aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.h
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2024-10-14 11:58:59 +0100
committerTamar Christina <tamar.christina@arm.com>2024-10-14 11:58:59 +0100
commitaccb85345edb91368221fd07b74e74df427b7de0 (patch)
tree1f699e4a27e58b95cfa1daa4b252c54dc5d73cac /gcc/tree-vectorizer.h
parentdf25d528d36352af9dd677a4d67e44fa5ed5dc43 (diff)
downloadgcc-accb85345edb91368221fd07b74e74df427b7de0.zip
gcc-accb85345edb91368221fd07b74e74df427b7de0.tar.gz
gcc-accb85345edb91368221fd07b74e74df427b7de0.tar.bz2
middle-end: support SLP early break
This patch introduces feature parity for early break int the SLP only vectorizer. The approach taken here is to treat the early exits as root statements for an SLP tree. This means that we don't need any changes to build_slp to support gconds. Codegen for the gcond itself now has to be done out of line but the body of the SLP blocks itself is simply driven by SLP scheduling. There is a slight awkwardness in having re-used vectorizable_early_exit for both SLP and non-SLP but I've documented the differences and when I did try to refactor it it wasn't really worth it given that this is a temporary state anyway. This version is restricted to lane = 1, as such we can re-use the existing move_early_break function instead of having to do safety update through scheduling. I have a branch where I'm working on that but lane > 1 is out of scope for GCC 15 anyway. The only reason I will try to get moving through scheduling done as a stretch goal is so we get epilogue vectorization back for early break. The example: unsigned test4(unsigned x) { unsigned ret = 0; for (int i = 0; i < N; i++) { vect_b[i] = x + i; if (vect_a[i]*2 != x) break; vect_a[i] = x; } return ret; } builds the following SLP instance for early break: note: Analyzing vectorizable control flow: if (patt_6 != 0) note: Starting SLP discovery for note: patt_6 = _4 != x_9(D); note: starting SLP discovery for node 0x63abc80 note: Build SLP for patt_6 = _4 != x_9(D); note: precomputed vectype: vector(4) <signed-boolean:32> note: nunits = 4 note: vect_is_simple_use: operand x_9(D), type of def: external note: vect_is_simple_use: operand # RANGE [irange] unsigned int [0, 0][2, +INF] MASK 0xffff _3 * 2, type of def: internal note: starting SLP discovery for node 0x63abdc0 note: Build SLP for _4 = _3 * 2; note: precomputed vectype: vector(4) unsigned int note: nunits = 4 note: vect_is_simple_use: operand # vect_aD.4416[i_15], type of def: internal note: vect_is_simple_use: operand 2, type of def: constant note: starting SLP discovery for node 0x63abe60 note: Build SLP for _3 = vect_a[i_15]; note: precomputed vectype: vector(4) unsigned int note: nunits = 4 note: SLP discovery for node 0x63abe60 succeeded note: SLP discovery for node 0x63abdc0 succeeded note: SLP discovery for node 0x63abc80 succeeded note: SLP size 3 vs. limit 10. note: Final SLP tree for instance 0x6474190: note: node 0x63abc80 (max_nunits=4, refcnt=2) vector(4) <signed-boolean:32> note: op template: patt_6 = _4 != x_9(D); note: stmt 0 patt_6 = _4 != x_9(D); note: children 0x63abd20 0x63abdc0 note: node (external) 0x63abd20 (max_nunits=1, refcnt=1) note: { x_9(D) } note: node 0x63abdc0 (max_nunits=4, refcnt=2) vector(4) unsigned int note: op template: _4 = _3 * 2; note: stmt 0 _4 = _3 * 2; note: children 0x63abe60 0x63abf00 note: node 0x63abe60 (max_nunits=4, refcnt=2) vector(4) unsigned int note: op template: _3 = vect_a[i_15]; note: stmt 0 _3 = vect_a[i_15]; note: load permutation { 0 } note: node (constant) 0x63abf00 (max_nunits=1, refcnt=1) note: { 2 } and during codegen: note: ------>vectorizing SLP node starting from: patt_6 = _4 != x_9(D); note: vect_is_simple_use: operand # RANGE [irange] unsigned int [0, 0][2, +INF] MASK 0xffff _3 * 2, type of def: internal note: add new stmt: mask_patt_6.18_58 = _53 != vect__4.17_57; note: === vectorizable_early_exit === note: transform early-exit. note: vectorizing stmts using SLP. note: Vectorizing SLP tree: note: node 0x63abfa0 (max_nunits=4, refcnt=1) vector(4) int note: op template: i_12 = i_15 + 1; note: stmt 0 i_12 = i_15 + 1; note: children 0x63aba00 0x63ac040 note: node 0x63aba00 (max_nunits=4, refcnt=2) vector(4) int note: op template: i_15 = PHI <i_12(6), 0(14)> note: [l] stmt 0 i_15 = PHI <i_12(6), 0(14)> note: children (nil) (nil) note: node (constant) 0x63ac040 (max_nunits=1, refcnt=1) vector(4) int note: { 1 } gcc/ChangeLog: * tree-vect-loop.cc (vect_analyze_loop_2): Handle SLP trees with no children. * tree-vectorizer.h (enum slp_instance_kind): Add slp_inst_kind_gcond. (LOOP_VINFO_EARLY_BREAKS_LIVE_IVS): New. (vectorizable_early_exit): Expose. (class _loop_vec_info): Add early_break_live_stmts. * tree-vect-slp.cc (vect_build_slp_instance, vect_analyze_slp_instance): Support gcond instances. (vect_analyze_slp): Analyze gcond roots and early break live statements. (maybe_push_to_hybrid_worklist): Don't sink gconds. (vect_slp_analyze_operations): Support gconds. (vect_slp_check_for_roots): Update comments. (vectorize_slp_instance_root_stmt): Support gconds. (vect_schedule_slp): Pass vinfo to vectorize_slp_instance_root_stmt. * tree-vect-stmts.cc (vect_stmt_relevant_p): Record early break live statements. (vectorizable_early_exit): Support SLP. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-early-break_126.c: New test. * gcc.dg/vect/vect-early-break_127.c: New test. * gcc.dg/vect/vect-early-break_128.c: New test.
Diffstat (limited to 'gcc/tree-vectorizer.h')
-rw-r--r--gcc/tree-vectorizer.h12
1 files changed, 11 insertions, 1 deletions
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 2775d87..11f921f 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -296,7 +296,8 @@ enum slp_instance_kind {
slp_inst_kind_reduc_group,
slp_inst_kind_reduc_chain,
slp_inst_kind_bb_reduc,
- slp_inst_kind_ctor
+ slp_inst_kind_ctor,
+ slp_inst_kind_gcond
};
/* SLP instance is a sequence of stmts in a loop that can be packed into
@@ -1022,6 +1023,10 @@ public:
/* Statements whose VUSES need updating if early break vectorization is to
happen. */
auto_vec<gimple*> early_break_vuses;
+
+ /* Record statements that are needed to be live for early break vectorization
+ but may not have an LC PHI node materialized yet in the exits. */
+ auto_vec<stmt_vec_info> early_break_live_ivs;
} *loop_vec_info;
/* Access Functions. */
@@ -1081,6 +1086,8 @@ public:
#define LOOP_VINFO_EARLY_BRK_STORES(L) (L)->early_break_stores
#define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \
(single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
+#define LOOP_VINFO_EARLY_BREAKS_LIVE_IVS(L) \
+ (L)->early_break_live_ivs
#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
@@ -2546,6 +2553,9 @@ extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree,
stmt_vector_for_cost *);
extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info,
gimple **, slp_tree, stmt_vector_for_cost *);
+extern bool vectorizable_early_exit (vec_info *, stmt_vec_info,
+ gimple_stmt_iterator *, gimple **,
+ slp_tree, stmt_vector_for_cost *);
extern bool vect_emulated_vector_p (tree);
extern bool vect_can_vectorize_without_simd_p (tree_code);
extern bool vect_can_vectorize_without_simd_p (code_helper);