aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorYuri Rumyantsev <ysrumyan@gmail.com>2016-02-02 09:46:26 +0000
committerIlya Enkovich <ienkovich@gcc.gnu.org>2016-02-02 09:46:26 +0000
commit2d4dc2233b4a3aef3edeb23d6acaaa8a5327c137 (patch)
tree406eb48f3246bc92d314640f0b58301e7c2c6257 /gcc/tree-vect-loop.c
parent65c98fdec722720ae9b2a6765232fe4535e06d4e (diff)
downloadgcc-2d4dc2233b4a3aef3edeb23d6acaaa8a5327c137.zip
gcc-2d4dc2233b4a3aef3edeb23d6acaaa8a5327c137.tar.gz
gcc-2d4dc2233b4a3aef3edeb23d6acaaa8a5327c137.tar.bz2
re PR middle-end/68542 (10% 481.wrf performance regression)
gcc/ 2016-02-02 Yuri Rumyantsev <ysrumyan@gmail.com> PR middle-end/68542 * config/i386/i386.c (ix86_expand_branch): Add support for conditional branch with vector comparison. * config/i386/sse.md (VI48_AVX): New mode iterator. (define_expand "cbranch<mode>4): Add support for conditional branch with vector comparison. * tree-vect-loop.c (optimize_mask_stores): New function. * tree-vect-stmts.c (vectorizable_mask_load_store): Initialize has_mask_store field of vect_info. * tree-vectorizer.c (vectorize_loops): Invoke optimaze_mask_stores for vectorized loops having masked stores after vec_info destroy. * tree-vectorizer.h (loop_vec_info): Add new has_mask_store field and correspondent macros. (optimize_mask_stores): Add prototype. gcc/testsuite 2016-02-02 Yuri Rumyantsev <ysrumyan@gmail.com> PR middle-end/68542 * gcc.dg/vect/vect-mask-store-move-1.c: New test. * gcc.target/i386/avx2-vect-mask-store-move1.c: New test. From-SVN: r233068
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c192
1 files changed, 192 insertions, 0 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index b8303ad..976e192 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6938,3 +6938,195 @@ vect_transform_loop (loop_vec_info loop_vinfo)
vect_free_slp_instance (instance);
LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
}
+
+/* The code below is trying to perform simple optimization - revert
+ if-conversion for masked stores, i.e. if the mask of a store is zero
+ do not perform it and all stored value producers also if possible.
+ For example,
+ for (i=0; i<n; i++)
+ if (c[i])
+ {
+ p1[i] += 1;
+ p2[i] = p3[i] +2;
+ }
+ this transformation will produce the following semi-hammock:
+
+ if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 })
+ {
+ vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165);
+ vect__12.22_172 = vect__11.19_170 + vect_cst__171;
+ MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172);
+ vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165);
+ vect__19.28_184 = vect__18.25_182 + vect_cst__183;
+ MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184);
+ }
+*/
+
+void
+optimize_mask_stores (struct loop *loop)
+{
+ basic_block *bbs = get_loop_body (loop);
+ unsigned nbbs = loop->num_nodes;
+ unsigned i;
+ basic_block bb;
+ gimple_stmt_iterator gsi;
+ gimple *stmt, *stmt1 = NULL;
+ auto_vec<gimple *> worklist;
+
+ vect_location = find_loop_location (loop);
+ /* Pick up all masked stores in loop if any. */
+ for (i = 0; i < nbbs; i++)
+ {
+ bb = bbs[i];
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ stmt = gsi_stmt (gsi);
+ if (is_gimple_call (stmt)
+ && gimple_call_internal_p (stmt)
+ && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
+ worklist.safe_push (stmt);
+ }
+ }
+
+ free (bbs);
+ if (worklist.is_empty ())
+ return;
+
+ /* Loop has masked stores. */
+ while (!worklist.is_empty ())
+ {
+ gimple *last, *last_store;
+ edge e, efalse;
+ tree mask;
+ basic_block store_bb, join_bb;
+ gimple_stmt_iterator gsi_to;
+ tree vdef, new_vdef;
+ gphi *phi;
+ tree vectype;
+ tree zero;
+
+ last = worklist.pop ();
+ mask = gimple_call_arg (last, 2);
+ bb = gimple_bb (last);
+ /* Create new bb. */
+ e = split_block (bb, last);
+ join_bb = e->dest;
+ store_bb = create_empty_bb (bb);
+ add_bb_to_loop (store_bb, loop);
+ e->flags = EDGE_TRUE_VALUE;
+ efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
+ /* Put STORE_BB to likely part. */
+ efalse->probability = PROB_UNLIKELY;
+ store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse);
+ make_edge (store_bb, join_bb, EDGE_FALLTHRU);
+ if (dom_info_available_p (CDI_DOMINATORS))
+ set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Create new block %d to sink mask stores.",
+ store_bb->index);
+ /* Create vector comparison with boolean result. */
+ vectype = TREE_TYPE (mask);
+ zero = build_zero_cst (vectype);
+ stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
+ gsi = gsi_last_bb (bb);
+ gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
+ /* Create new PHI node for vdef of the last masked store:
+ .MEM_2 = VDEF <.MEM_1>
+ will be converted to
+ .MEM.3 = VDEF <.MEM_1>
+ and new PHI node will be created in join bb
+ .MEM_2 = PHI <.MEM_1, .MEM_3>
+ */
+ vdef = gimple_vdef (last);
+ new_vdef = make_ssa_name (gimple_vop (cfun), last);
+ gimple_set_vdef (last, new_vdef);
+ phi = create_phi_node (vdef, join_bb);
+ add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);
+
+ /* Put all masked stores with the same mask to STORE_BB if possible. */
+ while (true)
+ {
+ gimple_stmt_iterator gsi_from;
+ /* Move masked store to STORE_BB. */
+ last_store = last;
+ gsi = gsi_for_stmt (last);
+ gsi_from = gsi;
+ /* Shift GSI to the previous stmt for further traversal. */
+ gsi_prev (&gsi);
+ gsi_to = gsi_start_bb (store_bb);
+ gsi_move_before (&gsi_from, &gsi_to);
+ /* Setup GSI_TO to the non-empty block start. */
+ gsi_to = gsi_start_bb (store_bb);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Move stmt to created bb\n");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, last, 0);
+ }
+ /* Move all stored value producers if possible. */
+ while (!gsi_end_p (gsi))
+ {
+ tree lhs;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ bool res;
+ stmt1 = gsi_stmt (gsi);
+ /* Do not consider statements writing to memory. */
+ if (gimple_vdef (stmt1))
+ break;
+ gsi_from = gsi;
+ gsi_prev (&gsi);
+ lhs = gimple_get_lhs (stmt1);
+ if (!lhs)
+ break;
+
+ /* LHS of vectorized stmt must be SSA_NAME. */
+ if (TREE_CODE (lhs) != SSA_NAME)
+ break;
+
+ /* Skip scalar statements. */
+ if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
+ continue;
+
+ /* Check that LHS does not have uses outside of STORE_BB. */
+ res = true;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+ {
+ gimple *use_stmt;
+ use_stmt = USE_STMT (use_p);
+ if (gimple_bb (use_stmt) != store_bb)
+ {
+ res = false;
+ break;
+ }
+ }
+ if (!res)
+ break;
+
+ if (gimple_vuse (stmt1)
+ && gimple_vuse (stmt1) != gimple_vuse (last_store))
+ break;
+
+ /* Can move STMT1 to STORE_BB. */
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Move stmt to created bb\n");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
+ }
+ gsi_move_before (&gsi_from, &gsi_to);
+ /* Shift GSI_TO for further insertion. */
+ gsi_prev (&gsi_to);
+ }
+ /* Put other masked stores with the same mask to STORE_BB. */
+ if (worklist.is_empty ()
+ || gimple_call_arg (worklist.last (), 2) != mask
+ || worklist.last () != stmt1)
+ break;
+ last = worklist.pop ();
+ }
+ add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION);
+ }
+}