diff options
author | Thomas Schwinge <tschwinge@baylibre.com> | 2024-03-11 22:51:28 +0100 |
---|---|---|
committer | Thomas Schwinge <tschwinge@baylibre.com> | 2024-03-11 22:51:28 +0100 |
commit | a95e21151a6366e7344d0f1983f99e318c5a7097 (patch) | |
tree | 11d987406d9ce8399ec1736477d971ef09344df2 /gcc/tree-vect-loop-manip.cc | |
parent | 02d394b2736afa9a24ab3e1b8ad56fd6ac37e0f4 (diff) | |
parent | af4bb221153359f5948da917d5ef2df738bb1e61 (diff) | |
download | gcc-a95e21151a6366e7344d0f1983f99e318c5a7097.zip gcc-a95e21151a6366e7344d0f1983f99e318c5a7097.tar.gz gcc-a95e21151a6366e7344d0f1983f99e318c5a7097.tar.bz2 |
Merge commit 'af4bb221153359f5948da917d5ef2df738bb1e61' into HEAD
Diffstat (limited to 'gcc/tree-vect-loop-manip.cc')
-rw-r--r-- | gcc/tree-vect-loop-manip.cc | 485 |
1 files changed, 245 insertions, 240 deletions
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 0964190..1f7779b 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -252,6 +252,9 @@ adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def) { tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e); + gcc_assert (TREE_CODE (orig_def) != SSA_NAME + || orig_def != new_def); + SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def); if (MAY_HAVE_DEBUG_BIND_STMTS) @@ -803,7 +806,7 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo, final gcond. */ static gcond * -vect_set_loop_condition_partial_vectors (class loop *loop, +vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge, loop_vec_info loop_vinfo, tree niters, tree final_iv, bool niters_maybe_zero, gimple_stmt_iterator loop_cond_gsi) @@ -904,7 +907,6 @@ vect_set_loop_condition_partial_vectors (class loop *loop, add_header_seq (loop, header_seq); /* Get a boolean result that tells us whether to iterate. */ - edge exit_edge = single_exit (loop); gcond *cond_stmt; if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) && !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) @@ -935,7 +937,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop, if (final_iv) { gassign *assign = gimple_build_assign (final_iv, orig_niters); - gsi_insert_on_edge_immediate (single_exit (loop), assign); + gsi_insert_on_edge_immediate (exit_edge, assign); } return cond_stmt; @@ -953,6 +955,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop, static gcond * vect_set_loop_condition_partial_vectors_avx512 (class loop *loop, + edge exit_edge, loop_vec_info loop_vinfo, tree niters, tree final_iv, bool niters_maybe_zero, @@ -1144,7 +1147,6 @@ vect_set_loop_condition_partial_vectors_avx512 (class loop *loop, add_preheader_seq (loop, preheader_seq); /* Adjust the exit test using the decrementing IV. */ - edge exit_edge = single_exit (loop); tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR; /* When we peel for alignment with niter_skip != 0 this can cause niter + niter_skip to wrap and since we are comparing the @@ -1183,7 +1185,8 @@ vect_set_loop_condition_partial_vectors_avx512 (class loop *loop, loop handles exactly VF scalars per iteration. */ static gcond * -vect_set_loop_condition_normal (class loop *loop, tree niters, tree step, +vect_set_loop_condition_normal (loop_vec_info /* loop_vinfo */, edge exit_edge, + class loop *loop, tree niters, tree step, tree final_iv, bool niters_maybe_zero, gimple_stmt_iterator loop_cond_gsi) { @@ -1191,13 +1194,12 @@ vect_set_loop_condition_normal (class loop *loop, tree niters, tree step, gcond *cond_stmt; gcond *orig_cond; edge pe = loop_preheader_edge (loop); - edge exit_edge = single_exit (loop); gimple_stmt_iterator incr_gsi; bool insert_after; enum tree_code code; tree niters_type = TREE_TYPE (niters); - orig_cond = get_loop_exit_condition (loop); + orig_cond = get_loop_exit_condition (exit_edge); gcc_assert (orig_cond); loop_cond_gsi = gsi_for_stmt (orig_cond); @@ -1305,19 +1307,18 @@ vect_set_loop_condition_normal (class loop *loop, tree niters, tree step, if (final_iv) { gassign *assign; - edge exit = single_exit (loop); - gcc_assert (single_pred_p (exit->dest)); + gcc_assert (single_pred_p (exit_edge->dest)); tree phi_dest = integer_zerop (init) ? final_iv : copy_ssa_name (indx_after_incr); /* Make sure to maintain LC SSA form here and elide the subtraction if the value is zero. */ - gphi *phi = create_phi_node (phi_dest, exit->dest); - add_phi_arg (phi, indx_after_incr, exit, UNKNOWN_LOCATION); + gphi *phi = create_phi_node (phi_dest, exit_edge->dest); + add_phi_arg (phi, indx_after_incr, exit_edge, UNKNOWN_LOCATION); if (!integer_zerop (init)) { assign = gimple_build_assign (final_iv, MINUS_EXPR, phi_dest, init); - gimple_stmt_iterator gsi = gsi_after_labels (exit->dest); + gimple_stmt_iterator gsi = gsi_after_labels (exit_edge->dest); gsi_insert_before (&gsi, assign, GSI_SAME_STMT); } } @@ -1348,29 +1349,33 @@ vect_set_loop_condition_normal (class loop *loop, tree niters, tree step, Assumption: the exit-condition of LOOP is the last stmt in the loop. */ void -vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo, +vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo, tree niters, tree step, tree final_iv, bool niters_maybe_zero) { gcond *cond_stmt; - gcond *orig_cond = get_loop_exit_condition (loop); + gcond *orig_cond = get_loop_exit_condition (loop_e); gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond); if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) { if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512) - cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, loop_vinfo, + cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, loop_e, + loop_vinfo, niters, final_iv, niters_maybe_zero, loop_cond_gsi); else - cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_vinfo, + cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_e, + loop_vinfo, niters, final_iv, niters_maybe_zero, loop_cond_gsi); } else - cond_stmt = vect_set_loop_condition_normal (loop, niters, step, final_iv, + cond_stmt = vect_set_loop_condition_normal (loop_vinfo, loop_e, loop, + niters, + step, final_iv, niters_maybe_zero, loop_cond_gsi); @@ -1439,16 +1444,24 @@ slpeel_duplicate_current_defs_from_edges (edge from, edge to) get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from))); } - /* Given LOOP this function generates a new copy of it and puts it on E which is either the entry or exit of LOOP. If SCALAR_LOOP is non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the basic blocks from SCALAR_LOOP instead of LOOP, but to either the - entry or exit of LOOP. */ + entry or exit of LOOP. If FLOW_LOOPS then connect LOOP to SCALAR_LOOP as a + continuation. This is correct for cases where one loop continues from the + other like in the vectorizer, but not true for uses in e.g. loop distribution + where the contents of the loop body are split but the iteration space of both + copies remains the same. + + If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms + dominators were updated during the peeling. */ class loop * -slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, - class loop *scalar_loop, edge e) +slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, + class loop *scalar_loop, + edge scalar_exit, edge e, edge *new_e, + bool flow_loops) { class loop *new_loop; basic_block *new_bbs, *bbs, *pbbs; @@ -1458,13 +1471,30 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge exit, new_exit; bool duplicate_outer_loop = false; - exit = single_exit (loop); + exit = loop_exit; at_exit = (e == exit); if (!at_exit && e != loop_preheader_edge (loop)) return NULL; if (scalar_loop == NULL) - scalar_loop = loop; + { + scalar_loop = loop; + scalar_exit = loop_exit; + } + else if (scalar_loop == loop) + scalar_exit = loop_exit; + else + { + /* Loop has been version, match exits up using the aux index. */ + for (edge exit : get_loop_exit_edges (scalar_loop)) + if (exit->aux == loop_exit->aux) + { + scalar_exit = exit; + break; + } + + gcc_assert (scalar_exit); + } bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1); pbbs = bbs + 1; @@ -1490,13 +1520,19 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, bbs[0] = preheader; new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1); - exit = single_exit (scalar_loop); copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs, - &exit, 1, &new_exit, NULL, + &scalar_exit, 1, &new_exit, NULL, at_exit ? loop->latch : e->src, true); - exit = single_exit (loop); + exit = loop_exit; basic_block new_preheader = new_bbs[0]; + gcc_assert (new_exit); + + /* Record the new loop exit information. new_loop doesn't have SCEV data and + so we must initialize the exit information. */ + if (new_e) + *new_e = new_exit; + /* Before installing PHI arguments make sure that the edges into them match that of the scalar loop we analyzed. This makes sure the SLP tree matches up between the main vectorized @@ -1530,6 +1566,19 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++) rename_variables_in_bb (new_bbs[i], duplicate_outer_loop); + /* Rename the exit uses. */ + for (edge exit : get_loop_exit_edges (new_loop)) + for (auto gsi = gsi_start_phis (exit->dest); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + tree orig_def = PHI_ARG_DEF_FROM_EDGE (gsi.phi (), exit); + rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), exit)); + if (MAY_HAVE_DEBUG_BIND_STMTS) + adjust_debug_stmts (orig_def, PHI_RESULT (gsi.phi ()), exit->dest); + } + + /* This condition happens when the loop has been versioned. e.g. due to ifcvt + versioning the loop. */ if (scalar_loop != loop) { /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from @@ -1537,34 +1586,89 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects the LOOP SSA_NAMEs (on the exit edge and edge from latch to header) to have current_def set, so copy them over. */ - slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop), - exit); + slpeel_duplicate_current_defs_from_edges (scalar_exit, exit); slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch, 0), EDGE_SUCC (loop->latch, 0)); } + auto loop_exits = get_loop_exit_edges (loop); + auto_vec<basic_block> doms; + if (at_exit) /* Add the loop copy at exit. */ { - if (scalar_loop != loop) + if (scalar_loop != loop && new_exit->dest != exit_dest) { - gphi_iterator gsi; new_exit = redirect_edge_and_branch (new_exit, exit_dest); + flush_pending_stmts (new_exit); + } - for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); - gsi_next (&gsi)) - { - gphi *phi = gsi.phi (); - tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e); - location_t orig_locus - = gimple_phi_arg_location_from_edge (phi, e); + auto_vec <gimple *> new_phis; + hash_map <tree, tree> new_phi_args; + /* First create the empty phi nodes so that when we flush the + statements they can be filled in. However because there is no order + between the PHI nodes in the exits and the loop headers we need to + order them base on the order of the two headers. First record the new + phi nodes. */ + for (auto gsi_from = gsi_start_phis (scalar_exit->dest); + !gsi_end_p (gsi_from); gsi_next (&gsi_from)) + { + gimple *from_phi = gsi_stmt (gsi_from); + tree new_res = copy_ssa_name (gimple_phi_result (from_phi)); + gphi *res = create_phi_node (new_res, new_preheader); + new_phis.safe_push (res); + } - add_phi_arg (phi, orig_arg, new_exit, orig_locus); - } + /* Then redirect the edges and flush the changes. This writes out the new + SSA names. */ + for (edge exit : loop_exits) + { + edge temp_e = redirect_edge_and_branch (exit, new_preheader); + flush_pending_stmts (temp_e); } - redirect_edge_and_branch_force (e, new_preheader); - flush_pending_stmts (e); + + /* Record the new SSA names in the cache so that we can skip materializing + them again when we fill in the rest of the LCSSA variables. */ + for (auto phi : new_phis) + { + tree new_arg = gimple_phi_arg (phi, 0)->def; + new_phi_args.put (new_arg, gimple_phi_result (phi)); + } + + /* Copy the current loop LC PHI nodes between the original loop exit + block and the new loop header. This allows us to later split the + preheader block and still find the right LC nodes. */ + edge loop_entry = single_succ_edge (new_preheader); + if (flow_loops) + for (auto gsi_from = gsi_start_phis (loop->header), + gsi_to = gsi_start_phis (new_loop->header); + !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to); + gsi_next (&gsi_from), gsi_next (&gsi_to)) + { + gimple *from_phi = gsi_stmt (gsi_from); + gimple *to_phi = gsi_stmt (gsi_to); + tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, + loop_latch_edge (loop)); + + /* Check if we've already created a new phi node during edge + redirection. If we have, only propagate the value downwards. */ + if (tree *res = new_phi_args.get (new_arg)) + { + adjust_phi_and_debug_stmts (to_phi, loop_entry, *res); + continue; + } + + tree new_res = copy_ssa_name (gimple_phi_result (from_phi)); + gphi *lcssa_phi = create_phi_node (new_res, new_preheader); + + /* Main loop exit should use the final iter value. */ + add_phi_arg (lcssa_phi, new_arg, loop_exit, UNKNOWN_LOCATION); + + adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res); + } + set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src); + if (was_imm_dom || duplicate_outer_loop) set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src); @@ -1578,6 +1682,23 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, } else /* Add the copy at entry. */ { + /* Copy the current loop LC PHI nodes between the original loop exit + block and the new loop header. This allows us to later split the + preheader block and still find the right LC nodes. */ + if (flow_loops) + for (auto gsi_from = gsi_start_phis (new_loop->header), + gsi_to = gsi_start_phis (loop->header); + !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to); + gsi_next (&gsi_from), gsi_next (&gsi_to)) + { + gimple *from_phi = gsi_stmt (gsi_from); + gimple *to_phi = gsi_stmt (gsi_to); + tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, + loop_latch_edge (new_loop)); + adjust_phi_and_debug_stmts (to_phi, loop_preheader_edge (loop), + new_arg); + } + if (scalar_loop != loop) { /* Remove the non-necessary forwarder of scalar_loop again. */ @@ -1607,29 +1728,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, loop_preheader_edge (new_loop)->src); } - if (scalar_loop != loop) - { - /* Update new_loop->header PHIs, so that on the preheader - edge they are the ones from loop rather than scalar_loop. */ - gphi_iterator gsi_orig, gsi_new; - edge orig_e = loop_preheader_edge (loop); - edge new_e = loop_preheader_edge (new_loop); - - for (gsi_orig = gsi_start_phis (loop->header), - gsi_new = gsi_start_phis (new_loop->header); - !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new); - gsi_next (&gsi_orig), gsi_next (&gsi_new)) - { - gphi *orig_phi = gsi_orig.phi (); - gphi *new_phi = gsi_new.phi (); - tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e); - location_t orig_locus - = gimple_phi_arg_location_from_edge (orig_phi, orig_e); - - add_phi_arg (new_phi, orig_arg, new_e, orig_locus); - } - } - free (new_bbs); free (bbs); @@ -1696,11 +1794,11 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, */ bool -slpeel_can_duplicate_loop_p (const class loop *loop, const_edge e) +slpeel_can_duplicate_loop_p (const class loop *loop, const_edge exit_e, + const_edge e) { - edge exit_e = single_exit (loop); edge entry_e = loop_preheader_edge (loop); - gcond *orig_cond = get_loop_exit_condition (loop); + gcond *orig_cond = get_loop_exit_condition (exit_e); gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src); unsigned int num_bb = loop->inner? 5 : 2; @@ -1709,7 +1807,7 @@ slpeel_can_duplicate_loop_p (const class loop *loop, const_edge e) if (!loop_outer (loop) || loop->num_nodes != num_bb || !empty_block_p (loop->latch) - || !single_exit (loop) + || !exit_e /* Verify that new loop exit condition can be trivially modified. */ || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi)) || (e != exit_e && e != entry_e)) @@ -1722,7 +1820,7 @@ slpeel_can_duplicate_loop_p (const class loop *loop, const_edge e) return ret; } -/* Function vect_get_loop_location. +/* Function find_loop_location. Extract the location of the loop in the source code. If the loop is not well formed for vectorization, an estimated @@ -1739,11 +1837,19 @@ find_loop_location (class loop *loop) if (!loop) return dump_user_location_t (); - stmt = get_loop_exit_condition (loop); + if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS)) + { + /* We only care about the loop location, so use any exit with location + information. */ + for (edge e : get_loop_exit_edges (loop)) + { + stmt = get_loop_exit_condition (e); - if (stmt - && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION) - return stmt; + if (stmt + && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION) + return stmt; + } + } /* If we got here the loop is probably not "well formed", try to estimate the loop location */ @@ -1962,7 +2068,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, gphi_iterator gsi, gsi1; class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block update_bb = update_e->dest; - basic_block exit_bb = single_exit (loop)->dest; + + basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; /* Make sure there exists a single-predecessor exit bb: */ gcc_assert (single_pred_p (exit_bb)); @@ -2529,10 +2636,9 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo, { /* We should be using a step_vector of VF if VF is variable. */ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant (); - class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree type = TREE_TYPE (niters_vector); tree log_vf = build_int_cst (type, exact_log2 (vf)); - basic_block exit_bb = single_exit (loop)->dest; + basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; gcc_assert (niters_vector_mult_vf_ptr != NULL); tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type, @@ -2551,137 +2657,36 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo, /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP, this function searches for the corresponding lcssa phi node in exit - bb of LOOP. If it is found, return the phi result; otherwise return - NULL. */ + bb of LOOP following the LCSSA_EDGE to the exit node. If it is found, + return the phi result; otherwise return NULL. */ static tree -find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED, - gphi *lcssa_phi) +find_guard_arg (class loop *loop ATTRIBUTE_UNUSED, const_edge loop_e, + class loop *epilog ATTRIBUTE_UNUSED, gphi *lcssa_phi, + int lcssa_edge = 0) { gphi_iterator gsi; - edge e = single_exit (loop); - gcc_assert (single_pred_p (e->dest)); - for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) + for (gsi = gsi_start_phis (loop_e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) { gphi *phi = gsi.phi (); - if (operand_equal_p (PHI_ARG_DEF (phi, 0), - PHI_ARG_DEF (lcssa_phi, 0), 0)) - return PHI_RESULT (phi); - } - return NULL_TREE; -} - -/* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND - from SECOND/FIRST and puts it at the original loop's preheader/exit - edge, the two loops are arranged as below: - - preheader_a: - first_loop: - header_a: - i_1 = PHI<i_0, i_2>; - ... - i_2 = i_1 + 1; - if (cond_a) - goto latch_a; - else - goto between_bb; - latch_a: - goto header_a; - - between_bb: - ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST, - - second_loop: - header_b: - i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x, - or with i_2 if no LCSSA phi is created - under condition of CREATE_LCSSA_FOR_IV_PHIS. - ... - i_4 = i_3 + 1; - if (cond_b) - goto latch_b; - else - goto exit_bb; - latch_b: - goto header_b; - - exit_bb: - - This function creates loop closed SSA for the first loop; update the - second loop's PHI nodes by replacing argument on incoming edge with the - result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS - is false, Loop closed ssa phis will only be created for non-iv phis for - the first loop. - - This function assumes exit bb of the first loop is preheader bb of the - second loop, i.e, between_bb in the example code. With PHIs updated, - the second loop will execute rest iterations of the first. */ - -static void -slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo, - class loop *first, class loop *second, - bool create_lcssa_for_iv_phis) -{ - gphi_iterator gsi_update, gsi_orig; - class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - - edge first_latch_e = EDGE_SUCC (first->latch, 0); - edge second_preheader_e = loop_preheader_edge (second); - basic_block between_bb = single_exit (first)->dest; - - gcc_assert (between_bb == second_preheader_e->src); - gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb)); - /* Either the first loop or the second is the loop to be vectorized. */ - gcc_assert (loop == first || loop == second); - - for (gsi_orig = gsi_start_phis (first->header), - gsi_update = gsi_start_phis (second->header); - !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update); - gsi_next (&gsi_orig), gsi_next (&gsi_update)) - { - gphi *orig_phi = gsi_orig.phi (); - gphi *update_phi = gsi_update.phi (); - - tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e); - /* Generate lcssa PHI node for the first loop. */ - gphi *vect_phi = (loop == first) ? orig_phi : update_phi; - stmt_vec_info vect_phi_info = loop_vinfo->lookup_stmt (vect_phi); - if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi_info)) + /* Nested loops with multiple exits can have different no# phi node + arguments between the main loop and epilog as epilog falls to the + second loop. */ + if (gimple_phi_num_args (phi) > loop_e->dest_idx) { - tree new_res = copy_ssa_name (PHI_RESULT (orig_phi)); - gphi *lcssa_phi = create_phi_node (new_res, between_bb); - add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION); - arg = new_res; - } - - /* Update PHI node in the second loop by replacing arg on the loop's - incoming edge. */ - adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg); - } - - /* For epilogue peeling we have to make sure to copy all LC PHIs - for correct vectorization of live stmts. */ - if (loop == first) - { - basic_block orig_exit = single_exit (second)->dest; - for (gsi_orig = gsi_start_phis (orig_exit); - !gsi_end_p (gsi_orig); gsi_next (&gsi_orig)) - { - gphi *orig_phi = gsi_orig.phi (); - tree orig_arg = PHI_ARG_DEF (orig_phi, 0); - if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p (orig_arg)) - continue; - - /* Already created in the above loop. */ - if (find_guard_arg (first, second, orig_phi)) + tree var = PHI_ARG_DEF (phi, loop_e->dest_idx); + if (TREE_CODE (var) != SSA_NAME) continue; - - tree new_res = copy_ssa_name (orig_arg); - gphi *lcphi = create_phi_node (new_res, between_bb); - add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION); + tree def = get_current_def (var); + if (!def) + continue; + if (operand_equal_p (def, + PHI_ARG_DEF (lcssa_phi, lcssa_edge), 0)) + return PHI_RESULT (phi); } } + return NULL_TREE; } /* Function slpeel_add_loop_guard adds guard skipping from the beginning @@ -2766,11 +2771,11 @@ slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop, } } -/* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied - from LOOP. Function slpeel_add_loop_guard adds guard skipping from a - point between the two loops to the end of EPILOG. Edges GUARD_EDGE - and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG. - The CFG looks like: +/* LOOP and EPILOG are two consecutive loops in CFG connected by LOOP_EXIT edge + and EPILOG is copied from LOOP. Function slpeel_add_loop_guard adds guard + skipping from a point between the two loops to the end of EPILOG. Edges + GUARD_EDGE and MERGE_EDGE are the two pred edges of merge_bb at the end of + EPILOG. The CFG looks like: loop: header_a: @@ -2821,6 +2826,7 @@ slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop, static void slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog, + const_edge loop_exit, edge guard_edge, edge merge_edge) { gphi_iterator gsi; @@ -2829,13 +2835,11 @@ slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog, gcc_assert (single_succ_p (merge_bb)); edge e = single_succ_edge (merge_bb); basic_block exit_bb = e->dest; - gcc_assert (single_pred_p (exit_bb)); - gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest); for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi)) { gphi *update_phi = gsi.phi (); - tree old_arg = PHI_ARG_DEF (update_phi, 0); + tree old_arg = PHI_ARG_DEF (update_phi, e->dest_idx); tree merge_arg = NULL_TREE; @@ -2847,7 +2851,8 @@ slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog, if (!merge_arg) merge_arg = old_arg; - tree guard_arg = find_guard_arg (loop, epilog, update_phi); + tree guard_arg = find_guard_arg (loop, loop_exit, epilog, + update_phi, e->dest_idx); /* If the var is live after loop but not a reduction, we simply use the old arg. */ if (!guard_arg) @@ -2867,21 +2872,6 @@ slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog, } } -/* EPILOG loop is duplicated from the original loop for vectorizing, - the arg of its loop closed ssa PHI needs to be updated. */ - -static void -slpeel_update_phi_nodes_for_lcssa (class loop *epilog) -{ - gphi_iterator gsi; - basic_block exit_bb = single_exit (epilog)->dest; - - gcc_assert (single_pred_p (exit_bb)); - edge e = EDGE_PRED (exit_bb, 0); - for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi)) - rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e)); -} - /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped. Return a value that equals: @@ -3201,27 +3191,36 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } if (vect_epilogues) - /* Make sure to set the epilogue's epilogue scalar loop, such that we can - use the original scalar loop as remaining epilogue if necessary. */ - LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo) - = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); + { + /* Make sure to set the epilogue's epilogue scalar loop, such that we can + use the original scalar loop as remaining epilogue if necessary. */ + LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo) + = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); + LOOP_VINFO_SCALAR_IV_EXIT (epilogue_vinfo) + = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo); + } if (prolog_peeling) { e = loop_preheader_edge (loop); - gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e)); + edge exit_e = LOOP_VINFO_IV_EXIT (loop_vinfo); + gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, exit_e, e)); /* Peel prolog and put it on preheader edge of loop. */ - prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e); + edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo); + edge prolog_e = NULL; + prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e, + scalar_loop, scalar_e, + e, &prolog_e); gcc_assert (prolog); prolog->force_vectorize = false; - slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true); + first_loop = prolog; reset_original_copy_tables (); /* Update the number of iterations for prolog loop. */ tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog)); - vect_set_loop_condition (prolog, NULL, niters_prolog, + vect_set_loop_condition (prolog, prolog_e, loop_vinfo, niters_prolog, step_prolog, NULL_TREE, false); /* Skip the prolog loop. */ @@ -3275,8 +3274,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, if (epilog_peeling) { - e = single_exit (loop); - gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e)); + e = LOOP_VINFO_IV_EXIT (loop_vinfo); + gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e, e)); /* Peel epilog and put it on exit edge of loop. If we are vectorizing said epilog then we should use a copy of the main loop as a starting @@ -3285,12 +3284,16 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, If we are not vectorizing the epilog then we should use the scalar loop as the transformations mentioned above make less or no sense when not vectorizing. */ + edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo); epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop; - epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e); + edge epilog_e = vect_epilogues ? e : scalar_e; + edge new_epilog_e = NULL; + epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, + epilog_e, e, + &new_epilog_e); + LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo) = new_epilog_e; gcc_assert (epilog); - epilog->force_vectorize = false; - slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false); bb_before_epilog = loop_preheader_edge (epilog)->src; /* Scalar version loop may be preferred. In this case, add guard @@ -3335,7 +3338,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, free (bbs); free (original_bbs); } - else + else if (old_count.nonzero_p ()) scale_loop_profile (epilog, guard_to->count.probability_in (old_count), -1); /* Only need to handle basic block before epilog loop if it's not @@ -3374,16 +3377,18 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, { guard_cond = fold_build2 (EQ_EXPR, boolean_type_node, niters, niters_vector_mult_vf); - guard_bb = single_exit (loop)->dest; - guard_to = split_edge (single_exit (epilog)); + guard_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; + edge epilog_e = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo); + guard_to = split_edge (epilog_e); guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to, skip_vector ? anchor : guard_bb, prob_epilog.invert (), irred_flag); if (vect_epilogues) epilogue_vinfo->skip_this_loop_edge = guard_e; - slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e, - single_exit (epilog)); + edge main_iv = LOOP_VINFO_IV_EXIT (loop_vinfo); + slpeel_update_phi_nodes_for_guard2 (loop, epilog, main_iv, guard_e, + epilog_e); /* Only need to handle basic block before epilog loop if it's not the guard_bb, which is the case when skip_vector is true. */ if (guard_bb != bb_before_epilog) @@ -3394,8 +3399,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } scale_loop_profile (epilog, prob_epilog, -1); } - else - slpeel_update_phi_nodes_for_lcssa (epilog); unsigned HOST_WIDE_INT bound; if (bound_scalar.is_constant (&bound)) @@ -3416,6 +3419,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, { epilog->aux = epilogue_vinfo; LOOP_VINFO_LOOP (epilogue_vinfo) = epilog; + LOOP_VINFO_IV_EXIT (epilogue_vinfo) + = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo); loop_constraint_clear (epilog, LOOP_C_INFINITE); |