diff options
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r-- | gcc/tree-vectorizer.c | 226 |
1 files changed, 206 insertions, 20 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 43b51a7..fcc7416 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -918,20 +918,29 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e) static edge slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb, - basic_block dom_bb) + basic_block dom_bb) { block_stmt_iterator bsi; edge new_e, enter_e; tree cond_stmt; + tree gimplify_stmt_list; enter_e = EDGE_SUCC (guard_bb, 0); enter_e->flags &= ~EDGE_FALLTHRU; enter_e->flags |= EDGE_FALSE_VALUE; bsi = bsi_last (guard_bb); + cond = + force_gimple_operand (cond, &gimplify_stmt_list, true, + NULL_TREE); cond_stmt = build3 (COND_EXPR, void_type_node, cond, NULL_TREE, NULL_TREE); + if (gimplify_stmt_list) + bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT); + + bsi = bsi_last (guard_bb); bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); + /* Add new edge to connect guard block to the merge/loop-exit block. */ new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); @@ -1007,12 +1016,89 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop, } #endif +/* If the run time cost model check determines that vectorization is + not profitable and hence scalar loop should be generated then set + FIRST_NITERS to prologue peeled iterations. This will allow all the + iterations to be executed in the prologue peeled scalar loop. */ + +void +set_prologue_iterations (basic_block bb_before_first_loop, + tree first_niters, + struct loop *loop, + unsigned int th) +{ + edge e; + basic_block cond_bb, then_bb; + tree var, prologue_after_cost_adjust_name, stmt; + block_stmt_iterator bsi; + tree newphi; + edge e_true, e_false, e_fallthru; + tree cond_stmt; + tree gimplify_stmt_list; + tree cost_pre_condition = NULL_TREE; + tree scalar_loop_iters = + LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)); + + e = single_pred_edge (bb_before_first_loop); + cond_bb = split_edge(e); + + e = single_pred_edge (bb_before_first_loop); + then_bb = split_edge(e); + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + + e_false = make_single_succ_edge (cond_bb, bb_before_first_loop, + EDGE_FALSE_VALUE); + set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb); + + e_true = EDGE_PRED (then_bb, 0); + e_true->flags &= ~EDGE_FALLTHRU; + e_true->flags |= EDGE_TRUE_VALUE; + + e_fallthru = EDGE_SUCC (then_bb, 0); + + cost_pre_condition = + build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, + build_int_cst (TREE_TYPE (scalar_loop_iters), th)); + cost_pre_condition = + force_gimple_operand (cost_pre_condition, &gimplify_stmt_list, + true, NULL_TREE); + cond_stmt = build3 (COND_EXPR, void_type_node, cost_pre_condition, + NULL_TREE, NULL_TREE); + + bsi = bsi_last (cond_bb); + if (gimplify_stmt_list) + bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT); + + bsi = bsi_last (cond_bb); + bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); + + var = create_tmp_var (TREE_TYPE (scalar_loop_iters), + "prologue_after_cost_adjust"); + add_referenced_var (var); + prologue_after_cost_adjust_name = + force_gimple_operand (scalar_loop_iters, &stmt, false, var); + + bsi = bsi_last (then_bb); + if (stmt) + bsi_insert_after (&bsi, stmt, BSI_NEW_STMT); + + newphi = create_phi_node (var, bb_before_first_loop); + add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru); + add_phi_arg (newphi, first_niters, e_false); + + first_niters = PHI_RESULT (newphi); +} + + /* Function slpeel_tree_peel_loop_to_edge. Peel the first (last) iterations of LOOP into a new prolog (epilog) loop that is placed on the entry (exit) edge E of LOOP. After this transformation we have two loops one after the other - first-loop iterates FIRST_NITERS times, and second-loop iterates the remainder NITERS - FIRST_NITERS times. + If the cost model indicates that it is profitable to emit a scalar + loop instead of the vector one, then the prolog (epilog) loop will iterate + for the entire unchanged scalar iterations of the loop. Input: - LOOP: the loop to be peeled. @@ -1027,6 +1113,13 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop, for updating the loop bound of the first-loop to FIRST_NITERS. If it is false, the caller of this function may want to take care of this (this can be useful if we don't want new stmts added to first-loop). + - TH: cost model profitability threshold of iterations for vectorization. + - CHECK_PROFITABILITY: specify whether cost model check has not occured + during versioning and hence needs to occur during + prologue generation or whether cost model check + has not occured during prologue generation and hence + needs to occur during epilogue generation. + Output: The function returns a pointer to the new loop-copy, or NULL if it failed @@ -1048,11 +1141,11 @@ struct loop* slpeel_tree_peel_loop_to_edge (struct loop *loop, edge e, tree first_niters, tree niters, bool update_first_loop_count, - unsigned int th) + unsigned int th, bool check_profitability) { struct loop *new_loop = NULL, *first_loop, *second_loop; edge skip_e; - tree pre_condition; + tree pre_condition = NULL_TREE; bitmap definitions; basic_block bb_before_second_loop, bb_after_second_loop; basic_block bb_before_first_loop; @@ -1060,6 +1153,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, basic_block new_exit_bb; edge exit_e = single_exit (loop); LOC loop_loc; + tree cost_pre_condition = NULL_TREE; + tree scalar_loop_iters = + LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)); if (!slpeel_can_duplicate_loop_p (loop, e)) return NULL; @@ -1116,32 +1212,121 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, rename_variables_in_loop (new_loop); - /* 2. Add the guard that controls whether the first loop is executed. - Resulting CFG would be: + /* 2. Add the guard code in one of the following ways: - bb_before_first_loop: - if (FIRST_NITERS == 0) GOTO bb_before_second_loop - GOTO first-loop + 2.a Add the guard that controls whether the first loop is executed. + This occurs when this function is invoked for prologue or epilogiue + generation and when the cost model check can be done at compile time. - first_loop: - do { - } while ... + Resulting CFG would be: - bb_before_second_loop: + bb_before_first_loop: + if (FIRST_NITERS == 0) GOTO bb_before_second_loop + GOTO first-loop - second_loop: - do { - } while ... + first_loop: + do { + } while ... - orig_exit_bb: - */ + bb_before_second_loop: + + second_loop: + do { + } while ... + + orig_exit_bb: + + 2.b Add the cost model check that allows the prologue + to iterate for the entire unchanged scalar + iterations of the loop in the event that the cost + model indicates that the scalar loop is more + profitable than the vector one. This occurs when + this function is invoked for prologue generation + and the cost model check needs to be done at run + time. + + Resulting CFG after prologue peeling would be: + + if (scalar_loop_iterations <= th) + FIRST_NITERS = scalar_loop_iterations + + bb_before_first_loop: + if (FIRST_NITERS == 0) GOTO bb_before_second_loop + GOTO first-loop + + first_loop: + do { + } while ... + + bb_before_second_loop: + + second_loop: + do { + } while ... + + orig_exit_bb: + + 2.c Add the cost model check that allows the epilogue + to iterate for the entire unchanged scalar + iterations of the loop in the event that the cost + model indicates that the scalar loop is more + profitable than the vector one. This occurs when + this function is invoked for epilogue generation + and the cost model check needs to be done at run + time. + + Resulting CFG after prologue peeling would be: + + bb_before_first_loop: + if ((scalar_loop_iterations <= th) + || + FIRST_NITERS == 0) GOTO bb_before_second_loop + GOTO first-loop + + first_loop: + do { + } while ... + + bb_before_second_loop: + + second_loop: + do { + } while ... + + orig_exit_bb: + */ bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); bb_before_second_loop = split_edge (single_exit (first_loop)); - pre_condition = - fold_build2 (LE_EXPR, boolean_type_node, first_niters, - build_int_cst (TREE_TYPE (first_niters), th)); + /* Epilogue peeling. */ + if (!update_first_loop_count) + { + pre_condition = + fold_build2 (LE_EXPR, boolean_type_node, first_niters, + build_int_cst (TREE_TYPE (first_niters), 0)); + if (check_profitability) + { + cost_pre_condition = + build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, + build_int_cst (TREE_TYPE (scalar_loop_iters), th)); + + pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, + cost_pre_condition, pre_condition); + } + } + + /* Prologue peeling. */ + else + { + if (check_profitability) + set_prologue_iterations (bb_before_first_loop, first_niters, + loop, th); + + pre_condition = + fold_build2 (LE_EXPR, boolean_type_node, first_niters, + build_int_cst (TREE_TYPE (first_niters), 0)); + } skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, bb_before_second_loop, bb_before_first_loop); @@ -1468,6 +1653,7 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_BBS (res) = bbs; LOOP_VINFO_NITERS (res) = NULL; + LOOP_VINFO_NITERS_UNCHANGED (res) = NULL; LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0; LOOP_PEELING_FOR_ALIGNMENT (res) = 0; |