diff options
author | Richard Biener <rguenther@suse.de> | 2013-11-21 14:09:15 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2013-11-21 14:09:15 +0000 |
commit | 15e693cc593824fa56a2e52b756e1e2e4bad2a27 (patch) | |
tree | 0586a0be8d7ce253c92bf4eab57e92b0dd307936 /gcc/tree-vect-loop.c | |
parent | b05e02332f86e3168b96708263a88ec8f07c5e5a (diff) | |
download | gcc-15e693cc593824fa56a2e52b756e1e2e4bad2a27.zip gcc-15e693cc593824fa56a2e52b756e1e2e4bad2a27.tar.gz gcc-15e693cc593824fa56a2e52b756e1e2e4bad2a27.tar.bz2 |
re PR tree-optimization/59058 (wrong code at -O3 on x86_64-linux-gnu (affecting gcc 4.6 to trunk))
2013-11-21 Richard Biener <rguenther@suse.de>
PR tree-optimization/59058
* tree-scalar-evolution.h (number_of_exit_cond_executions): Remove.
* tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise.
* tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ...
(LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this.
(NITERS_KNOWN_P): Fold into ...
(LOOP_VINFO_NITERS_KNOWN_P): ... this.
(LOOP_VINFO_PEELING_FOR_NITER): Add.
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop):
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
(vect_do_peeling_for_alignment): Re-use precomputed niter
instead of re-emitting it.
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
* tree-vect-loop.c (vect_get_loop_niters): Use
number_of_latch_executions.
(new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER.
(vect_analyze_loop_form): Simplify.
(vect_analyze_loop_operations): Move epilogue peeling code ...
(vect_analyze_loop_2): ... here and adjust it to compute
LOOP_VINFO_PEELING_FOR_NITER.
(vect_estimate_min_profitable_iters): Use
LOOP_VINFO_PEELING_FOR_ALIGNMENT.
(vect_build_loop_niters): Emit on the preheader.
(vect_generate_tmps_on_preheader): Likewise.
(vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead
of recomputing it. Adjust.
From-SVN: r205217
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r-- | gcc/tree-vect-loop.c | 217 |
1 files changed, 110 insertions, 107 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 36393da..02aa090 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -771,11 +771,12 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); } + /* Function vect_get_loop_niters. - Determine how many iterations the loop is executed. - If an expression that represents the number of iterations - can be constructed, place it in NUMBER_OF_ITERATIONS. + Determine how many iterations the loop is executed and place it + in NUMBER_OF_ITERATIONS. + Return the loop exit condition. */ static gimple @@ -786,20 +787,16 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== get_loop_niters ===\n"); - niters = number_of_exit_cond_executions (loop); - if (niters != NULL_TREE - && niters != chrec_dont_know) - { - *number_of_iterations = niters; - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:"); - dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations); - dump_printf (MSG_NOTE, "\n"); - } - } + niters = number_of_latch_executions (loop); + /* We want the number of loop header executions which is the number + of latch executions plus one. + ??? For UINT_MAX latch executions this number overflows to zero + for loops like do { n++; } while (n != 0); */ + if (niters && !chrec_contains_undetermined (niters)) + niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters, + build_int_cst (TREE_TYPE (niters), 1)); + *number_of_iterations = niters; return get_loop_exit_condition (loop); } @@ -907,7 +904,7 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_NITERS_UNCHANGED (res) = NULL; LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0; - LOOP_PEELING_FOR_ALIGNMENT (res) = 0; + LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; LOOP_VINFO_LOOP_NEST (res).create (3); LOOP_VINFO_DATAREFS (res).create (10); @@ -924,6 +921,7 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop); LOOP_VINFO_PEELING_FOR_GAPS (res) = false; + LOOP_VINFO_PEELING_FOR_NITER (res) = false; LOOP_VINFO_OPERANDS_SWAPPED (res) = false; return res; @@ -1091,12 +1089,12 @@ vect_analyze_loop_form (struct loop *loop) } if (empty_block_p (loop->header)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "not vectorized: empty loop.\n"); - return NULL; - } + return NULL; + } } else { @@ -1243,7 +1241,8 @@ vect_analyze_loop_form (struct loop *loop) return NULL; } - if (!number_of_iterations) + if (!number_of_iterations + || chrec_contains_undetermined (number_of_iterations)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -1254,17 +1253,21 @@ vect_analyze_loop_form (struct loop *loop) return NULL; } - if (chrec_contains_undetermined (number_of_iterations)) + if (integer_zerop (number_of_iterations)) { if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Infinite number of iterations.\n"); + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: number of iterations = 0.\n"); if (inner_loop_vinfo) - destroy_loop_vec_info (inner_loop_vinfo, true); + destroy_loop_vec_info (inner_loop_vinfo, true); return NULL; } - if (!NITERS_KNOWN_P (number_of_iterations)) + loop_vinfo = new_loop_vec_info (loop); + LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; + LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations; + + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { if (dump_enabled_p ()) { @@ -1274,19 +1277,6 @@ vect_analyze_loop_form (struct loop *loop) dump_printf (MSG_NOTE, "\n"); } } - else if (TREE_INT_CST_LOW (number_of_iterations) == 0) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: number of iterations = 0.\n"); - if (inner_loop_vinfo) - destroy_loop_vec_info (inner_loop_vinfo, true); - return NULL; - } - - loop_vinfo = new_loop_vec_info (loop); - LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; - LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations; STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type; @@ -1588,23 +1578,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) return false; } - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) - < exact_log2 (vectorization_factor))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n"); - if (!vect_can_advance_ivs_p (loop_vinfo) - || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: can't create required " - "epilog loop\n"); - return false; - } - } - return true; } @@ -1760,6 +1733,40 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) return false; } + /* Decide whether we need to create an epilogue loop to handle + remaining scalar iterations. */ + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) + { + if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo) + - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) + < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))) + LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; + } + else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) + || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) + < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))) + LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; + + /* If an epilogue loop is required make sure we can create one. */ + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n"); + if (!vect_can_advance_ivs_p (loop_vinfo) + || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo), + single_exit (LOOP_VINFO_LOOP + (loop_vinfo)))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: can't create required " + "epilog loop\n"); + return false; + } + } + return true; } @@ -2689,7 +2696,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, int scalar_single_iter_cost = 0; int scalar_outside_cost = 0; int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); + int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); /* Cost model disabled. */ @@ -2880,7 +2887,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, else { /* Cost model check occurs at prologue generation. */ - if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) + if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken) + vect_get_stmt_cost (cond_branch_not_taken); /* Cost model check occurs at epilogue generation. */ @@ -5574,47 +5581,51 @@ vect_loop_kill_debug_uses (struct loop *loop, gimple stmt) /* This function builds ni_name = number of iterations. Statements - are queued onto SEQ. */ + are emitted on the loop preheader edge. */ static tree -vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq) +vect_build_loop_niters (loop_vec_info loop_vinfo) { - tree ni_name, var; - gimple_seq stmts = NULL; tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); + if (TREE_CODE (ni) == INTEGER_CST) + return ni; + else + { + tree ni_name, var; + gimple_seq stmts = NULL; + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - var = create_tmp_var (TREE_TYPE (ni), "niters"); - ni_name = force_gimple_operand (ni, &stmts, false, var); - - if (stmts) - gimple_seq_add_seq (seq, stmts); + var = create_tmp_var (TREE_TYPE (ni), "niters"); + ni_name = force_gimple_operand (ni, &stmts, false, var); + if (stmts) + gsi_insert_seq_on_edge_immediate (pe, stmts); - return ni_name; + return ni_name; + } } /* This function generates the following statements: - ni_name = number of iterations loop executes - ratio = ni_name / vf - ratio_mult_vf_name = ratio * vf + ni_name = number of iterations loop executes + ratio = ni_name / vf + ratio_mult_vf_name = ratio * vf - and places them in COND_EXPR_STMT_LIST. */ + and places them on the loop preheader edge. */ static void vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree ni_name, tree *ratio_mult_vf_name_ptr, - tree *ratio_name_ptr, - gimple_seq *cond_expr_stmt_list) + tree *ratio_name_ptr) { - gimple_seq stmts; tree ni_minus_gap_name; tree var; tree ratio_name; tree ratio_mult_vf_name; tree ni = LOOP_VINFO_NITERS (loop_vinfo); int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); tree log_vf; log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); @@ -5630,11 +5641,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, if (!is_gimple_val (ni_minus_gap_name)) { var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); - - stmts = NULL; + gimple stmts = NULL; ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, true, var); - gimple_seq_add_seq (cond_expr_stmt_list, stmts); + gsi_insert_seq_on_edge_immediate (pe, stmts); } } else @@ -5647,10 +5657,9 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, if (!is_gimple_val (ratio_name)) { var = create_tmp_var (TREE_TYPE (ni), "bnd"); - - stmts = NULL; + gimple stmts = NULL; ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); - gimple_seq_add_seq (cond_expr_stmt_list, stmts); + gsi_insert_seq_on_edge_immediate (pe, stmts); } *ratio_name_ptr = ratio_name; @@ -5663,11 +5672,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, if (!is_gimple_val (ratio_mult_vf_name)) { var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); - - stmts = NULL; + gimple stmts = NULL; ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, true, var); - gimple_seq_add_seq (cond_expr_stmt_list, stmts); + gsi_insert_seq_on_edge_immediate (pe, stmts); } *ratio_mult_vf_name_ptr = ratio_mult_vf_name; } @@ -5739,20 +5747,20 @@ vect_transform_loop (loop_vec_info loop_vinfo) check_profitability = false; } + tree ni_name = vect_build_loop_niters (loop_vinfo); + LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name; + /* Peel the loop if there are data refs with unknown alignment. - Only one data ref with unknown store is allowed. - This clobbers LOOP_VINFO_NITERS but retains the original - in LOOP_VINFO_NITERS_UNCHANGED. So we cannot avoid re-computing - niters. */ + Only one data ref with unknown store is allowed. */ - if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) + if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) { - gimple_seq stmts = NULL; - tree ni_name = vect_build_loop_niters (loop_vinfo, &stmts); - gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); vect_do_peeling_for_alignment (loop_vinfo, ni_name, th, check_profitability); check_profitability = false; + /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to + be re-computed. */ + ni_name = NULL_TREE; } /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a @@ -5763,16 +5771,14 @@ vect_transform_loop (loop_vec_info loop_vinfo) will remain scalar and will compute the remaining (n%VF) iterations. (VF is the vectorization factor). */ - if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) - < exact_log2 (vectorization_factor) + if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) { - tree ni_name, ratio_mult_vf; - gimple_seq stmts = NULL; - ni_name = vect_build_loop_niters (loop_vinfo, &stmts); + tree ratio_mult_vf; + if (!ni_name) + ni_name = vect_build_loop_niters (loop_vinfo); vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf, - &ratio, &stmts); - gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); + &ratio); vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf, th, check_profitability); } @@ -5781,12 +5787,9 @@ vect_transform_loop (loop_vec_info loop_vinfo) LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); else { - tree ni_name; - gimple_seq stmts = NULL; - ni_name = vect_build_loop_niters (loop_vinfo, &stmts); - vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, - &ratio, &stmts); - gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); + if (!ni_name) + ni_name = vect_build_loop_niters (loop_vinfo); + vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio); } /* 1) Make sure the loop header has exactly two entries |