From e0bdccac582c01c928a05f26edcd8f5ac24669eb Mon Sep 17 00:00:00 2001 From: Bin Cheng Date: Wed, 7 Apr 2021 10:24:32 +0800 Subject: tree-optimization/98736 - use programing order preserved RPO in ldist Tree loop distribution uses RPO to build reduced dependence graph, it's important that RPO preserves the original programing order. Though it usually does so, when distributing loop nest, exit BB can be placed before some loop BBs while after loop header. This patch fixes the issue by calling rev_post_order_and_mark_dfs_back_seme. gcc/ChangeLog: PR tree-optimization/98736 * tree-loop-distribution.c * (loop_distribution::bb_top_order_init): Compute RPO with programing order preserved by calling function rev_post_order_and_mark_dfs_back_seme. gcc/testsuite/ChangeLog: PR tree-optimization/98736 * gcc.c-torture/execute/pr98736.c: New test. --- gcc/tree-loop-distribution.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'gcc/tree-loop-distribution.c') diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 7ee19fc..583bb06 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -3152,11 +3152,19 @@ loop_distribution::distribute_loop (class loop *loop, vec stmts, void loop_distribution::bb_top_order_init (void) { int rpo_num; - int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun)); + int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); + edge entry = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + bitmap exit_bbs = BITMAP_ALLOC (NULL); bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun)); bb_top_order_index_size = last_basic_block_for_fn (cfun); - rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true); + + entry->flags &= ~EDGE_DFS_BACK; + bitmap_set_bit (exit_bbs, EXIT_BLOCK); + rpo_num = rev_post_order_and_mark_dfs_back_seme (cfun, entry, exit_bbs, true, + rpo, NULL); + BITMAP_FREE (exit_bbs); + for (int i = 0; i < rpo_num; i++) bb_top_order_index[rpo[i]] = i; -- cgit v1.1 From c01ae2ab6b227e21835d128c90e974dce4604be9 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 7 Apr 2021 13:17:05 +0200 Subject: tree-optimization/99954 - fix loop distribution memcpy classification This fixes bogus classification of a copy as memcpy. We cannot use plain dependence analysis to decide between memcpy and memmove when it computes no dependence. Instead we have to try harder later which the patch does for the gcc.dg/tree-ssa/ldist-24.c testcase by resorting to tree-affine to compute the difference between src and dest and compare against the copy size. 2021-04-07 Richard Biener PR tree-optimization/99954 * tree-loop-distribution.c: Include tree-affine.h. (generate_memcpy_builtin): Try using tree-affine to prove non-overlap. (loop_distribution::classify_builtin_ldst): Always classify as PKIND_MEMMOVE. * gcc.dg/torture/pr99954.c: New testcase. --- gcc/tree-loop-distribution.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'gcc/tree-loop-distribution.c') diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 583bb06..8b91a30 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -115,6 +115,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-vectorizer.h" #include "tree-eh.h" #include "gimple-fold.h" +#include "tree-affine.h" #define MAX_DATAREFS_NUM \ @@ -1212,6 +1213,18 @@ generate_memcpy_builtin (class loop *loop, partition *partition) kind = BUILT_IN_MEMCPY; else kind = BUILT_IN_MEMMOVE; + /* Try harder if we're copying a constant size. */ + if (kind == BUILT_IN_MEMMOVE && poly_int_tree_p (nb_bytes)) + { + aff_tree asrc, adest; + tree_to_aff_combination (src, ptr_type_node, &asrc); + tree_to_aff_combination (dest, ptr_type_node, &adest); + aff_combination_scale (&adest, -1); + aff_combination_add (&asrc, &adest); + if (aff_comb_cannot_overlap_p (&asrc, wi::to_poly_widest (nb_bytes), + wi::to_poly_widest (nb_bytes))) + kind = BUILT_IN_MEMCPY; + } dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, false, GSI_CONTINUE_LINKING); @@ -1759,11 +1772,11 @@ loop_distribution::classify_builtin_ldst (loop_p loop, struct graph *rdg, /* Now check that if there is a dependence. */ ddr_p ddr = get_data_dependence (rdg, src_dr, dst_dr); - /* Classify as memcpy if no dependence between load and store. */ + /* Classify as memmove if no dependence between load and store. */ if (DDR_ARE_DEPENDENT (ddr) == chrec_known) { partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size); - partition->kind = PKIND_MEMCPY; + partition->kind = PKIND_MEMMOVE; return; } -- cgit v1.1 From 60af2db18013a0339302928ba98fee893ccc1957 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 10 May 2021 11:37:27 +0200 Subject: tree-optimization/100492 - avoid irreducible regions in loop distribution When we distribute away a condition we rely on the ability to change it to either 1 != 0 or 0 != 0 depending on the direction of the exit branch in the respective loop. But when the loop contains an irreducible sub-region then for the conditions inside this this fails and can lead to infinite loops being generated. Avoid distibuting loops with irreducible sub-regions. 2021-05-10 Richard Biener PR tree-optimization/100492 * tree-loop-distribution.c (find_seed_stmts_for_distribution): Find nothing when the loop contains an irreducible region. * gcc.dg/torture/pr100492.c: New testcase. --- gcc/tree-loop-distribution.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'gcc/tree-loop-distribution.c') diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 8b91a30..65aa1df 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -3203,6 +3203,16 @@ find_seed_stmts_for_distribution (class loop *loop, vec *work_list) /* Initialize the worklist with stmts we seed the partitions with. */ for (unsigned i = 0; i < loop->num_nodes; ++i) { + /* In irreducible sub-regions we don't know how to redirect + conditions, so fail. See PR100492. */ + if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "loop %d contains an irreducible region.\n", + loop->num); + work_list->truncate (0); + break; + } for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) { -- cgit v1.1 From 00dcc88a0ed7bd148ea86d900b6c93574a2e1f26 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Tue, 20 Jul 2021 11:14:19 -0600 Subject: Adjust by-value function vec arguments to by-reference. gcc/c-family/ChangeLog: * c-common.c (c_build_shufflevector): Adjust by-value argument to by-const-reference. * c-common.h (c_build_shufflevector): Same. gcc/c/ChangeLog: * c-tree.h (c_build_function_call_vec): Adjust by-value argument to by-const-reference. * c-typeck.c (c_build_function_call_vec): Same. gcc/ChangeLog: * cfgloop.h (single_likely_exit): Adjust by-value argument to by-const-reference. * cfgloopanal.c (single_likely_exit): Same. * cgraph.h (struct cgraph_node): Same. * cgraphclones.c (cgraph_node::create_virtual_clone): Same. * genautomata.c (merge_states): Same. * genextract.c (VEC_char_to_string): Same. * genmatch.c (dt_node::gen_kids_1): Same. (walk_captures): Adjust by-value argument to by-reference. * gimple-ssa-store-merging.c (check_no_overlap): Adjust by-value argument to by-const-reference. * gimple.c (gimple_build_call_vec): Same. (gimple_build_call_internal_vec): Same. (gimple_build_switch): Same. (sort_case_labels): Same. (preprocess_case_label_vec_for_gimple): Adjust by-value argument to by-reference. * gimple.h (gimple_build_call_vec): Adjust by-value argument to by-const-reference. (gimple_build_call_internal_vec): Same. (gimple_build_switch): Same. (sort_case_labels): Same. (preprocess_case_label_vec_for_gimple): Adjust by-value argument to by-reference. * haifa-sched.c (calc_priorities): Adjust by-value argument to by-const-reference. (sched_init_luids): Same. (haifa_init_h_i_d): Same. * ipa-cp.c (ipa_get_indirect_edge_target_1): Same. (adjust_callers_for_value_intersection): Adjust by-value argument to by-reference. (find_more_scalar_values_for_callers_subset): Adjust by-value argument to by-const-reference. (find_more_contexts_for_caller_subset): Same. (find_aggregate_values_for_callers_subset): Same. (copy_useful_known_contexts): Same. * ipa-fnsummary.c (remap_edge_summaries): Same. (remap_freqcounting_predicate): Same. * ipa-inline.c (add_new_edges_to_heap): Adjust by-value argument to by-reference. * ipa-predicate.c (predicate::remap_after_inlining): Adjust by-value argument to by-const-reference. * ipa-predicate.h (predicate::remap_after_inlining): Same. * ipa-prop.c (ipa_find_agg_cst_for_param): Same. * ipa-prop.h (ipa_find_agg_cst_for_param): Same. * ira-build.c (ira_loop_tree_body_rev_postorder): Same. * read-rtl.c (add_overload_instance): Same. * rtl.h (native_decode_rtx): Same. (native_decode_vector_rtx): Same. * sched-int.h (sched_init_luids): Same. (haifa_init_h_i_d): Same. * simplify-rtx.c (native_decode_vector_rtx): Same. (native_decode_rtx): Same. * tree-call-cdce.c (gen_shrink_wrap_conditions): Same. (shrink_wrap_one_built_in_call_with_conds): Same. (shrink_wrap_conditional_dead_built_in_calls): Same. * tree-data-ref.c (create_runtime_alias_checks): Same. (compute_all_dependences): Same. * tree-data-ref.h (compute_all_dependences): Same. (create_runtime_alias_checks): Same. (index_in_loop_nest): Same. * tree-if-conv.c (mask_exists): Same. * tree-loop-distribution.c (class loop_distribution): Same. (loop_distribution::create_rdg_vertices): Same. (dump_rdg_partitions): Same. (debug_rdg_partitions): Same. (partition_contains_all_rw): Same. (loop_distribution::distribute_loop): Same. * tree-parloops.c (oacc_entry_exit_ok_1): Same. (oacc_entry_exit_single_gang): Same. * tree-ssa-loop-im.c (hoist_memory_references): Same. (loop_suitable_for_sm): Same. * tree-ssa-loop-niter.c (bound_index): Same. * tree-ssa-reassoc.c (update_ops): Same. (swap_ops_for_binary_stmt): Same. (rewrite_expr_tree): Same. (rewrite_expr_tree_parallel): Same. * tree-ssa-sccvn.c (ao_ref_init_from_vn_reference): Same. * tree-ssa-sccvn.h (ao_ref_init_from_vn_reference): Same. * tree-ssa-structalias.c (process_all_all_constraints): Same. (make_constraints_to): Same. (handle_lhs_call): Same. (find_func_aliases_for_builtin_call): Same. (sort_fieldstack): Same. (check_for_overlaps): Same. * tree-vect-loop-manip.c (vect_create_cond_for_align_checks): Same. (vect_create_cond_for_unequal_addrs): Same. (vect_create_cond_for_lower_bounds): Same. (vect_create_cond_for_alias_checks): Same. * tree-vect-slp-patterns.c (vect_validate_multiplication): Same. * tree-vect-slp.c (vect_analyze_slp_instance): Same. (vect_make_slp_decision): Same. (vect_slp_bbs): Same. (duplicate_and_interleave): Same. (vect_transform_slp_perm_load): Same. (vect_schedule_slp): Same. * tree-vectorizer.h (vect_transform_slp_perm_load): Same. (vect_schedule_slp): Same. (duplicate_and_interleave): Same. * tree.c (build_vector_from_ctor): Same. (build_vector): Same. (check_vector_cst): Same. (check_vector_cst_duplicate): Same. (check_vector_cst_fill): Same. (check_vector_cst_stepped): Same. * tree.h (build_vector_from_ctor): Same. --- gcc/tree-loop-distribution.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'gcc/tree-loop-distribution.c') diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 65aa1df..a984d21 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -527,7 +527,8 @@ class loop_distribution /* Build the vertices of the reduced dependence graph RDG. Return false if that failed. */ - bool create_rdg_vertices (struct graph *rdg, vec stmts, loop_p loop); + bool create_rdg_vertices (struct graph *rdg, const vec &stmts, + loop_p loop); /* Initialize STMTS with all the statements of LOOP. We use topological order to discover all statements. The order is important because @@ -646,7 +647,7 @@ class loop_distribution statements from STMTS into separate loops. Returns the number of distributed loops. Set NB_CALLS to number of generated builtin calls. Set *DESTROY_P to whether LOOP needs to be destroyed. */ - int distribute_loop (class loop *loop, vec stmts, + int distribute_loop (class loop *loop, const vec &stmts, control_dependences *cd, int *nb_calls, bool *destroy_p, bool only_patterns_p); @@ -699,7 +700,8 @@ bb_top_order_cmp_r (const void *x, const void *y, void *loop) } bool -loop_distribution::create_rdg_vertices (struct graph *rdg, vec stmts, +loop_distribution::create_rdg_vertices (struct graph *rdg, + const vec &stmts, loop_p loop) { int i; @@ -1953,7 +1955,7 @@ loop_distribution::rdg_build_partitions (struct graph *rdg, /* Dump to FILE the PARTITIONS. */ static void -dump_rdg_partitions (FILE *file, vec partitions) +dump_rdg_partitions (FILE *file, const vec &partitions) { int i; partition *partition; @@ -1963,10 +1965,10 @@ dump_rdg_partitions (FILE *file, vec partitions) } /* Debug PARTITIONS. */ -extern void debug_rdg_partitions (vec ); +extern void debug_rdg_partitions (const vec &); DEBUG_FUNCTION void -debug_rdg_partitions (vec partitions) +debug_rdg_partitions (const vec &partitions) { dump_rdg_partitions (stderr, partitions); } @@ -2017,7 +2019,7 @@ number_of_rw_in_partition (struct graph *rdg, partition *partition) static bool partition_contains_all_rw (struct graph *rdg, - vec partitions) + const vec &partitions) { int i; partition *partition; @@ -2921,7 +2923,8 @@ loop_distribution::finalize_partitions (class loop *loop, Set *DESTROY_P to whether LOOP needs to be destroyed. */ int -loop_distribution::distribute_loop (class loop *loop, vec stmts, +loop_distribution::distribute_loop (class loop *loop, + const vec &stmts, control_dependences *cd, int *nb_calls, bool *destroy_p, bool only_patterns_p) { -- cgit v1.1 From e41ba804ba5f5ca433e09238d561b1b4c8b10985 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Thu, 29 Jul 2021 22:26:25 -0500 Subject: Use range-based for loops for traversing loops This patch follows Martin's suggestion here[1], to support range based loop for iterating loops, analogously to the patch for vec[2]. For example, use below range-based for loop for (auto loop : loops_list (cfun, 0)) to replace the previous macro FOR_EACH_LOOP FOR_EACH_LOOP (loop, 0) [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573424.html [2] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572315.html gcc/ChangeLog: * cfgloop.h (as_const): New function. (class loop_iterator): Rename to ... (class loops_list): ... this. (loop_iterator::next): Rename to ... (loops_list::Iter::fill_curr_loop): ... this and adjust. (loop_iterator::loop_iterator): Rename to ... (loops_list::loops_list): ... this and adjust. (loops_list::Iter): New class. (loops_list::iterator): New type. (loops_list::const_iterator): New type. (loops_list::begin): New function. (loops_list::end): Likewise. (loops_list::begin const): Likewise. (loops_list::end const): Likewise. (FOR_EACH_LOOP): Remove. (FOR_EACH_LOOP_FN): Remove. * cfgloop.c (flow_loops_dump): Adjust FOR_EACH_LOOP* with range-based for loop with loops_list instance. (sort_sibling_loops): Likewise. (disambiguate_loops_with_multiple_latches): Likewise. (verify_loop_structure): Likewise. * cfgloopmanip.c (create_preheaders): Likewise. (force_single_succ_latches): Likewise. * config/aarch64/falkor-tag-collision-avoidance.c (execute_tag_collision_avoidance): Likewise. * config/mn10300/mn10300.c (mn10300_scan_for_setlb_lcc): Likewise. * config/s390/s390.c (s390_adjust_loops): Likewise. * doc/loop.texi: Likewise. * gimple-loop-interchange.cc (pass_linterchange::execute): Likewise. * gimple-loop-jam.c (tree_loop_unroll_and_jam): Likewise. * gimple-loop-versioning.cc (loop_versioning::analyze_blocks): Likewise. (loop_versioning::make_versioning_decisions): Likewise. * gimple-ssa-split-paths.c (split_paths): Likewise. * graphite-isl-ast-to-gimple.c (graphite_regenerate_ast_isl): Likewise. * graphite.c (canonicalize_loop_form): Likewise. (graphite_transform_loops): Likewise. * ipa-fnsummary.c (analyze_function_body): Likewise. * ipa-pure-const.c (analyze_function): Likewise. * loop-doloop.c (doloop_optimize_loops): Likewise. * loop-init.c (loop_optimizer_finalize): Likewise. (fix_loop_structure): Likewise. * loop-invariant.c (calculate_loop_reg_pressure): Likewise. (move_loop_invariants): Likewise. * loop-unroll.c (decide_unrolling): Likewise. (unroll_loops): Likewise. * modulo-sched.c (sms_schedule): Likewise. * predict.c (predict_loops): Likewise. (pass_profile::execute): Likewise. * profile.c (branch_prob): Likewise. * sel-sched-ir.c (sel_finish_pipelining): Likewise. (sel_find_rgns): Likewise. * tree-cfg.c (replace_loop_annotate): Likewise. (replace_uses_by): Likewise. (move_sese_region_to_fn): Likewise. * tree-if-conv.c (pass_if_conversion::execute): Likewise. * tree-loop-distribution.c (loop_distribution::execute): Likewise. * tree-parloops.c (parallelize_loops): Likewise. * tree-predcom.c (tree_predictive_commoning): Likewise. * tree-scalar-evolution.c (scev_initialize): Likewise. (scev_reset): Likewise. * tree-ssa-dce.c (find_obviously_necessary_stmts): Likewise. * tree-ssa-live.c (remove_unused_locals): Likewise. * tree-ssa-loop-ch.c (ch_base::copy_headers): Likewise. * tree-ssa-loop-im.c (analyze_memory_references): Likewise. (tree_ssa_lim_initialize): Likewise. * tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Likewise. * tree-ssa-loop-ivopts.c (tree_ssa_iv_optimize): Likewise. * tree-ssa-loop-manip.c (get_loops_exits): Likewise. * tree-ssa-loop-niter.c (estimate_numbers_of_iterations): Likewise. (free_numbers_of_iterations_estimates): Likewise. * tree-ssa-loop-prefetch.c (tree_ssa_prefetch_arrays): Likewise. * tree-ssa-loop-split.c (tree_ssa_split_loops): Likewise. * tree-ssa-loop-unswitch.c (tree_ssa_unswitch_loops): Likewise. * tree-ssa-loop.c (gate_oacc_kernels): Likewise. (pass_scev_cprop::execute): Likewise. * tree-ssa-propagate.c (clean_up_loop_closed_phi): Likewise. * tree-ssa-sccvn.c (do_rpo_vn): Likewise. * tree-ssa-threadupdate.c (jump_thread_path_registry::thread_through_all_blocks): Likewise. * tree-vectorizer.c (vectorize_loops): Likewise. * tree-vrp.c (vrp_asserts::find_assert_locations): Likewise. --- gcc/tree-loop-distribution.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc/tree-loop-distribution.c') diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index a984d21..2df762c 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -3315,7 +3315,7 @@ loop_distribution::execute (function *fun) /* We can at the moment only distribute non-nested loops, thus restrict walking to innermost loops. */ - FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) + for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST)) { /* Don't distribute multiple exit edges loop, or cold loop when not doing pattern detection. */ -- cgit v1.1