From b8b80b8aa3d9a7abbcb59b651ea5e84c2ea12d0b Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 11 Jun 2021 12:06:08 +0200 Subject: tree-optimization/101025 - fix store-motion dependence checking This plugs a hole in store-motion where it fails to perform dependence checking on conditionally executed but not store-motioned refs. 2021-06-11 Richard Biener PR tree-optimization/101025 * tree-ssa-loop-im.c (sm_seq_valid_bb): Make sure to process all refs that require dependence checking. * gcc.dg/torture/pr101025.c: New testcase. --- gcc/tree-ssa-loop-im.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 8034cf6..1c865b2 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -2169,6 +2169,7 @@ execute_sm (class loop *loop, im_mem_ref *ref, enum sm_kind { sm_ord, sm_unord, sm_other }; struct seq_entry { + seq_entry () {} seq_entry (unsigned f, sm_kind k, tree fr = NULL) : first (f), second (k), from (fr) {} unsigned first; @@ -2352,6 +2353,8 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, unsigned min_len = MIN(first_edge_seq.length (), edge_seq.length ()); /* Incrementally merge seqs into first_edge_seq. */ + int first_uneq = -1; + auto_vec extra_refs; for (unsigned int i = 0; i < min_len; ++i) { /* ??? We can more intelligently merge when we face different @@ -2367,6 +2370,11 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, bitmap_set_bit (refs_not_supported, edge_seq[i].first); first_edge_seq[i].second = sm_other; first_edge_seq[i].from = NULL_TREE; + /* Record the dropped refs for later processing. */ + if (first_uneq == -1) + first_uneq = i; + extra_refs.safe_push (seq_entry (edge_seq[i].first, + sm_other, NULL_TREE)); } /* sm_other prevails. */ else if (first_edge_seq[i].second != edge_seq[i].second) @@ -2399,10 +2407,36 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, } else if (edge_seq.length () > first_edge_seq.length ()) { + if (first_uneq == -1) + first_uneq = first_edge_seq.length (); for (unsigned i = first_edge_seq.length (); i < edge_seq.length (); ++i) - if (edge_seq[i].second == sm_ord) - bitmap_set_bit (refs_not_supported, edge_seq[i].first); + { + if (edge_seq[i].second == sm_ord) + bitmap_set_bit (refs_not_supported, edge_seq[i].first); + extra_refs.safe_push (seq_entry (edge_seq[i].first, + sm_other, NULL_TREE)); + } + } + /* Put unmerged refs at first_uneq to force dependence checking + on them. */ + if (first_uneq != -1) + { + /* Missing ordered_splice_at. */ + if ((unsigned)first_uneq == first_edge_seq.length ()) + first_edge_seq.safe_splice (extra_refs); + else + { + unsigned fes_length = first_edge_seq.length (); + first_edge_seq.safe_grow (fes_length + + extra_refs.length ()); + memmove (&first_edge_seq[first_uneq + extra_refs.length ()], + &first_edge_seq[first_uneq], + (fes_length - first_uneq) * sizeof (seq_entry)); + memcpy (&first_edge_seq[first_uneq], + extra_refs.address (), + extra_refs.length () * sizeof (seq_entry)); + } } } /* Use the sequence from the first edge and push SMs down. */ -- cgit v1.1 From 43fc4234ad3d9302d3460385b6fdb5e3f59b6986 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 16 Jun 2021 09:49:18 +0200 Subject: tree-optimization/101088 - fix SM invalidation issue When we face a sm_ord vs sm_unord for the same ref during store sequence merging we assert that the ref is already marked unsupported. But it can be that it will only be marked so during the ongoing merging so instead of asserting mark it here. Also apply some optimization to not waste resources to search for already unsupported refs. 2021-06-16 Richard Biener PR tree-optimization/101088 * tree-ssa-loop-im.c (sm_seq_valid_bb): Only look for supported refs on edges. Do not assert same ref but different kind stores are unsuported but mark them so. (hoist_memory_references): Only look for supported refs on exits. * gcc.dg/torture/pr101088.c: New testcase. --- gcc/tree-ssa-loop-im.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 1c865b2..7de47ed 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -2340,7 +2340,13 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, tree vuse = gimple_phi_arg_def (phi, i); edge e = gimple_phi_arg_edge (phi, i); auto_vec edge_seq; - bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq); + bitmap_and_compl (tem_refs_not_in_seq, + refs_not_in_seq, refs_not_supported); + /* If we've marked all refs we search for as unsupported + we can stop processing and use the sequence as before + the PHI. */ + if (bitmap_empty_p (tem_refs_not_in_seq)) + return 1; eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq, tem_refs_not_in_seq, refs_not_supported, true, fully_visited); @@ -2379,9 +2385,9 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, /* sm_other prevails. */ else if (first_edge_seq[i].second != edge_seq[i].second) { - /* This is just an optimization. */ - gcc_assert (bitmap_bit_p (refs_not_supported, - first_edge_seq[i].first)); + /* Make sure the ref is marked as not supported. */ + bitmap_set_bit (refs_not_supported, + first_edge_seq[i].first); first_edge_seq[i].second = sm_other; first_edge_seq[i].from = NULL_TREE; } @@ -2533,7 +2539,12 @@ hoist_memory_references (class loop *loop, bitmap mem_refs, vec seq; seq.create (4); auto_bitmap refs_not_in_seq (&lim_bitmap_obstack); - bitmap_copy (refs_not_in_seq, mem_refs); + bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported); + if (bitmap_empty_p (refs_not_in_seq)) + { + seq.release (); + break; + } auto_bitmap fully_visited; int res = sm_seq_valid_bb (loop, e->src, NULL_TREE, seq, refs_not_in_seq, -- cgit v1.1 From e9e2bad7251477db92ab9ebcdc010f9282dd9890 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Thu, 24 Jun 2021 19:22:06 -0600 Subject: middle-end: add support for per-location warning groups. gcc/ChangeLog: * builtins.c (warn_string_no_nul): Replace uses of TREE_NO_WARNING, gimple_no_warning_p and gimple_set_no_warning with warning_suppressed_p, and suppress_warning. (c_strlen): Same. (maybe_warn_for_bound): Same. (warn_for_access): Same. (check_access): Same. (expand_builtin_strncmp): Same. (fold_builtin_varargs): Same. * calls.c (maybe_warn_nonstring_arg): Same. (maybe_warn_rdwr_sizes): Same. * cfgexpand.c (expand_call_stmt): Same. * cgraphunit.c (check_global_declaration): Same. * fold-const.c (fold_undefer_overflow_warnings): Same. (fold_truth_not_expr): Same. (fold_unary_loc): Same. (fold_checksum_tree): Same. * gimple-array-bounds.cc (array_bounds_checker::check_array_ref): Same. (array_bounds_checker::check_mem_ref): Same. (array_bounds_checker::check_addr_expr): Same. (array_bounds_checker::check_array_bounds): Same. * gimple-expr.c (copy_var_decl): Same. * gimple-fold.c (gimple_fold_builtin_strcpy): Same. (gimple_fold_builtin_strncat): Same. (gimple_fold_builtin_stxcpy_chk): Same. (gimple_fold_builtin_stpcpy): Same. (gimple_fold_builtin_sprintf): Same. (fold_stmt_1): Same. * gimple-ssa-isolate-paths.c (diag_returned_locals): Same. * gimple-ssa-nonnull-compare.c (do_warn_nonnull_compare): Same. * gimple-ssa-sprintf.c (handle_printf_call): Same. * gimple-ssa-store-merging.c (imm_store_chain_info::output_merged_store): Same. * gimple-ssa-warn-restrict.c (maybe_diag_overlap): Same. * gimple-ssa-warn-restrict.h: Adjust declarations. (maybe_diag_access_bounds): Replace uses of TREE_NO_WARNING, gimple_no_warning_p and gimple_set_no_warning with warning_suppressed_p, and suppress_warning. (check_call): Same. (check_bounds_or_overlap): Same. * gimple.c (gimple_build_call_from_tree): Same. * gimplify.c (gimplify_return_expr): Same. (gimplify_cond_expr): Same. (gimplify_modify_expr_complex_part): Same. (gimplify_modify_expr): Same. (gimple_push_cleanup): Same. (gimplify_expr): Same. * omp-expand.c (expand_omp_for_generic): Same. (expand_omp_taskloop_for_outer): Same. * omp-low.c (lower_rec_input_clauses): Same. (lower_lastprivate_clauses): Same. (lower_send_clauses): Same. (lower_omp_target): Same. * tree-cfg.c (pass_warn_function_return::execute): Same. * tree-complex.c (create_one_component_var): Same. * tree-inline.c (remap_gimple_op_r): Same. (copy_tree_body_r): Same. (declare_return_variable): Same. (expand_call_inline): Same. * tree-nested.c (lookup_field_for_decl): Same. * tree-sra.c (create_access_replacement): Same. (generate_subtree_copies): Same. * tree-ssa-ccp.c (pass_post_ipa_warn::execute): Same. * tree-ssa-forwprop.c (combine_cond_expr_cond): Same. * tree-ssa-loop-ch.c (ch_base::copy_headers): Same. * tree-ssa-loop-im.c (execute_sm): Same. * tree-ssa-phiopt.c (cond_store_replacement): Same. * tree-ssa-strlen.c (maybe_warn_overflow): Same. (handle_builtin_strcpy): Same. (maybe_diag_stxncpy_trunc): Same. (handle_builtin_stxncpy_strncat): Same. (handle_builtin_strcat): Same. * tree-ssa-uninit.c (get_no_uninit_warning): Same. (set_no_uninit_warning): Same. (uninit_undefined_value_p): Same. (warn_uninit): Same. (maybe_warn_operand): Same. * tree-vrp.c (compare_values_warnv): Same. * vr-values.c (vr_values::extract_range_for_var_from_comparison_expr): Same. (test_for_singularity): Same. * gimple.h (warning_suppressed_p): New function. (suppress_warning): Same. (copy_no_warning): Same. (gimple_set_block): Call gimple_set_location. (gimple_set_location): Call copy_warning. --- gcc/tree-ssa-loop-im.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 7de47ed..48c952a 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -2143,7 +2143,7 @@ execute_sm (class loop *loop, im_mem_ref *ref, /* If not emitting a load mark the uninitialized state on the loop entry as not to be warned for. */ tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var)); - TREE_NO_WARNING (uninit) = 1; + suppress_warning (uninit, OPT_Wuninitialized); load = gimple_build_assign (aux->tmp_var, uninit); } lim_data = init_lim_data (load); -- cgit v1.1 From 4546f423ecff96f223adfbec4963d2ff17f27c7b Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 2 Jul 2021 12:57:06 +0200 Subject: tree-optimization/101293 - further enhance LIMs ref canonicalization This makes sure to handle MEM[p + 4] and MEM[p].j with j at offset 4 as the same ref in store motion. For hashing we need to be more restrictive in what we handle since there's no poly-int handlers for inchash. For comparison we can compare poly_offsets directly. 2021-07-02 Richard Biener PR tree-optimization/101293 * tree-ssa-loop-im.c (mem_ref_hasher::equal): Compare MEM_REF bases with combined offsets. (gather_mem_refs_stmt): Hash MEM_REFs as if their offset were combined with the rest of the offset. * gcc.dg/tree-ssa/ssa-lim-15.c: New testcase. --- gcc/tree-ssa-loop-im.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 48c952a..e7a3050 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -194,8 +194,14 @@ mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2) { if (obj2->max_size_known_p ()) return (mem1->ref_decomposed - && operand_equal_p (mem1->mem.base, obj2->base, 0) - && known_eq (mem1->mem.offset, obj2->offset) + && ((TREE_CODE (mem1->mem.base) == MEM_REF + && TREE_CODE (obj2->base) == MEM_REF + && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0), + TREE_OPERAND (obj2->base, 0), 0) + && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset, + mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset)) + || (operand_equal_p (mem1->mem.base, obj2->base, 0) + && known_eq (mem1->mem.offset, obj2->offset))) && known_eq (mem1->mem.size, obj2->size) && known_eq (mem1->mem.max_size, obj2->max_size) && mem1->mem.volatile_p == obj2->volatile_p @@ -1500,8 +1506,21 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off))) { ref_decomposed = true; - hash = iterative_hash_expr (ao_ref_base (&aor), 0); - hash = iterative_hash_host_wide_int (offset, hash); + tree base = ao_ref_base (&aor); + poly_int64 moffset; + HOST_WIDE_INT mcoffset; + if (TREE_CODE (base) == MEM_REF + && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset) + && moffset.is_constant (&mcoffset)) + { + hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0); + hash = iterative_hash_host_wide_int (mcoffset, hash); + } + else + { + hash = iterative_hash_expr (base, 0); + hash = iterative_hash_host_wide_int (offset, hash); + } hash = iterative_hash_host_wide_int (size, hash); } else -- cgit v1.1 From 9f489a5731f12b8e6b49994e8f61acb5d26f508e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 2 Jul 2021 13:48:07 +0200 Subject: add -fmove-loop-stores option to control GIMPLE loop store-motion This adds the -fmove-loop-stores option, mainly as a way to disable the store-motion part of GIMPLE invariant motion (-ftree-loop-im) which is enabled by default. It might be sensible to turn off -fmove-loop-stores at -O1 since it can result in compile-time as well as memory usage issues but this patch tries to preserve existing behavior besides introducing the new option with the exception of -Og where I've disabled it. Controlling store-motion has been made easy by earlier refactoring for the invariant motion only use after loop interchange. 2021-07-02 Richard Biener * doc/invoke.texi (fmove-loop-stores): Document. * common.opt (fmove-loop-stores): New option. * opts.c (default_options_table): Enable -fmove-loop-stores at -O1 but not -Og. * tree-ssa-loop-im.c (pass_lim::execute): Pass flag_move_loop_stores instead of true to loop_invariant_motion_in_fun. --- gcc/tree-ssa-loop-im.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index e7a3050..9ac390b 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -3258,7 +3258,7 @@ pass_lim::execute (function *fun) if (number_of_loops (fun) <= 1) return 0; - unsigned int todo = loop_invariant_motion_in_fun (fun, true); + unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores); if (!in_loop_pipeline) loop_optimizer_finalize (); -- cgit v1.1 From 9f34b780b0461ec7b2b2defe96e44ab616ea2aa3 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 7 Jul 2021 11:41:03 +0200 Subject: tree-optimization/99728 - improve LIM for loops with aggregate copies This improves LIM by recording aggregate copies for disambiguation purposes instead of as UNANALYZABLE_MEM which will prevent any invariant or store motion across it. This allows four of the six references in the loop of the testcase to be promoted. 2021-07-07 Richard Biener PR tree-optimization/99728 * tree-ssa-loop-im.c (gather_mem_refs_stmt): Record aggregate copies. (mem_refs_may_alias_p): Add assert we handled aggregate copies elsewhere. (sm_seq_valid_bb): Give up when running into aggregate copies. (ref_indep_loop_p): Handle aggregate copies as never being invariant themselves but allow other refs to be disambiguated against them. (can_sm_ref_p): Do not try to apply store-motion to aggregate copies. * g++.dg/opt/pr99728.C: New testcase. --- gcc/tree-ssa-loop-im.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 7 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 9ac390b..81b4ec2 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -122,7 +122,9 @@ public: hashval_t hash; /* Its hash value. */ /* The memory access itself and associated caching of alias-oracle - query meta-data. */ + query meta-data. We are using mem.ref == error_mark_node for the + case the reference is represented by its single access stmt + in accesses_in_loop[0]. */ ao_ref mem; bitmap stored; /* The set of loops in that this memory location @@ -130,8 +132,7 @@ public: bitmap loaded; /* The set of loops in that this memory location is loaded from. */ vec accesses_in_loop; - /* The locations of the accesses. Vector - indexed by the loop number. */ + /* The locations of the accesses. */ /* The following set is computed on demand. */ bitmap_head dep_loop; /* The set of loops in that the memory @@ -1465,7 +1466,22 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) return; mem = simple_mem_ref_in_stmt (stmt, &is_stored); - if (!mem) + if (!mem && is_gimple_assign (stmt)) + { + /* For aggregate copies record distinct references but use them + only for disambiguation purposes. */ + id = memory_accesses.refs_list.length (); + ref = mem_ref_alloc (NULL, 0, id); + memory_accesses.refs_list.safe_push (ref); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Unhandled memory reference %u: ", id); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + } + record_mem_ref_loc (ref, stmt, mem); + is_stored = gimple_vdef (stmt); + } + else if (!mem) { /* We use the shared mem_ref for all unanalyzable refs. */ id = UNANALYZABLE_MEM_ID; @@ -1595,7 +1611,8 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) mark_ref_stored (ref, loop); } /* A not simple memory op is also a read when it is a write. */ - if (!is_stored || id == UNANALYZABLE_MEM_ID) + if (!is_stored || id == UNANALYZABLE_MEM_ID + || ref->mem.ref == error_mark_node) { bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id); mark_ref_loaded (ref, loop); @@ -1714,6 +1731,9 @@ mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2, hash_map **ttae_cache, bool tbaa_p) { + gcc_checking_assert (mem1->mem.ref != error_mark_node + && mem2->mem.ref != error_mark_node); + /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same object and their offset differ in such a way that the locations cannot overlap, then they cannot alias. */ @@ -2490,6 +2510,13 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, gcc_assert (data); if (data->ref == UNANALYZABLE_MEM_ID) return -1; + /* Stop at memory references which we can't move. */ + else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node) + { + /* Mark refs_not_in_seq as unsupported. */ + bitmap_ior_into (refs_not_supported, refs_not_in_seq); + return 1; + } /* One of the stores we want to apply SM to and we've not yet seen. */ else if (bitmap_clear_bit (refs_not_in_seq, data->ref)) { @@ -2798,7 +2825,8 @@ ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind) else refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num]; - if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)) + if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID) + || ref->mem.ref == error_mark_node) indep_p = false; else { @@ -2825,7 +2853,20 @@ ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind) EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi) { im_mem_ref *aref = memory_accesses.refs_list[i]; - if (!refs_independent_p (ref, aref, kind != sm_waw)) + if (aref->mem.ref == error_mark_node) + { + gimple *stmt = aref->accesses_in_loop[0].stmt; + if ((kind == sm_war + && ref_maybe_used_by_stmt_p (stmt, &ref->mem, + kind != sm_waw)) + || stmt_may_clobber_ref_p_1 (stmt, &ref->mem, + kind != sm_waw)) + { + indep_p = false; + break; + } + } + else if (!refs_independent_p (ref, aref, kind != sm_waw)) { indep_p = false; break; @@ -2858,6 +2899,10 @@ can_sm_ref_p (class loop *loop, im_mem_ref *ref) if (!MEM_ANALYZABLE (ref)) return false; + /* Can't hoist/sink aggregate copies. */ + if (ref->mem.ref == error_mark_node) + return false; + /* It should be movable. */ if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref)) || TREE_THIS_VOLATILE (ref->mem.ref) -- cgit v1.1 From 00dcc88a0ed7bd148ea86d900b6c93574a2e1f26 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Tue, 20 Jul 2021 11:14:19 -0600 Subject: Adjust by-value function vec arguments to by-reference. gcc/c-family/ChangeLog: * c-common.c (c_build_shufflevector): Adjust by-value argument to by-const-reference. * c-common.h (c_build_shufflevector): Same. gcc/c/ChangeLog: * c-tree.h (c_build_function_call_vec): Adjust by-value argument to by-const-reference. * c-typeck.c (c_build_function_call_vec): Same. gcc/ChangeLog: * cfgloop.h (single_likely_exit): Adjust by-value argument to by-const-reference. * cfgloopanal.c (single_likely_exit): Same. * cgraph.h (struct cgraph_node): Same. * cgraphclones.c (cgraph_node::create_virtual_clone): Same. * genautomata.c (merge_states): Same. * genextract.c (VEC_char_to_string): Same. * genmatch.c (dt_node::gen_kids_1): Same. (walk_captures): Adjust by-value argument to by-reference. * gimple-ssa-store-merging.c (check_no_overlap): Adjust by-value argument to by-const-reference. * gimple.c (gimple_build_call_vec): Same. (gimple_build_call_internal_vec): Same. (gimple_build_switch): Same. (sort_case_labels): Same. (preprocess_case_label_vec_for_gimple): Adjust by-value argument to by-reference. * gimple.h (gimple_build_call_vec): Adjust by-value argument to by-const-reference. (gimple_build_call_internal_vec): Same. (gimple_build_switch): Same. (sort_case_labels): Same. (preprocess_case_label_vec_for_gimple): Adjust by-value argument to by-reference. * haifa-sched.c (calc_priorities): Adjust by-value argument to by-const-reference. (sched_init_luids): Same. (haifa_init_h_i_d): Same. * ipa-cp.c (ipa_get_indirect_edge_target_1): Same. (adjust_callers_for_value_intersection): Adjust by-value argument to by-reference. (find_more_scalar_values_for_callers_subset): Adjust by-value argument to by-const-reference. (find_more_contexts_for_caller_subset): Same. (find_aggregate_values_for_callers_subset): Same. (copy_useful_known_contexts): Same. * ipa-fnsummary.c (remap_edge_summaries): Same. (remap_freqcounting_predicate): Same. * ipa-inline.c (add_new_edges_to_heap): Adjust by-value argument to by-reference. * ipa-predicate.c (predicate::remap_after_inlining): Adjust by-value argument to by-const-reference. * ipa-predicate.h (predicate::remap_after_inlining): Same. * ipa-prop.c (ipa_find_agg_cst_for_param): Same. * ipa-prop.h (ipa_find_agg_cst_for_param): Same. * ira-build.c (ira_loop_tree_body_rev_postorder): Same. * read-rtl.c (add_overload_instance): Same. * rtl.h (native_decode_rtx): Same. (native_decode_vector_rtx): Same. * sched-int.h (sched_init_luids): Same. (haifa_init_h_i_d): Same. * simplify-rtx.c (native_decode_vector_rtx): Same. (native_decode_rtx): Same. * tree-call-cdce.c (gen_shrink_wrap_conditions): Same. (shrink_wrap_one_built_in_call_with_conds): Same. (shrink_wrap_conditional_dead_built_in_calls): Same. * tree-data-ref.c (create_runtime_alias_checks): Same. (compute_all_dependences): Same. * tree-data-ref.h (compute_all_dependences): Same. (create_runtime_alias_checks): Same. (index_in_loop_nest): Same. * tree-if-conv.c (mask_exists): Same. * tree-loop-distribution.c (class loop_distribution): Same. (loop_distribution::create_rdg_vertices): Same. (dump_rdg_partitions): Same. (debug_rdg_partitions): Same. (partition_contains_all_rw): Same. (loop_distribution::distribute_loop): Same. * tree-parloops.c (oacc_entry_exit_ok_1): Same. (oacc_entry_exit_single_gang): Same. * tree-ssa-loop-im.c (hoist_memory_references): Same. (loop_suitable_for_sm): Same. * tree-ssa-loop-niter.c (bound_index): Same. * tree-ssa-reassoc.c (update_ops): Same. (swap_ops_for_binary_stmt): Same. (rewrite_expr_tree): Same. (rewrite_expr_tree_parallel): Same. * tree-ssa-sccvn.c (ao_ref_init_from_vn_reference): Same. * tree-ssa-sccvn.h (ao_ref_init_from_vn_reference): Same. * tree-ssa-structalias.c (process_all_all_constraints): Same. (make_constraints_to): Same. (handle_lhs_call): Same. (find_func_aliases_for_builtin_call): Same. (sort_fieldstack): Same. (check_for_overlaps): Same. * tree-vect-loop-manip.c (vect_create_cond_for_align_checks): Same. (vect_create_cond_for_unequal_addrs): Same. (vect_create_cond_for_lower_bounds): Same. (vect_create_cond_for_alias_checks): Same. * tree-vect-slp-patterns.c (vect_validate_multiplication): Same. * tree-vect-slp.c (vect_analyze_slp_instance): Same. (vect_make_slp_decision): Same. (vect_slp_bbs): Same. (duplicate_and_interleave): Same. (vect_transform_slp_perm_load): Same. (vect_schedule_slp): Same. * tree-vectorizer.h (vect_transform_slp_perm_load): Same. (vect_schedule_slp): Same. (duplicate_and_interleave): Same. * tree.c (build_vector_from_ctor): Same. (build_vector): Same. (check_vector_cst): Same. (check_vector_cst_duplicate): Same. (check_vector_cst_fill): Same. (check_vector_cst_stepped): Same. * tree.h (build_vector_from_ctor): Same. --- gcc/tree-ssa-loop-im.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 81b4ec2..dfb3984 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -2557,7 +2557,7 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, static void hoist_memory_references (class loop *loop, bitmap mem_refs, - vec exits) + const vec &exits) { im_mem_ref *ref; unsigned i; @@ -2970,7 +2970,7 @@ find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm) static bool loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED, - vec exits) + const vec &exits) { unsigned i; edge ex; -- cgit v1.1 From e41ba804ba5f5ca433e09238d561b1b4c8b10985 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Thu, 29 Jul 2021 22:26:25 -0500 Subject: Use range-based for loops for traversing loops This patch follows Martin's suggestion here[1], to support range based loop for iterating loops, analogously to the patch for vec[2]. For example, use below range-based for loop for (auto loop : loops_list (cfun, 0)) to replace the previous macro FOR_EACH_LOOP FOR_EACH_LOOP (loop, 0) [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573424.html [2] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572315.html gcc/ChangeLog: * cfgloop.h (as_const): New function. (class loop_iterator): Rename to ... (class loops_list): ... this. (loop_iterator::next): Rename to ... (loops_list::Iter::fill_curr_loop): ... this and adjust. (loop_iterator::loop_iterator): Rename to ... (loops_list::loops_list): ... this and adjust. (loops_list::Iter): New class. (loops_list::iterator): New type. (loops_list::const_iterator): New type. (loops_list::begin): New function. (loops_list::end): Likewise. (loops_list::begin const): Likewise. (loops_list::end const): Likewise. (FOR_EACH_LOOP): Remove. (FOR_EACH_LOOP_FN): Remove. * cfgloop.c (flow_loops_dump): Adjust FOR_EACH_LOOP* with range-based for loop with loops_list instance. (sort_sibling_loops): Likewise. (disambiguate_loops_with_multiple_latches): Likewise. (verify_loop_structure): Likewise. * cfgloopmanip.c (create_preheaders): Likewise. (force_single_succ_latches): Likewise. * config/aarch64/falkor-tag-collision-avoidance.c (execute_tag_collision_avoidance): Likewise. * config/mn10300/mn10300.c (mn10300_scan_for_setlb_lcc): Likewise. * config/s390/s390.c (s390_adjust_loops): Likewise. * doc/loop.texi: Likewise. * gimple-loop-interchange.cc (pass_linterchange::execute): Likewise. * gimple-loop-jam.c (tree_loop_unroll_and_jam): Likewise. * gimple-loop-versioning.cc (loop_versioning::analyze_blocks): Likewise. (loop_versioning::make_versioning_decisions): Likewise. * gimple-ssa-split-paths.c (split_paths): Likewise. * graphite-isl-ast-to-gimple.c (graphite_regenerate_ast_isl): Likewise. * graphite.c (canonicalize_loop_form): Likewise. (graphite_transform_loops): Likewise. * ipa-fnsummary.c (analyze_function_body): Likewise. * ipa-pure-const.c (analyze_function): Likewise. * loop-doloop.c (doloop_optimize_loops): Likewise. * loop-init.c (loop_optimizer_finalize): Likewise. (fix_loop_structure): Likewise. * loop-invariant.c (calculate_loop_reg_pressure): Likewise. (move_loop_invariants): Likewise. * loop-unroll.c (decide_unrolling): Likewise. (unroll_loops): Likewise. * modulo-sched.c (sms_schedule): Likewise. * predict.c (predict_loops): Likewise. (pass_profile::execute): Likewise. * profile.c (branch_prob): Likewise. * sel-sched-ir.c (sel_finish_pipelining): Likewise. (sel_find_rgns): Likewise. * tree-cfg.c (replace_loop_annotate): Likewise. (replace_uses_by): Likewise. (move_sese_region_to_fn): Likewise. * tree-if-conv.c (pass_if_conversion::execute): Likewise. * tree-loop-distribution.c (loop_distribution::execute): Likewise. * tree-parloops.c (parallelize_loops): Likewise. * tree-predcom.c (tree_predictive_commoning): Likewise. * tree-scalar-evolution.c (scev_initialize): Likewise. (scev_reset): Likewise. * tree-ssa-dce.c (find_obviously_necessary_stmts): Likewise. * tree-ssa-live.c (remove_unused_locals): Likewise. * tree-ssa-loop-ch.c (ch_base::copy_headers): Likewise. * tree-ssa-loop-im.c (analyze_memory_references): Likewise. (tree_ssa_lim_initialize): Likewise. * tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Likewise. * tree-ssa-loop-ivopts.c (tree_ssa_iv_optimize): Likewise. * tree-ssa-loop-manip.c (get_loops_exits): Likewise. * tree-ssa-loop-niter.c (estimate_numbers_of_iterations): Likewise. (free_numbers_of_iterations_estimates): Likewise. * tree-ssa-loop-prefetch.c (tree_ssa_prefetch_arrays): Likewise. * tree-ssa-loop-split.c (tree_ssa_split_loops): Likewise. * tree-ssa-loop-unswitch.c (tree_ssa_unswitch_loops): Likewise. * tree-ssa-loop.c (gate_oacc_kernels): Likewise. (pass_scev_cprop::execute): Likewise. * tree-ssa-propagate.c (clean_up_loop_closed_phi): Likewise. * tree-ssa-sccvn.c (do_rpo_vn): Likewise. * tree-ssa-threadupdate.c (jump_thread_path_registry::thread_through_all_blocks): Likewise. * tree-vectorizer.c (vectorize_loops): Likewise. * tree-vrp.c (vrp_asserts::find_assert_locations): Likewise. --- gcc/tree-ssa-loop-im.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index dfb3984..d9f75d5 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -1662,7 +1662,7 @@ analyze_memory_references (bool store_motion) { gimple_stmt_iterator bsi; basic_block bb, *bbs; - class loop *loop, *outer; + class loop *outer; unsigned i, n; /* Collect all basic-blocks in loops and sort them after their @@ -1706,7 +1706,7 @@ analyze_memory_references (bool store_motion) /* Propagate the information about accessed memory references up the loop hierarchy. */ - FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) + for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) { /* Finalize the overall touched references (including subloops). */ bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num], @@ -3133,7 +3133,6 @@ fill_always_executed_in (void) static void tree_ssa_lim_initialize (bool store_motion) { - class loop *loop; unsigned i; bitmap_obstack_initialize (&lim_bitmap_obstack); @@ -3177,7 +3176,7 @@ tree_ssa_lim_initialize (bool store_motion) its postorder index. */ i = 0; bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun)); - FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) + for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) bb_loop_postorder[loop->num] = i++; } -- cgit v1.1 From f482bf2af86990329b4df660f8c1eb9e094de9f9 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 1 Sep 2021 09:51:45 +0200 Subject: tree-optimization/102155 - fix LIM fill_always_executed_in CFG walk This fixes the CFG walk order of fill_always_executed_in to use RPO oder rather than the dominator based order computed by get_loop_body_in_dom_order. That fixes correctness issues with unordered dominator children. The RPO order computed by rev_post_order_and_mark_dfs_back_seme in its for-iteration mode is a good match for the algorithm. 2021-09-01 Richard Biener PR tree-optimization/102155 * tree-ssa-loop-im.c (fill_always_executed_in_1): Iterate over a part of the RPO array and do not recurse here. Dump blocks marked as always executed. (fill_always_executed_in): Walk over the RPO array and process loops whose header we run into. (loop_invariant_motion_in_fun): Compute the first RPO using rev_post_order_and_mark_dfs_back_seme in iteration order and pass that to fill_always_executed_in. --- gcc/tree-ssa-loop-im.c | 136 ++++++++++++++++++++++++++----------------------- 1 file changed, 73 insertions(+), 63 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index d9f75d5..f3706dc 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -3025,77 +3025,74 @@ do_store_motion (void) /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e. for each such basic block bb records the outermost loop for that execution of its header implies execution of bb. CONTAINS_CALL is the bitmap of - blocks that contain a nonpure call. */ + blocks that contain a nonpure call. The blocks of LOOP start at index + START of the RPO array of size N. */ static void -fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) +fill_always_executed_in_1 (function *fun, class loop *loop, + int *rpo, int start, int n, sbitmap contains_call) { - basic_block bb = NULL, *bbs, last = NULL; - unsigned i; - edge e; + basic_block last = NULL; class loop *inn_loop = loop; - if (ALWAYS_EXECUTED_IN (loop->header) == NULL) + for (int i = start; i < n; i++) { - bbs = get_loop_body_in_dom_order (loop); - - for (i = 0; i < loop->num_nodes; i++) - { - edge_iterator ei; - bb = bbs[i]; - - if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) - last = bb; + basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i]); + /* Stop when we iterated over all blocks in this loop. */ + if (!flow_bb_inside_loop_p (loop, bb)) + break; - if (bitmap_bit_p (contains_call, bb->index)) - break; + if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) + last = bb; - FOR_EACH_EDGE (e, ei, bb->succs) - { - /* If there is an exit from this BB. */ - if (!flow_bb_inside_loop_p (loop, e->dest)) - break; - /* Or we enter a possibly non-finite loop. */ - if (flow_loop_nested_p (bb->loop_father, - e->dest->loop_father) - && ! finite_loop_p (e->dest->loop_father)) - break; - } - if (e) - break; + if (bitmap_bit_p (contains_call, bb->index)) + break; - /* A loop might be infinite (TODO use simple loop analysis - to disprove this if possible). */ - if (bb->flags & BB_IRREDUCIBLE_LOOP) + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, bb->succs) + { + /* If there is an exit from this BB. */ + if (!flow_bb_inside_loop_p (loop, e->dest)) break; - - if (!flow_bb_inside_loop_p (inn_loop, bb)) + /* Or we enter a possibly non-finite loop. */ + if (flow_loop_nested_p (bb->loop_father, + e->dest->loop_father) + && ! finite_loop_p (e->dest->loop_father)) break; + } + if (e) + break; - if (bb->loop_father->header == bb) - { - if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) - break; + /* A loop might be infinite (TODO use simple loop analysis + to disprove this if possible). */ + if (bb->flags & BB_IRREDUCIBLE_LOOP) + break; - /* In a loop that is always entered we may proceed anyway. - But record that we entered it and stop once we leave it. */ - inn_loop = bb->loop_father; - } - } + if (!flow_bb_inside_loop_p (inn_loop, bb)) + break; - while (1) + if (bb->loop_father->header == bb) { - SET_ALWAYS_EXECUTED_IN (last, loop); - if (last == loop->header) + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) break; - last = get_immediate_dominator (CDI_DOMINATORS, last); - } - free (bbs); + /* In a loop that is always entered we may proceed anyway. + But record that we entered it and stop once we leave it. */ + inn_loop = bb->loop_father; + } } - for (loop = loop->inner; loop; loop = loop->next) - fill_always_executed_in_1 (loop, contains_call); + while (1) + { + SET_ALWAYS_EXECUTED_IN (last, loop); + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "bb %d is always executed in loop %d\n", + last->index, loop->num); + if (last == loop->header) + break; + last = get_immediate_dominator (CDI_DOMINATORS, last); + } } /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e. @@ -3103,14 +3100,13 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) of its header implies execution of bb. */ static void -fill_always_executed_in (void) +fill_always_executed_in (function *fun, int *rpo, int n) { basic_block bb; - class loop *loop; - auto_sbitmap contains_call (last_basic_block_for_fn (cfun)); + auto_sbitmap contains_call (last_basic_block_for_fn (fun)); bitmap_clear (contains_call); - FOR_EACH_BB_FN (bb, cfun) + FOR_EACH_BB_FN (bb, fun) { gimple_stmt_iterator gsi; for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) @@ -3123,8 +3119,18 @@ fill_always_executed_in (void) bitmap_set_bit (contains_call, bb->index); } - for (loop = current_loops->tree_root->inner; loop; loop = loop->next) - fill_always_executed_in_1 (loop, contains_call); + /* The RPO order we iterate over is one that visits all blocks of a CFG + cycle before leaving it. That means we can visit a loop once we + run into its header and we can skip it if it was determined as always + entering when proccessing the containing loop. */ + for (int i = 0; i < n; ++i) + { + basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i]); + if (bb->loop_father->header == bb + && !ALWAYS_EXECUTED_IN (bb)) + fill_always_executed_in_1 (fun, bb->loop_father, + rpo, i, n, contains_call); + } } @@ -3227,23 +3233,27 @@ loop_invariant_motion_in_fun (function *fun, bool store_motion) /* Gathers information about memory accesses in the loops. */ analyze_memory_references (store_motion); - /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */ - fill_always_executed_in (); + int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (fun) - NUM_FIXED_BLOCKS); + auto_bitmap exit_bbs; + bitmap_set_bit (exit_bbs, EXIT_BLOCK); + edge entry = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (fun)); + int n = rev_post_order_and_mark_dfs_back_seme (fun, entry, exit_bbs, true, + rpo, NULL); - int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); - int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); + /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */ + fill_always_executed_in (fun, rpo, n); /* For each statement determine the outermost loop in that it is invariant and cost for computing the invariant. */ for (int i = 0; i < n; ++i) compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i])); + free (rpo); /* Execute store motion. Force the necessary invariants to be moved out of the loops as well. */ if (store_motion) do_store_motion (); - free (rpo); rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); -- cgit v1.1 From 1e6267b335262eb6244c86a7102f00b26e57af4d Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 2 Sep 2021 09:58:39 +0200 Subject: Revert "tree-optimization/102155 - fix LIM fill_always_executed_in CFG walk" This reverts commit f482bf2af86990329b4df660f8c1eb9e094de9f9. --- gcc/tree-ssa-loop-im.c | 136 +++++++++++++++++++++++-------------------------- 1 file changed, 63 insertions(+), 73 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index f3706dc..d9f75d5 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -3025,74 +3025,77 @@ do_store_motion (void) /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e. for each such basic block bb records the outermost loop for that execution of its header implies execution of bb. CONTAINS_CALL is the bitmap of - blocks that contain a nonpure call. The blocks of LOOP start at index - START of the RPO array of size N. */ + blocks that contain a nonpure call. */ static void -fill_always_executed_in_1 (function *fun, class loop *loop, - int *rpo, int start, int n, sbitmap contains_call) +fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) { - basic_block last = NULL; + basic_block bb = NULL, *bbs, last = NULL; + unsigned i; + edge e; class loop *inn_loop = loop; - for (int i = start; i < n; i++) + if (ALWAYS_EXECUTED_IN (loop->header) == NULL) { - basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i]); - /* Stop when we iterated over all blocks in this loop. */ - if (!flow_bb_inside_loop_p (loop, bb)) - break; + bbs = get_loop_body_in_dom_order (loop); - if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) - last = bb; + for (i = 0; i < loop->num_nodes; i++) + { + edge_iterator ei; + bb = bbs[i]; - if (bitmap_bit_p (contains_call, bb->index)) - break; + if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) + last = bb; - edge_iterator ei; - edge e; - FOR_EACH_EDGE (e, ei, bb->succs) - { - /* If there is an exit from this BB. */ - if (!flow_bb_inside_loop_p (loop, e->dest)) + if (bitmap_bit_p (contains_call, bb->index)) break; - /* Or we enter a possibly non-finite loop. */ - if (flow_loop_nested_p (bb->loop_father, - e->dest->loop_father) - && ! finite_loop_p (e->dest->loop_father)) + + FOR_EACH_EDGE (e, ei, bb->succs) + { + /* If there is an exit from this BB. */ + if (!flow_bb_inside_loop_p (loop, e->dest)) + break; + /* Or we enter a possibly non-finite loop. */ + if (flow_loop_nested_p (bb->loop_father, + e->dest->loop_father) + && ! finite_loop_p (e->dest->loop_father)) + break; + } + if (e) break; - } - if (e) - break; - /* A loop might be infinite (TODO use simple loop analysis - to disprove this if possible). */ - if (bb->flags & BB_IRREDUCIBLE_LOOP) - break; + /* A loop might be infinite (TODO use simple loop analysis + to disprove this if possible). */ + if (bb->flags & BB_IRREDUCIBLE_LOOP) + break; - if (!flow_bb_inside_loop_p (inn_loop, bb)) - break; + if (!flow_bb_inside_loop_p (inn_loop, bb)) + break; + + if (bb->loop_father->header == bb) + { + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) + break; + + /* In a loop that is always entered we may proceed anyway. + But record that we entered it and stop once we leave it. */ + inn_loop = bb->loop_father; + } + } - if (bb->loop_father->header == bb) + while (1) { - if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) + SET_ALWAYS_EXECUTED_IN (last, loop); + if (last == loop->header) break; - - /* In a loop that is always entered we may proceed anyway. - But record that we entered it and stop once we leave it. */ - inn_loop = bb->loop_father; + last = get_immediate_dominator (CDI_DOMINATORS, last); } - } - while (1) - { - SET_ALWAYS_EXECUTED_IN (last, loop); - if (dump_enabled_p ()) - dump_printf (MSG_NOTE, "bb %d is always executed in loop %d\n", - last->index, loop->num); - if (last == loop->header) - break; - last = get_immediate_dominator (CDI_DOMINATORS, last); + free (bbs); } + + for (loop = loop->inner; loop; loop = loop->next) + fill_always_executed_in_1 (loop, contains_call); } /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e. @@ -3100,13 +3103,14 @@ fill_always_executed_in_1 (function *fun, class loop *loop, of its header implies execution of bb. */ static void -fill_always_executed_in (function *fun, int *rpo, int n) +fill_always_executed_in (void) { basic_block bb; + class loop *loop; - auto_sbitmap contains_call (last_basic_block_for_fn (fun)); + auto_sbitmap contains_call (last_basic_block_for_fn (cfun)); bitmap_clear (contains_call); - FOR_EACH_BB_FN (bb, fun) + FOR_EACH_BB_FN (bb, cfun) { gimple_stmt_iterator gsi; for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) @@ -3119,18 +3123,8 @@ fill_always_executed_in (function *fun, int *rpo, int n) bitmap_set_bit (contains_call, bb->index); } - /* The RPO order we iterate over is one that visits all blocks of a CFG - cycle before leaving it. That means we can visit a loop once we - run into its header and we can skip it if it was determined as always - entering when proccessing the containing loop. */ - for (int i = 0; i < n; ++i) - { - basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i]); - if (bb->loop_father->header == bb - && !ALWAYS_EXECUTED_IN (bb)) - fill_always_executed_in_1 (fun, bb->loop_father, - rpo, i, n, contains_call); - } + for (loop = current_loops->tree_root->inner; loop; loop = loop->next) + fill_always_executed_in_1 (loop, contains_call); } @@ -3233,27 +3227,23 @@ loop_invariant_motion_in_fun (function *fun, bool store_motion) /* Gathers information about memory accesses in the loops. */ analyze_memory_references (store_motion); - int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (fun) - NUM_FIXED_BLOCKS); - auto_bitmap exit_bbs; - bitmap_set_bit (exit_bbs, EXIT_BLOCK); - edge entry = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (fun)); - int n = rev_post_order_and_mark_dfs_back_seme (fun, entry, exit_bbs, true, - rpo, NULL); - /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */ - fill_always_executed_in (fun, rpo, n); + fill_always_executed_in (); + + int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); + int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); /* For each statement determine the outermost loop in that it is invariant and cost for computing the invariant. */ for (int i = 0; i < n; ++i) compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i])); - free (rpo); /* Execute store motion. Force the necessary invariants to be moved out of the loops as well. */ if (store_motion) do_store_motion (); + free (rpo); rpo = XNEWVEC (int, last_basic_block_for_fn (fun)); n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false); -- cgit v1.1 From 483e400870601f650c80f867ec781cd5f83507d6 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 2 Sep 2021 10:47:35 +0200 Subject: Refine fix for PR78185, improve LIM for code after inner loops This refines the fix for PR78185 after understanding that the code regarding to the comment 'In a loop that is always entered we may proceed anyway. But record that we entered it and stop once we leave it.' was supposed to protect us from leaving possibly infinite inner loops. The simpler fix of moving the misplaced stopping code can then be refined to continue processing when the exited inner loop is finite, improving invariant motion for cases like in the added testcase. 2021-09-02 Richard Biener * tree-ssa-loop-im.c (fill_always_executed_in_1): Refine fix for PR78185 and continue processing when leaving finite inner loops. * gcc.dg/tree-ssa/ssa-lim-16.c: New testcase. --- gcc/tree-ssa-loop-im.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index d9f75d5..01f3fc2 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -3044,23 +3044,27 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) edge_iterator ei; bb = bbs[i]; + if (!flow_bb_inside_loop_p (inn_loop, bb)) + { + /* When we are leaving a possibly infinite inner loop + we have to stop processing. */ + if (!finite_loop_p (inn_loop)) + break; + /* If the loop was finite we can continue with processing + the loop we exited to. */ + inn_loop = bb->loop_father; + } + if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) last = bb; if (bitmap_bit_p (contains_call, bb->index)) break; + /* If LOOP exits from this BB stop processing. */ FOR_EACH_EDGE (e, ei, bb->succs) - { - /* If there is an exit from this BB. */ - if (!flow_bb_inside_loop_p (loop, e->dest)) - break; - /* Or we enter a possibly non-finite loop. */ - if (flow_loop_nested_p (bb->loop_father, - e->dest->loop_father) - && ! finite_loop_p (e->dest->loop_father)) - break; - } + if (!flow_bb_inside_loop_p (loop, e->dest)) + break; if (e) break; @@ -3069,22 +3073,23 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) if (bb->flags & BB_IRREDUCIBLE_LOOP) break; - if (!flow_bb_inside_loop_p (inn_loop, bb)) - break; - if (bb->loop_father->header == bb) { if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) break; /* In a loop that is always entered we may proceed anyway. - But record that we entered it and stop once we leave it. */ + But record that we entered it and stop once we leave it + since it might not be finite. */ inn_loop = bb->loop_father; } } while (1) { + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n", + last->index, loop->num); SET_ALWAYS_EXECUTED_IN (last, loop); if (last == loop->header) break; -- cgit v1.1 From 6e27bc2b885207d51500b2c42f949ca5073dbe72 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 9 Sep 2021 10:52:12 +0200 Subject: Avoid full DOM walk in LIM fill_always_executed_in This avoids a full DOM walk via get_loop_body_in_dom_order in the loop body walk of fill_always_executed_in which is often terminating the walk of a loop body early by integrating the DOM walk of get_loop_body_in_dom_order with the actual processing done by fill_always_executed_in. This trades the fully populated loop body array with a worklist allocation of the same size and thus should be a strict improvement over the recursive approach of get_loop_body_in_dom_order. 2021-09-09 Richard Biener * tree-ssa-loop-im.c (fill_always_executed_in_1): Integrate DOM walk from get_loop_body_in_dom_order using a worklist approach. --- gcc/tree-ssa-loop-im.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 01f3fc2..5d68454 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -3030,19 +3030,19 @@ do_store_motion (void) static void fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) { - basic_block bb = NULL, *bbs, last = NULL; - unsigned i; + basic_block bb = NULL, last = NULL; edge e; class loop *inn_loop = loop; if (ALWAYS_EXECUTED_IN (loop->header) == NULL) { - bbs = get_loop_body_in_dom_order (loop); - - for (i = 0; i < loop->num_nodes; i++) + auto_vec worklist; + worklist.reserve_exact (loop->num_nodes); + worklist.quick_push (loop->header); + do { edge_iterator ei; - bb = bbs[i]; + bb = worklist.pop (); if (!flow_bb_inside_loop_p (inn_loop, bb)) { @@ -3083,7 +3083,32 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) since it might not be finite. */ inn_loop = bb->loop_father; } + + /* Walk the body of LOOP sorted by dominance relation. Additionally, + if a basic block S dominates the latch, then only blocks dominated + by S are after it. + This is get_loop_body_in_dom_order using a worklist algorithm and + stopping once we are no longer interested in visiting further + blocks. */ + unsigned old_len = worklist.length (); + unsigned postpone = 0; + for (basic_block son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + { + if (!flow_bb_inside_loop_p (loop, son)) + continue; + if (dominated_by_p (CDI_DOMINATORS, loop->latch, son)) + postpone = worklist.length (); + worklist.quick_push (son); + } + if (postpone) + /* Postponing the block that dominates the latch means + processing it last and thus putting it earliest in the + worklist. */ + std::swap (worklist[old_len], worklist[postpone]); } + while (!worklist.is_empty ()); while (1) { @@ -3095,8 +3120,6 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) break; last = get_immediate_dominator (CDI_DOMINATORS, last); } - - free (bbs); } for (loop = loop->inner; loop; loop = loop->next) -- cgit v1.1 From 013cfc648405a8a118d07436f103e4d70224fe00 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 9 Sep 2021 11:50:20 +0200 Subject: Improve LIM fill_always_executed_in computation Currently the DOM walk over a loop body does not walk into not always executed subloops to avoid scalability issues since doing so makes the walk quadratic in the loop depth. It turns out this is not an issue in practice and even with a loop depth of 1800 this function is way off the radar. So the following patch removes the limitation, replacing it with a comment. 2021-09-09 Richard Biener * tree-ssa-loop-im.c (fill_always_executed_in_1): Walk into all subloops. * gcc.dg/tree-ssa/ssa-lim-17.c: New testcase. --- gcc/tree-ssa-loop-im.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'gcc/tree-ssa-loop-im.c') diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 5d68454..4b187c2 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -3074,15 +3074,13 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) break; if (bb->loop_father->header == bb) - { - if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) - break; - - /* In a loop that is always entered we may proceed anyway. - But record that we entered it and stop once we leave it - since it might not be finite. */ - inn_loop = bb->loop_father; - } + /* Record that we enter into a subloop since it might not + be finite. */ + /* ??? Entering into a not always executed subloop makes + fill_always_executed_in quadratic in loop depth since + we walk those loops N times. This is not a problem + in practice though, see PR102253 for a worst-case testcase. */ + inn_loop = bb->loop_father; /* Walk the body of LOOP sorted by dominance relation. Additionally, if a basic block S dominates the latch, then only blocks dominated -- cgit v1.1