diff options
author | Ian Lance Taylor <iant@golang.org> | 2021-09-13 10:37:49 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2021-09-13 10:37:49 -0700 |
commit | e252b51ccde010cbd2a146485d8045103cd99533 (patch) | |
tree | e060f101cdc32bf5e520de8e5275db9d4236b74c /gcc/tree-ssa-loop-im.c | |
parent | f10c7c4596dda99d2ee872c995ae4aeda65adbdf (diff) | |
parent | 104c05c5284b7822d770ee51a7d91946c7e56d50 (diff) | |
download | gcc-e252b51ccde010cbd2a146485d8045103cd99533.zip gcc-e252b51ccde010cbd2a146485d8045103cd99533.tar.gz gcc-e252b51ccde010cbd2a146485d8045103cd99533.tar.bz2 |
Merge from trunk revision 104c05c5284b7822d770ee51a7d91946c7e56d50.
Diffstat (limited to 'gcc/tree-ssa-loop-im.c')
-rw-r--r-- | gcc/tree-ssa-loop-im.c | 240 |
1 files changed, 187 insertions, 53 deletions
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 8034cf6..4b187c2 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -122,7 +122,9 @@ public: hashval_t hash; /* Its hash value. */ /* The memory access itself and associated caching of alias-oracle - query meta-data. */ + query meta-data. We are using mem.ref == error_mark_node for the + case the reference is represented by its single access stmt + in accesses_in_loop[0]. */ ao_ref mem; bitmap stored; /* The set of loops in that this memory location @@ -130,8 +132,7 @@ public: bitmap loaded; /* The set of loops in that this memory location is loaded from. */ vec<mem_ref_loc> accesses_in_loop; - /* The locations of the accesses. Vector - indexed by the loop number. */ + /* The locations of the accesses. */ /* The following set is computed on demand. */ bitmap_head dep_loop; /* The set of loops in that the memory @@ -194,8 +195,14 @@ mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2) { if (obj2->max_size_known_p ()) return (mem1->ref_decomposed - && operand_equal_p (mem1->mem.base, obj2->base, 0) - && known_eq (mem1->mem.offset, obj2->offset) + && ((TREE_CODE (mem1->mem.base) == MEM_REF + && TREE_CODE (obj2->base) == MEM_REF + && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0), + TREE_OPERAND (obj2->base, 0), 0) + && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset, + mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset)) + || (operand_equal_p (mem1->mem.base, obj2->base, 0) + && known_eq (mem1->mem.offset, obj2->offset))) && known_eq (mem1->mem.size, obj2->size) && known_eq (mem1->mem.max_size, obj2->max_size) && mem1->mem.volatile_p == obj2->volatile_p @@ -1459,7 +1466,22 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) return; mem = simple_mem_ref_in_stmt (stmt, &is_stored); - if (!mem) + if (!mem && is_gimple_assign (stmt)) + { + /* For aggregate copies record distinct references but use them + only for disambiguation purposes. */ + id = memory_accesses.refs_list.length (); + ref = mem_ref_alloc (NULL, 0, id); + memory_accesses.refs_list.safe_push (ref); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Unhandled memory reference %u: ", id); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + } + record_mem_ref_loc (ref, stmt, mem); + is_stored = gimple_vdef (stmt); + } + else if (!mem) { /* We use the shared mem_ref for all unanalyzable refs. */ id = UNANALYZABLE_MEM_ID; @@ -1500,8 +1522,21 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off))) { ref_decomposed = true; - hash = iterative_hash_expr (ao_ref_base (&aor), 0); - hash = iterative_hash_host_wide_int (offset, hash); + tree base = ao_ref_base (&aor); + poly_int64 moffset; + HOST_WIDE_INT mcoffset; + if (TREE_CODE (base) == MEM_REF + && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (&moffset) + && moffset.is_constant (&mcoffset)) + { + hash = iterative_hash_expr (TREE_OPERAND (base, 0), 0); + hash = iterative_hash_host_wide_int (mcoffset, hash); + } + else + { + hash = iterative_hash_expr (base, 0); + hash = iterative_hash_host_wide_int (offset, hash); + } hash = iterative_hash_host_wide_int (size, hash); } else @@ -1576,7 +1611,8 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) mark_ref_stored (ref, loop); } /* A not simple memory op is also a read when it is a write. */ - if (!is_stored || id == UNANALYZABLE_MEM_ID) + if (!is_stored || id == UNANALYZABLE_MEM_ID + || ref->mem.ref == error_mark_node) { bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id); mark_ref_loaded (ref, loop); @@ -1626,7 +1662,7 @@ analyze_memory_references (bool store_motion) { gimple_stmt_iterator bsi; basic_block bb, *bbs; - class loop *loop, *outer; + class loop *outer; unsigned i, n; /* Collect all basic-blocks in loops and sort them after their @@ -1670,7 +1706,7 @@ analyze_memory_references (bool store_motion) /* Propagate the information about accessed memory references up the loop hierarchy. */ - FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) + for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) { /* Finalize the overall touched references (including subloops). */ bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num], @@ -1695,6 +1731,9 @@ mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2, hash_map<tree, name_expansion *> **ttae_cache, bool tbaa_p) { + gcc_checking_assert (mem1->mem.ref != error_mark_node + && mem2->mem.ref != error_mark_node); + /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same object and their offset differ in such a way that the locations cannot overlap, then they cannot alias. */ @@ -2143,7 +2182,7 @@ execute_sm (class loop *loop, im_mem_ref *ref, /* If not emitting a load mark the uninitialized state on the loop entry as not to be warned for. */ tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var)); - TREE_NO_WARNING (uninit) = 1; + suppress_warning (uninit, OPT_Wuninitialized); load = gimple_build_assign (aux->tmp_var, uninit); } lim_data = init_lim_data (load); @@ -2169,6 +2208,7 @@ execute_sm (class loop *loop, im_mem_ref *ref, enum sm_kind { sm_ord, sm_unord, sm_other }; struct seq_entry { + seq_entry () {} seq_entry (unsigned f, sm_kind k, tree fr = NULL) : first (f), second (k), from (fr) {} unsigned first; @@ -2339,7 +2379,13 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, tree vuse = gimple_phi_arg_def (phi, i); edge e = gimple_phi_arg_edge (phi, i); auto_vec<seq_entry> edge_seq; - bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq); + bitmap_and_compl (tem_refs_not_in_seq, + refs_not_in_seq, refs_not_supported); + /* If we've marked all refs we search for as unsupported + we can stop processing and use the sequence as before + the PHI. */ + if (bitmap_empty_p (tem_refs_not_in_seq)) + return 1; eret = sm_seq_valid_bb (loop, e->src, vuse, edge_seq, tem_refs_not_in_seq, refs_not_supported, true, fully_visited); @@ -2352,6 +2398,8 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, unsigned min_len = MIN(first_edge_seq.length (), edge_seq.length ()); /* Incrementally merge seqs into first_edge_seq. */ + int first_uneq = -1; + auto_vec<seq_entry, 2> extra_refs; for (unsigned int i = 0; i < min_len; ++i) { /* ??? We can more intelligently merge when we face different @@ -2367,13 +2415,18 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, bitmap_set_bit (refs_not_supported, edge_seq[i].first); first_edge_seq[i].second = sm_other; first_edge_seq[i].from = NULL_TREE; + /* Record the dropped refs for later processing. */ + if (first_uneq == -1) + first_uneq = i; + extra_refs.safe_push (seq_entry (edge_seq[i].first, + sm_other, NULL_TREE)); } /* sm_other prevails. */ else if (first_edge_seq[i].second != edge_seq[i].second) { - /* This is just an optimization. */ - gcc_assert (bitmap_bit_p (refs_not_supported, - first_edge_seq[i].first)); + /* Make sure the ref is marked as not supported. */ + bitmap_set_bit (refs_not_supported, + first_edge_seq[i].first); first_edge_seq[i].second = sm_other; first_edge_seq[i].from = NULL_TREE; } @@ -2399,10 +2452,36 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, } else if (edge_seq.length () > first_edge_seq.length ()) { + if (first_uneq == -1) + first_uneq = first_edge_seq.length (); for (unsigned i = first_edge_seq.length (); i < edge_seq.length (); ++i) - if (edge_seq[i].second == sm_ord) - bitmap_set_bit (refs_not_supported, edge_seq[i].first); + { + if (edge_seq[i].second == sm_ord) + bitmap_set_bit (refs_not_supported, edge_seq[i].first); + extra_refs.safe_push (seq_entry (edge_seq[i].first, + sm_other, NULL_TREE)); + } + } + /* Put unmerged refs at first_uneq to force dependence checking + on them. */ + if (first_uneq != -1) + { + /* Missing ordered_splice_at. */ + if ((unsigned)first_uneq == first_edge_seq.length ()) + first_edge_seq.safe_splice (extra_refs); + else + { + unsigned fes_length = first_edge_seq.length (); + first_edge_seq.safe_grow (fes_length + + extra_refs.length ()); + memmove (&first_edge_seq[first_uneq + extra_refs.length ()], + &first_edge_seq[first_uneq], + (fes_length - first_uneq) * sizeof (seq_entry)); + memcpy (&first_edge_seq[first_uneq], + extra_refs.address (), + extra_refs.length () * sizeof (seq_entry)); + } } } /* Use the sequence from the first edge and push SMs down. */ @@ -2431,6 +2510,13 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, gcc_assert (data); if (data->ref == UNANALYZABLE_MEM_ID) return -1; + /* Stop at memory references which we can't move. */ + else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node) + { + /* Mark refs_not_in_seq as unsupported. */ + bitmap_ior_into (refs_not_supported, refs_not_in_seq); + return 1; + } /* One of the stores we want to apply SM to and we've not yet seen. */ else if (bitmap_clear_bit (refs_not_in_seq, data->ref)) { @@ -2471,7 +2557,7 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, static void hoist_memory_references (class loop *loop, bitmap mem_refs, - vec<edge> exits) + const vec<edge> &exits) { im_mem_ref *ref; unsigned i; @@ -2499,7 +2585,12 @@ hoist_memory_references (class loop *loop, bitmap mem_refs, vec<seq_entry> seq; seq.create (4); auto_bitmap refs_not_in_seq (&lim_bitmap_obstack); - bitmap_copy (refs_not_in_seq, mem_refs); + bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported); + if (bitmap_empty_p (refs_not_in_seq)) + { + seq.release (); + break; + } auto_bitmap fully_visited; int res = sm_seq_valid_bb (loop, e->src, NULL_TREE, seq, refs_not_in_seq, @@ -2734,7 +2825,8 @@ ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind) else refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num]; - if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)) + if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID) + || ref->mem.ref == error_mark_node) indep_p = false; else { @@ -2761,7 +2853,20 @@ ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind) EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi) { im_mem_ref *aref = memory_accesses.refs_list[i]; - if (!refs_independent_p (ref, aref, kind != sm_waw)) + if (aref->mem.ref == error_mark_node) + { + gimple *stmt = aref->accesses_in_loop[0].stmt; + if ((kind == sm_war + && ref_maybe_used_by_stmt_p (stmt, &ref->mem, + kind != sm_waw)) + || stmt_may_clobber_ref_p_1 (stmt, &ref->mem, + kind != sm_waw)) + { + indep_p = false; + break; + } + } + else if (!refs_independent_p (ref, aref, kind != sm_waw)) { indep_p = false; break; @@ -2794,6 +2899,10 @@ can_sm_ref_p (class loop *loop, im_mem_ref *ref) if (!MEM_ANALYZABLE (ref)) return false; + /* Can't hoist/sink aggregate copies. */ + if (ref->mem.ref == error_mark_node) + return false; + /* It should be movable. */ if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref)) || TREE_THIS_VOLATILE (ref->mem.ref) @@ -2861,7 +2970,7 @@ find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm) static bool loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED, - vec<edge> exits) + const vec<edge> &exits) { unsigned i; edge ex; @@ -2921,19 +3030,30 @@ do_store_motion (void) static void fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) { - basic_block bb = NULL, *bbs, last = NULL; - unsigned i; + basic_block bb = NULL, last = NULL; edge e; class loop *inn_loop = loop; if (ALWAYS_EXECUTED_IN (loop->header) == NULL) { - bbs = get_loop_body_in_dom_order (loop); - - for (i = 0; i < loop->num_nodes; i++) + auto_vec<basic_block, 64> worklist; + worklist.reserve_exact (loop->num_nodes); + worklist.quick_push (loop->header); + do { edge_iterator ei; - bb = bbs[i]; + bb = worklist.pop (); + + if (!flow_bb_inside_loop_p (inn_loop, bb)) + { + /* When we are leaving a possibly infinite inner loop + we have to stop processing. */ + if (!finite_loop_p (inn_loop)) + break; + /* If the loop was finite we can continue with processing + the loop we exited to. */ + inn_loop = bb->loop_father; + } if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) last = bb; @@ -2941,17 +3061,10 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) if (bitmap_bit_p (contains_call, bb->index)) break; + /* If LOOP exits from this BB stop processing. */ FOR_EACH_EDGE (e, ei, bb->succs) - { - /* If there is an exit from this BB. */ - if (!flow_bb_inside_loop_p (loop, e->dest)) - break; - /* Or we enter a possibly non-finite loop. */ - if (flow_loop_nested_p (bb->loop_father, - e->dest->loop_father) - && ! finite_loop_p (e->dest->loop_father)) - break; - } + if (!flow_bb_inside_loop_p (loop, e->dest)) + break; if (e) break; @@ -2960,29 +3073,51 @@ fill_always_executed_in_1 (class loop *loop, sbitmap contains_call) if (bb->flags & BB_IRREDUCIBLE_LOOP) break; - if (!flow_bb_inside_loop_p (inn_loop, bb)) - break; - if (bb->loop_father->header == bb) + /* Record that we enter into a subloop since it might not + be finite. */ + /* ??? Entering into a not always executed subloop makes + fill_always_executed_in quadratic in loop depth since + we walk those loops N times. This is not a problem + in practice though, see PR102253 for a worst-case testcase. */ + inn_loop = bb->loop_father; + + /* Walk the body of LOOP sorted by dominance relation. Additionally, + if a basic block S dominates the latch, then only blocks dominated + by S are after it. + This is get_loop_body_in_dom_order using a worklist algorithm and + stopping once we are no longer interested in visiting further + blocks. */ + unsigned old_len = worklist.length (); + unsigned postpone = 0; + for (basic_block son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) { - if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) - break; - - /* In a loop that is always entered we may proceed anyway. - But record that we entered it and stop once we leave it. */ - inn_loop = bb->loop_father; + if (!flow_bb_inside_loop_p (loop, son)) + continue; + if (dominated_by_p (CDI_DOMINATORS, loop->latch, son)) + postpone = worklist.length (); + worklist.quick_push (son); } + if (postpone) + /* Postponing the block that dominates the latch means + processing it last and thus putting it earliest in the + worklist. */ + std::swap (worklist[old_len], worklist[postpone]); } + while (!worklist.is_empty ()); while (1) { + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n", + last->index, loop->num); SET_ALWAYS_EXECUTED_IN (last, loop); if (last == loop->header) break; last = get_immediate_dominator (CDI_DOMINATORS, last); } - - free (bbs); } for (loop = loop->inner; loop; loop = loop->next) @@ -3024,7 +3159,6 @@ fill_always_executed_in (void) static void tree_ssa_lim_initialize (bool store_motion) { - class loop *loop; unsigned i; bitmap_obstack_initialize (&lim_bitmap_obstack); @@ -3068,7 +3202,7 @@ tree_ssa_lim_initialize (bool store_motion) its postorder index. */ i = 0; bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun)); - FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) + for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) bb_loop_postorder[loop->num] = i++; } @@ -3194,7 +3328,7 @@ pass_lim::execute (function *fun) if (number_of_loops (fun) <= 1) return 0; - unsigned int todo = loop_invariant_motion_in_fun (fun, true); + unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores); if (!in_loop_pipeline) loop_optimizer_finalize (); |