diff options
author | Richard Biener <rguenther@suse.de> | 2021-11-18 13:40:32 +0100 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2021-11-19 09:35:21 +0100 |
commit | 0fc859f5efcb4624a8b4ffdbf34d63972af179a8 (patch) | |
tree | edca5ac944ef0ed42fce6f17db03cb5f7b0c9558 /gcc/tree-ssa-loop-im.c | |
parent | 09d462146b3107c665265b11ad925c61a91c6efb (diff) | |
download | gcc-0fc859f5efcb4624a8b4ffdbf34d63972af179a8.zip gcc-0fc859f5efcb4624a8b4ffdbf34d63972af179a8.tar.gz gcc-0fc859f5efcb4624a8b4ffdbf34d63972af179a8.tar.bz2 |
tree-optimization/102436 - restore loop store motion
This restores a case of conditional store motion we fail to handle
after the rewrite. We can recognize the special case of all
stores in a loop happening in a single conditionally executed
block which ensures stores are not re-ordered by executing them
in different loop iterations. Separating out the case avoids
complicating the already complex main path.
2021-11-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/102436
* tree-ssa-loop-im.c (execute_sm_if_changed): Add mode
to just create the if structure and return the then block.
(execute_sm): Add flag to indicate the var will re-use
another flag var.
(hoist_memory_references): Support a single conditional
block with all stores as special case.
* gcc.dg/torture/20211118-1.c: New testcase.
* gcc.dg/tree-ssa/ssa-lim-18.c: Likewise.
Diffstat (limited to 'gcc/tree-ssa-loop-im.c')
-rw-r--r-- | gcc/tree-ssa-loop-im.c | 162 |
1 files changed, 153 insertions, 9 deletions
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 8a81aca..682406d 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -1911,10 +1911,13 @@ first_mem_ref_loc (class loop *loop, im_mem_ref *ref) } } if (lsm_flag) <-- - MEM = lsm; <-- + MEM = lsm; <-- (X) + + In case MEM and TMP_VAR are NULL the function will return the then + block so the caller can insert (X) and other related stmts. */ -static void +static basic_block execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, edge preheader, hash_set <basic_block> *flag_bbs, edge &append_cond_position, edge &last_cond_fallthru) @@ -2009,10 +2012,13 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, NULL_TREE, NULL_TREE); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - gsi = gsi_start_bb (then_bb); /* Insert actual store. */ - stmt = gimple_build_assign (unshare_expr (mem), tmp_var); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + if (mem) + { + gsi = gsi_start_bb (then_bb); + stmt = gimple_build_assign (unshare_expr (mem), tmp_var); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } edge e1 = single_succ_edge (new_bb); edge e2 = make_edge (new_bb, then_bb, @@ -2060,6 +2066,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, update_stmt (phi); } } + + return then_bb; } /* When REF is set on the location, set flag indicating the store. */ @@ -2117,7 +2125,8 @@ struct sm_aux static void execute_sm (class loop *loop, im_mem_ref *ref, - hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt) + hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt, + bool use_other_flag_var) { gassign *load; struct fmt_data fmt_data; @@ -2146,7 +2155,7 @@ execute_sm (class loop *loop, im_mem_ref *ref, || (! flag_store_data_races && ! always_stored))) multi_threaded_model_p = true; - if (multi_threaded_model_p) + if (multi_threaded_model_p && !use_other_flag_var) aux->store_flag = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs); else @@ -2182,7 +2191,7 @@ execute_sm (class loop *loop, im_mem_ref *ref, lim_data->tgt_loop = loop; gsi_insert_before (&gsi, load, GSI_SAME_STMT); - if (multi_threaded_model_p) + if (aux->store_flag) { load = gimple_build_assign (aux->store_flag, boolean_false_node); lim_data = init_lim_data (load); @@ -2555,6 +2564,140 @@ hoist_memory_references (class loop *loop, bitmap mem_refs, unsigned i; bitmap_iterator bi; + /* There's a special case we can use ordered re-materialization for + conditionally excuted stores which is when all stores in the loop + happen in the same basic-block. In that case we know we'll reach + all stores and thus can simply process that BB and emit a single + conditional block of ordered materializations. See PR102436. */ + basic_block single_store_bb = NULL; + EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num], + 0, i, bi) + { + bool fail = false; + ref = memory_accesses.refs_list[i]; + for (auto loc : ref->accesses_in_loop) + if (!gimple_vdef (loc.stmt)) + ; + else if (!single_store_bb) + { + single_store_bb = gimple_bb (loc.stmt); + bool conditional = false; + for (edge e : exits) + if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb)) + { + /* Conditional as seen from e. */ + conditional = true; + break; + } + if (!conditional) + { + fail = true; + break; + } + } + else if (single_store_bb != gimple_bb (loc.stmt)) + { + fail = true; + break; + } + if (fail) + { + single_store_bb = NULL; + break; + } + } + if (single_store_bb) + { + /* Analyze the single block with stores. */ + auto_bitmap fully_visited; + auto_bitmap refs_not_supported; + auto_bitmap refs_not_in_seq; + auto_vec<seq_entry> seq; + bitmap_copy (refs_not_in_seq, mem_refs); + int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE, + seq, refs_not_in_seq, refs_not_supported, + false, fully_visited); + if (res != 1) + { + /* Unhandled refs can still fail this. */ + bitmap_clear (mem_refs); + return; + } + + /* We cannot handle sm_other since we neither remember the + stored location nor the value at the point we execute them. */ + for (unsigned i = 0; i < seq.length (); ++i) + { + unsigned new_i; + if (seq[i].second == sm_other + && seq[i].from != NULL_TREE) + seq[i].from = NULL_TREE; + else if ((seq[i].second == sm_ord + || (seq[i].second == sm_other + && seq[i].from != NULL_TREE)) + && !sm_seq_push_down (seq, i, &new_i)) + { + bitmap_set_bit (refs_not_supported, seq[new_i].first); + seq[new_i].second = sm_other; + seq[new_i].from = NULL_TREE; + } + } + bitmap_and_compl_into (mem_refs, refs_not_supported); + if (bitmap_empty_p (mem_refs)) + return; + + /* Prune seq. */ + while (seq.last ().second == sm_other + && seq.last ().from == NULL_TREE) + seq.pop (); + + hash_map<im_mem_ref *, sm_aux *> aux_map; + + /* Execute SM but delay the store materialization for ordered + sequences on exit. */ + bool first_p = true; + EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) + { + ref = memory_accesses.refs_list[i]; + execute_sm (loop, ref, aux_map, true, !first_p); + first_p = false; + } + + /* Get at the single flag variable we eventually produced. */ + im_mem_ref *ref + = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)]; + sm_aux *aux = *aux_map.get (ref); + + /* Materialize ordered store sequences on exits. */ + edge e; + FOR_EACH_VEC_ELT (exits, i, e) + { + edge append_cond_position = NULL; + edge last_cond_fallthru = NULL; + edge insert_e = e; + /* Construct the single flag variable control flow and insert + the ordered seq of stores in the then block. With + -fstore-data-races we can do the stores unconditionally. */ + if (aux->store_flag) + insert_e + = single_pred_edge + (execute_sm_if_changed (e, NULL_TREE, NULL_TREE, + aux->store_flag, + loop_preheader_edge (loop), + &aux->flag_bbs, append_cond_position, + last_cond_fallthru)); + execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord, + append_cond_position, last_cond_fallthru); + gsi_commit_one_edge_insert (insert_e, NULL); + } + + for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin (); + iter != aux_map.end (); ++iter) + delete (*iter).second; + + return; + } + /* To address PR57359 before actually applying store-motion check the candidates found for validity with regards to reordering relative to other stores which we until here disambiguated using @@ -2693,7 +2836,8 @@ hoist_memory_references (class loop *loop, bitmap mem_refs, EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) { ref = memory_accesses.refs_list[i]; - execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i)); + execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i), + false); } /* Materialize ordered store sequences on exits. */ |