aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2020-07-24 13:44:09 +0200
committerRichard Biener <rguenther@suse.de>2020-07-31 08:22:45 +0200
commit1212cfad09378bc85860a7de22dde0cf7a19fd01 (patch)
treedb2fc19d73115779ac2b55366eef77bb86fa5977
parent3e61a2056335ca7d4e2009823efae4ee2dc950ee (diff)
downloadgcc-1212cfad09378bc85860a7de22dde0cf7a19fd01.zip
gcc-1212cfad09378bc85860a7de22dde0cf7a19fd01.tar.gz
gcc-1212cfad09378bc85860a7de22dde0cf7a19fd01.tar.bz2
Improve var-tracking dataflow iteration order
This builds upon the rev_post_order_and_mark_dfs_back_seme improvements and makes vt_find_locations iterate over the dataflow problems for each toplevel SCC separately, improving memory locality and avoiding to process nodes after the SCC before the SCC itself stabilized. On the asan_interceptors.cc testcase this for example reduces the number of visited blocks from 3751 to 2867. For stage3-gcc this reduces the number of visited blocks by ~4%. 2020-07-28 Richard Biener <rguenther@suse.de> PR debug/78288 * var-tracking.c (vt_find_locations): Use rev_post_order_and_mark_dfs_back_seme and separately iterate over toplevel SCCs.
-rw-r--r--gcc/var-tracking.c274
1 files changed, 151 insertions, 123 deletions
diff --git a/gcc/var-tracking.c b/gcc/var-tracking.c
index a345fb4..743f5dc 100644
--- a/gcc/var-tracking.c
+++ b/gcc/var-tracking.c
@@ -7084,164 +7084,191 @@ vt_find_locations (void)
so that the data-flow runs faster. */
rc_order = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
bb_order = XNEWVEC (int, last_basic_block_for_fn (cfun));
- pre_and_rev_post_order_compute (NULL, rc_order, false);
- for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
+ auto_bitmap exit_bbs;
+ bitmap_set_bit (exit_bbs, EXIT_BLOCK);
+ auto_vec<std::pair<int, int> > toplevel_scc_extents;
+ int n = rev_post_order_and_mark_dfs_back_seme
+ (cfun, single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), exit_bbs, true,
+ rc_order, &toplevel_scc_extents);
+ for (i = 0; i < n; i++)
bb_order[rc_order[i]] = i;
- free (rc_order);
in_worklist = sbitmap_alloc (last_basic_block_for_fn (cfun));
in_pending = sbitmap_alloc (last_basic_block_for_fn (cfun));
bitmap_clear (in_worklist);
- FOR_EACH_BB_FN (bb, cfun)
- pending->insert (bb_order[bb->index], bb);
- bitmap_ones (in_pending);
-
- while (success && !pending->empty ())
- {
- std::swap (worklist, pending);
- std::swap (in_worklist, in_pending);
+ /* We're performing the dataflow iteration independently over the
+ toplevel SCCs plus leading non-cyclic entry blocks and separately
+ over the tail. That ensures best memory locality and the least
+ number of visited blocks. */
+ unsigned extent = 0;
+ int curr_start = -1;
+ int curr_end = -1;
+ do
+ {
+ curr_start = curr_end + 1;
+ if (toplevel_scc_extents.length () <= extent)
+ curr_end = n - 1;
+ else
+ curr_end = toplevel_scc_extents[extent++].second;
- while (!worklist->empty ())
+ for (int i = curr_start; i <= curr_end; ++i)
{
- bool changed;
- edge_iterator ei;
- int oldinsz, oldoutsz;
+ pending->insert (i, BASIC_BLOCK_FOR_FN (cfun, rc_order[i]));
+ bitmap_set_bit (in_pending, rc_order[i]);
+ }
- bb = worklist->extract_min ();
- bitmap_clear_bit (in_worklist, bb->index);
+ while (success && !pending->empty ())
+ {
+ std::swap (worklist, pending);
+ std::swap (in_worklist, in_pending);
- if (VTI (bb)->in.vars)
+ while (!worklist->empty ())
{
- htabsz -= (shared_hash_htab (VTI (bb)->in.vars)->size ()
- + shared_hash_htab (VTI (bb)->out.vars)->size ());
- oldinsz = shared_hash_htab (VTI (bb)->in.vars)->elements ();
- oldoutsz = shared_hash_htab (VTI (bb)->out.vars)->elements ();
- }
- else
- oldinsz = oldoutsz = 0;
+ bool changed;
+ edge_iterator ei;
+ int oldinsz, oldoutsz;
- if (MAY_HAVE_DEBUG_BIND_INSNS)
- {
- dataflow_set *in = &VTI (bb)->in, *first_out = NULL;
- bool first = true, adjust = false;
+ bb = worklist->extract_min ();
+ bitmap_clear_bit (in_worklist, bb->index);
- /* Calculate the IN set as the intersection of
- predecessor OUT sets. */
+ if (VTI (bb)->in.vars)
+ {
+ htabsz -= (shared_hash_htab (VTI (bb)->in.vars)->size ()
+ + shared_hash_htab (VTI (bb)->out.vars)->size ());
+ oldinsz = shared_hash_htab (VTI (bb)->in.vars)->elements ();
+ oldoutsz = shared_hash_htab (VTI (bb)->out.vars)->elements ();
+ }
+ else
+ oldinsz = oldoutsz = 0;
- dataflow_set_clear (in);
- dst_can_be_shared = true;
+ if (MAY_HAVE_DEBUG_BIND_INSNS)
+ {
+ dataflow_set *in = &VTI (bb)->in, *first_out = NULL;
+ bool first = true, adjust = false;
- FOR_EACH_EDGE (e, ei, bb->preds)
- if (!VTI (e->src)->flooded)
- gcc_assert (bb_order[bb->index]
- <= bb_order[e->src->index]);
- else if (first)
- {
- dataflow_set_copy (in, &VTI (e->src)->out);
- first_out = &VTI (e->src)->out;
- first = false;
- }
- else
- {
- dataflow_set_merge (in, &VTI (e->src)->out);
- adjust = true;
- }
+ /* Calculate the IN set as the intersection of
+ predecessor OUT sets. */
- if (adjust)
- {
- dataflow_post_merge_adjust (in, &VTI (bb)->permp);
+ dataflow_set_clear (in);
+ dst_can_be_shared = true;
- if (flag_checking)
- /* Merge and merge_adjust should keep entries in
- canonical order. */
- shared_hash_htab (in->vars)
- ->traverse <dataflow_set *,
- canonicalize_loc_order_check> (in);
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (!VTI (e->src)->flooded)
+ gcc_assert (bb_order[bb->index]
+ <= bb_order[e->src->index]);
+ else if (first)
+ {
+ dataflow_set_copy (in, &VTI (e->src)->out);
+ first_out = &VTI (e->src)->out;
+ first = false;
+ }
+ else
+ {
+ dataflow_set_merge (in, &VTI (e->src)->out);
+ adjust = true;
+ }
- if (dst_can_be_shared)
+ if (adjust)
{
- shared_hash_destroy (in->vars);
- in->vars = shared_hash_copy (first_out->vars);
- }
- }
+ dataflow_post_merge_adjust (in, &VTI (bb)->permp);
- VTI (bb)->flooded = true;
- }
- else
- {
- /* Calculate the IN set as union of predecessor OUT sets. */
- dataflow_set_clear (&VTI (bb)->in);
- FOR_EACH_EDGE (e, ei, bb->preds)
- dataflow_set_union (&VTI (bb)->in, &VTI (e->src)->out);
- }
+ if (flag_checking)
+ /* Merge and merge_adjust should keep entries in
+ canonical order. */
+ shared_hash_htab (in->vars)
+ ->traverse <dataflow_set *,
+ canonicalize_loc_order_check> (in);
- changed = compute_bb_dataflow (bb);
- n_blocks_processed++;
- htabsz += (shared_hash_htab (VTI (bb)->in.vars)->size ()
- + shared_hash_htab (VTI (bb)->out.vars)->size ());
+ if (dst_can_be_shared)
+ {
+ shared_hash_destroy (in->vars);
+ in->vars = shared_hash_copy (first_out->vars);
+ }
+ }
- if (htabmax && htabsz > htabmax)
- {
- if (MAY_HAVE_DEBUG_BIND_INSNS)
- inform (DECL_SOURCE_LOCATION (cfun->decl),
- "variable tracking size limit exceeded with "
- "%<-fvar-tracking-assignments%>, retrying without");
+ VTI (bb)->flooded = true;
+ }
else
- inform (DECL_SOURCE_LOCATION (cfun->decl),
- "variable tracking size limit exceeded");
- success = false;
- break;
- }
+ {
+ /* Calculate the IN set as union of predecessor OUT sets. */
+ dataflow_set_clear (&VTI (bb)->in);
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ dataflow_set_union (&VTI (bb)->in, &VTI (e->src)->out);
+ }
- if (changed)
- {
- FOR_EACH_EDGE (e, ei, bb->succs)
+ changed = compute_bb_dataflow (bb);
+ n_blocks_processed++;
+ htabsz += (shared_hash_htab (VTI (bb)->in.vars)->size ()
+ + shared_hash_htab (VTI (bb)->out.vars)->size ());
+
+ if (htabmax && htabsz > htabmax)
{
- if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
- continue;
+ if (MAY_HAVE_DEBUG_BIND_INSNS)
+ inform (DECL_SOURCE_LOCATION (cfun->decl),
+ "variable tracking size limit exceeded with "
+ "%<-fvar-tracking-assignments%>, retrying without");
+ else
+ inform (DECL_SOURCE_LOCATION (cfun->decl),
+ "variable tracking size limit exceeded");
+ success = false;
+ break;
+ }
- /* Iterate to an earlier block in RPO in the next
- round, iterate to the same block immediately. */
- if (bb_order[e->dest->index] < bb_order[bb->index])
+ if (changed)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
{
- if (!bitmap_bit_p (in_pending, e->dest->index))
+ if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
+ continue;
+
+ /* Iterate to an earlier block in RPO in the next
+ round, iterate to the same block immediately. */
+ if (bb_order[e->dest->index] < bb_order[bb->index])
{
- /* Send E->DEST to next round. */
- bitmap_set_bit (in_pending, e->dest->index);
- pending->insert (bb_order[e->dest->index],
- e->dest);
+ gcc_assert (bb_order[e->dest->index] >= curr_start);
+ if (!bitmap_bit_p (in_pending, e->dest->index))
+ {
+ /* Send E->DEST to next round. */
+ bitmap_set_bit (in_pending, e->dest->index);
+ pending->insert (bb_order[e->dest->index],
+ e->dest);
+ }
+ }
+ else if (bb_order[e->dest->index] <= curr_end
+ && !bitmap_bit_p (in_worklist, e->dest->index))
+ {
+ /* Add E->DEST to current round or delay
+ processing if it is in the next SCC. */
+ bitmap_set_bit (in_worklist, e->dest->index);
+ worklist->insert (bb_order[e->dest->index],
+ e->dest);
}
- }
- else if (!bitmap_bit_p (in_worklist, e->dest->index))
- {
- /* Add E->DEST to current round. */
- bitmap_set_bit (in_worklist, e->dest->index);
- worklist->insert (bb_order[e->dest->index],
- e->dest);
}
}
- }
- if (dump_file)
- fprintf (dump_file,
- "BB %i: in %i (was %i), out %i (was %i), rem %i + %i, tsz %i\n",
- bb->index,
- (int)shared_hash_htab (VTI (bb)->in.vars)->size (),
- oldinsz,
- (int)shared_hash_htab (VTI (bb)->out.vars)->size (),
- oldoutsz,
- (int)worklist->nodes (), (int)pending->nodes (), htabsz);
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "BB %i IN:\n", bb->index);
- dump_dataflow_set (&VTI (bb)->in);
- fprintf (dump_file, "BB %i OUT:\n", bb->index);
- dump_dataflow_set (&VTI (bb)->out);
+ if (dump_file)
+ fprintf (dump_file,
+ "BB %i: in %i (was %i), out %i (was %i), rem %i + %i, "
+ "tsz %i\n", bb->index,
+ (int)shared_hash_htab (VTI (bb)->in.vars)->size (),
+ oldinsz,
+ (int)shared_hash_htab (VTI (bb)->out.vars)->size (),
+ oldoutsz,
+ (int)worklist->nodes (), (int)pending->nodes (),
+ htabsz);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "BB %i IN:\n", bb->index);
+ dump_dataflow_set (&VTI (bb)->in);
+ fprintf (dump_file, "BB %i OUT:\n", bb->index);
+ dump_dataflow_set (&VTI (bb)->out);
+ }
}
}
}
+ while (curr_end != n - 1);
statistics_counter_event (cfun, "compute_bb_dataflow times",
n_blocks_processed);
@@ -7250,6 +7277,7 @@ vt_find_locations (void)
FOR_EACH_BB_FN (bb, cfun)
gcc_assert (VTI (bb)->flooded);
+ free (rc_order);
free (bb_order);
delete worklist;
delete pending;