aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2025-08-20 11:06:53 +0200
committerRichard Biener <rguenth@gcc.gnu.org>2025-08-20 13:34:11 +0200
commitfc23b539caa16a108bd16bcfcb86fe261a9aa174 (patch)
tree0826f1878c492b16fd9021df0a4dd145effaae0c
parent0f15ff7b511493e9197e6153b794081c1557ba02 (diff)
downloadgcc-fc23b539caa16a108bd16bcfcb86fe261a9aa174.zip
gcc-fc23b539caa16a108bd16bcfcb86fe261a9aa174.tar.gz
gcc-fc23b539caa16a108bd16bcfcb86fe261a9aa174.tar.bz2
tree-optimization/114480 - speedup IDF compute
The testcase in the PR shows that it's worth splitting the processing of the initial workset, which is def_blocks from the main iteration. This reduces SSA incremental update time from 44.7s to 32.9s. Further changing the workset bitmap of the main iteration to a vector speeds up things further to 23.5s for an overall nearly halving of the SSA incremental update compile-time and an overall 12% compile-time saving at -O1. Using bitmap_ior in the first loop or avoiding (immediate) re-processing of blocks in def_blocks does not make a measurable difference for the testcase so I left this as-is. PR tree-optimization/114480 * cfganal.cc (compute_idf): Split processing of the initial workset from the main iteration. Use a vector for the workset of the main iteration.
-rw-r--r--gcc/cfganal.cc44
1 files changed, 25 insertions, 19 deletions
diff --git a/gcc/cfganal.cc b/gcc/cfganal.cc
index 7903575..3537e79 100644
--- a/gcc/cfganal.cc
+++ b/gcc/cfganal.cc
@@ -1679,30 +1679,17 @@ compute_dominance_frontiers (bitmap_head *frontiers)
bitmap
compute_idf (bitmap def_blocks, bitmap_head *dfs)
{
- bitmap_iterator bi;
+ bitmap_iterator bi, bi2;
unsigned bb_index, i;
bitmap phi_insertion_points;
phi_insertion_points = BITMAP_ALLOC (NULL);
- /* Seed the work set with all the blocks in DEF_BLOCKS. */
- auto_bitmap work_set;
- bitmap_copy (work_set, def_blocks);
- bitmap_tree_view (work_set);
-
- /* Pop a block off the workset, add every block that appears in
- the original block's DF that we have not already processed to
- the workset. Iterate until the workset is empty. Blocks
- which are added to the workset are potential sites for
- PHI nodes. */
- while (!bitmap_empty_p (work_set))
+ /* The initial workset is the DEF_BLOCKs, process that first,
+ seeding phi_insertion_points as the start of the worklist
+ for the iteration which then also serves as a visited set. */
+ EXECUTE_IF_SET_IN_BITMAP (def_blocks, 0, bb_index, bi2)
{
- /* The dominance frontier of a block is blocks after it so iterating
- on earlier blocks first is better.
- ??? Basic blocks are by no means guaranteed to be ordered in
- optimal order for this iteration. */
- bb_index = bitmap_clear_first_set_bit (work_set);
-
/* Since the registration of NEW -> OLD name mappings is done
separately from the call to update_ssa, when updating the SSA
form, the basic blocks where new and/or old names are defined
@@ -1717,8 +1704,27 @@ compute_idf (bitmap def_blocks, bitmap_head *dfs)
as well. That makes iterating over the DFS bitmap preferential
to whole bitmap operations involving also phi_insertion_points. */
EXECUTE_IF_SET_IN_BITMAP (&dfs[bb_index], 0, i, bi)
+ bitmap_set_bit (phi_insertion_points, i);
+ }
+
+ /* Seed the work set with the initial phi_insertion_points. */
+ auto_vec<int, 64> work_set (n_basic_blocks_for_fn (cfun));
+ EXECUTE_IF_SET_IN_BITMAP (phi_insertion_points, 0, i, bi)
+ work_set.quick_push (i);
+
+ /* Pop a block off the workset, add every block that appears in
+ the original block's DF that we have not already processed to
+ the workset. Iterate until the workset is empty. Blocks
+ which are added to the workset are potential sites for
+ PHI nodes. */
+ while (!work_set.is_empty ())
+ {
+ bb_index = work_set.pop ();
+ gcc_checking_assert (bb_index
+ < (unsigned) last_basic_block_for_fn (cfun));
+ EXECUTE_IF_SET_IN_BITMAP (&dfs[bb_index], 0, i, bi)
if (bitmap_set_bit (phi_insertion_points, i))
- bitmap_set_bit (work_set, i);
+ work_set.quick_push (i);
}
return phi_insertion_points;