aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-cfg.cc
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2023-07-01 13:05:34 +0200
committerJan Hubicka <jh@suse.cz>2023-07-01 13:05:34 +0200
commit7e904d6c7f252ee947c237ed32dd43b2c248384d (patch)
treea92c54653e9fd592f0d8d03d28a8a7b6838c1895 /gcc/tree-cfg.cc
parent620a35b24a2b6edb67720ec42864b571a972fa45 (diff)
downloadgcc-7e904d6c7f252ee947c237ed32dd43b2c248384d.zip
gcc-7e904d6c7f252ee947c237ed32dd43b2c248384d.tar.gz
gcc-7e904d6c7f252ee947c237ed32dd43b2c248384d.tar.bz2
Fix profile update in copy-header
Most common source of profile mismatches is now copyheader pass. The reason is that in comon case the duplicated header condition will become constant true and that needs changes in the loop exit condition probability. While this can be done by jump threading it is not, since it gives up on loops. Copy header pass now has logic to prove that first exit will become true, so this patch adds necessary pumbing to the profile updating. This is done in gimple_duplicate_sese_region in a way that is specific for this particular case. I think general case is kind-of unsolvable and loop-ch is the only user of the infrastructure. If we later invent some new users, maybe we can export the region and region_copy arrays and let user to do the update. With the patch we now get: Pass dump id and name |static mismat|dynamic mismatch |in count |in count 107t cunrolli | 3 +3| 19237 +19237 127t ch | 13 +10| 19237 131t dom | 39 +26| 19237 133t isolate-paths | 47 +8| 19237 134t reassoc | 49 +2| 19237 136t forwprop | 53 +4| 226943 +207706 159t cddce | 61 +8| 242222 +15279 161t ldist | 62 +1| 242222 172t ifcvt | 66 +4| 415472 +173250 173t vect | 143 +77| 10859784 +10444312 176t cunroll | 294 +151| 150357763 +139497979 183t loopdone | 291 -3| 150289533 -68230 194t tracer | 322 +31| 153230990 +2941457 195t fre | 317 -5| 153230990 197t dom | 286 -31| 154448079 +1217089 199t threadfull | 293 +7| 154724763 +276684 200t vrp | 297 +4| 155042448 +317685 204t dce | 294 -3| 155017073 -25375 206t sink | 292 -2| 155017073 211t cddce | 298 +6| 155018657 +1584 255t optimized | 296 -2| 155018657 256r expand | 273 -23| 154592622 -426035 258r into_cfglayout | 268 -5| 154592661 +39 275r loop2_unroll | 272 +4| 159701866 +5109205 291r ce2 | 270 -2| 159723509 312r pro_and_epilogue | 290 +20| 159792505 +68996 315r jump2 | 296 +6| 164234016 +4441511 323r bbro | 294 -2| 159385430 -4848586 So ch introduces 10 new mismatches while originally it did 308. At bbro the number of mismatches dropped from 432 to 294. Most offender is now cunroll pass. I think it is the case where loop has multiple exits and one of exits becomes to be false in all but last peeled iteration. This is another case where non-trivial loop update is needed. Honza gcc/ChangeLog: * tree-cfg.cc (gimple_duplicate_sese_region): Add elliminated_edge parmaeter; update profile. * tree-cfg.h (gimple_duplicate_sese_region): Update prototype. * tree-ssa-loop-ch.cc (entry_loop_condition_is_static): Rename to ... (static_loop_exit): ... this; return the edge to be elliminated. (ch_base::copy_headers): Handle profile updating for eliminated exits. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/ifc-20040816-1.c: Reduce number of mismatches from 2 to 1. * gcc.dg/tree-ssa/loop-ch-profile-1.c: New test. * gcc.dg/tree-ssa/loop-ch-profile-2.c: New test.
Diffstat (limited to 'gcc/tree-cfg.cc')
-rw-r--r--gcc/tree-cfg.cc103
1 files changed, 94 insertions, 9 deletions
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index 30f26af..4989906 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -6658,13 +6658,17 @@ add_phi_args_after_copy (basic_block *region_copy, unsigned n_region,
blocks are stored to REGION_COPY in the same order as they had in REGION,
provided that REGION_COPY is not NULL.
The function returns false if it is unable to copy the region,
- true otherwise. */
+ true otherwise.
+
+ ELIMINATED_EDGE is an edge that is known to be removed in the dupicated
+ region. */
bool
gimple_duplicate_sese_region (edge entry, edge exit,
- basic_block *region, unsigned n_region,
- basic_block *region_copy,
- bool update_dominance)
+ basic_block *region, unsigned n_region,
+ basic_block *region_copy,
+ bool update_dominance,
+ edge eliminated_edge)
{
unsigned i;
bool free_region_copy = false, copying_header = false;
@@ -6743,11 +6747,92 @@ gimple_duplicate_sese_region (edge entry, edge exit,
split_edge_bb_loc (entry), update_dominance);
if (total_count.initialized_p () && entry_count.initialized_p ())
{
- scale_bbs_frequencies_profile_count (region, n_region,
- total_count - entry_count,
- total_count);
- scale_bbs_frequencies_profile_count (region_copy, n_region, entry_count,
- total_count);
+ if (!eliminated_edge)
+ {
+ scale_bbs_frequencies_profile_count (region, n_region,
+ total_count - entry_count,
+ total_count);
+ scale_bbs_frequencies_profile_count (region_copy, n_region,
+ entry_count, total_count);
+ }
+ else
+ {
+ /* We only support only case where eliminated_edge is one and it
+ exists first BB. We also assume that the duplicated region is
+ acyclic. So we expect the following:
+
+ // region_copy_start entry will be scaled to entry_count
+ if (cond1) <- this condition will become false
+ and we update probabilities
+ goto loop_exit;
+ if (cond2)
+ goto loop_exit;
+ goto loop_header <- this will be redirected to loop.
+ // region_copy_end
+ loop:
+ <body>
+ // region start
+ loop_header:
+ if (cond1) <- we need to update probabbility here
+ goto loop_exit;
+ if (cond2) <- and determine scaling factor here.
+ goto loop_exit;
+ else
+ goto loop;
+ // region end
+
+ Adding support for more exits can be done similarly,
+ but only consumer so far is tree-ssa-loop-ch and it uses only this
+ to handle the common case of peeling headers which have
+ conditionals known to be always true upon entry. */
+ gcc_assert (eliminated_edge->src == region[0]
+ && EDGE_COUNT (region[0]->succs) == 2
+ && copying_header);
+
+ edge e, e_copy, eliminated_edge_copy;
+ if (EDGE_SUCC (region[0], 0) == eliminated_edge)
+ {
+ e = EDGE_SUCC (region[0], 1);
+ e_copy = EDGE_SUCC (region_copy[0], 1);
+ eliminated_edge_copy = EDGE_SUCC (region_copy[0], 0);
+ }
+ else
+ {
+ e = EDGE_SUCC (region[0], 0);
+ e_copy = EDGE_SUCC (region_copy[0], 0);
+ eliminated_edge_copy = EDGE_SUCC (region_copy[0], 1);
+ }
+ gcc_checking_assert (e != e_copy
+ && eliminated_edge_copy != eliminated_edge
+ && eliminated_edge_copy->dest
+ == eliminated_edge->dest);
+
+
+ /* Handle first basic block in duplicated region as in the
+ non-eliminating case. */
+ scale_bbs_frequencies_profile_count (region_copy, n_region,
+ entry_count, total_count);
+ /* Now update redirecting eliminated edge to the other edge.
+ Actual CFG update is done by caller. */
+ e_copy->probability = profile_probability::always ();
+ eliminated_edge_copy->probability = profile_probability::never ();
+ /* Header copying is a special case of jump threading, so use
+ common code to update loop body exit condition. */
+ update_bb_profile_for_threading (region[0], e_copy->count (), e);
+ /* If we duplicated more conditionals first scale the profile of
+ rest of the preheader. Then work out the probability of
+ entering the loop and scale rest of the loop. */
+ if (n_region > 1)
+ {
+ scale_bbs_frequencies_profile_count (region_copy + 1,
+ n_region - 1,
+ e_copy->count (),
+ region_copy[1]->count);
+ scale_bbs_frequencies_profile_count (region + 1, n_region - 1,
+ e->count (),
+ region[1]->count);
+ }
+ }
}
if (copying_header)