diff options
author | Jan Hubicka <hubicka@ucw.cz> | 2019-12-05 19:12:51 +0100 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2019-12-05 18:12:51 +0000 |
commit | 34fbe3f0946f88828765184ed6581bda62cdf49f (patch) | |
tree | 5a3bf82ae6bb92203c6e4922a5d694198595edc6 /gcc/cgraphclones.c | |
parent | 8575d5925226a8f92ee644d6d59a2b1b93840d94 (diff) | |
download | gcc-34fbe3f0946f88828765184ed6581bda62cdf49f.zip gcc-34fbe3f0946f88828765184ed6581bda62cdf49f.tar.gz gcc-34fbe3f0946f88828765184ed6581bda62cdf49f.tar.bz2 |
cgraphclones.c (localize_profile): New function.
* cgraphclones.c (localize_profile): New function.
(cgraph_node::create_clone): Use it for partial profiles.
* common.opt (fprofile-partial-training): New flag.
* doc/invoke.texi (-fprofile-partial-training): Document.
* ipa-cp.c (update_profiling_info): For partial profiles do not
set function profile to zero.
* profile.c (compute_branch_probabilities): With partial profile
watch if edge count is zero and turn all probabilities to guessed.
(compute_branch_probabilities): For partial profiles do not apply
profile when entry count is zero.
* tree-profile.c (tree_profiling): Only do value_profile_transformations
when profile is read.
From-SVN: r279013
Diffstat (limited to 'gcc/cgraphclones.c')
-rw-r--r-- | gcc/cgraphclones.c | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index 81c5dfd..f2dfb4e 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -307,6 +307,22 @@ dump_callgraph_transformation (const cgraph_node *original, } } +/* Turn profile of N to local profile. */ + +static void +localize_profile (cgraph_node *n) +{ + n->count = n->count.guessed_local (); + for (cgraph_edge *e = n->callees; e; e=e->next_callee) + { + e->count = e->count.guessed_local (); + if (!e->inline_failed) + localize_profile (e->callee); + } + for (cgraph_edge *e = n->indirect_calls; e; e=e->next_callee) + e->count = e->count.guessed_local (); +} + /* Create node representing clone of N executed COUNT times. Decrease the execution counts from original node too. The new clone will have decl set to DECL that may or may not be the same @@ -340,6 +356,7 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, cgraph_edge *e; unsigned i; profile_count old_count = count; + bool nonzero = count.ipa ().nonzero_p (); if (new_inlined_to) dump_callgraph_transformation (this, new_inlined_to, "inlining to"); @@ -426,6 +443,15 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, if (call_duplication_hook) symtab->call_cgraph_duplication_hooks (this, new_node); + /* With partial train run we do not want to assume that original's + count is zero whenever we redurect all executed edges to clone. + Simply drop profile to local one in this case. */ + if (update_original + && opt_for_fn (decl, flag_profile_partial_training) + && nonzero + && count.ipa_p () + && !count.ipa ().nonzero_p ()) + localize_profile (this); if (!new_inlined_to) dump_callgraph_transformation (this, new_node, suffix); |