diff options
-rw-r--r-- | gcc/doc/invoke.texi | 5 | ||||
-rw-r--r-- | gcc/ipa-cp.c | 82 | ||||
-rw-r--r-- | gcc/params.opt | 4 |
3 files changed, 83 insertions, 8 deletions
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 71992b8..b64ec18 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -14246,6 +14246,11 @@ Maximum depth of recursive cloning for self-recursive function. Recursive cloning only when the probability of call being executed exceeds the parameter. +@item ipa-cp-profile-count-base +When using @option{-fprofile-use} option, IPA-CP will consider the measured +execution count of a call graph edge at this percentage position in their +histogram as the basis for its heuristics calculation. + @item ipa-cp-recursive-freq-factor The number of times interprocedural copy propagation expects recursive functions to call themselves. diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index b254b9b..4d07a6d0a 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -400,9 +400,9 @@ object_allocator<ipcp_value_source<tree> > ipcp_sources_pool object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool ("IPA_CP aggregate lattices"); -/* Maximal count found in program. */ +/* Base count to use in heuristics when using profile feedback. */ -static profile_count max_count; +static profile_count base_count; /* Original overall size of the program. */ @@ -809,7 +809,8 @@ ipcp_cloning_candidate_p (struct cgraph_node *node) /* When profile is available and function is hot, propagate into it even if calls seems cold; constant propagation can improve function's speed significantly. */ - if (max_count > profile_count::zero ()) + if (stats.count_sum > profile_count::zero () + && node->count.ipa ().initialized_p ()) { if (stats.count_sum > node->count.ipa ().apply_scale (90, 100)) { @@ -3310,10 +3311,10 @@ good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit, ipa_node_params *info = ipa_node_params_sum->get (node); int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold); - if (max_count > profile_count::zero ()) + if (base_count > profile_count::zero ()) { - sreal factor = count_sum.probability_in (max_count).to_sreal (); + sreal factor = count_sum.probability_in (base_count).to_sreal (); sreal evaluation = (time_benefit * factor) / size_cost; evaluation = incorporate_penalties (node, info, evaluation); evaluation *= 1000; @@ -3950,6 +3951,21 @@ value_topo_info<valtype>::propagate_effects () } } +/* Callback for qsort to sort counts of all edges. */ + +static int +compare_edge_profile_counts (const void *a, const void *b) +{ + const profile_count *cnt1 = (const profile_count *) a; + const profile_count *cnt2 = (const profile_count *) b; + + if (*cnt1 < *cnt2) + return 1; + if (*cnt1 > *cnt2) + return -1; + return 0; +} + /* Propagate constants, polymorphic contexts and their effects from the summaries interprocedurally. */ @@ -3962,8 +3978,10 @@ ipcp_propagate_stage (class ipa_topo_info *topo) if (dump_file) fprintf (dump_file, "\n Propagating constants:\n\n"); - max_count = profile_count::uninitialized (); + base_count = profile_count::uninitialized (); + bool compute_count_base = false; + unsigned base_count_pos_percent = 0; FOR_EACH_DEFINED_FUNCTION (node) { if (node->has_gimple_body_p () @@ -3981,9 +3999,57 @@ ipcp_propagate_stage (class ipa_topo_info *topo) ipa_size_summary *s = ipa_size_summaries->get (node); if (node->definition && !node->alias && s != NULL) overall_size += s->self_size; - max_count = max_count.max (node->count.ipa ()); + if (node->count.ipa ().initialized_p ()) + { + compute_count_base = true; + unsigned pos_percent = opt_for_fn (node->decl, + param_ipa_cp_profile_count_base); + base_count_pos_percent = MAX (base_count_pos_percent, pos_percent); + } } + if (compute_count_base) + { + auto_vec<profile_count> all_edge_counts; + all_edge_counts.reserve_exact (symtab->edges_count); + FOR_EACH_DEFINED_FUNCTION (node) + for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee) + { + profile_count count = cs->count.ipa (); + if (!(count > profile_count::zero ())) + continue; + + enum availability avail; + cgraph_node *tgt + = cs->callee->function_or_virtual_thunk_symbol (&avail); + ipa_node_params *info = ipa_node_params_sum->get (tgt); + if (info && info->versionable) + all_edge_counts.quick_push (count); + } + + if (!all_edge_counts.is_empty ()) + { + gcc_assert (base_count_pos_percent <= 100); + all_edge_counts.qsort (compare_edge_profile_counts); + + unsigned base_count_pos + = ((all_edge_counts.length () * (base_count_pos_percent)) / 100); + base_count = all_edge_counts[base_count_pos]; + + if (dump_file) + { + fprintf (dump_file, "\nSelected base_count from %u edges at " + "position %u, arriving at: ", all_edge_counts.length (), + base_count_pos); + base_count.dump (dump_file); + fprintf (dump_file, "\n"); + } + } + else if (dump_file) + fprintf (dump_file, "\nNo candidates with non-zero call count found, " + "continuing as if without profile feedback.\n"); + } + orig_overall_size = overall_size; if (dump_file) @@ -6567,7 +6633,7 @@ make_pass_ipa_cp (gcc::context *ctxt) void ipa_cp_c_finalize (void) { - max_count = profile_count::uninitialized (); + base_count = profile_count::uninitialized (); overall_size = 0; orig_overall_size = 0; ipcp_free_transformation_sum (); diff --git a/gcc/params.opt b/gcc/params.opt index 6eb3e15..7ee7820 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -273,6 +273,10 @@ The size of translation unit that IPA-CP pass considers large. Common Joined UInteger Var(param_ipa_cp_value_list_size) Init(8) Param Optimization Maximum size of a list of values associated with each parameter for interprocedural constant propagation. +-param=ipa-cp-profile-count-base= +Common Joined UInteger Var(param_ipa_cp_profile_count_base) Init(10) IntegerRange(0, 100) Param Optimization +When using profile feedback, use the edge at this percentage position in frequncy histogram as the bases for IPA-CP heuristics. + -param=ipa-jump-function-lookups= Common Joined UInteger Var(param_ipa_jump_function_lookups) Init(8) Param Optimization Maximum number of statements visited during jump function offset discovery. |