diff options
Diffstat (limited to 'gcc/ipa-cp.cc')
-rw-r--r-- | gcc/ipa-cp.cc | 388 |
1 files changed, 162 insertions, 226 deletions
diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index 806c2bd..480cf48 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -147,10 +147,6 @@ object_allocator<ipcp_value_source<tree> > ipcp_sources_pool object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool ("IPA_CP aggregate lattices"); -/* Base count to use in heuristics when using profile feedback. */ - -static profile_count base_count; - /* Original overall size of the program. */ static long overall_size, orig_overall_size; @@ -292,7 +288,7 @@ ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits) else fprintf (f, " [scc: %i, from:", val->scc_no); for (s = val->sources; s; s = s->next) - fprintf (f, " %i(%f)", s->cs->caller->order, + fprintf (f, " %i(%f)", s->cs->caller->get_uid (), s->cs->sreal_frequency ().to_double ()); fprintf (f, "]"); } @@ -505,14 +501,16 @@ struct caller_statistics profile_count count_sum; /* Sum of all frequencies for all calls. */ sreal freq_sum; - /* Number of calls and hot calls respectively. */ - int n_calls, n_hot_calls; + /* Number of calls and calls considered interesting respectively. */ + int n_calls, n_interesting_calls; /* If itself is set up, also count the number of non-self-recursive calls. */ int n_nonrec_calls; /* If non-NULL, this is the node itself and calls from it should have their counts included in rec_count_sum and not count_sum. */ cgraph_node *itself; + /* True if there is a caller that has no IPA profile. */ + bool called_without_ipa_profile; }; /* Initialize fields of STAT to zeroes and optionally set it up so that edges @@ -524,10 +522,42 @@ init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL) stats->rec_count_sum = profile_count::zero (); stats->count_sum = profile_count::zero (); stats->n_calls = 0; - stats->n_hot_calls = 0; + stats->n_interesting_calls = 0; stats->n_nonrec_calls = 0; stats->freq_sum = 0; stats->itself = itself; + stats->called_without_ipa_profile = false; +} + +/* We want to propagate across edges that may be executed, however + we do not want to check maybe_hot, since call itself may be cold + while calee contains some heavy loop which makes propagation still + relevant. + + In particular, even edge called once may lead to significant + improvement. */ + +static bool +cs_interesting_for_ipcp_p (cgraph_edge *e) +{ + /* If profile says the edge is executed, we want to optimize. */ + if (e->count.ipa ().nonzero_p ()) + return true; + /* If local (possibly guseed or adjusted 0 profile) claims edge is + not executed, do not propagate. + Do not trust AFDO since branch needs to be executed multiple + time to count while we want to propagate even call called + once during the train run if callee is important. */ + if (e->count.initialized_p () && !e->count.nonzero_p () + && e->count.quality () != AFDO) + return false; + /* If we have zero IPA profile, still consider edge for cloning + in case we do partial training. */ + if (e->count.ipa ().initialized_p () + && e->count.ipa ().quality () != AFDO + && !opt_for_fn (e->callee->decl,flag_profile_partial_training)) + return false; + return true; } /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of @@ -553,13 +583,18 @@ gather_caller_stats (struct cgraph_node *node, void *data) else stats->count_sum += cs->count.ipa (); } + else + stats->called_without_ipa_profile = true; stats->freq_sum += cs->sreal_frequency (); stats->n_calls++; if (stats->itself && stats->itself != cs->caller) stats->n_nonrec_calls++; - if (cs->maybe_hot_p ()) - stats->n_hot_calls ++; + /* If profile known to be zero, we do not want to clone for performance. + However if call is cold, the called function may still contain + important hot loops. */ + if (cs_interesting_for_ipcp_p (cs)) + stats->n_interesting_calls++; } return false; @@ -583,7 +618,9 @@ ipcp_cloning_candidate_p (struct cgraph_node *node) return false; } - if (node->optimize_for_size_p ()) + /* Do not use profile here since cold wrapper wrap + hot function. */ + if (opt_for_fn (node->decl, optimize_size)) { if (dump_file) fprintf (dump_file, "Not considering %s for cloning; " @@ -602,26 +639,11 @@ ipcp_cloning_candidate_p (struct cgraph_node *node) node->dump_name ()); return true; } - - /* When profile is available and function is hot, propagate into it even if - calls seems cold; constant propagation can improve function's speed - significantly. */ - if (stats.count_sum > profile_count::zero () - && node->count.ipa ().initialized_p ()) - { - if (stats.count_sum > node->count.ipa ().apply_scale (90, 100)) - { - if (dump_file) - fprintf (dump_file, "Considering %s for cloning; " - "usually called directly.\n", - node->dump_name ()); - return true; - } - } - if (!stats.n_hot_calls) + if (!stats.n_interesting_calls) { if (dump_file) - fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n", + fprintf (dump_file, "Not considering %s for cloning; " + "no calls considered interesting by profile.\n", node->dump_name ()); return false; } @@ -3322,10 +3344,10 @@ devirtualization_time_bonus (struct cgraph_node *node, /* Return time bonus incurred because of hints stored in ESTIMATES. */ -static int +static sreal hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates) { - int result = 0; + sreal result = 0; ipa_hints hints = estimates.hints; if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride)) result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); @@ -3333,10 +3355,10 @@ hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates) sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); if (hints & INLINE_HINT_loop_iterations) - result += (estimates.loops_with_known_iterations * bonus_for_one).to_int (); + result += estimates.loops_with_known_iterations * bonus_for_one; if (hints & INLINE_HINT_loop_stride) - result += (estimates.loops_with_known_strides * bonus_for_one).to_int (); + result += estimates.loops_with_known_strides * bonus_for_one; return result; } @@ -3369,24 +3391,30 @@ incorporate_penalties (cgraph_node *node, ipa_node_params *info, static bool good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit, sreal freq_sum, profile_count count_sum, - int size_cost) + int size_cost, bool called_without_ipa_profile) { + gcc_assert (count_sum.ipa () == count_sum); + if (count_sum.quality () == AFDO) + count_sum = count_sum.force_nonzero (); if (time_benefit == 0 || !opt_for_fn (node->decl, flag_ipa_cp_clone) - || node->optimize_for_size_p ()) + /* If there is no call which was executed in profiling or where + profile is missing, we do not want to clone. */ + || (!called_without_ipa_profile && !count_sum.nonzero_p ())) return false; gcc_assert (size_cost > 0); ipa_node_params *info = ipa_node_params_sum->get (node); int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold); + /* If we know the execution IPA execution counts, we can estimate overall + speedup of the program. */ if (count_sum.nonzero_p ()) { - gcc_assert (base_count.nonzero_p ()); - sreal factor = count_sum.probability_in (base_count).to_sreal (); - sreal evaluation = (time_benefit * factor) / size_cost; + profile_count saved_time = count_sum * time_benefit; + sreal evaluation = saved_time.to_sreal_scale (profile_count::one ()) + / size_cost; evaluation = incorporate_penalties (node, info, evaluation); - evaluation *= 1000; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -3394,33 +3422,46 @@ good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit, "size: %i, count_sum: ", time_benefit.to_double (), size_cost); count_sum.dump (dump_file); + fprintf (dump_file, ", overall time saved: "); + saved_time.dump (dump_file); fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n", info->node_within_scc ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", info->node_calling_single_call ? ", single_call" : "", evaluation.to_double (), eval_threshold); } - - return evaluation.to_int () >= eval_threshold; + gcc_checking_assert (saved_time == saved_time.ipa ()); + if (!maybe_hot_count_p (NULL, saved_time)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " not cloning: time saved is not hot\n"); + } + /* Evaulation approximately corresponds to time saved per instruction + introduced. This is likely almost always going to be true, since we + already checked that time saved is large enough to be considered + hot. */ + else if (evaluation >= (sreal)eval_threshold) + return true; + /* If all call sites have profile known; we know we do not want t clone. + If there are calls with unknown profile; try local heuristics. */ + if (!called_without_ipa_profile) + return false; } - else - { - sreal evaluation = (time_benefit * freq_sum) / size_cost; - evaluation = incorporate_penalties (node, info, evaluation); - evaluation *= 1000; + sreal evaluation = (time_benefit * freq_sum) / size_cost; + evaluation = incorporate_penalties (node, info, evaluation); + evaluation *= 1000; - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " good_cloning_opportunity_p (time: %g, " - "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, " - "threshold: %i\n", - time_benefit.to_double (), size_cost, freq_sum.to_double (), - info->node_within_scc - ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", - info->node_calling_single_call ? ", single_call" : "", - evaluation.to_double (), eval_threshold); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " good_cloning_opportunity_p (time: %g, " + "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, " + "threshold: %i\n", + time_benefit.to_double (), size_cost, freq_sum.to_double (), + info->node_within_scc + ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", + info->node_calling_single_call ? ", single_call" : "", + evaluation.to_double (), eval_threshold); - return evaluation.to_int () >= eval_threshold; - } + return evaluation >= eval_threshold; } /* Grow vectors in AVALS and fill them with information about values of @@ -3506,8 +3547,8 @@ perform_estimation_of_a_value (cgraph_node *node, time_benefit = 0; else time_benefit = (estimates.nonspecialized_time - estimates.time) + + hint_time_bonus (node, estimates) + (devirtualization_time_bonus (node, avals) - + hint_time_bonus (node, estimates) + removable_params_cost + est_move_cost); int size = estimates.size; @@ -3613,7 +3654,8 @@ estimate_local_effects (struct cgraph_node *node) "known contexts, code not going to grow.\n"); } else if (good_cloning_opportunity_p (node, time, stats.freq_sum, - stats.count_sum, size)) + stats.count_sum, size, + stats.called_without_ipa_profile)) { if (size + overall_size <= get_max_overall_size (node)) { @@ -3979,7 +4021,7 @@ value_topo_info<valtype>::propagate_effects () processed_srcvals.empty (); for (src = val->sources; src; src = src->next) if (src->val - && src->cs->maybe_hot_p ()) + && cs_interesting_for_ipcp_p (src->cs)) { if (!processed_srcvals.add (src->val)) { @@ -4024,21 +4066,6 @@ value_topo_info<valtype>::propagate_effects () } } -/* Callback for qsort to sort counts of all edges. */ - -static int -compare_edge_profile_counts (const void *a, const void *b) -{ - const profile_count *cnt1 = (const profile_count *) a; - const profile_count *cnt2 = (const profile_count *) b; - - if (*cnt1 < *cnt2) - return 1; - if (*cnt1 > *cnt2) - return -1; - return 0; -} - /* Propagate constants, polymorphic contexts and their effects from the summaries interprocedurally. */ @@ -4051,10 +4078,6 @@ ipcp_propagate_stage (class ipa_topo_info *topo) if (dump_file) fprintf (dump_file, "\n Propagating constants:\n\n"); - base_count = profile_count::uninitialized (); - - bool compute_count_base = false; - unsigned base_count_pos_percent = 0; FOR_EACH_DEFINED_FUNCTION (node) { if (node->has_gimple_body_p () @@ -4071,57 +4094,8 @@ ipcp_propagate_stage (class ipa_topo_info *topo) ipa_size_summary *s = ipa_size_summaries->get (node); if (node->definition && !node->alias && s != NULL) overall_size += s->self_size; - if (node->count.ipa ().initialized_p ()) - { - compute_count_base = true; - unsigned pos_percent = opt_for_fn (node->decl, - param_ipa_cp_profile_count_base); - base_count_pos_percent = MAX (base_count_pos_percent, pos_percent); - } } - if (compute_count_base) - { - auto_vec<profile_count> all_edge_counts; - all_edge_counts.reserve_exact (symtab->edges_count); - FOR_EACH_DEFINED_FUNCTION (node) - for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee) - { - profile_count count = cs->count.ipa (); - if (!count.nonzero_p ()) - continue; - - enum availability avail; - cgraph_node *tgt - = cs->callee->function_or_virtual_thunk_symbol (&avail); - ipa_node_params *info = ipa_node_params_sum->get (tgt); - if (info && info->versionable) - all_edge_counts.quick_push (count); - } - - if (!all_edge_counts.is_empty ()) - { - gcc_assert (base_count_pos_percent <= 100); - all_edge_counts.qsort (compare_edge_profile_counts); - - unsigned base_count_pos - = ((all_edge_counts.length () * (base_count_pos_percent)) / 100); - base_count = all_edge_counts[base_count_pos]; - - if (dump_file) - { - fprintf (dump_file, "\nSelected base_count from %u edges at " - "position %u, arriving at: ", all_edge_counts.length (), - base_count_pos); - base_count.dump (dump_file); - fprintf (dump_file, "\n"); - } - } - else if (dump_file) - fprintf (dump_file, "\nNo candidates with non-zero call count found, " - "continuing as if without profile feedback.\n"); - } - orig_overall_size = overall_size; if (dump_file) @@ -4383,15 +4357,17 @@ static bool get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, sreal *freq_sum, int *caller_count, profile_count *rec_count_sum, - profile_count *nonrec_count_sum) + profile_count *nonrec_count_sum, + bool *called_without_ipa_profile) { ipcp_value_source<valtype> *src; sreal freq = 0; int count = 0; profile_count rec_cnt = profile_count::zero (); profile_count nonrec_cnt = profile_count::zero (); - bool hot = false; + bool interesting = false; bool non_self_recursive = false; + *called_without_ipa_profile = false; for (src = val->sources; src; src = src->next) { @@ -4402,15 +4378,19 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, { count++; freq += cs->sreal_frequency (); - hot |= cs->maybe_hot_p (); + interesting |= cs_interesting_for_ipcp_p (cs); if (cs->caller != dest) { non_self_recursive = true; if (cs->count.ipa ().initialized_p ()) rec_cnt += cs->count.ipa (); + else + *called_without_ipa_profile = true; } else if (cs->count.ipa ().initialized_p ()) nonrec_cnt += cs->count.ipa (); + else + *called_without_ipa_profile = true; } cs = get_next_cgraph_edge_clone (cs); } @@ -4426,19 +4406,7 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, *rec_count_sum = rec_cnt; *nonrec_count_sum = nonrec_cnt; - if (!hot && ipa_node_params_sum->get (dest)->node_within_scc) - { - struct cgraph_edge *cs; - - /* Cold non-SCC source edge could trigger hot recursive execution of - function. Consider the case as hot and rely on following cost model - computation to further select right one. */ - for (cs = dest->callers; cs; cs = cs->next_caller) - if (cs->caller == dest && cs->maybe_hot_p ()) - return true; - } - - return hot; + return interesting; } /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers @@ -4564,7 +4532,7 @@ lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node) if (remainder.ipa_p () && !remainder.ipa ().nonzero_p () && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p () && opt_for_fn (orig_node->decl, flag_profile_partial_training)) - remainder = remainder.guessed_local (); + remainder = orig_node->count.guessed_local (); return remainder; } @@ -4677,7 +4645,7 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node, const vec<cgraph_node *> &self_gen_clones) { profile_count redist_sum = orig_node->count.ipa (); - if (!(redist_sum > profile_count::zero ())) + if (!redist_sum.nonzero_p ()) return; if (dump_file) @@ -4702,19 +4670,12 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node, unsigned i = 0; for (cgraph_node *n : self_gen_clones) { - profile_count orig_count = n->count; profile_count new_count = (redist_sum / self_gen_clones.length () + other_edges_count[i]); new_count = lenient_count_portion_handling (new_count, orig_node); - n->count = new_count; - profile_count::adjust_for_ipa_scaling (&new_count, &orig_count); + n->scale_profile_to (new_count); for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee) - { - cs->count = cs->count.apply_scale (new_count, orig_count); - processed_edges.add (cs); - } - for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee) - cs->count = cs->count.apply_scale (new_count, orig_count); + processed_edges.add (cs); i++; } @@ -4748,7 +4709,7 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node, it. */ for (cgraph_node *n : self_gen_clones) { - if (!(n->count.ipa () > profile_count::zero ())) + if (!n->count.ipa ().nonzero_p ()) continue; desc_incoming_count_struct desc; @@ -4794,7 +4755,7 @@ update_profiling_info (struct cgraph_node *orig_node, profile_count new_sum; profile_count remainder, orig_node_count = orig_node->count.ipa (); - if (!(orig_node_count > profile_count::zero ())) + if (!orig_node_count.nonzero_p ()) return; if (dump_file) @@ -4812,16 +4773,14 @@ update_profiling_info (struct cgraph_node *orig_node, bool orig_edges_processed = false; if (new_sum > orig_node_count) { - /* TODO: Profile has alreay gone astray, keep what we have but lower it - to global0 category. */ - remainder = orig_node->count.global0 (); - - for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee) - cs->count = cs->count.global0 (); - for (cgraph_edge *cs = orig_node->indirect_calls; - cs; - cs = cs->next_callee) - cs->count = cs->count.global0 (); + /* Profile has alreay gone astray, keep what we have but lower it + to global0adjusted or to local if we have partial training. */ + if (opt_for_fn (orig_node->decl, flag_profile_partial_training)) + orig_node->make_profile_local (); + if (new_sum.quality () == AFDO) + orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO); + else + orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED); orig_edges_processed = true; } else if (stats.rec_count_sum.nonzero_p ()) @@ -4847,20 +4806,12 @@ update_profiling_info (struct cgraph_node *orig_node, /* The NEW_NODE count and counts of all its outgoing edges are still unmodified copies of ORIG_NODE's. Just clear the latter and bail out. */ - profile_count zero; if (opt_for_fn (orig_node->decl, flag_profile_partial_training)) - zero = profile_count::zero ().guessed_local (); + orig_node->make_profile_local (); + else if (orig_nonrec_call_count.quality () == AFDO) + orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO); else - zero = profile_count::adjusted_zero (); - orig_node->count = zero; - for (cgraph_edge *cs = orig_node->callees; - cs; - cs = cs->next_callee) - cs->count = zero; - for (cgraph_edge *cs = orig_node->indirect_calls; - cs; - cs = cs->next_callee) - cs->count = zero; + orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED); return; } } @@ -4887,11 +4838,12 @@ update_profiling_info (struct cgraph_node *orig_node, profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count; int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls); - profile_count new_part - = MAX(MIN (unexp.apply_scale (new_sum, - new_sum + orig_nonrec_call_count), - unexp.apply_scale (limit_den - 1, limit_den)), - unexp.apply_scale (new_nonrec_calls, limit_den)); + profile_count new_part = unexp.apply_scale (limit_den - 1, limit_den); + profile_count den = new_sum + orig_nonrec_call_count; + if (den.nonzero_p ()) + new_part = MIN (unexp.apply_scale (new_sum, den), new_part); + new_part = MAX (new_part, + unexp.apply_scale (new_nonrec_calls, limit_den)); if (dump_file) { fprintf (dump_file, " Claiming "); @@ -4909,27 +4861,10 @@ update_profiling_info (struct cgraph_node *orig_node, remainder = lenient_count_portion_handling (orig_node_count - new_sum, orig_node); - new_sum = orig_node_count.combine_with_ipa_count (new_sum); - new_node->count = new_sum; - orig_node->count = remainder; - - profile_count orig_new_node_count = orig_node_count; - profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count); - for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee) - cs->count = cs->count.apply_scale (new_sum, orig_new_node_count); - for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee) - cs->count = cs->count.apply_scale (new_sum, orig_new_node_count); + new_node->scale_profile_to (new_sum); if (!orig_edges_processed) - { - profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count); - for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee) - cs->count = cs->count.apply_scale (remainder, orig_node_count); - for (cgraph_edge *cs = orig_node->indirect_calls; - cs; - cs = cs->next_callee) - cs->count = cs->count.apply_scale (remainder, orig_node_count); - } + orig_node->scale_profile_to (remainder); if (dump_file) { @@ -4947,35 +4882,23 @@ update_specialized_profile (struct cgraph_node *new_node, struct cgraph_node *orig_node, profile_count redirected_sum) { - struct cgraph_edge *cs; - profile_count new_node_count, orig_node_count = orig_node->count.ipa (); - if (dump_file) { fprintf (dump_file, " the sum of counts of redirected edges is "); redirected_sum.dump (dump_file); fprintf (dump_file, "\n old ipa count of the original node is "); - orig_node_count.dump (dump_file); + orig_node->count.dump (dump_file); fprintf (dump_file, "\n"); } - if (!(orig_node_count > profile_count::zero ())) + if (!orig_node->count.ipa ().nonzero_p () + || !redirected_sum.nonzero_p ()) return; - new_node_count = new_node->count; - new_node->count += redirected_sum; - orig_node->count - = lenient_count_portion_handling (orig_node->count - redirected_sum, - orig_node); - - for (cs = new_node->callees; cs; cs = cs->next_callee) - cs->count += cs->count.apply_scale (redirected_sum, new_node_count); + orig_node->scale_profile_to + (lenient_count_portion_handling (orig_node->count.ipa () - redirected_sum, + orig_node)); - for (cs = orig_node->callees; cs; cs = cs->next_callee) - { - profile_count dec = cs->count.apply_scale (redirected_sum, - orig_node_count); - cs->count -= dec; - } + new_node->scale_profile_to (new_node->count.ipa () + redirected_sum); if (dump_file) { @@ -5928,6 +5851,7 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, sreal freq_sum; profile_count count_sum, rec_count_sum; vec<cgraph_edge *> callers; + bool called_without_ipa_profile; if (val->spec_node) { @@ -5943,7 +5867,8 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, return false; } else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count, - &rec_count_sum, &count_sum)) + &rec_count_sum, &count_sum, + &called_without_ipa_profile)) return false; if (!dbg_cnt (ipa_cp_values)) @@ -5980,9 +5905,11 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, if (!good_cloning_opportunity_p (node, val->local_time_benefit, freq_sum, count_sum, - val->local_size_cost) + val->local_size_cost, + called_without_ipa_profile) && !good_cloning_opportunity_p (node, val->prop_time_benefit, - freq_sum, count_sum, val->prop_size_cost)) + freq_sum, count_sum, val->prop_size_cost, + called_without_ipa_profile)) return false; if (dump_file) @@ -6398,6 +6325,11 @@ ipcp_store_vr_results (void) TYPE_PRECISION (type), TYPE_SIGN (type))); tmp.update_bitmask (bm); + // Reflecting the bitmask on the ranges can sometime + // produce an UNDEFINED value if the the bitmask update + // was previously deferred. See PR 120048. + if (tmp.undefined_p ()) + tmp.set_varying (type); ipa_vr vr (tmp); ts->m_vr->quick_push (vr); } @@ -6419,6 +6351,11 @@ ipcp_store_vr_results (void) TYPE_PRECISION (type), TYPE_SIGN (type))); tmp.update_bitmask (bm); + // Reflecting the bitmask on the ranges can sometime + // produce an UNDEFINED value if the the bitmask update + // was previously deferred. See PR 120048. + if (tmp.undefined_p ()) + tmp.set_varying (type); ipa_vr vr (tmp); ts->m_vr->quick_push (vr); } @@ -6564,7 +6501,6 @@ make_pass_ipa_cp (gcc::context *ctxt) void ipa_cp_cc_finalize (void) { - base_count = profile_count::uninitialized (); overall_size = 0; orig_overall_size = 0; ipcp_free_transformation_sum (); |