diff options
Diffstat (limited to 'gcc/ipa-cp.cc')
-rw-r--r-- | gcc/ipa-cp.cc | 161 |
1 files changed, 66 insertions, 95 deletions
diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index abde64b..480cf48 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -288,7 +288,7 @@ ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits) else fprintf (f, " [scc: %i, from:", val->scc_no); for (s = val->sources; s; s = s->next) - fprintf (f, " %i(%f)", s->cs->caller->order, + fprintf (f, " %i(%f)", s->cs->caller->get_uid (), s->cs->sreal_frequency ().to_double ()); fprintf (f, "]"); } @@ -544,15 +544,18 @@ cs_interesting_for_ipcp_p (cgraph_edge *e) if (e->count.ipa ().nonzero_p ()) return true; /* If local (possibly guseed or adjusted 0 profile) claims edge is - not executed, do not propagate. */ - if (!e->count.nonzero_p ()) + not executed, do not propagate. + Do not trust AFDO since branch needs to be executed multiple + time to count while we want to propagate even call called + once during the train run if callee is important. */ + if (e->count.initialized_p () && !e->count.nonzero_p () + && e->count.quality () != AFDO) return false; - /* If IPA profile says edge is executed zero times, but zero - is quality is ADJUSTED, still consider it for cloning in - case we have partial training. */ + /* If we have zero IPA profile, still consider edge for cloning + in case we do partial training. */ if (e->count.ipa ().initialized_p () - && opt_for_fn (e->callee->decl,flag_profile_partial_training) - && e->count.nonzero_p ()) + && e->count.ipa ().quality () != AFDO + && !opt_for_fn (e->callee->decl,flag_profile_partial_training)) return false; return true; } @@ -615,7 +618,9 @@ ipcp_cloning_candidate_p (struct cgraph_node *node) return false; } - if (node->optimize_for_size_p ()) + /* Do not use profile here since cold wrapper wrap + hot function. */ + if (opt_for_fn (node->decl, optimize_size)) { if (dump_file) fprintf (dump_file, "Not considering %s for cloning; " @@ -3339,10 +3344,10 @@ devirtualization_time_bonus (struct cgraph_node *node, /* Return time bonus incurred because of hints stored in ESTIMATES. */ -static int +static sreal hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates) { - int result = 0; + sreal result = 0; ipa_hints hints = estimates.hints; if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride)) result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); @@ -3350,10 +3355,10 @@ hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates) sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); if (hints & INLINE_HINT_loop_iterations) - result += (estimates.loops_with_known_iterations * bonus_for_one).to_int (); + result += estimates.loops_with_known_iterations * bonus_for_one; if (hints & INLINE_HINT_loop_stride) - result += (estimates.loops_with_known_strides * bonus_for_one).to_int (); + result += estimates.loops_with_known_strides * bonus_for_one; return result; } @@ -3389,9 +3394,10 @@ good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit, int size_cost, bool called_without_ipa_profile) { gcc_assert (count_sum.ipa () == count_sum); + if (count_sum.quality () == AFDO) + count_sum = count_sum.force_nonzero (); if (time_benefit == 0 || !opt_for_fn (node->decl, flag_ipa_cp_clone) - || node->optimize_for_size_p () /* If there is no call which was executed in profiling or where profile is missing, we do not want to clone. */ || (!called_without_ipa_profile && !count_sum.nonzero_p ())) @@ -3434,7 +3440,7 @@ good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit, introduced. This is likely almost always going to be true, since we already checked that time saved is large enough to be considered hot. */ - else if (evaluation.to_int () >= eval_threshold) + else if (evaluation >= (sreal)eval_threshold) return true; /* If all call sites have profile known; we know we do not want t clone. If there are calls with unknown profile; try local heuristics. */ @@ -3455,7 +3461,7 @@ good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit, info->node_calling_single_call ? ", single_call" : "", evaluation.to_double (), eval_threshold); - return evaluation.to_int () >= eval_threshold; + return evaluation >= eval_threshold; } /* Grow vectors in AVALS and fill them with information about values of @@ -3541,8 +3547,8 @@ perform_estimation_of_a_value (cgraph_node *node, time_benefit = 0; else time_benefit = (estimates.nonspecialized_time - estimates.time) + + hint_time_bonus (node, estimates) + (devirtualization_time_bonus (node, avals) - + hint_time_bonus (node, estimates) + removable_params_cost + est_move_cost); int size = estimates.size; @@ -4526,7 +4532,7 @@ lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node) if (remainder.ipa_p () && !remainder.ipa ().nonzero_p () && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p () && opt_for_fn (orig_node->decl, flag_profile_partial_training)) - remainder = remainder.guessed_local (); + remainder = orig_node->count.guessed_local (); return remainder; } @@ -4639,7 +4645,7 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node, const vec<cgraph_node *> &self_gen_clones) { profile_count redist_sum = orig_node->count.ipa (); - if (!(redist_sum > profile_count::zero ())) + if (!redist_sum.nonzero_p ()) return; if (dump_file) @@ -4664,19 +4670,12 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node, unsigned i = 0; for (cgraph_node *n : self_gen_clones) { - profile_count orig_count = n->count; profile_count new_count = (redist_sum / self_gen_clones.length () + other_edges_count[i]); new_count = lenient_count_portion_handling (new_count, orig_node); - n->count = new_count; - profile_count::adjust_for_ipa_scaling (&new_count, &orig_count); + n->scale_profile_to (new_count); for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee) - { - cs->count = cs->count.apply_scale (new_count, orig_count); - processed_edges.add (cs); - } - for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee) - cs->count = cs->count.apply_scale (new_count, orig_count); + processed_edges.add (cs); i++; } @@ -4710,7 +4709,7 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node, it. */ for (cgraph_node *n : self_gen_clones) { - if (!(n->count.ipa () > profile_count::zero ())) + if (!n->count.ipa ().nonzero_p ()) continue; desc_incoming_count_struct desc; @@ -4756,7 +4755,7 @@ update_profiling_info (struct cgraph_node *orig_node, profile_count new_sum; profile_count remainder, orig_node_count = orig_node->count.ipa (); - if (!(orig_node_count > profile_count::zero ())) + if (!orig_node_count.nonzero_p ()) return; if (dump_file) @@ -4774,16 +4773,14 @@ update_profiling_info (struct cgraph_node *orig_node, bool orig_edges_processed = false; if (new_sum > orig_node_count) { - /* TODO: Profile has alreay gone astray, keep what we have but lower it - to global0 category. */ - remainder = orig_node->count.global0 (); - - for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee) - cs->count = cs->count.global0 (); - for (cgraph_edge *cs = orig_node->indirect_calls; - cs; - cs = cs->next_callee) - cs->count = cs->count.global0 (); + /* Profile has alreay gone astray, keep what we have but lower it + to global0adjusted or to local if we have partial training. */ + if (opt_for_fn (orig_node->decl, flag_profile_partial_training)) + orig_node->make_profile_local (); + if (new_sum.quality () == AFDO) + orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO); + else + orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED); orig_edges_processed = true; } else if (stats.rec_count_sum.nonzero_p ()) @@ -4809,20 +4806,12 @@ update_profiling_info (struct cgraph_node *orig_node, /* The NEW_NODE count and counts of all its outgoing edges are still unmodified copies of ORIG_NODE's. Just clear the latter and bail out. */ - profile_count zero; if (opt_for_fn (orig_node->decl, flag_profile_partial_training)) - zero = profile_count::zero ().guessed_local (); + orig_node->make_profile_local (); + else if (orig_nonrec_call_count.quality () == AFDO) + orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO); else - zero = profile_count::adjusted_zero (); - orig_node->count = zero; - for (cgraph_edge *cs = orig_node->callees; - cs; - cs = cs->next_callee) - cs->count = zero; - for (cgraph_edge *cs = orig_node->indirect_calls; - cs; - cs = cs->next_callee) - cs->count = zero; + orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED); return; } } @@ -4849,11 +4838,12 @@ update_profiling_info (struct cgraph_node *orig_node, profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count; int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls); - profile_count new_part - = MAX(MIN (unexp.apply_scale (new_sum, - new_sum + orig_nonrec_call_count), - unexp.apply_scale (limit_den - 1, limit_den)), - unexp.apply_scale (new_nonrec_calls, limit_den)); + profile_count new_part = unexp.apply_scale (limit_den - 1, limit_den); + profile_count den = new_sum + orig_nonrec_call_count; + if (den.nonzero_p ()) + new_part = MIN (unexp.apply_scale (new_sum, den), new_part); + new_part = MAX (new_part, + unexp.apply_scale (new_nonrec_calls, limit_den)); if (dump_file) { fprintf (dump_file, " Claiming "); @@ -4871,27 +4861,10 @@ update_profiling_info (struct cgraph_node *orig_node, remainder = lenient_count_portion_handling (orig_node_count - new_sum, orig_node); - new_sum = orig_node_count.combine_with_ipa_count (new_sum); - new_node->count = new_sum; - orig_node->count = remainder; - - profile_count orig_new_node_count = orig_node_count; - profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count); - for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee) - cs->count = cs->count.apply_scale (new_sum, orig_new_node_count); - for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee) - cs->count = cs->count.apply_scale (new_sum, orig_new_node_count); + new_node->scale_profile_to (new_sum); if (!orig_edges_processed) - { - profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count); - for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee) - cs->count = cs->count.apply_scale (remainder, orig_node_count); - for (cgraph_edge *cs = orig_node->indirect_calls; - cs; - cs = cs->next_callee) - cs->count = cs->count.apply_scale (remainder, orig_node_count); - } + orig_node->scale_profile_to (remainder); if (dump_file) { @@ -4909,35 +4882,23 @@ update_specialized_profile (struct cgraph_node *new_node, struct cgraph_node *orig_node, profile_count redirected_sum) { - struct cgraph_edge *cs; - profile_count new_node_count, orig_node_count = orig_node->count.ipa (); - if (dump_file) { fprintf (dump_file, " the sum of counts of redirected edges is "); redirected_sum.dump (dump_file); fprintf (dump_file, "\n old ipa count of the original node is "); - orig_node_count.dump (dump_file); + orig_node->count.dump (dump_file); fprintf (dump_file, "\n"); } - if (!(orig_node_count > profile_count::zero ())) + if (!orig_node->count.ipa ().nonzero_p () + || !redirected_sum.nonzero_p ()) return; - new_node_count = new_node->count; - new_node->count += redirected_sum; - orig_node->count - = lenient_count_portion_handling (orig_node->count - redirected_sum, - orig_node); + orig_node->scale_profile_to + (lenient_count_portion_handling (orig_node->count.ipa () - redirected_sum, + orig_node)); - for (cs = new_node->callees; cs; cs = cs->next_callee) - cs->count += cs->count.apply_scale (redirected_sum, new_node_count); - - for (cs = orig_node->callees; cs; cs = cs->next_callee) - { - profile_count dec = cs->count.apply_scale (redirected_sum, - orig_node_count); - cs->count -= dec; - } + new_node->scale_profile_to (new_node->count.ipa () + redirected_sum); if (dump_file) { @@ -6364,6 +6325,11 @@ ipcp_store_vr_results (void) TYPE_PRECISION (type), TYPE_SIGN (type))); tmp.update_bitmask (bm); + // Reflecting the bitmask on the ranges can sometime + // produce an UNDEFINED value if the the bitmask update + // was previously deferred. See PR 120048. + if (tmp.undefined_p ()) + tmp.set_varying (type); ipa_vr vr (tmp); ts->m_vr->quick_push (vr); } @@ -6385,6 +6351,11 @@ ipcp_store_vr_results (void) TYPE_PRECISION (type), TYPE_SIGN (type))); tmp.update_bitmask (bm); + // Reflecting the bitmask on the ranges can sometime + // produce an UNDEFINED value if the the bitmask update + // was previously deferred. See PR 120048. + if (tmp.undefined_p ()) + tmp.set_varying (type); ipa_vr vr (tmp); ts->m_vr->quick_push (vr); } |