aboutsummaryrefslogtreecommitdiff
path: root/gcc/ipa-cp.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/ipa-cp.cc')
-rw-r--r--gcc/ipa-cp.cc161
1 files changed, 66 insertions, 95 deletions
diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index abde64b..480cf48 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -288,7 +288,7 @@ ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
else
fprintf (f, " [scc: %i, from:", val->scc_no);
for (s = val->sources; s; s = s->next)
- fprintf (f, " %i(%f)", s->cs->caller->order,
+ fprintf (f, " %i(%f)", s->cs->caller->get_uid (),
s->cs->sreal_frequency ().to_double ());
fprintf (f, "]");
}
@@ -544,15 +544,18 @@ cs_interesting_for_ipcp_p (cgraph_edge *e)
if (e->count.ipa ().nonzero_p ())
return true;
/* If local (possibly guseed or adjusted 0 profile) claims edge is
- not executed, do not propagate. */
- if (!e->count.nonzero_p ())
+ not executed, do not propagate.
+ Do not trust AFDO since branch needs to be executed multiple
+ time to count while we want to propagate even call called
+ once during the train run if callee is important. */
+ if (e->count.initialized_p () && !e->count.nonzero_p ()
+ && e->count.quality () != AFDO)
return false;
- /* If IPA profile says edge is executed zero times, but zero
- is quality is ADJUSTED, still consider it for cloning in
- case we have partial training. */
+ /* If we have zero IPA profile, still consider edge for cloning
+ in case we do partial training. */
if (e->count.ipa ().initialized_p ()
- && opt_for_fn (e->callee->decl,flag_profile_partial_training)
- && e->count.nonzero_p ())
+ && e->count.ipa ().quality () != AFDO
+ && !opt_for_fn (e->callee->decl,flag_profile_partial_training))
return false;
return true;
}
@@ -615,7 +618,9 @@ ipcp_cloning_candidate_p (struct cgraph_node *node)
return false;
}
- if (node->optimize_for_size_p ())
+ /* Do not use profile here since cold wrapper wrap
+ hot function. */
+ if (opt_for_fn (node->decl, optimize_size))
{
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; "
@@ -3339,10 +3344,10 @@ devirtualization_time_bonus (struct cgraph_node *node,
/* Return time bonus incurred because of hints stored in ESTIMATES. */
-static int
+static sreal
hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
{
- int result = 0;
+ sreal result = 0;
ipa_hints hints = estimates.hints;
if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
@@ -3350,10 +3355,10 @@ hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
if (hints & INLINE_HINT_loop_iterations)
- result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
+ result += estimates.loops_with_known_iterations * bonus_for_one;
if (hints & INLINE_HINT_loop_stride)
- result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
+ result += estimates.loops_with_known_strides * bonus_for_one;
return result;
}
@@ -3389,9 +3394,10 @@ good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
int size_cost, bool called_without_ipa_profile)
{
gcc_assert (count_sum.ipa () == count_sum);
+ if (count_sum.quality () == AFDO)
+ count_sum = count_sum.force_nonzero ();
if (time_benefit == 0
|| !opt_for_fn (node->decl, flag_ipa_cp_clone)
- || node->optimize_for_size_p ()
/* If there is no call which was executed in profiling or where
profile is missing, we do not want to clone. */
|| (!called_without_ipa_profile && !count_sum.nonzero_p ()))
@@ -3434,7 +3440,7 @@ good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
introduced. This is likely almost always going to be true, since we
already checked that time saved is large enough to be considered
hot. */
- else if (evaluation.to_int () >= eval_threshold)
+ else if (evaluation >= (sreal)eval_threshold)
return true;
/* If all call sites have profile known; we know we do not want t clone.
If there are calls with unknown profile; try local heuristics. */
@@ -3455,7 +3461,7 @@ good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
info->node_calling_single_call ? ", single_call" : "",
evaluation.to_double (), eval_threshold);
- return evaluation.to_int () >= eval_threshold;
+ return evaluation >= eval_threshold;
}
/* Grow vectors in AVALS and fill them with information about values of
@@ -3541,8 +3547,8 @@ perform_estimation_of_a_value (cgraph_node *node,
time_benefit = 0;
else
time_benefit = (estimates.nonspecialized_time - estimates.time)
+ + hint_time_bonus (node, estimates)
+ (devirtualization_time_bonus (node, avals)
- + hint_time_bonus (node, estimates)
+ removable_params_cost + est_move_cost);
int size = estimates.size;
@@ -4526,7 +4532,7 @@ lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
&& orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
&& opt_for_fn (orig_node->decl, flag_profile_partial_training))
- remainder = remainder.guessed_local ();
+ remainder = orig_node->count.guessed_local ();
return remainder;
}
@@ -4639,7 +4645,7 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node,
const vec<cgraph_node *> &self_gen_clones)
{
profile_count redist_sum = orig_node->count.ipa ();
- if (!(redist_sum > profile_count::zero ()))
+ if (!redist_sum.nonzero_p ())
return;
if (dump_file)
@@ -4664,19 +4670,12 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node,
unsigned i = 0;
for (cgraph_node *n : self_gen_clones)
{
- profile_count orig_count = n->count;
profile_count new_count
= (redist_sum / self_gen_clones.length () + other_edges_count[i]);
new_count = lenient_count_portion_handling (new_count, orig_node);
- n->count = new_count;
- profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
+ n->scale_profile_to (new_count);
for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
- {
- cs->count = cs->count.apply_scale (new_count, orig_count);
- processed_edges.add (cs);
- }
- for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
- cs->count = cs->count.apply_scale (new_count, orig_count);
+ processed_edges.add (cs);
i++;
}
@@ -4710,7 +4709,7 @@ update_counts_for_self_gen_clones (cgraph_node *orig_node,
it. */
for (cgraph_node *n : self_gen_clones)
{
- if (!(n->count.ipa () > profile_count::zero ()))
+ if (!n->count.ipa ().nonzero_p ())
continue;
desc_incoming_count_struct desc;
@@ -4756,7 +4755,7 @@ update_profiling_info (struct cgraph_node *orig_node,
profile_count new_sum;
profile_count remainder, orig_node_count = orig_node->count.ipa ();
- if (!(orig_node_count > profile_count::zero ()))
+ if (!orig_node_count.nonzero_p ())
return;
if (dump_file)
@@ -4774,16 +4773,14 @@ update_profiling_info (struct cgraph_node *orig_node,
bool orig_edges_processed = false;
if (new_sum > orig_node_count)
{
- /* TODO: Profile has alreay gone astray, keep what we have but lower it
- to global0 category. */
- remainder = orig_node->count.global0 ();
-
- for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
- cs->count = cs->count.global0 ();
- for (cgraph_edge *cs = orig_node->indirect_calls;
- cs;
- cs = cs->next_callee)
- cs->count = cs->count.global0 ();
+ /* Profile has alreay gone astray, keep what we have but lower it
+ to global0adjusted or to local if we have partial training. */
+ if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
+ orig_node->make_profile_local ();
+ if (new_sum.quality () == AFDO)
+ orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO);
+ else
+ orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED);
orig_edges_processed = true;
}
else if (stats.rec_count_sum.nonzero_p ())
@@ -4809,20 +4806,12 @@ update_profiling_info (struct cgraph_node *orig_node,
/* The NEW_NODE count and counts of all its outgoing edges
are still unmodified copies of ORIG_NODE's. Just clear
the latter and bail out. */
- profile_count zero;
if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
- zero = profile_count::zero ().guessed_local ();
+ orig_node->make_profile_local ();
+ else if (orig_nonrec_call_count.quality () == AFDO)
+ orig_node->make_profile_global0 (GUESSED_GLOBAL0_AFDO);
else
- zero = profile_count::adjusted_zero ();
- orig_node->count = zero;
- for (cgraph_edge *cs = orig_node->callees;
- cs;
- cs = cs->next_callee)
- cs->count = zero;
- for (cgraph_edge *cs = orig_node->indirect_calls;
- cs;
- cs = cs->next_callee)
- cs->count = zero;
+ orig_node->make_profile_global0 (GUESSED_GLOBAL0_ADJUSTED);
return;
}
}
@@ -4849,11 +4838,12 @@ update_profiling_info (struct cgraph_node *orig_node,
profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
- profile_count new_part
- = MAX(MIN (unexp.apply_scale (new_sum,
- new_sum + orig_nonrec_call_count),
- unexp.apply_scale (limit_den - 1, limit_den)),
- unexp.apply_scale (new_nonrec_calls, limit_den));
+ profile_count new_part = unexp.apply_scale (limit_den - 1, limit_den);
+ profile_count den = new_sum + orig_nonrec_call_count;
+ if (den.nonzero_p ())
+ new_part = MIN (unexp.apply_scale (new_sum, den), new_part);
+ new_part = MAX (new_part,
+ unexp.apply_scale (new_nonrec_calls, limit_den));
if (dump_file)
{
fprintf (dump_file, " Claiming ");
@@ -4871,27 +4861,10 @@ update_profiling_info (struct cgraph_node *orig_node,
remainder = lenient_count_portion_handling (orig_node_count - new_sum,
orig_node);
- new_sum = orig_node_count.combine_with_ipa_count (new_sum);
- new_node->count = new_sum;
- orig_node->count = remainder;
-
- profile_count orig_new_node_count = orig_node_count;
- profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
- for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
- cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
- for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
- cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
+ new_node->scale_profile_to (new_sum);
if (!orig_edges_processed)
- {
- profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
- for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
- cs->count = cs->count.apply_scale (remainder, orig_node_count);
- for (cgraph_edge *cs = orig_node->indirect_calls;
- cs;
- cs = cs->next_callee)
- cs->count = cs->count.apply_scale (remainder, orig_node_count);
- }
+ orig_node->scale_profile_to (remainder);
if (dump_file)
{
@@ -4909,35 +4882,23 @@ update_specialized_profile (struct cgraph_node *new_node,
struct cgraph_node *orig_node,
profile_count redirected_sum)
{
- struct cgraph_edge *cs;
- profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
-
if (dump_file)
{
fprintf (dump_file, " the sum of counts of redirected edges is ");
redirected_sum.dump (dump_file);
fprintf (dump_file, "\n old ipa count of the original node is ");
- orig_node_count.dump (dump_file);
+ orig_node->count.dump (dump_file);
fprintf (dump_file, "\n");
}
- if (!(orig_node_count > profile_count::zero ()))
+ if (!orig_node->count.ipa ().nonzero_p ()
+ || !redirected_sum.nonzero_p ())
return;
- new_node_count = new_node->count;
- new_node->count += redirected_sum;
- orig_node->count
- = lenient_count_portion_handling (orig_node->count - redirected_sum,
- orig_node);
+ orig_node->scale_profile_to
+ (lenient_count_portion_handling (orig_node->count.ipa () - redirected_sum,
+ orig_node));
- for (cs = new_node->callees; cs; cs = cs->next_callee)
- cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
-
- for (cs = orig_node->callees; cs; cs = cs->next_callee)
- {
- profile_count dec = cs->count.apply_scale (redirected_sum,
- orig_node_count);
- cs->count -= dec;
- }
+ new_node->scale_profile_to (new_node->count.ipa () + redirected_sum);
if (dump_file)
{
@@ -6364,6 +6325,11 @@ ipcp_store_vr_results (void)
TYPE_PRECISION (type),
TYPE_SIGN (type)));
tmp.update_bitmask (bm);
+ // Reflecting the bitmask on the ranges can sometime
+ // produce an UNDEFINED value if the the bitmask update
+ // was previously deferred. See PR 120048.
+ if (tmp.undefined_p ())
+ tmp.set_varying (type);
ipa_vr vr (tmp);
ts->m_vr->quick_push (vr);
}
@@ -6385,6 +6351,11 @@ ipcp_store_vr_results (void)
TYPE_PRECISION (type),
TYPE_SIGN (type)));
tmp.update_bitmask (bm);
+ // Reflecting the bitmask on the ranges can sometime
+ // produce an UNDEFINED value if the the bitmask update
+ // was previously deferred. See PR 120048.
+ if (tmp.undefined_p ())
+ tmp.set_varying (type);
ipa_vr vr (tmp);
ts->m_vr->quick_push (vr);
}