aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2020-01-16 23:55:44 +0100
committerJan Hubicka <jh@suse.cz>2020-01-16 23:55:44 +0100
commitf5b25e15165adde1356af42e9066ab75c5b37a19 (patch)
treec42ddcfe8702d66b51bde341c1b2d6fc27ec67c1
parent801f5b96775288e55193a66a746caab1ddd56f4a (diff)
downloadgcc-f5b25e15165adde1356af42e9066ab75c5b37a19.zip
gcc-f5b25e15165adde1356af42e9066ab75c5b37a19.tar.gz
gcc-f5b25e15165adde1356af42e9066ab75c5b37a19.tar.bz2
Make profile estimation more precise
While analyzing code size regression in SPEC2k GCC binary I noticed that we perform some inline decisions because we think that number of executions are very high. In particular there was inline decision inlining gen_rtx_fmt_ee to find_reloads believing that it is called 4 billion times. This turned out to be cummulation of roundoff errors in propagate_freq which was bit mechanically updated from original sreals to C++ sreals and later to new probabilities. This led us to estimate that a loopback edge is reached with probability 2.3 which was capped to 1-1/10000 and since this happened in nested loop it quickly escalated to large values. Originally capping to REG_BR_PROB_BASE avoided such problems but now we have much higher range. This patch avoids going from probabilites to REG_BR_PROB_BASE so precision is kept. In addition it makes the propagation to not estimate more than param-max-predicted-loop-iterations. The first change makes the cap to not be triggered on the gcc build, but it is still better to be safe than sorry. * ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of dump. * params.opt: (max-predicted-iterations): Set bounds. * predict.c (real_almost_one, real_br_prob_base, real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove. (propagate_freq): Add max_cyclic_prob parameter; cap cyclic probabilities; do not truncate to reg_br_prob_bases. (estimate_loops_at_level): Pass max_cyclic_prob. (estimate_loops): Compute max_cyclic_prob. (estimate_bb_frequencies): Do not initialize real_*; update calculation of back edge prob. * profile-count.c (profile_probability::to_sreal): New. * profile-count.h (class sreal): Move up in file. (profile_probability::to_sreal): Declare.
-rw-r--r--gcc/ChangeLog17
-rw-r--r--gcc/ipa-fnsummary.c2
-rw-r--r--gcc/params.opt2
-rw-r--r--gcc/predict.c101
-rw-r--r--gcc/profile-count.c9
-rw-r--r--gcc/profile-count.h5
6 files changed, 76 insertions, 60 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b98ead8..907d0f7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2020-01-16 Jan Hubicka <hubicka@ucw.cz>
+
+ * ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of
+ dump.
+ * params.opt: (max-predicted-iterations): Set bounds.
+ * predict.c (real_almost_one, real_br_prob_base,
+ real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove.
+ (propagate_freq): Add max_cyclic_prob parameter; cap cyclic
+ probabilities; do not truncate to reg_br_prob_bases.
+ (estimate_loops_at_level): Pass max_cyclic_prob.
+ (estimate_loops): Compute max_cyclic_prob.
+ (estimate_bb_frequencies): Do not initialize real_*; update calculation
+ of back edge prob.
+ * profile-count.c (profile_probability::to_sreal): New.
+ * profile-count.h (class sreal): Move up in file.
+ (profile_probability::to_sreal): Declare.
+
2020-01-16 Stam Markianos-Wright <stam.markianos-wright@arm.com>
* config/arm/arm.c
diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
index 4e1c587..dbd53f1 100644
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -3258,7 +3258,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size,
gcc_assert (*size == old_size);
if (time && (*time - old_time > 1 || *time - old_time < -1)
&& dump_file)
- fprintf (dump_file, "Time mismatch in call summary %f!=%f",
+ fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
old_time.to_double (),
time->to_double ());
}
diff --git a/gcc/params.opt b/gcc/params.opt
index 31cc200..f02c769 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -555,7 +555,7 @@ Common Joined UInteger Var(param_max_pow_sqrt_depth) Init(5) IntegerRange(1, 32)
Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant.
-param=max-predicted-iterations=
-Common Joined UInteger Var(param_max_predicted_iterations) Init(100) Param Optimization
+Common Joined UInteger Var(param_max_predicted_iterations) Init(100) IntegerRange(1, 65536) Param Optimization
The maximum number of loop iterations we predict statically.
-param=max-reload-search-insns=
diff --git a/gcc/predict.c b/gcc/predict.c
index 02c5fe0..c3aed9e 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -76,10 +76,6 @@ enum predictor_reason
static const char *reason_messages[] = {"", " (ignored)",
" (single edge duplicate)", " (edge pair duplicate)"};
-/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
- 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
-static sreal real_almost_one, real_br_prob_base,
- real_inv_br_prob_base, real_one_half, real_bb_freq_max;
static void combine_predictions_for_insn (rtx_insn *, basic_block);
static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
@@ -3266,7 +3262,8 @@ public:
TOVISIT, starting in HEAD. */
static void
-propagate_freq (basic_block head, bitmap tovisit)
+propagate_freq (basic_block head, bitmap tovisit,
+ sreal max_cyclic_prob)
{
basic_block bb;
basic_block last;
@@ -3322,22 +3319,14 @@ propagate_freq (basic_block head, bitmap tovisit)
FOR_EACH_EDGE (e, ei, bb->preds)
if (EDGE_INFO (e)->back_edge)
- {
- cyclic_probability += EDGE_INFO (e)->back_edge_prob;
- }
+ cyclic_probability += EDGE_INFO (e)->back_edge_prob;
else if (!(e->flags & EDGE_DFS_BACK))
{
- /* frequency += (e->probability
- * BLOCK_INFO (e->src)->frequency /
- REG_BR_PROB_BASE); */
-
/* FIXME: Graphite is producing edges with no profile. Once
this is fixed, drop this. */
sreal tmp = e->probability.initialized_p () ?
- e->probability.to_reg_br_prob_base () : 0;
- tmp *= BLOCK_INFO (e->src)->frequency;
- tmp *= real_inv_br_prob_base;
- frequency += tmp;
+ e->probability.to_sreal () : 0;
+ frequency += tmp * BLOCK_INFO (e->src)->frequency;
}
if (cyclic_probability == 0)
@@ -3346,14 +3335,29 @@ propagate_freq (basic_block head, bitmap tovisit)
}
else
{
- if (cyclic_probability > real_almost_one)
- cyclic_probability = real_almost_one;
-
- /* BLOCK_INFO (bb)->frequency = frequency
- / (1 - cyclic_probability) */
-
- cyclic_probability = sreal (1) - cyclic_probability;
- BLOCK_INFO (bb)->frequency = frequency / cyclic_probability;
+ if (cyclic_probability > max_cyclic_prob)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "cyclic probability of bb %i is %f (capped to %f)"
+ "; turning freq %f",
+ bb->index, cyclic_probability.to_double (),
+ max_cyclic_prob.to_double (),
+ frequency.to_double ());
+
+ cyclic_probability = max_cyclic_prob;
+ }
+ else if (dump_file)
+ fprintf (dump_file,
+ "cyclic probability of bb %i is %f; turning freq %f",
+ bb->index, cyclic_probability.to_double (),
+ frequency.to_double ());
+
+ BLOCK_INFO (bb)->frequency = frequency
+ / (sreal (1) - cyclic_probability);
+ if (dump_file)
+ fprintf (dump_file, " to %f\n",
+ BLOCK_INFO (bb)->frequency.to_double ());
}
}
@@ -3362,16 +3366,11 @@ propagate_freq (basic_block head, bitmap tovisit)
e = find_edge (bb, head);
if (e)
{
- /* EDGE_INFO (e)->back_edge_prob
- = ((e->probability * BLOCK_INFO (bb)->frequency)
- / REG_BR_PROB_BASE); */
-
/* FIXME: Graphite is producing edges with no profile. Once
this is fixed, drop this. */
sreal tmp = e->probability.initialized_p () ?
- e->probability.to_reg_br_prob_base () : 0;
- tmp *= BLOCK_INFO (bb)->frequency;
- EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
+ e->probability.to_sreal () : 0;
+ EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)->frequency;
}
/* Propagate to successor blocks. */
@@ -3396,7 +3395,7 @@ propagate_freq (basic_block head, bitmap tovisit)
/* Estimate frequencies in loops at same nest level. */
static void
-estimate_loops_at_level (class loop *first_loop)
+estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob)
{
class loop *loop;
@@ -3407,7 +3406,7 @@ estimate_loops_at_level (class loop *first_loop)
unsigned i;
auto_bitmap tovisit;
- estimate_loops_at_level (loop->inner);
+ estimate_loops_at_level (loop->inner, max_cyclic_prob);
/* Find current loop back edge and mark it. */
e = loop_latch_edge (loop);
@@ -3417,7 +3416,7 @@ estimate_loops_at_level (class loop *first_loop)
for (i = 0; i < loop->num_nodes; i++)
bitmap_set_bit (tovisit, bbs[i]->index);
free (bbs);
- propagate_freq (loop->header, tovisit);
+ propagate_freq (loop->header, tovisit, max_cyclic_prob);
}
}
@@ -3428,17 +3427,18 @@ estimate_loops (void)
{
auto_bitmap tovisit;
basic_block bb;
+ sreal max_cyclic_prob = (sreal)1 - (sreal)1 / param_max_predicted_iterations;
/* Start by estimating the frequencies in the loops. */
if (number_of_loops (cfun) > 1)
- estimate_loops_at_level (current_loops->tree_root->inner);
+ estimate_loops_at_level (current_loops->tree_root->inner, max_cyclic_prob);
/* Now propagate the frequencies through all the blocks. */
FOR_ALL_BB_FN (bb, cfun)
{
bitmap_set_bit (tovisit, bb->index);
}
- propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
+ propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit, max_cyclic_prob);
}
/* Drop the profile for NODE to guessed, and update its frequency based on
@@ -3844,21 +3844,6 @@ estimate_bb_frequencies (bool force)
if (force || profile_status_for_fn (cfun) != PROFILE_READ
|| !update_max_bb_count ())
{
- static int real_values_initialized = 0;
-
- if (!real_values_initialized)
- {
- real_values_initialized = 1;
- real_br_prob_base = REG_BR_PROB_BASE;
- /* Scaling frequencies up to maximal profile count may result in
- frequent overflows especially when inlining loops.
- Small scalling results in unnecesary precision loss. Stay in
- the half of the (exponential) range. */
- real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
- real_one_half = sreal (1, -1);
- real_inv_br_prob_base = sreal (1) / real_br_prob_base;
- real_almost_one = sreal (1) - real_inv_br_prob_base;
- }
mark_dfs_back_edges ();
@@ -3879,10 +3864,10 @@ estimate_bb_frequencies (bool force)
this is fixed, drop this. */
if (e->probability.initialized_p ())
EDGE_INFO (e)->back_edge_prob
- = e->probability.to_reg_br_prob_base ();
+ = e->probability.to_sreal ();
else
- EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2;
- EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base;
+ /* back_edge_prob = 0.5 */
+ EDGE_INFO (e)->back_edge_prob = sreal (1, -1);
}
}
@@ -3895,14 +3880,18 @@ estimate_bb_frequencies (bool force)
if (freq_max < BLOCK_INFO (bb)->frequency)
freq_max = BLOCK_INFO (bb)->frequency;
- freq_max = real_bb_freq_max / freq_max;
+ /* Scaling frequencies up to maximal profile count may result in
+ frequent overflows especially when inlining loops.
+ Small scalling results in unnecesary precision loss. Stay in
+ the half of the (exponential) range. */
+ freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
if (freq_max < 16)
freq_max = 16;
profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
cfun->cfg->count_max = profile_count::uninitialized ();
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
{
- sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half;
+ sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + sreal (1, -1);
profile_count count = profile_count::from_gcov_type (tmp.to_int ());
/* If we have profile feedback in which this function was never
diff --git a/gcc/profile-count.c b/gcc/profile-count.c
index d463ddd..0c79229 100644
--- a/gcc/profile-count.c
+++ b/gcc/profile-count.c
@@ -446,3 +446,12 @@ profile_probability::combine_with_count (profile_count count1,
else
return *this * even () + other * even ();
}
+
+/* Return probability as sreal in range [0, 1]. */
+
+sreal
+profile_probability::to_sreal () const
+{
+ gcc_checking_assert (initialized_p ());
+ return ((sreal)m_val) >> (n_bits - 2);
+}
diff --git a/gcc/profile-count.h b/gcc/profile-count.h
index 987d3ce..09217a8 100644
--- a/gcc/profile-count.h
+++ b/gcc/profile-count.h
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
struct function;
struct profile_count;
+class sreal;
/* Quality of the profile count. Because gengtype does not support enums
inside of classes, this is in global namespace. */
@@ -614,6 +615,8 @@ public:
profile_probability other,
profile_count count2) const;
+ /* Return probability as sreal. */
+ sreal to_sreal () const;
/* LTO streaming support. */
static profile_probability stream_in (class lto_input_block *);
void stream_out (struct output_block *);
@@ -674,8 +677,6 @@ public:
*/
-class sreal;
-
struct GTY(()) profile_count
{
public: