aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2020-10-21 20:00:22 +0200
committerJan Hubicka <jh@suse.cz>2020-10-21 20:00:22 +0200
commitcaaa218f912ccf932fdb79243ded68bb462bbe63 (patch)
tree759306f33556c3c19d797f001e97e016717d3296 /gcc
parent6bd2f2d080c52b73edac7dcb9fa9dcb58e6170bb (diff)
downloadgcc-caaa218f912ccf932fdb79243ded68bb462bbe63.zip
gcc-caaa218f912ccf932fdb79243ded68bb462bbe63.tar.gz
gcc-caaa218f912ccf932fdb79243ded68bb462bbe63.tar.bz2
Inline functions with builtin_constant_p more agressively.
This patch implements heuristics that increases inline limits (by the hints mechanism) for inline functions that use builtin_constant_p on parameter. Those are very likely intended to be always inlined and simplify after inlining. The PR is about a function that we used to inline with --param inline-insns-single=200 but with new default of 70 for -O2 we no longer do so. Hints are currently configured to bump the bound up twice, so we get limit of 140 that is still not enough to inline the particular testcase but it should help in general. I can implement a stronger bump if that seems useful (maybe it is). The example is bit operation written as a decision chain with 64 conditions. This blows up the limit on number of conditions we track per funtion (which is 30) and thus the size/time estimates are not working that well. gcc/ChangeLog: PR ipa/97445 * ipa-fnsummary.c (ipa_dump_hints): Add INLINE_HINT_builtin_constant_p. (ipa_fn_summary::~ipa_fn_summary): Free builtin_constant_p_parms. (ipa_fn_summary_t::duplicate): Duplicate builtin_constant_p_parms. (ipa_dump_fn_summary): Dump builtin_constant_p_parms. (add_builtin_constant_p_parm): New function (set_cond_stmt_execution_predicate): Update builtin_constant_p_parms. (ipa_call_context::estimate_size_and_time): Set INLINE_HINT_builtin_constant_p.. (ipa_merge_fn_summary_after_inlining): Merge builtin_constant_p_parms. (inline_read_section): Read builtin_constant_p_parms. (ipa_fn_summary_write): Write builtin_constant_p_parms. * ipa-fnsummary.h (enum ipa_hints_vals): Add INLINE_HINT_builtin_constant_p. * ipa-inline.c (want_inline_small_function_p): Use INLINE_HINT_builtin_constant_p. (edge_badness): Use INLINE_HINT_builtin_constant_p. gcc/testsuite/ChangeLog: PR ipa/97445 * gcc.dg/ipa/inlinehint-5.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ipa-fnsummary.c69
-rw-r--r--gcc/ipa-fnsummary.h12
-rw-r--r--gcc/ipa-inline.c5
-rw-r--r--gcc/testsuite/gcc.dg/ipa/inlinehint-5.c36
4 files changed, 117 insertions, 5 deletions
diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
index 9e3eda4..f680e42 100644
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -141,6 +141,11 @@ ipa_dump_hints (FILE *f, ipa_hints hints)
hints &= ~INLINE_HINT_known_hot;
fprintf (f, " known_hot");
}
+ if (hints & INLINE_HINT_builtin_constant_p)
+ {
+ hints &= ~INLINE_HINT_builtin_constant_p;
+ fprintf (f, " builtin_constant_p");
+ }
gcc_assert (!hints);
}
@@ -751,6 +756,7 @@ ipa_fn_summary::~ipa_fn_summary ()
vec_free (call_size_time_table);
vec_free (loop_iterations);
vec_free (loop_strides);
+ builtin_constant_p_parms.release ();
}
void
@@ -899,7 +905,8 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
new_predicate = es->predicate->remap_after_duplication
(possible_truths);
if (new_predicate == false && *es->predicate != false)
- optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
+ optimized_out_size
+ += es->call_stmt_size * ipa_fn_summary::size_scale;
edge_set_predicate (edge, &new_predicate);
}
info->loop_iterations
@@ -908,6 +915,15 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
info->loop_strides
= remap_freqcounting_preds_after_dup (info->loop_strides,
possible_truths);
+ if (info->builtin_constant_p_parms.length())
+ {
+ vec <int, va_heap, vl_ptr> parms = info->builtin_constant_p_parms;
+ int ip;
+ info->builtin_constant_p_parms = vNULL;
+ for (i = 0; parms.iterate (i, &ip); i++)
+ if (!avals.m_known_vals[ip])
+ info->builtin_constant_p_parms.safe_push (ip);
+ }
/* If inliner or someone after inliner will ever start producing
non-trivial clones, we will get trouble with lack of information
@@ -921,6 +937,9 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
info->loop_iterations = vec_safe_copy (info->loop_iterations);
info->loop_strides = vec_safe_copy (info->loop_strides);
+ info->builtin_constant_p_parms
+ = info->builtin_constant_p_parms.copy ();
+
ipa_freqcounting_predicate *f;
for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
{
@@ -1066,6 +1085,13 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
fprintf (f, " inlinable");
if (s->fp_expressions)
fprintf (f, " fp_expression");
+ if (s->builtin_constant_p_parms.length ())
+ {
+ fprintf (f, " builtin_constant_p_parms");
+ for (unsigned int i = 0;
+ i < s->builtin_constant_p_parms.length (); i++)
+ fprintf (f, " %i", s->builtin_constant_p_parms[i]);
+ }
fprintf (f, "\n global time: %f\n", s->time.to_double ());
fprintf (f, " self size: %i\n", ss->self_size);
fprintf (f, " global size: %i\n", ss->size);
@@ -1517,6 +1543,21 @@ fail:
return false;
}
+/* Record to SUMMARY that PARM is used by builtin_constant_p. */
+
+static void
+add_builtin_constant_p_parm (class ipa_fn_summary *summary, int parm)
+{
+ int ip;
+
+ /* Avoid duplicates. */
+ for (unsigned int i = 0;
+ summary->builtin_constant_p_parms.iterate (i, &ip); i++)
+ if (ip == parm)
+ return;
+ summary->builtin_constant_p_parms.safe_push (parm);
+}
+
/* If BB ends by a conditional we can turn into predicates, attach corresponding
predicates to the CFG edges. */
@@ -1598,6 +1639,8 @@ set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
op2 = gimple_call_arg (set_stmt, 0);
if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
return;
+ if (!aggpos.by_ref)
+ add_builtin_constant_p_parm (summary, index);
FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
{
predicate p = add_condition (summary, params_summary, index,
@@ -3717,6 +3760,9 @@ ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates,
hints |= INLINE_HINT_in_scc;
if (DECL_DECLARED_INLINE_P (m_node->decl))
hints |= INLINE_HINT_declared_inline;
+ if (info->builtin_constant_p_parms.length ()
+ && DECL_DECLARED_INLINE_P (m_node->decl))
+ hints |= INLINE_HINT_builtin_constant_p;
ipa_freqcounting_predicate *fcp;
for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
@@ -4044,8 +4090,13 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
operand_map[i] = map;
gcc_assert (map < ipa_get_param_count (params_summary));
}
+
+ int ip;
+ for (i = 0; callee_info->builtin_constant_p_parms.iterate (i, &ip); i++)
+ if (ip < count && operand_map[ip] >= 0)
+ add_builtin_constant_p_parm (info, operand_map[ip]);
}
- sreal freq = edge->sreal_frequency ();
+ sreal freq = edge->sreal_frequency ();
for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
{
predicate p;
@@ -4443,6 +4494,15 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data,
vec_safe_push (info->loop_strides, fcp);
}
}
+ count2 = streamer_read_uhwi (&ib);
+ if (info && count2)
+ info->builtin_constant_p_parms.reserve_exact (count2);
+ for (j = 0; j < count2; j++)
+ {
+ int parm = streamer_read_uhwi (&ib);
+ if (info)
+ info->builtin_constant_p_parms.quick_push (parm);
+ }
for (e = node->callees; e; e = e->next_callee)
read_ipa_call_summary (&ib, e, info != NULL);
for (e = node->indirect_calls; e; e = e->next_callee)
@@ -4618,6 +4678,11 @@ ipa_fn_summary_write (void)
fcp->predicate->stream_out (ob);
fcp->freq.stream_out (ob);
}
+ streamer_write_uhwi (ob, info->builtin_constant_p_parms.length ());
+ int ip;
+ for (i = 0; info->builtin_constant_p_parms.iterate (i, &ip);
+ i++)
+ streamer_write_uhwi (ob, ip);
for (edge = cnode->callees; edge; edge = edge->next_callee)
write_ipa_call_summary (ob, edge);
for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
index f4dd5b8..3ecedb5 100644
--- a/gcc/ipa-fnsummary.h
+++ b/gcc/ipa-fnsummary.h
@@ -49,7 +49,10 @@ enum ipa_hints_vals {
Set by simple_edge_hints in ipa-inline-analysis.c. */
INLINE_HINT_cross_module = 64,
/* We know that the callee is hot by profile. */
- INLINE_HINT_known_hot = 128
+ INLINE_HINT_known_hot = 128,
+ /* There is builtin_constant_p dependent on parameter which is usually
+ a strong hint to inline. */
+ INLINE_HINT_builtin_constant_p = 256
};
typedef int ipa_hints;
@@ -123,10 +126,12 @@ public:
ipa_fn_summary ()
: min_size (0),
inlinable (false), single_caller (false),
- fp_expressions (false), estimated_stack_size (false),
+ fp_expressions (false),
+ estimated_stack_size (false),
time (0), conds (NULL),
size_time_table (NULL), call_size_time_table (NULL),
loop_iterations (NULL), loop_strides (NULL),
+ builtin_constant_p_parms (vNULL),
growth (0), scc_no (0)
{
}
@@ -140,6 +145,7 @@ public:
time (s.time), conds (s.conds), size_time_table (s.size_time_table),
call_size_time_table (NULL),
loop_iterations (s.loop_iterations), loop_strides (s.loop_strides),
+ builtin_constant_p_parms (s.builtin_constant_p_parms),
growth (s.growth), scc_no (s.scc_no)
{}
@@ -182,6 +188,8 @@ public:
vec<ipa_freqcounting_predicate, va_gc> *loop_iterations;
/* Predicates on when some loops in the function can have known strides. */
vec<ipa_freqcounting_predicate, va_gc> *loop_strides;
+ /* Parameters tested by builtin_constant_p. */
+ vec<int, va_heap, vl_ptr> GTY((skip)) builtin_constant_p_parms;
/* Estimated growth for inlining all copies of the function before start
of small functions inlining.
This value will get out of date as the callers are duplicated, but
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index 225a014..bc846ea 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -878,7 +878,8 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
bool apply_hints = (hints & (INLINE_HINT_indirect_call
| INLINE_HINT_known_hot
| INLINE_HINT_loop_iterations
- | INLINE_HINT_loop_stride));
+ | INLINE_HINT_loop_stride
+ | INLINE_HINT_builtin_constant_p));
if (growth <= opt_for_fn (to->decl,
param_max_inline_insns_size))
@@ -1317,6 +1318,8 @@ edge_badness (struct cgraph_edge *edge, bool dump)
| INLINE_HINT_loop_stride))
|| callee_info->growth <= 0)
badness = badness.shift (badness > 0 ? -2 : 2);
+ if (hints & INLINE_HINT_builtin_constant_p)
+ badness = badness.shift (badness > 0 ? -4 : 4);
if (hints & (INLINE_HINT_same_scc))
badness = badness.shift (badness > 0 ? 3 : -3);
else if (hints & (INLINE_HINT_in_scc))
diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c
new file mode 100644
index 0000000..218f805
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-5.c
@@ -0,0 +1,36 @@
+/* { dg-options "-O2 -fdump-ipa-inline-details -fno-early-inlining " } */
+/* { dg-add-options bind_pic_locally } */
+int j,k,l;
+int test3(int);
+int test4(int);
+
+static inline int
+test2(int i)
+{
+ if (__builtin_constant_p (i))
+ {
+ switch (i)
+ {
+ case 1: return j;
+ case 2: return k;
+ case 3: return l;
+ }
+ }
+ else return test3(i)+test4(i);
+}
+
+static inline int
+test (int i)
+{
+ return test2(i) + test2(i+1) + test3 (i) + test3(i) + test3(i) + test3 (i);
+}
+
+int
+run (int i)
+{
+ return test (i) + test (i);
+}
+/* The test should work by first inlining test2->test and then test to run
+ Both are called twice, so 4 hints (the second make sure that we propagate
+ to callers. */
+/* { dg-final { scan-ipa-dump-times "hints: declared_inline builtin_constant_p" 4 "inline" } } */