From b58b11577a908528c37111404a161f744ca3a694 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 12 Jul 2003 03:07:40 +0200 Subject: cgraph.c (cgraph_max_uid): New global variable. * cgraph.c (cgraph_max_uid): New global variable. (cgraph_node): Set uid field. (create_edge): Keep inline flags consistent. (dump_cgraph): Dump more info. * cgraph.h (struct cgraph_local_info): Remove inline_many and can_inline_once; add inlinable, disgread_inline_limits, and self_insn (struct cgraph_global_info): Add insns, calls, cloned_times, will_be_output. (struct cgraph_node): Add uid. (struct cgraph_edge): Add inline_call. (cgraph_max_uid, cgraph_inline_p): Declare. * cgraph.c: Include params.h and fibheap.h (cgraph_mark_functions_to_inline_once): Kill. (INSNS_PER_CALL): New constant. (ncalls_inlined, nfunctions_inlined, initial_insns, overall_insns): New static variables. (cgraph_finalize_function): Do not analyze inlining. (cgraph_finalize_compilation_unit): Set inlining attributes. (cgraph_mark_functions_to_output): More consistency checks. (cgraph_optimize_function): Set current_function_decl to NULL. (cgraph_expand_function): Use new inline flags. (cgraph_postorder): Expand from cgraph_expand_functions. (INLINED_TIMES, SET_INLINED_TIMES): New macros. (cgraph_inlined_into, cgraph_inlined_callees, cgraph_estimate_size_after_inlining, cgraph_estimate_growth, cgraph_mark_inline, cgraph_check_inline_limits, cgraph_default_inline_p, cgraph_decide_inling_of_small_functions, cgraph_decide_inlining, cgraph_inline_p): New functions. * params.def (PARAM_LARGE_FUNCTION_INSNS, PARAM_LARGE_FUNCTION_GROWTH, PARAM_INLINE_UNIT_GROWTH): New parameters. * tree-inline.c (struct inline_data): New field current_decl. (expand_call_inline): Avoid forward declarations; use inlinable_function_p. (optimize_inline_calls): Set id.current_decl. Co-Authored-By: Gerald Pfeifer From-SVN: r69262 --- gcc/ChangeLog | 38 ++ gcc/cgraph.c | 42 +- gcc/cgraph.h | 31 +- gcc/cgraphunit.c | 844 +++++++++++++++++++++++++++++----- gcc/doc/invoke.texi | 20 + gcc/params.def | 13 + gcc/testsuite/g++.dg/warn/Winline-1.C | 2 +- gcc/tree-inline.c | 19 +- 8 files changed, 884 insertions(+), 125 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 899ba10..bdb8c28 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,41 @@ +Sat Jul 12 03:06:01 CEST 2003 Jan Hubicka + Gerald Pfeifer + + * cgraph.c (cgraph_max_uid): New global variable. + (cgraph_node): Set uid field. + (create_edge): Keep inline flags consistent. + (dump_cgraph): Dump more info. + * cgraph.h (struct cgraph_local_info): Remove inline_many and + can_inline_once; add inlinable, disgread_inline_limits, and self_insn + (struct cgraph_global_info): Add insns, calls, cloned_times, + will_be_output. + (struct cgraph_node): Add uid. + (struct cgraph_edge): Add inline_call. + (cgraph_max_uid, cgraph_inline_p): Declare. + * cgraph.c: Include params.h and fibheap.h + (cgraph_mark_functions_to_inline_once): Kill. + (INSNS_PER_CALL): New constant. + (ncalls_inlined, nfunctions_inlined, initial_insns, overall_insns): New + static variables. + (cgraph_finalize_function): Do not analyze inlining. + (cgraph_finalize_compilation_unit): Set inlining attributes. + (cgraph_mark_functions_to_output): More consistency checks. + (cgraph_optimize_function): Set current_function_decl to NULL. + (cgraph_expand_function): Use new inline flags. + (cgraph_postorder): Expand from cgraph_expand_functions. + (INLINED_TIMES, SET_INLINED_TIMES): New macros. + (cgraph_inlined_into, cgraph_inlined_callees, + cgraph_estimate_size_after_inlining, cgraph_estimate_growth, + cgraph_mark_inline, cgraph_check_inline_limits, + cgraph_default_inline_p, cgraph_decide_inling_of_small_functions, + cgraph_decide_inlining, cgraph_inline_p): New functions. + * params.def (PARAM_LARGE_FUNCTION_INSNS, PARAM_LARGE_FUNCTION_GROWTH, + PARAM_INLINE_UNIT_GROWTH): New parameters. + * tree-inline.c (struct inline_data): New field current_decl. + (expand_call_inline): Avoid forward declarations; use + inlinable_function_p. + (optimize_inline_calls): Set id.current_decl. + 2003-07-11 Andrew Pinski * configure.in: Remove wrongly added definition of diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 7bc065b..c191381 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -48,6 +48,9 @@ struct cgraph_node *cgraph_nodes_queue; /* Number of nodes in existence. */ int cgraph_n_nodes; +/* Maximal uid used in cgraph nodes. */ +int cgraph_max_uid; + /* Set when whole unit has been analyzed so we can access global info. */ bool cgraph_global_info_ready = false; @@ -114,6 +117,7 @@ cgraph_node (decl) node = ggc_alloc_cleared (sizeof (*node)); node->decl = decl; node->next = cgraph_nodes; + node->uid = cgraph_max_uid++; if (cgraph_nodes) cgraph_nodes->previous = node; node->previous = NULL; @@ -157,6 +161,19 @@ create_edge (caller, callee) struct cgraph_node *caller, *callee; { struct cgraph_edge *edge = ggc_alloc (sizeof (struct cgraph_edge)); + struct cgraph_edge *edge2; + + edge->inline_call = false; + /* At the moment we don't associate calls with specific CALL_EXPRs + as we probably ought to, so we must preserve inline_call flags to + be the same in all copies of the same edge. */ + if (cgraph_global_info_ready) + for (edge2 = caller->callees; edge2; edge2 = edge2->next_caller) + if (edge2->callee == callee) + { + edge->inline_call = edge2->inline_call; + break; + } edge->caller = caller; edge->callee = callee; @@ -337,6 +354,8 @@ dump_cgraph (f) { struct cgraph_edge *edge; fprintf (f, "%s", cgraph_node_name (node)); + if (node->local.self_insns) + fprintf (f, " %i insns", node->local.self_insns); if (node->origin) fprintf (f, " nested in: %s", cgraph_node_name (node->origin)); if (node->needed) @@ -346,13 +365,32 @@ dump_cgraph (f) if (DECL_SAVED_TREE (node->decl)) fprintf (f, " tree"); + if (node->local.disgread_inline_limits) + fprintf (f, " always_inline"); + else if (node->local.inlinable) + fprintf (f, " inlinable"); + if (node->global.insns && node->global.insns != node->local.self_insns) + fprintf (f, " %i insns after inlining", node->global.insns); + if (node->global.cloned_times > 1) + fprintf (f, " cloned %ix", node->global.cloned_times); + if (node->global.calls) + fprintf (f, " %i calls", node->global.calls); + fprintf (f, "\n called by :"); for (edge = node->callers; edge; edge = edge->next_caller) - fprintf (f, "%s ", cgraph_node_name (edge->caller)); + { + fprintf (f, "%s ", cgraph_node_name (edge->caller)); + if (edge->inline_call) + fprintf(f, "(inlined) "); + } fprintf (f, "\n calls: "); for (edge = node->callees; edge; edge = edge->next_callee) - fprintf (f, "%s ", cgraph_node_name (edge->callee)); + { + fprintf (f, "%s ", cgraph_node_name (edge->callee)); + if (edge->inline_call) + fprintf(f, "(inlined) "); + } fprintf (f, "\n"); } } diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 7650326..0a8a2d0 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -30,13 +30,15 @@ struct cgraph_local_info GTY(()) /* Set when function function is visible in current compilation unit only and it's address is never taken. */ bool local; - /* Set when function is small enough to be inlinable many times. */ - bool inline_many; - /* Set when function can be inlined once (false only for functions calling - alloca, using varargs and so on). */ - bool can_inline_once; /* Set once it has been finalized so we consider it to be output. */ bool finalized; + + /* False when there is something making inlining impossible (such as va_arg) */ + bool inlinable; + /* True when function should be inlined independently on it's size. */ + bool disgread_inline_limits; + /* Size of the function before inlining. */ + int self_insns; }; /* Information about the function that needs to be computed globally @@ -46,6 +48,20 @@ struct cgraph_global_info GTY(()) { /* Set when the function will be inlined exactly once. */ bool inline_once; + + /* Estimated size of the function after inlining. */ + int insns; + + /* Number of direct calls not inlined into the function body. */ + int calls; + + /* Number of times given function will be cloned during output. */ + int cloned_times; + + /* Set to true for all reachable functions before inlining is decided. + Once we inline all calls to the function and the function is local, + it is set to false. */ + bool will_be_output; }; /* Information about the function that is propagated by the RTL backend. @@ -77,6 +93,8 @@ struct cgraph_node GTY((chain_next ("%h.next"), chain_prev ("%h.previous"))) struct cgraph_node *next_nested; /* Pointer to the next function in cgraph_nodes_queue. */ struct cgraph_node *next_needed; + /* Unique id of the node. */ + int uid; PTR GTY ((skip (""))) aux; /* Set when function must be output - it is externally visible @@ -102,6 +120,7 @@ struct cgraph_edge GTY(()) struct cgraph_node *callee; struct cgraph_edge *next_caller; struct cgraph_edge *next_callee; + bool inline_call; }; /* The cgraph_varpool data strutcture. @@ -124,6 +143,7 @@ struct cgraph_varpool_node GTY(()) extern GTY(()) struct cgraph_node *cgraph_nodes; extern GTY(()) int cgraph_n_nodes; +extern GTY(()) int cgraph_max_uid; extern bool cgraph_global_info_ready; extern GTY(()) struct cgraph_node *cgraph_nodes_queue; extern FILE *cgraph_dump_file; @@ -157,5 +177,6 @@ void cgraph_finalize_compilation_unit PARAMS ((void)); void cgraph_create_edges PARAMS ((tree, tree)); void cgraph_optimize PARAMS ((void)); void cgraph_mark_needed_node PARAMS ((struct cgraph_node *, int)); +bool cgraph_inline_p PARAMS ((tree, tree)); #endif /* GCC_CGRAPH_H */ diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 0a3ae48..df188d5 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -1,4 +1,4 @@ -/* Callgraph handling code. +/* Callgraph based intraprocedural optimizations. Copyright (C) 2003 Free Software Foundation, Inc. Contributed by Jan Hubicka @@ -35,15 +35,25 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "cgraph.h" #include "diagnostic.h" #include "timevar.h" +#include "params.h" +#include "fibheap.h" +#include "c-common.h" + +#define INSNS_PER_CALL 10 static void cgraph_expand_functions PARAMS ((void)); static void cgraph_mark_functions_to_output PARAMS ((void)); static void cgraph_expand_function PARAMS ((struct cgraph_node *)); static tree record_call_1 PARAMS ((tree *, int *, void *)); static void cgraph_mark_local_functions PARAMS ((void)); -static void cgraph_mark_functions_to_inline_once PARAMS ((void)); static void cgraph_optimize_function PARAMS ((struct cgraph_node *)); +/* Statistics we collect about inlining algorithm. */ +static int ncalls_inlined; +static int nfunctions_inlined; +static int initial_insns; +static int overall_insns; + /* Analyze function once it is parsed. Set up the local information available - create cgraph edges for function calls via BODY. */ @@ -141,6 +151,8 @@ cgraph_finalize_compilation_unit () struct cgraph_edge *edge; cgraph_varpool_assemble_pending_decls (); + if (!quiet_flag) + fprintf (stderr, "\nAnalyzing compilation unit\n"); timevar_push (TV_CGRAPH); if (cgraph_dump_file) @@ -170,14 +182,6 @@ cgraph_finalize_compilation_unit () (*lang_hooks.callgraph.lower_function) (decl); current_function_decl = node->decl; - if (!node->needed && !DECL_COMDAT (node->decl)) - node->local.can_inline_once = tree_inlinable_function_p (decl, 1); - else - node->local.can_inline_once = 0; - if (flag_inline_trees) - node->local.inline_many = tree_inlinable_function_p (decl, 0); - else - node->local.inline_many = 0; /* At the moment frontend automatically emits all nested functions. */ if (node->nested) @@ -189,9 +193,18 @@ cgraph_finalize_compilation_unit () cgraph_mark_needed_node (node2, 0); } - /* First kill forward declaration so reverse inling works properly. */ + /* First kill forward declaration so reverse inlining works properly. */ cgraph_create_edges (decl, DECL_SAVED_TREE (decl)); + node->local.inlinable = tree_inlinable_function_p (decl, 1); + if (!DECL_ESTIMATED_INSNS (decl)) + DECL_ESTIMATED_INSNS (decl) + = (*lang_hooks.tree_inlining.estimate_num_insns) (decl); + node->local.self_insns = DECL_ESTIMATED_INSNS (decl); + if (node->local.inlinable) + node->local.disgread_inline_limits + = (*lang_hooks.tree_inlining.disregard_inline_limits) (decl); + for (edge = node->callees; edge; edge = edge->next_callee) { if (!edge->callee->reachable) @@ -241,16 +254,20 @@ cgraph_mark_functions_to_output () for (node = cgraph_nodes; node; node = node->next) { tree decl = node->decl; + struct cgraph_edge *e; + if (node->output) + abort (); + + for (e = node->callers; e; e = e->next_caller) + if (!e->inline_call) + break; /* We need to output all local functions that are used and not always inlined, as well as those that are reachable from outside the current compilation unit. */ if (DECL_SAVED_TREE (decl) && (node->needed - || (!node->local.inline_many && !node->global.inline_once - && node->reachable) - || (DECL_ASSEMBLER_NAME_SET_P (decl) - && TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))) + || (e && node->reachable)) && !TREE_ASM_WRITTEN (decl) && !node->origin && !DECL_EXTERNAL (decl)) node->output = 1; @@ -266,6 +283,8 @@ cgraph_optimize_function (node) tree decl = node->decl; timevar_push (TV_INTEGRATION); + /* optimize_inline_calls avoids inlining of current_function_decl. */ + current_function_decl = 0; if (flag_inline_trees) optimize_inline_calls (decl); if (node->nested) @@ -283,6 +302,7 @@ cgraph_expand_function (node) struct cgraph_node *node; { tree decl = node->decl; + struct cgraph_edge *e; announce_function (decl); @@ -292,41 +312,26 @@ cgraph_expand_function (node) via lang_expand_decl_stmt. */ (*lang_hooks.callgraph.expand_function) (decl); - /* When we decided to inline the function once, we never ever should - need to output it separately. */ - if (node->global.inline_once) - abort (); - if (!node->local.inline_many - || !node->callers) + for (e = node->callers; e; e = e->next_caller) + if (e->inline_call) + break; + if (!e) DECL_SAVED_TREE (decl) = NULL; current_function_decl = NULL; } - -/* Expand all functions that must be output. - - Attempt to topologically sort the nodes so function is output when - all called functions are already assembled to allow data to be - propagated across the callgraph. Use a stack to get smaller distance - between a function and it's callees (later we may choose to use a more - sophisticated algorithm for function reordering; we will likely want - to use subsections to make the output functions appear in top-down - order. */ - -static void -cgraph_expand_functions () +/* Fill array order with all nodes with output flag set in the reverse + topological order. */ +static int +cgraph_postorder (struct cgraph_node **order) { struct cgraph_node *node, *node2; - struct cgraph_node **stack = - xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes); - struct cgraph_node **order = - xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes); int stack_size = 0; int order_pos = 0; struct cgraph_edge *edge, last; - int i; - cgraph_mark_functions_to_output (); + struct cgraph_node **stack = + xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes); /* We have to deal with cycles nicely, so use a depth first traversal output algorithm. Ignore the fact that some functions won't need @@ -372,97 +377,739 @@ cgraph_expand_functions () } } } - for (i = order_pos - 1; i >= 0; i--) + free (stack); + return order_pos; +} + +#define INLINED_TIMES(node) ((size_t)(node)->aux) +#define SET_INLINED_TIMES(node,times) ((node)->aux = (void *)(times)) + +/* Return list of nodes we decided to inline NODE into, set their output + flag and compute INLINED_TIMES. + + We do simple backtracing to get INLINED_TIMES right. This should not be + expensive as we limit the amount of inlining. Alternatively we may first + discover set of nodes, topologically sort these and propagate + INLINED_TIMES */ + +static int +cgraph_inlined_into (struct cgraph_node *node, struct cgraph_node **array) +{ + int nfound = 0; + struct cgraph_edge **stack; + struct cgraph_edge *e, *e1; + int sp; + int i; + + /* Fast path: since we traverse in mostly topological order, we will likely + find no edges. */ + for (e = node->callers; e; e = e->next_caller) + if (e->inline_call) + break; + + if (!e) + return 0; + + /* Allocate stack for back-tracking up callgraph. */ + stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge)); + sp = 0; + + /* Push the first edge on to the stack. */ + stack[sp++] = e; + + while (sp) { - node = order[i]; - if (node->output) + struct cgraph_node *caller; + + /* Look at the edge on the top of the stack. */ + e = stack[sp - 1]; + caller = e->caller; + + /* Check if the caller destination has been visited yet. */ + if (!caller->output) { - if (!node->reachable) - abort (); - node->output = 0; - cgraph_expand_function (node); + array[nfound++] = e->caller; + /* Mark that we have visited the destination. */ + caller->output = true; + SET_INLINED_TIMES (caller, 0); + } + SET_INLINED_TIMES (caller, INLINED_TIMES (caller) + 1); + + for (e1 = caller->callers; e1; e1 = e1->next_caller) + if (e1->inline_call) + break; + if (e1) + stack[sp++] = e1; + else + { + while (true) + { + for (e1 = e->next_caller; e1; e1 = e1->next_caller) + if (e1->inline_call) + break; + + if (e1) + { + stack[sp - 1] = e1; + break; + } + else + { + sp--; + if (!sp) + break; + e = stack[sp - 1]; + } + } } } + free (stack); - free (order); + + + if (cgraph_dump_file) + { + fprintf (cgraph_dump_file, "Found inline predecesors of %s:", + cgraph_node_name (node)); + for (i = 0; i < nfound; i++) + { + fprintf (cgraph_dump_file, " %s", cgraph_node_name (array[i])); + if (INLINED_TIMES (array[i]) != 1) + fprintf (cgraph_dump_file, " (%i times)", + INLINED_TIMES (array[i])); + } + fprintf (cgraph_dump_file, "\n"); + } + + return nfound; } -/* Mark all local functions. - We can not use node->needed directly as it is modified during - execution of cgraph_optimize. */ +/* Return list of nodes we decided to inline into NODE, set their output + flag and compute INLINED_TIMES. + + This function is identical to cgraph_inlined_into with callers and callees + nodes swapped. */ + +static int +cgraph_inlined_callees (struct cgraph_node *node, struct cgraph_node **array) +{ + int nfound = 0; + struct cgraph_edge **stack; + struct cgraph_edge *e, *e1; + int sp; + int i; + + /* Fast path: since we traverse in mostly topological order, we will likely + find no edges. */ + for (e = node->callees; e; e = e->next_callee) + if (e->inline_call) + break; + + if (!e) + return 0; + + /* Allocate stack for back-tracking up callgraph. */ + stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge)); + sp = 0; + + /* Push the first edge on to the stack. */ + stack[sp++] = e; + + while (sp) + { + struct cgraph_node *callee; + + /* Look at the edge on the top of the stack. */ + e = stack[sp - 1]; + callee = e->callee; + + /* Check if the callee destination has been visited yet. */ + if (!callee->output) + { + array[nfound++] = e->callee; + /* Mark that we have visited the destination. */ + callee->output = true; + SET_INLINED_TIMES (callee, 0); + } + SET_INLINED_TIMES (callee, INLINED_TIMES (callee) + 1); + + for (e1 = callee->callees; e1; e1 = e1->next_callee) + if (e1->inline_call) + break; + if (e1) + stack[sp++] = e1; + else + { + while (true) + { + for (e1 = e->next_callee; e1; e1 = e1->next_callee) + if (e1->inline_call) + break; + + if (e1) + { + stack[sp - 1] = e1; + break; + } + else + { + sp--; + if (!sp) + break; + e = stack[sp - 1]; + } + } + } + } + + free (stack); + + if (cgraph_dump_file) + { + fprintf (cgraph_dump_file, "Found inline successors of %s:", + cgraph_node_name (node)); + for (i = 0; i < nfound; i++) + { + fprintf (cgraph_dump_file, " %s", cgraph_node_name (array[i])); + if (INLINED_TIMES (array[i]) != 1) + fprintf (cgraph_dump_file, " (%i times)", + INLINED_TIMES (array[i])); + } + fprintf (cgraph_dump_file, "\n"); + } + + return nfound; +} + +/* Estimate size of the function after inlining WHAT into TO. */ + +static int +cgraph_estimate_size_after_inlining (int times, + struct cgraph_node *to, + struct cgraph_node *what) +{ + return (what->global.insns - INSNS_PER_CALL) *times + to->global.insns; +} + +/* Estimate the growth caused by inlining NODE into all callees. */ + +static int +cgraph_estimate_growth (struct cgraph_node *node) +{ + int growth = 0; + int calls_saved = 0; + int clones_added = 0; + struct cgraph_edge *e; + + for (e = node->callers; e; e = e->next_caller) + if (!e->inline_call) + { + growth += ((cgraph_estimate_size_after_inlining (1, e->caller, node) + - + e->caller->global.insns) *e->caller->global.cloned_times); + calls_saved += e->caller->global.cloned_times; + clones_added += e->caller->global.cloned_times; + } + + /* ??? Wrong for self recursive functions or cases where we decide to not + inline for different reasons, but it is not big deal as in that case + we will keep the body around, but we will also avoid some inlining. */ + if (!node->needed && !node->origin && !DECL_EXTERNAL (node->decl)) + growth -= node->global.insns, clones_added--; + + if (!calls_saved) + calls_saved = 1; + + return growth; +} + +/* Update insn sizes after inlining WHAT into TO that is already inlined into + all nodes in INLINED array. */ static void -cgraph_mark_local_functions () +cgraph_mark_inline (struct cgraph_node *to, + struct cgraph_node *what, + struct cgraph_node **inlined, int ninlined, + struct cgraph_node **inlined_callees, + int ninlined_callees) +{ + int i; + int times = 0; + int clones = 0; + struct cgraph_edge *e; + bool called = false; + int new_insns; + + for (e = what->callers; e; e = e->next_caller) + { + if (e->caller == to) + { + if (e->inline_call) + abort (); + e->inline_call = true; + times++; + clones += e->caller->global.cloned_times; + } + else if (!e->inline_call) + called = true; + } + if (!times) + abort (); + ncalls_inlined += times; + + new_insns = cgraph_estimate_size_after_inlining (times, to, what); + if (to->global.will_be_output) + overall_insns += new_insns - to->global.insns; + to->global.insns = new_insns; + + to->global.calls += (what->global.calls - 1) *times; + if (!called && !what->needed && !what->origin + && !DECL_EXTERNAL (what->decl)) + { + if (!what->global.will_be_output) + abort (); + clones--; + nfunctions_inlined++; + what->global.will_be_output = 0; + overall_insns -= what->global.insns; + } + what->global.cloned_times += clones; + if (to->global.calls < 0) + abort (); + for (i = 0; i < ninlined; i++) + { + new_insns = + cgraph_estimate_size_after_inlining (INLINED_TIMES (inlined[i]) * + times, inlined[i], what); + if (inlined[i]->global.will_be_output) + overall_insns += new_insns - inlined[i]->global.insns; + inlined[i]->global.insns = new_insns; + inlined[i]->global.calls += + (what->global.calls - 1) *INLINED_TIMES (inlined[i]) * times; + if (inlined[i]->global.calls < 0) + abort (); + } + for (i = 0; i < ninlined_callees; i++) + { + inlined_callees[i]->global.cloned_times += + INLINED_TIMES (inlined_callees[i]) * clones; + } +} + +/* Return false when inlining WHAT into TO is not good idea as it would cause + too large growth of function bodies. */ + +static bool +cgraph_check_inline_limits (struct cgraph_node *to, + struct cgraph_node *what, + struct cgraph_node **inlined, int ninlined) { + int i; + int times = 0; + struct cgraph_edge *e; + int newsize; + int limit; + + for (e = to->callees; e; e = e->next_callee) + if (e->callee == what) + times++; + + /* When inlining large function body called once into small function, + take the inlined function as base for limiting the growth. */ + if (to->local.self_insns > what->local.self_insns) + limit = to->local.self_insns; + else + limit = what->local.self_insns; + + limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100; + + newsize = cgraph_estimate_size_after_inlining (times, to, what); + if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS) + && newsize > limit) + return false; + for (i = 0; i < ninlined; i++) + { + newsize = + cgraph_estimate_size_after_inlining (INLINED_TIMES (inlined[i]) * + times, inlined[i], what); + if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS) + && newsize > + inlined[i]->local.self_insns * + (100 + PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH)) / 100) + return false; + } + return true; +} + +/* Return true when function N is small enought to be inlined. */ + +static bool +cgraph_default_inline_p (struct cgraph_node *n) +{ + if (!DECL_INLINE (n->decl) || !DECL_SAVED_TREE (n->decl)) + return false; + if (DID_INLINE_FUNC (n->decl)) + return n->global.insns < MAX_INLINE_INSNS_AUTO; + else + return n->global.insns < MAX_INLINE_INSNS_SINGLE; +} + +/* We use greedy algorithm for inlining of small functions: + All inline candidates are put into prioritized heap based on estimated + growth of the overall number of instructions and then update the estimates. + + INLINED and INLINED_CALEES are just pointers to arrays large enought + to be passed to cgraph_inlined_into and cgraph_inlined_callees. */ + +static void +cgraph_decide_inlining_of_small_functions (struct cgraph_node **inlined, + struct cgraph_node + **inlined_callees) +{ + int i; struct cgraph_node *node; + fibheap_t heap = fibheap_new (); + struct fibnode **heap_node = + xcalloc (sizeof (struct fibnode *), cgraph_max_uid); + int ninlined, ninlined_callees; + int max_insns = ((HOST_WIDEST_INT) initial_insns + * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100); - if (cgraph_dump_file) - fprintf (cgraph_dump_file, "Marking local functions:"); + /* Put all inline candidates into the heap. */ - /* Figure out functions we want to assemble. */ for (node = cgraph_nodes; node; node = node->next) { - node->local.local = (!node->needed - && DECL_SAVED_TREE (node->decl) - && !DECL_COMDAT (node->decl) - && !TREE_PUBLIC (node->decl)); - if (cgraph_dump_file && node->local.local) - fprintf (cgraph_dump_file, " %s", cgraph_node_name (node)); + struct cgraph_edge *e; + + if (!node->local.inlinable || !node->callers + || !cgraph_default_inline_p (node)) + continue; + + /* Rule out always_inline functions we dealt with earler. */ + for (e = node->callers; e; e = e->next_caller) + if (e->inline_call) + break; + if (e) + continue; + heap_node[node->uid] = + fibheap_insert (heap, cgraph_estimate_growth (node), node); } + if (cgraph_dump_file) - fprintf (cgraph_dump_file, "\n"); + fprintf (cgraph_dump_file, "\n\nDeciding on inlining: "); + while ((node = fibheap_extract_min (heap)) && overall_insns <= max_insns) + { + struct cgraph_edge *e; + int old_insns = overall_insns; + + heap_node[node->uid] = NULL; + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Considering %s %i insns, growth %i.\n", + cgraph_node_name (node), node->global.insns, + cgraph_estimate_growth (node)); + if (!cgraph_default_inline_p (node)) + { + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Function too large.\n"); + continue; + } + ninlined_callees = cgraph_inlined_callees (node, inlined_callees); + for (e = node->callers; e; e = e->next_caller) + if (!e->inline_call && e->caller != node) + { + ninlined = cgraph_inlined_into (e->caller, inlined); + if (e->callee->output + || !cgraph_check_inline_limits (e->caller, node, inlined, + ninlined)) + { + for (i = 0; i < ninlined; i++) + inlined[i]->output = 0, node->aux = 0; + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Not inlining into %s\n", + cgraph_node_name (e->caller)); + continue; + } + cgraph_mark_inline (e->caller, node, inlined, ninlined, + inlined_callees, ninlined_callees); + if (heap_node[e->caller->uid]) + fibheap_replace_key (heap, heap_node[e->caller->uid], + cgraph_estimate_growth (e->caller)); + + /* Size of the functions we updated into has changed, so update + the keys. */ + for (i = 0; i < ninlined; i++) + { + inlined[i]->output = 0, node->aux = 0; + if (heap_node[inlined[i]->uid]) + fibheap_replace_key (heap, heap_node[inlined[i]->uid], + cgraph_estimate_growth (inlined[i])); + } + } + + /* Similarly all functions called by function we just inlined + are now called more times; update keys. */ + + for (e = node->callees; e; e = e->next_callee) + if (!e->inline_call && heap_node[e->callee->uid]) + fibheap_replace_key (heap, heap_node[e->callee->uid], + cgraph_estimate_growth (e->callee)); + + for (i = 0; i < ninlined_callees; i++) + { + struct cgraph_edge *e; + + for (e = inlined_callees[i]->callees; e; e = e->next_callee) + if (!e->inline_call && heap_node[e->callee->uid]) + fibheap_replace_key (heap, heap_node[e->callee->uid], + cgraph_estimate_growth (e->callee)); + + inlined_callees[i]->output = 0, node->aux = 0; + } + if (cgraph_dump_file) + fprintf (cgraph_dump_file, + "Created %i clones, Num insns:%i (%+i), %.2f%%.\n\n", + node->global.cloned_times - 1, + overall_insns, overall_insns - old_insns, + overall_insns * 100.0 / initial_insns); + } + if (cgraph_dump_file && !fibheap_empty (heap)) + fprintf (cgraph_dump_file, "inline-unit-growth limit reached.\n"); + fibheap_delete (heap); + free (heap_node); } -/* Decide what function should be inlined because they are invoked once - (so inlining won't result in duplication of the code). */ +/* Decide on the inlining. We do so in the topological order to avoid + expenses on updating datastructures. */ static void -cgraph_mark_functions_to_inline_once () +cgraph_decide_inlining (void) { - struct cgraph_node *node, *node1; + struct cgraph_node *node; + int nnodes; + struct cgraph_node **order = + xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes); + struct cgraph_node **inlined = + xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes); + struct cgraph_node **inlined_callees = + xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes); + int ninlined; + int ninlined_callees; + int i, y; - if (cgraph_dump_file) - fprintf (cgraph_dump_file, "\n\nMarking functions to inline once:"); + for (node = cgraph_nodes; node; node = node->next) + { + int ncalls = 0; + struct cgraph_edge *e; + + node->global.insns = node->local.self_insns; + for (e = node->callees; e; e = e->next_callee) + ncalls++; + node->global.calls = ncalls; + if (!DECL_EXTERNAL (node->decl)) + { + node->global.cloned_times = 1; + initial_insns += node->local.self_insns; + node->global.will_be_output = true; + } + } + overall_insns = initial_insns; + + nnodes = cgraph_postorder (order); - /* Now look for function called only once and mark them to inline. - From this point number of calls to given function won't grow. */ for (node = cgraph_nodes; node; node = node->next) + node->aux = 0; + + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "\n\nDeciding on always_inline functions:\n"); + + /* In the first pass mark all always_inline edges. Do this with a priority + so no our decisions makes this impossible. */ + for (i = nnodes - 1; i >= 0; i--) + { + struct cgraph_edge *e; + + node = order[i]; + + for (e = node->callees; e; e = e->next_callee) + if (e->callee->local.disgread_inline_limits) + break; + if (!e) + continue; + if (cgraph_dump_file) + fprintf (cgraph_dump_file, + "Considering %s %i insns (always inline)\n", + cgraph_node_name (node), node->global.insns); + ninlined = cgraph_inlined_into (order[i], inlined); + for (; e; e = e->next_callee) + { + if (e->inline_call || !e->callee->local.disgread_inline_limits) + continue; + if (e->callee->output || e->callee == node) + continue; + ninlined_callees = + cgraph_inlined_callees (e->callee, inlined_callees); + cgraph_mark_inline (node, e->callee, inlined, ninlined, + inlined_callees, ninlined_callees); + for (y = 0; y < ninlined_callees; y++) + inlined_callees[y]->output = 0, node->aux = 0; + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Inlined %i times. Now %i insns\n\n", + node->global.cloned_times, overall_insns); + } + for (y = 0; y < ninlined; y++) + inlined[y]->output = 0, node->aux = 0; + } + + cgraph_decide_inlining_of_small_functions (inlined, inlined_callees); + + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "\n\nFunctions to inline once:\n"); + + /* And finally decide what functions are called once. */ + + for (i = nnodes - 1; i >= 0; i--) { + node = order[i]; + if (node->callers && !node->callers->next_caller && !node->needed - && node->local.can_inline_once) + && node->local.inlinable && !node->callers->inline_call + && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl)) { bool ok = true; + struct cgraph_node *node1; /* Verify that we won't duplicate the caller. */ for (node1 = node->callers->caller; - node1->local.inline_many - && node1->callers - && ok; - node1 = node1->callers->caller) + node1->callers && node1->callers->inline_call + && ok; node1 = node1->callers->caller) if (node1->callers->next_caller || node1->needed) ok = false; if (ok) { - node->global.inline_once = true; if (cgraph_dump_file) - fprintf (cgraph_dump_file, " %s", cgraph_node_name (node)); + fprintf (cgraph_dump_file, + "Considering %s %i insns (called once)\n", + cgraph_node_name (node), node->global.insns); + ninlined = cgraph_inlined_into (node->callers->caller, inlined); + if (cgraph_check_inline_limits + (node->callers->caller, node, inlined, ninlined)) + { + ninlined_callees = + cgraph_inlined_callees (node, inlined_callees); + cgraph_mark_inline (node->callers->caller, node, inlined, + ninlined, inlined_callees, + ninlined_callees); + for (y = 0; y < ninlined_callees; y++) + inlined_callees[y]->output = 0, node->aux = 0; + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Inlined. Now %i insns\n\n", overall_insns); + } + for (y = 0; y < ninlined; y++) + inlined[y]->output = 0, node->aux = 0; } } } + if (cgraph_dump_file) - fprintf (cgraph_dump_file, "\n"); + fprintf (cgraph_dump_file, + "\nInlined %i calls, elliminated %i functions, %i insns turned to %i insns.\n", + ncalls_inlined, nfunctions_inlined, initial_insns, + overall_insns); + free (order); + free (inlined); + free (inlined_callees); } +/* Return true when CALLER_DECL should be inlined into CALLEE_DECL. */ + +bool +cgraph_inline_p (tree caller_decl, tree callee_decl) +{ + struct cgraph_node *caller = cgraph_node (caller_decl); + struct cgraph_node *callee = cgraph_node (callee_decl); + struct cgraph_edge *e; + + for (e = caller->callees; e; e = e->next_callee) + if (e->callee == callee) + return e->inline_call; + /* We do not record builtins in the callgraph. Perhaps it would make more + sense to do so and then prune out those not overwriten by explicit + function body. */ + return false; +} +/* Expand all functions that must be output. + + Attempt to topologically sort the nodes so function is output when + all called functions are already assembled to allow data to be + propagated accross the callgraph. Use a stack to get smaller distance + between a function and it's callees (later we may choose to use a more + sophisticated algorithm for function reordering; we will likely want + to use subsections to make the output functions appear in top-down + order). */ + +static void +cgraph_expand_functions () +{ + struct cgraph_node *node; + struct cgraph_node **order = + xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes); + int order_pos = 0; + int i; + + cgraph_mark_functions_to_output (); + + order_pos = cgraph_postorder (order); + + for (i = order_pos - 1; i >= 0; i--) + { + node = order[i]; + if (node->output) + { + if (!node->reachable) + abort (); + node->output = 0; + cgraph_expand_function (node); + } + } + free (order); +} + +/* Mark all local functions. + + Local function is function whose calls can occur only in the + current compilation unit so we change it's calling convetion. + We simply mark all static functions whose address is not taken + as local. */ + +static void +cgraph_mark_local_functions () +{ + struct cgraph_node *node; + + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Marking local functions:"); + + /* Figure out functions we want to assemble. */ + for (node = cgraph_nodes; node; node = node->next) + { + node->local.local = (!node->needed + && DECL_SAVED_TREE (node->decl) + && !TREE_PUBLIC (node->decl)); + if (cgraph_dump_file && node->local.local) + fprintf (cgraph_dump_file, " %s", cgraph_node_name (node)); + } + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "\n"); +} /* Perform simple optimizations based on callgraph. */ void cgraph_optimize () { - struct cgraph_node *node; - bool changed = true; - timevar_push (TV_CGRAPHOPT); + if (!quiet_flag) + fprintf (stderr, "Performing intraprocedural optimizations\n"); if (cgraph_dump_file) { fprintf (cgraph_dump_file, "Initial callgraph:"); @@ -470,7 +1117,7 @@ cgraph_optimize () } cgraph_mark_local_functions (); - cgraph_mark_functions_to_inline_once (); + cgraph_decide_inlining (); cgraph_global_info_ready = true; if (cgraph_dump_file) @@ -480,39 +1127,10 @@ cgraph_optimize () } timevar_pop (TV_CGRAPHOPT); if (!quiet_flag) - fprintf (stderr, "\n\nAssembling functions:"); + fprintf (stderr, "Assembling functions:"); - /* Output everything. - ??? Our inline heuristic may decide to not inline functions previously - marked as inlinable thus adding new function bodies that must be output. - Later we should move all inlining decisions to callgraph code to make - this impossible. */ + /* Output everything. */ cgraph_expand_functions (); - if (!quiet_flag) - fprintf (stderr, "\n\nAssembling functions that failed to inline:"); - while (changed && !errorcount && !sorrycount) - { - changed = false; - for (node = cgraph_nodes; node; node = node->next) - { - tree decl = node->decl; - if (!node->origin - && !TREE_ASM_WRITTEN (decl) - && DECL_SAVED_TREE (decl) - && !DECL_EXTERNAL (decl)) - { - struct cgraph_edge *edge; - - for (edge = node->callers; edge; edge = edge->next_caller) - if (TREE_ASM_WRITTEN (edge->caller->decl)) - { - changed = true; - cgraph_expand_function (node); - break; - } - } - } - } if (cgraph_dump_file) { fprintf (cgraph_dump_file, "Final callgraph:"); diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0220504..0fc713e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -4572,6 +4572,7 @@ After exceeding the maximum number of inlined instructions by repeated inlining, a linear function is used to decrease the allowable size for single functions. The slope of that function is the negative reciprocal of the number specified here. +This parameter is ignored when @option{-funit-at-a-time} is used. The default value is 32. @item min-inline-insns @@ -4579,8 +4580,27 @@ The repeated inlining is throttled more and more by the linear function after exceeding the limit. To avoid too much throttling, a minimum for this function is specified here to allow repeated inlining for very small functions even when a lot of repeated inlining already has been done. +This parameter is ignored when @option{-funit-at-a-time} is used. The default value is 10. +@item large-function-insns +The limit specifying really large functions. For functions greater than this +limit inlining is constrained by @option{--param large-function-growth}. +This parameter is usefull primarily to avoid extreme compilation time caused by non-linear +algorithms used by the backend. +This parameter is ignored when @option{-funit-at-a-time} is not used. +The default value is 30000. + +@item large-function-growth +Specifies maximal growth of large functtion caused by inlining in percents. +This parameter is ignored when @option{-funit-at-a-time} is not used. +The default value is 200. + +@item inline-unit-growth +Specifies maximal overall growth of the compilation unit caused by inlining. +This parameter is ignored when @option{-funit-at-a-time} is not used. +The default value is 150. + @item max-inline-insns-rtl For languages that use the RTL inliner (this happens at a later stage than tree inlining), you can set the maximum allowable size (counted diff --git a/gcc/params.def b/gcc/params.def index 3f1d94b..cb527f0 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -152,6 +152,19 @@ DEFPARAM(PARAM_MAX_PENDING_LIST_LENGTH, "The maximum length of scheduling's pending operations list", 32) +DEFPARAM(PARAM_LARGE_FUNCTION_INSNS, + "large-function-insns", + "The size of function body to be considered large", + 10000) +DEFPARAM(PARAM_LARGE_FUNCTION_GROWTH, + "large-function-growth", + "Maximal growth due to inlining of large function (in percents)", + 100) +DEFPARAM(PARAM_INLINE_UNIT_GROWTH, + "inline-unit-growth", + "how much can given compilation unit grow because of the inlining (in percents)", + 50) + /* The GCSE optimization will be disabled if it would require significantly more memory than this value. */ DEFPARAM(PARAM_MAX_GCSE_MEMORY, diff --git a/gcc/testsuite/g++.dg/warn/Winline-1.C b/gcc/testsuite/g++.dg/warn/Winline-1.C index 2f8b39c..f9ee465 100644 --- a/gcc/testsuite/g++.dg/warn/Winline-1.C +++ b/gcc/testsuite/g++.dg/warn/Winline-1.C @@ -1,4 +1,4 @@ -// { dg-options "-Winline -O2" } +// { dg-options "-Winline -O2 -fno-unit-at-a-time" } static inline int foo(int x); // { dg-warning "" } diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 3e68983f..3cfe702 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -106,6 +106,7 @@ typedef struct inline_data htab_t tree_pruner; /* Decl of function we are inlining into. */ tree decl; + tree current_decl; } inline_data; /* Prototypes. */ @@ -1145,6 +1146,10 @@ expand_call_inline (tree *tp, int *walk_subtrees, void *data) if (!fn) return NULL_TREE; + /* Turn forward declarations into real ones. */ + if (flag_unit_at_a_time) + fn = cgraph_node (fn)->decl; + /* If fn is a declaration of a function in a nested scope that was globally declared inline, we don't set its DECL_INITIAL. However, we can't blindly follow DECL_ABSTRACT_ORIGIN because the @@ -1159,9 +1164,9 @@ expand_call_inline (tree *tp, int *walk_subtrees, void *data) /* Don't try to inline functions that are not well-suited to inlining. */ - if ((!flag_unit_at_a_time || !DECL_SAVED_TREE (fn) - || !cgraph_global_info (fn)->inline_once) - && !inlinable_function_p (fn, id, 0)) + if (!DECL_SAVED_TREE (fn) + || (flag_unit_at_a_time && !cgraph_inline_p (id->current_decl, fn)) + || (!flag_unit_at_a_time && !inlinable_function_p (fn, id, 0))) { if (warn_inline && DECL_INLINE (fn) && !DID_INLINE_FUNC (fn) && !DECL_IN_SYSTEM_HEADER (fn)) @@ -1403,7 +1408,12 @@ expand_call_inline (tree *tp, int *walk_subtrees, void *data) } /* Recurse into the body of the just inlined function. */ - expand_calls_inline (inlined_body, id); + { + tree old_decl = id->current_decl; + id->current_decl = fn; + expand_calls_inline (inlined_body, id); + id->current_decl = old_decl; + } VARRAY_POP (id->fns); /* If we've returned to the top level, clear out the record of how @@ -1446,6 +1456,7 @@ optimize_inline_calls (tree fn) memset (&id, 0, sizeof (id)); id.decl = fn; + id.current_decl = fn; /* Don't allow recursion into FN. */ VARRAY_TREE_INIT (id.fns, 32, "fns"); VARRAY_PUSH_TREE (id.fns, fn); -- cgit v1.1