diff options
-rw-r--r-- | gcc/ChangeLog | 21 | ||||
-rw-r--r-- | gcc/cfgexpand.c | 203 | ||||
-rw-r--r-- | gcc/cgraph.c | 4 | ||||
-rw-r--r-- | gcc/cgraph.h | 8 | ||||
-rw-r--r-- | gcc/cgraphunit.c | 3 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 9 | ||||
-rw-r--r-- | gcc/ipa-inline.c | 22 | ||||
-rw-r--r-- | gcc/params.def | 8 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/winline-9.c | 25 | ||||
-rw-r--r-- | gcc/tree-inline.h | 2 |
11 files changed, 280 insertions, 29 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9bc5908..19d2f44 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2006-11-28 Jan Hubicka <jh@suse.cz> + + * invoke.texi (large-stack-frame, large-stack-frame-growth): New params. + * cgraph.c (dump_cgraph_node): Dump stack usage. + * cgraph.h (cgraph_local_info): Add estimated_self_stack_size. + (cgraph_global_info): Add estimated_stack_size and stack_frame_offset. + * cgraphunit.c (cgraph_analyze_function): Analyze stack sizes. + * ipa-inline.c (cgraph_clone_inlined_nodes): Propagate stack usage. + (cgraph_check_inline_limits): Limit stack growth. + * cfgexpand.c: Include tree-inline.h. + (account_stack_vars): New function. + (expand_one_var): New param to just account the stack; return estimated + size. + (expand_used_vars_for_block): Update call of expand_one_var. + (account_used_vars_for_block): New function. + (estimated_stack_frame_size): Likewise. + (init_vars_expansion, fini_vars_expansion): Break out from.. + (expand_used_vars): ... here. + * tree-inline.h (estimated_stack_frame_size): Declare. + * params.def (PARAM_LARGE_STACK_FRAME, PARAM_STACK_FRAME_GROWTH): New. + 2006-11-28 Richard Guenther <rguenther@suse.de> * ggc-page.c (ggc_print_statistics): Use %ul and a cast to diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index d0a560e..b472adb2 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -39,6 +39,7 @@ Boston, MA 02110-1301, USA. */ #include "toplev.h" #include "debug.h" #include "params.h" +#include "tree-inline.h" /* Verify that there is exactly single jump instruction since last and attach REG_BR_PROB note specifying probability. @@ -583,6 +584,28 @@ expand_stack_vars (bool (*pred) (tree)) } } +/* Take into account all sizes of partitions and reset DECL_RTLs. */ +static HOST_WIDE_INT +account_stack_vars (void) +{ + size_t si, j, i, n = stack_vars_num; + HOST_WIDE_INT size = 0; + + for (si = 0; si < n; ++si) + { + i = stack_vars_sorted[si]; + + /* Skip variables that aren't partition representatives, for now. */ + if (stack_vars[i].representative != i) + continue; + + size += stack_vars[i].size; + for (j = i; j != EOC; j = stack_vars[j].next) + SET_DECL_RTL (stack_vars[j].decl, NULL); + } + return size; +} + /* A subroutine of expand_one_var. Called to immediately assign rtl to a variable to be allocated in the stack frame. */ @@ -721,31 +744,53 @@ defer_stack_allocation (tree var, bool toplevel) /* A subroutine of expand_used_vars. Expand one variable according to its flavor. Variables to be placed on the stack are not actually - expanded yet, merely recorded. */ + expanded yet, merely recorded. + When REALLY_EXPAND is false, only add stack values to be allocated. + Return stack usage this variable is supposed to take. +*/ -static void -expand_one_var (tree var, bool toplevel) +static HOST_WIDE_INT +expand_one_var (tree var, bool toplevel, bool really_expand) { if (TREE_CODE (var) != VAR_DECL) - lang_hooks.expand_decl (var); + { + if (really_expand) + lang_hooks.expand_decl (var); + } else if (DECL_EXTERNAL (var)) ; else if (DECL_HAS_VALUE_EXPR_P (var)) ; else if (TREE_STATIC (var)) - expand_one_static_var (var); + { + if (really_expand) + expand_one_static_var (var); + } else if (DECL_RTL_SET_P (var)) ; else if (TREE_TYPE (var) == error_mark_node) - expand_one_error_var (var); + { + if (really_expand) + expand_one_error_var (var); + } else if (DECL_HARD_REGISTER (var)) - expand_one_hard_reg_var (var); + { + if (really_expand) + expand_one_hard_reg_var (var); + } else if (use_register_for_decl (var)) - expand_one_register_var (var); + { + if (really_expand) + expand_one_register_var (var); + } else if (defer_stack_allocation (var, toplevel)) add_stack_var (var); else - expand_one_stack_var (var); + { + expand_one_stack_var (var); + return tree_low_cst (DECL_SIZE_UNIT (var), 1); + } + return 0; } /* A subroutine of expand_used_vars. Walk down through the BLOCK tree @@ -770,7 +815,7 @@ expand_used_vars_for_block (tree block, bool toplevel) care of this. */ || (!flag_unit_at_a_time && TREE_STATIC (t) && DECL_PRESERVE_P (t))) - expand_one_var (t, toplevel); + expand_one_var (t, toplevel, true); this_sv_num = stack_vars_num; @@ -947,6 +992,122 @@ create_stack_guard (void) cfun->stack_protect_guard = guard; } +/* A subroutine of expand_used_vars. Walk down through the BLOCK tree + expanding variables. Those variables that can be put into registers + are allocated pseudos; those that can't are put on the stack. + + TOPLEVEL is true if this is the outermost BLOCK. */ + +static HOST_WIDE_INT +account_used_vars_for_block (tree block, bool toplevel) +{ + size_t i, j, old_sv_num, this_sv_num, new_sv_num; + tree t; + HOST_WIDE_INT size = 0; + + old_sv_num = toplevel ? 0 : stack_vars_num; + + /* Expand all variables at this level. */ + for (t = BLOCK_VARS (block); t ; t = TREE_CHAIN (t)) + if (TREE_USED (t)) + size += expand_one_var (t, toplevel, false); + + this_sv_num = stack_vars_num; + + /* Expand all variables at containing levels. */ + for (t = BLOCK_SUBBLOCKS (block); t ; t = BLOCK_CHAIN (t)) + size += account_used_vars_for_block (t, false); + + /* Since we do not track exact variable lifetimes (which is not even + possible for variables whose address escapes), we mirror the block + tree in the interference graph. Here we cause all variables at this + level, and all sublevels, to conflict. Do make certain that a + variable conflicts with itself. */ + if (old_sv_num < this_sv_num) + { + new_sv_num = stack_vars_num; + resize_stack_vars_conflict (new_sv_num); + + for (i = old_sv_num; i < new_sv_num; ++i) + for (j = i < this_sv_num ? i+1 : this_sv_num; j-- > old_sv_num ;) + add_stack_var_conflict (i, j); + } + return size; +} + +/* Prepare for expanding variables. */ +static void +init_vars_expansion (void) +{ + tree t; + /* Set TREE_USED on all variables in the unexpanded_var_list. */ + for (t = cfun->unexpanded_var_list; t; t = TREE_CHAIN (t)) + TREE_USED (TREE_VALUE (t)) = 1; + + /* Clear TREE_USED on all variables associated with a block scope. */ + clear_tree_used (DECL_INITIAL (current_function_decl)); + + /* Initialize local stack smashing state. */ + has_protected_decls = false; + has_short_buffer = false; +} + +/* Free up stack variable graph data. */ +static void +fini_vars_expansion (void) +{ + XDELETEVEC (stack_vars); + XDELETEVEC (stack_vars_sorted); + XDELETEVEC (stack_vars_conflict); + stack_vars = NULL; + stack_vars_alloc = stack_vars_num = 0; + stack_vars_conflict = NULL; + stack_vars_conflict_alloc = 0; +} + +HOST_WIDE_INT +estimated_stack_frame_size (void) +{ + HOST_WIDE_INT size = 0; + tree t, outer_block = DECL_INITIAL (current_function_decl); + + init_vars_expansion (); + + /* At this point all variables on the unexpanded_var_list with TREE_USED + set are not associated with any block scope. Lay them out. */ + for (t = cfun->unexpanded_var_list; t; t = TREE_CHAIN (t)) + { + tree var = TREE_VALUE (t); + + if (TREE_USED (var)) + size += expand_one_var (var, true, false); + TREE_USED (var) = 1; + } + size += account_used_vars_for_block (outer_block, true); + if (stack_vars_num > 0) + { + /* Due to the way alias sets work, no variables with non-conflicting + alias sets may be assigned the same address. Add conflicts to + reflect this. */ + add_alias_set_conflicts (); + + /* If stack protection is enabled, we don't share space between + vulnerable data and non-vulnerable data. */ + if (flag_stack_protect) + add_stack_protection_conflicts (); + + /* Now that we have collected all stack variables, and have computed a + minimal interference graph, attempt to save some stack space. */ + partition_stack_vars (); + if (dump_file) + dump_stack_var_partition (); + + size += account_stack_vars (); + fini_vars_expansion (); + } + return size; +} + /* Expand all variables used in the function. */ static void @@ -961,16 +1122,7 @@ expand_used_vars (void) frame_phase = off ? align - off : 0; } - /* Set TREE_USED on all variables in the unexpanded_var_list. */ - for (t = cfun->unexpanded_var_list; t; t = TREE_CHAIN (t)) - TREE_USED (TREE_VALUE (t)) = 1; - - /* Clear TREE_USED on all variables associated with a block scope. */ - clear_tree_used (outer_block); - - /* Initialize local stack smashing state. */ - has_protected_decls = false; - has_short_buffer = false; + init_vars_expansion (); /* At this point all variables on the unexpanded_var_list with TREE_USED set are not associated with any block scope. Lay them out. */ @@ -1005,7 +1157,7 @@ expand_used_vars (void) TREE_USED (var) = 1; if (expand_now) - expand_one_var (var, true); + expand_one_var (var, true, true); } cfun->unexpanded_var_list = NULL_TREE; @@ -1059,14 +1211,7 @@ expand_used_vars (void) expand_stack_vars (NULL); - /* Free up stack variable graph data. */ - XDELETEVEC (stack_vars); - XDELETEVEC (stack_vars_sorted); - XDELETEVEC (stack_vars_conflict); - stack_vars = NULL; - stack_vars_alloc = stack_vars_num = 0; - stack_vars_conflict = NULL; - stack_vars_conflict_alloc = 0; + fini_vars_expansion (); } /* If the target requires that FRAME_OFFSET be aligned, do it. */ diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 3e5dab1..6fdd925 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -716,6 +716,10 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node) fprintf (f, " %i insns", node->local.self_insns); if (node->global.insns && node->global.insns != node->local.self_insns) fprintf (f, " (%i after inlining)", node->global.insns); + if (node->local.estimated_self_stack_size) + fprintf (f, " %i bytes stack usage", (int)node->local.estimated_self_stack_size); + if (node->global.estimated_stack_size != node->local.estimated_self_stack_size) + fprintf (f, " %i bytes after inlining", (int)node->global.estimated_stack_size); if (node->origin) fprintf (f, " nested in: %s", cgraph_node_name (node->origin)); if (node->needed) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 668d7a7..b7aa81b 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -51,6 +51,9 @@ enum availability struct cgraph_local_info GTY(()) { + /* Estiimated stack frame consumption by the function. */ + HOST_WIDE_INT estimated_self_stack_size; + /* Size of the function before inlining. */ int self_insns; @@ -88,6 +91,11 @@ struct cgraph_local_info GTY(()) struct cgraph_global_info GTY(()) { + /* Estimated stack frame consumption by the function. */ + HOST_WIDE_INT estimated_stack_size; + /* Expected offset of the stack frame of inlined function. */ + HOST_WIDE_INT stack_frame_offset; + /* For inline clones this points to the function they will be inlined into. */ struct cgraph_node *inlined_to; diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 242738a..59a7e0d 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -950,6 +950,9 @@ cgraph_analyze_function (struct cgraph_node *node) /* First kill forward declaration so reverse inlining works properly. */ cgraph_create_edges (node, decl); + node->local.estimated_self_stack_size = estimated_stack_frame_size (); + node->global.estimated_stack_size = node->local.estimated_self_stack_size; + node->global.stack_frame_offset = 0; node->local.inlinable = tree_inlinable_function_p (decl); if (!flag_unit_at_a_time) node->local.self_insns = estimate_num_insns (decl); diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 427b267..e606366 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6033,6 +6033,15 @@ This parameter is ignored when @option{-funit-at-a-time} is not used. The default value is 50 which limits unit growth to 1.5 times the original size. +@item large-stack-frame +The limit specifying large stack frames. While inlining the algorithm is trying +to not grow past this limit too much. Default value is 256 bytes. + +@item large-stack-frame-growth +Specifies maximal growth of large stack frames caused by inlining in percents. +The default value is 1000 which limits large stack frame growth to 11 times +the original size. + @item max-inline-insns-recursive @itemx max-inline-insns-recursive-auto Specifies maximum number of instructions out-of-line copy of self recursive inline diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 638fdef..8e47f36 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -115,6 +115,7 @@ cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to, void cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original) { + HOST_WIDE_INT peak; if (duplicate) { /* We may eliminate the need for out-of-line copy to be output. @@ -141,6 +142,11 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_o e->callee->global.inlined_to = e->caller->global.inlined_to; else e->callee->global.inlined_to = e->caller; + e->callee->global.stack_frame_offset + = e->caller->global.stack_frame_offset + e->caller->local.estimated_self_stack_size; + peak = e->callee->global.stack_frame_offset + e->callee->local.estimated_self_stack_size; + if (e->callee->global.inlined_to->global.estimated_stack_size < peak) + e->callee->global.inlined_to->global.estimated_stack_size = peak; /* Recursively clone all bodies. */ for (e = e->callee->callees; e; e = e->next_callee) @@ -257,6 +263,7 @@ cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what, struct cgraph_edge *e; int newsize; int limit; + HOST_WIDE_INT stack_size_limit, inlined_stack; if (one_only) times = 1; @@ -288,6 +295,21 @@ cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what, *reason = N_("--param large-function-growth limit reached"); return false; } + + stack_size_limit = to->local.estimated_self_stack_size; + + stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100; + + inlined_stack = (to->global.stack_frame_offset + + to->local.estimated_self_stack_size + + what->global.estimated_stack_size); + if (inlined_stack > stack_size_limit + && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME)) + { + if (reason) + *reason = N_("--param large-stack-frame-growth limit reached"); + return false; + } return true; } diff --git a/gcc/params.def b/gcc/params.def index 764b3c4..142b19e 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -198,6 +198,14 @@ DEFPARAM(PARAM_INLINE_CALL_COST, "inline-call-cost", "expense of call operation relative to ordinary arithmetic operations", 16, 0, 0) +DEFPARAM(PARAM_LARGE_STACK_FRAME, + "large-stack-frame", + "The size of stack frame to be considered large", + 256, 0, 0) +DEFPARAM(PARAM_STACK_FRAME_GROWTH, + "large-stack-frame-growth", + "Maximal stack frame growth due to inlining (in percent)", + 1000, 0, 0) /* The GCSE optimization will be disabled if it would require significantly more memory than this value. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index fba7c2a..e2a6284 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2006-11-28 Jan Hubicka <jh@suse.cz> + + * gcc.dg/winline-1.c: New test. + 2006-11-28 Nathan Sidwell <nathan@codesourcery.com> * gcc.dg/pack-test-1.c: Use __alignof__ to determine natural diff --git a/gcc/testsuite/gcc.dg/winline-9.c b/gcc/testsuite/gcc.dg/winline-9.c new file mode 100644 index 0000000..f6cd237 --- /dev/null +++ b/gcc/testsuite/gcc.dg/winline-9.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Winline --param large-stack-frame=10 --param large-stack-frame-growth=2" } */ + +int a,b; +void test(char *); +static inline +int aa (void) +{ + char t[10]; + test(t); +} +static inline +int bb (void) +{ /* { dg-warning "large-stack-frame" "" } */ + char t[100]; + test(t); +} + +t() +{ + if (a) + aa(); + if (b) + bb(); /* { dg-warning "called from here" "" } */ +} diff --git a/gcc/tree-inline.h b/gcc/tree-inline.h index 818329c..4755fbb 100644 --- a/gcc/tree-inline.h +++ b/gcc/tree-inline.h @@ -110,6 +110,8 @@ void tree_function_versioning (tree, tree, varray_type, bool); extern tree remap_decl (tree decl, copy_body_data *id); extern tree remap_type (tree type, copy_body_data *id); +extern HOST_WIDE_INT estimated_stack_frame_size (void); + /* 0 if we should not perform inlining. 1 if we should expand functions calls inline at the tree level. 2 if we should consider *all* functions to be inline |