From 6d8fd122c4f856e9c6037adc310505f2d65347d9 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sun, 8 Dec 2019 18:02:30 +0100 Subject: Fix overflows in -fprofile-reorder-functions This patch fixes three sissues with -fprofile-reorder-functions: 1) First is that tp_first_run is stored as 32bit integer while it can easily overflow (and does so during Firefox profiling). 2) Second problem is that flag_profile_functions can not be tested w/o function context. The changes to expand_all_functions makes it to work on mixed units by first outputting all functions w/o -fprofile-reorder-function (or with no profile info) and then outputting in first_run order 3) LTO partitioner was mixing up order by tp_first_run and by order. for no_reorder we definitly want to order via first, while for everything else we want to roder by second. I have also merged duplicated comparators since they are bit fragile into tp_first_run_node_cmp. I originaly started to look into this because of undefined symbols with Firefox PGO builds. These symbols went away with fixing these bug but I am not quite sure how. it is possible that there is another problem in lto_blanced_map but even after reading the noreorder code few times carefuly I did not find it. Other explanation would be that our new qsort with broken comparator due to overflow can actualy remove some entries in the array, but that sounds bit crazy. Bootstrapped/regested x86_64-linux. * cgraph.c (cgraph_node::dump): Make tp_first_run 64bit. * cgraph.h (cgrpah_node): Likewise. (tp_first_run_node_cmp): Deeclare. * cgraphunit.c (node_cmp): Rename to ... (tp_first_run_node_cmp): ... this; export; watch for 64bit overflows; clear tp_first_run for no_reorder and !flag_profile_reorder_functions. (expand_all_functions): Collect tp_first_run and normal functions to two vectors so the other functions remain sorted. Do not check for flag_profile_reorder_functions it is function local flag. * profile.c (compute_value_histograms): Update tp_first_run printing. * lto-partition.c (node_cmp): Turn into simple order comparsions. (varpool_node_cmp): Remove. (add_sorted_nodes): Use node_cmp. (lto_balanced_map): Use tp_first_run_node_cmp. From-SVN: r279093 --- gcc/cgraphunit.c | 68 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 17 deletions(-) (limited to 'gcc/cgraphunit.c') diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 1b3d281..0468ccf 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2359,19 +2359,33 @@ cgraph_node::expand (void) /* Node comparator that is responsible for the order that corresponds to time when a function was launched for the first time. */ -static int -node_cmp (const void *pa, const void *pb) +int +tp_first_run_node_cmp (const void *pa, const void *pb) { const cgraph_node *a = *(const cgraph_node * const *) pa; const cgraph_node *b = *(const cgraph_node * const *) pb; + gcov_type tp_first_run_a = a->tp_first_run; + gcov_type tp_first_run_b = b->tp_first_run; + + if (!opt_for_fn (a->decl, flag_profile_reorder_functions) + || a->no_reorder) + tp_first_run_a = 0; + if (!opt_for_fn (b->decl, flag_profile_reorder_functions) + || b->no_reorder) + tp_first_run_b = 0; + + if (tp_first_run_a == tp_first_run_b) + return a->order - b->order; /* Functions with time profile must be before these without profile. */ - if (!a->tp_first_run || !b->tp_first_run) - return a->tp_first_run - b->tp_first_run; + if (!tp_first_run_a || !tp_first_run_b) + return tp_first_run_b ? 1 : -1; - return a->tp_first_run != b->tp_first_run - ? b->tp_first_run - a->tp_first_run - : b->order - a->order; + /* Watch for overlflow - tp_first_run is 64bit. */ + if (tp_first_run_a > tp_first_run_b) + return 1; + else + return -1; } /* Expand all functions that must be output. @@ -2390,8 +2404,10 @@ expand_all_functions (void) cgraph_node *node; cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count); + cgraph_node **tp_first_run_order = XCNEWVEC (cgraph_node *, + symtab->cgraph_count); unsigned int expanded_func_count = 0, profiled_func_count = 0; - int order_pos, new_order_pos = 0; + int order_pos, tp_first_run_order_pos = 0, new_order_pos = 0; int i; order_pos = ipa_reverse_postorder (order); @@ -2401,11 +2417,17 @@ expand_all_functions (void) optimization. So we must be sure to not reference them. */ for (i = 0; i < order_pos; i++) if (order[i]->process) - order[new_order_pos++] = order[i]; - - if (flag_profile_reorder_functions) - qsort (order, new_order_pos, sizeof (cgraph_node *), node_cmp); + { + if (order[i]->tp_first_run + && opt_for_fn (order[i]->decl, flag_profile_reorder_functions)) + tp_first_run_order[tp_first_run_order_pos++] = order[i]; + else + order[new_order_pos++] = order[i]; + } + /* Output functions in RPO so callers get optimized before callees. This + makes ipa-ra and other propagators to work. + FIXME: This is far from optimal code layout. */ for (i = new_order_pos - 1; i >= 0; i--) { node = order[i]; @@ -2413,13 +2435,25 @@ expand_all_functions (void) if (node->process) { expanded_func_count++; - if(node->tp_first_run) - profiled_func_count++; + node->process = 0; + node->expand (); + } + } + qsort (tp_first_run_order, tp_first_run_order_pos, + sizeof (cgraph_node *), tp_first_run_node_cmp); + for (i = 0; i < tp_first_run_order_pos; i++) + { + node = tp_first_run_order[i]; + + if (node->process) + { + expanded_func_count++; + profiled_func_count++; if (symtab->dump_file) fprintf (symtab->dump_file, - "Time profile order in expand_all_functions:%s:%d\n", - node->asm_name (), node->tp_first_run); + "Time profile order in expand_all_functions:%s:%" PRId64 + "\n", node->asm_name (), (int64_t) node->tp_first_run); node->process = 0; node->expand (); } @@ -2429,7 +2463,7 @@ expand_all_functions (void) fprintf (dump_file, "Expanded functions with time profile (%s):%u/%u\n", main_input_filename, profiled_func_count, expanded_func_count); - if (symtab->dump_file && flag_profile_reorder_functions) + if (symtab->dump_file && tp_first_run_order_pos) fprintf (symtab->dump_file, "Expanded functions with time profile:%u/%u\n", profiled_func_count, expanded_func_count); -- cgit v1.1