diff options
Diffstat (limited to 'gcc/auto-profile.cc')
-rw-r--r-- | gcc/auto-profile.cc | 2585 |
1 files changed, 2127 insertions, 458 deletions
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index 7e0e8c6..d78f2cb 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -35,6 +35,8 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-core.h" #include "profile.h" #include "langhooks.h" +#include "context.h" +#include "pass_manager.h" #include "cfgloop.h" #include "tree-cfg.h" #include "tree-cfgcleanup.h" @@ -59,7 +61,8 @@ along with GCC; see the file COPYING3. If not see There are three phases in AutoFDO: - Phase 1: Read profile from the profile data file. + Phase 1: At startup. + Read profile from the profile data file. The following info is read from the profile datafile: * string_table: a map between function name and its index. * autofdo_source_profile: a map from function_instance name to @@ -74,23 +77,44 @@ along with GCC; see the file COPYING3. If not see standalone symbol, or a clone of a function that is inlined into another function. - Phase 2: Early inline + value profile transformation. - Early inline uses autofdo_source_profile to find if a callsite is: + Phase 2: In afdo_offline pass. + Remove function instances from other translation units + and offline all cross-translation unit inlining done during train + run compilation. This is necessary to not lose profiles with + LTO train run. + + Phase 3: During early optimization. + AFDO inline + value profile transformation. + This happens during early optimization. + During early inlning AFDO inliner is executed which + uses autofdo_source_profile to find if a callsite is: * inlined in the profiled binary. * callee body is hot in the profiling run. If both condition satisfies, early inline will inline the callsite regardless of the code growth. - Phase 2 is an iterative process. During each iteration, we also check - if an indirect callsite is promoted and inlined in the profiling run. - If yes, vpt will happen to force promote it and in the next iteration, - einline will inline the promoted callsite in the next iteration. - Phase 3: Annotate control flow graph. - AutoFDO uses a separate pass to: + Performing this early has benefit of doing early optimizations + before read IPA passe and getting more "context sensitivity" of + the profile read. Profile of inlined functions may differ + significantly form one inline instance to another and from the + offline version. + + This is controlled by -fauto-profile-inlinig and is independent + of -fearly-inlining. + + Phase 4: In AFDO pass. + Offline all functions that has been inlined in the + train run but were not inlined in early inlining nor AFDO + inline. + + Phase 5: In AFDO pass. + Annotate control flow graph. * Annotate basic block count * Estimate branch probability + * Use earlier static profile to fill in the gaps + if AFDO profile is ambigous - After the above 3 phases, all profile is readily annotated on the GCC IR. + After the above 5 phases, all profile is readily annotated on the GCC IR. AutoFDO tries to reuse all FDO infrastructure as much as possible to make use of the profile. E.g. it uses existing mechanism to calculate the basic block/edge frequency, as well as the cgraph node/edge count. @@ -120,10 +144,17 @@ private: }; /* Represent a source location: (function_decl, lineno). */ -typedef std::pair<tree, unsigned> decl_lineno; +struct decl_lineno +{ + tree decl; + /* Relative locations stored in auto-profile. */ + unsigned int afdo_loc; + /* Actual location afdo_loc was computed from used to output diagnostics. */ + location_t location; +}; /* Represent an inline stack. vector[0] is the leaf node. */ -typedef auto_vec<decl_lineno> inline_stack; +typedef auto_vec<decl_lineno, 20> inline_stack; /* String array that stores function names. */ typedef auto_vec<char *> string_vector; @@ -136,6 +167,10 @@ typedef std::map<unsigned, gcov_type> icall_target_map; to direct call. */ typedef std::set<gimple *> stmt_set; +/* Set and map used to translate name indexes. */ +typedef hash_set<int_hash <int, -1, -2>> name_index_set; +typedef hash_map<int_hash <int, -1, -2>, int> name_index_map; + /* Represent count info of an inline stack. */ class count_info { @@ -151,7 +186,6 @@ public: Each inline stack should only be used to annotate IR once. This will be enforced when instruction-level discriminator is supported. */ - bool annotated; }; /* operator< for "const char *". */ @@ -184,6 +218,15 @@ public: /* Read profile, return TRUE on success. */ bool read (); + /* Return number of entries. */ + size_t num_entries () + { + return vector_.length (); + } + + /* Add new name and return its index. */ + int add_name (char *); + private: typedef std::map<const char *, unsigned, string_compare> string_index_map; string_vector vector_; @@ -218,11 +261,18 @@ public: { return name_; } + int + set_name (int index) + { + return name_ = index; + } gcov_type total_count () const { return total_count_; } + + /* Return head count or -1 if unknown. */ gcov_type head_count () const { @@ -230,24 +280,121 @@ public: } /* Traverse callsites of the current function_instance to find one at the - location of LINENO and callee name represented in DECL. */ + location of LINENO and callee name represented in DECL. + LOCATION should match LINENO and is used to output diagnostics. */ function_instance *get_function_instance_by_decl (unsigned lineno, - tree decl) const; + tree decl, + location_t location) const; + + /* Merge profile of clones. Note that cloning hasnt been performed when + we annotate the CFG (at this stage). */ + void merge (function_instance *other, + vec <function_instance *> &new_functions); + + /* Look for inline instancs that was not realized and + remove them while possibly merging them to offline variants. */ + void offline_if_not_realized (vec <function_instance *> &new_functions); + + /* Offline all inlined functions with name in SEEN. + If new toplevel functions are created, add them to NEW_FUNCTIONS. */ + void offline_if_in_set (name_index_set &seen, + vec <function_instance *> &new_functions); + + /* Walk inlined functions and if their name is not in SEEN + remove it. */ + + void remove_external_functions (name_index_set &seen, + name_index_map &to_symbol_name, + vec <function_instance *> &new_functions); /* Store the profile info for LOC in INFO. Return TRUE if profile info is found. */ bool get_count_info (location_t loc, count_info *info) const; - /* Read the inlined indirect call target profile for STMT and store it in - MAP, return the total count for all inlined indirect calls. */ - gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const; + /* Read the inlined indirect call target profile for STMT in FN and store it + in MAP, return the total count for all inlined indirect calls. */ + gcov_type find_icall_target_map (tree fn, gcall *stmt, + icall_target_map *map) const; - /* Sum of counts that is used during annotation. */ - gcov_type total_annotated_count () const; + /* Remove inlined indirect call target profile for STMT in FN. */ + void remove_icall_target (tree fn, gcall *stmt); /* Mark LOC as annotated. */ void mark_annotated (location_t loc); + void dump (FILE *f, int indent = 0, bool nested = false) const; + + void dump_inline_stack (FILE *f) const; + + DEBUG_FUNCTION void debug () const; + + /* Mark function as removed from indir target list. */ + void + remove_icall_target () + { + removed_icall_target_ = true; + } + + /* Reutrn true if function is removed from indir target list. */ + bool + removed_icall_target () + { + return removed_icall_target_; + } + + /* Set inlined_to pointer. */ + void + set_inlined_to (function_instance *inlined_to) + { + gcc_checking_assert (inlined_to != this); + inlined_to_ = inlined_to; + } + + /* Return pointer to the function instance this function is inlined + to or NULL if it is outer instance. */ + function_instance * + inlined_to () const + { + return inlined_to_; + } + + /* Mark function as realized. */ + void + set_realized () + { + realized_ = true; + } + + /* Return true if function is realized. */ + bool + realized_p () + { + return realized_; + } + + /* Mark function as in_worklist. */ + void + set_in_worklist () + { + gcc_checking_assert (!inlined_to_ && !in_worklist_p ()); + in_worklist_ = true; + } + + void + clear_in_worklist () + { + gcc_checking_assert (!inlined_to_ && in_worklist_p ()); + in_worklist_ = false; + } + + + /* Return true if function is in_worklist. */ + bool + in_worklist_p () + { + return in_worklist_; + } + private: /* Callsite, represented as (decl_lineno, callee_function_name_index). */ typedef std::pair<unsigned, unsigned> callsite; @@ -256,7 +403,9 @@ private: typedef std::map<callsite, function_instance *> callsite_map; function_instance (unsigned name, gcov_type head_count) - : name_ (name), total_count_ (0), head_count_ (head_count) + : name_ (name), total_count_ (0), head_count_ (head_count), + removed_icall_target_ (false), realized_ (false), + in_worklist_ (false), inlined_to_ (NULL) { } @@ -277,6 +426,25 @@ private: /* Map from source location to count_info. */ position_count_map pos_counts; + + /* True if function was removed from indir target list. */ + bool removed_icall_target_; + + /* True if function exists in IL. I.e. for toplevel instance we + have corresponding symbol and for inline instance we inlined + to it. */ + bool realized_; + + /* Ture if function is in worklist for merging/offlining. */ + bool in_worklist_; + + /* Pointer to outer function instance or NULL if this + is a toplevel one. */ + function_instance *inlined_to_; + + /* Turn inline instance to offline. */ + static bool offline (function_instance *fn, + vec <function_instance *> &new_functions); }; /* Profile for all functions. */ @@ -299,24 +467,37 @@ public: /* For a given DECL, returns the top-level function_instance. */ function_instance *get_function_instance_by_decl (tree decl) const; + /* For a given name index, returns the top-level function_instance. */ + function_instance *get_function_instance_by_name_index (int) const; + + void add_function_instance (function_instance *); + /* Find count_info for a given gimple STMT. If found, store the count_info - in INFO and return true; otherwise return false. */ - bool get_count_info (gimple *stmt, count_info *info) const; + in INFO and return true; otherwise return false. + NODE can be used to specify particular inline clone. */ + bool get_count_info (gimple *stmt, count_info *info, + cgraph_node *node = NULL) const; /* Find count_info for a given gimple location GIMPLE_LOC. If found, - store the count_info in INFO and return true; otherwise return false. */ - bool get_count_info (location_t gimple_loc, count_info *info) const; + store the count_info in INFO and return true; otherwise return false. + NODE can be used to specify particular inline clone. */ + bool get_count_info (location_t gimple_loc, count_info *info, + cgraph_node *node = NULL) const; /* Find total count of the callee of EDGE. */ gcov_type get_callsite_total_count (struct cgraph_edge *edge) const; - /* Update value profile INFO for STMT from the inlined indirect callsite. - Return true if INFO is updated. */ - bool update_inlined_ind_target (gcall *stmt, count_info *info); + /* Update value profile INFO for STMT within NODE from the inlined indirect + callsite. Return true if INFO is updated. */ + bool update_inlined_ind_target (gcall *stmt, count_info *info, + cgraph_node *node); - /* Mark LOC as annotated. */ - void mark_annotated (location_t loc); + void remove_icall_target (cgraph_edge *e); + + /* Offline all functions not defined in the current translation unit. */ + void offline_external_functions (); + void offline_unrealized_inlines (); private: /* Map from function_instance name index (in string_table) to function_instance. */ @@ -333,6 +514,8 @@ private: get_function_instance_by_inline_stack (const inline_stack &stack) const; name_function_instance_map map_; + + auto_vec <function_instance *> duplicate_functions_; }; /* Store the strings read from the profile data file. */ @@ -344,18 +527,59 @@ static autofdo_source_profile *afdo_source_profile; /* gcov_summary structure to store the profile_info. */ static gcov_summary *afdo_profile_info; +/* Scaling factor for afdo data. Compared to normal profile + AFDO profile counts are much lower, depending on sampling + frequency. We scale data up to reudce effects of roundoff + errors. */ + +static gcov_type afdo_count_scale = 1; + /* Helper functions. */ + /* Return the original name of NAME: strip the suffix that starts - with '.' Caller is responsible for freeing RET. */ + with '.' for names that are generetad after auto-profile pass. + This is to match profiled names with the names in the IR at this stage. + Note that we only have to strip sufix and not in the middle. + Caller is responsible for freeing RET. */ static char * -get_original_name (const char *name) +get_original_name (const char *name, bool alloc = true) { - char *ret = xstrdup (name); - char *find = strchr (ret, '.'); - if (find != NULL) - *find = 0; + char *ret = alloc ? xstrdup (name) : const_cast<char *> (name); + char *last_dot = strrchr (ret, '.'); + if (last_dot == NULL) + return ret; + bool only_digits = true; + char *ptr = last_dot; + while (*(++ptr) != 0) + if (*ptr < '0' || *ptr > '9') + { + only_digits = false; + break; + } + if (only_digits) + *last_dot = 0; + char *next_dot = strrchr (ret, '.'); + /* if nested function such as foo.0, return foo.0 */ + if (next_dot == NULL) + { + *last_dot = '.'; + return ret; + } + /* Suffixes of clones that compiler generates after auto-profile. */ + const char *suffixes[] = {"isra", "constprop", "lto_priv", "part", "cold"}; + for (unsigned i = 0; i < sizeof (suffixes); ++i) + { + if (strncmp (next_dot + 1, suffixes[i], strlen (suffixes[i])) == 0) + { + *next_dot = 0; + return get_original_name (ret, false); + } + } + /* Otherwise, it is for clones such as .omp_fn.N that was done before + auto-profile and should be kept as it is. */ + *last_dot = '.'; return ret; } @@ -384,10 +608,39 @@ get_function_decl_from_block (tree block) return BLOCK_ABSTRACT_ORIGIN (block); } +/* Dump LOC to F. */ + +static void +dump_afdo_loc (FILE *f, unsigned loc) +{ + if (loc & 65535) + fprintf (f, "%i.%i", loc >> 16, loc & 65535); + else + fprintf (f, "%i", loc >> 16); +} + +/* Dump STACK to F. */ + +static void +dump_inline_stack (FILE *f, inline_stack *stack) +{ + bool first = true; + for (decl_lineno &p : *stack) + { + fprintf (f, "%s%s:", + first ? "" : "; ", + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (p.decl))); + dump_afdo_loc (f, p.afdo_loc); + first = false; + } + fprintf (f, "\n"); +} + /* Store inline stack for STMT in STACK. */ static void -get_inline_stack (location_t locus, inline_stack *stack) +get_inline_stack (location_t locus, inline_stack *stack, + tree fn = current_function_decl) { if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) return; @@ -405,50 +658,65 @@ get_inline_stack (location_t locus, inline_stack *stack) tree decl = get_function_decl_from_block (block); stack->safe_push ( - std::make_pair (decl, get_combined_location (locus, decl))); + {decl, get_combined_location (locus, decl), locus}); locus = tmp_locus; } } - stack->safe_push ( - std::make_pair (current_function_decl, - get_combined_location (locus, current_function_decl))); + stack->safe_push ({fn, get_combined_location (locus, fn), locus}); } -/* Return STMT's combined location, which is a 32bit integer in which - higher 16 bits stores the line offset of LOC to the start lineno - of DECL, The lower 16 bits stores the discriminator. */ +/* Same as get_inline_stack for a given node which may be + an inline clone. If NODE is NULL, assume current_function_decl. */ +static void +get_inline_stack_in_node (location_t locus, inline_stack *stack, + cgraph_node *node) +{ + if (!node) + return get_inline_stack (locus, stack); + do + { + get_inline_stack (locus, stack, node->decl); + /* If caller is inlined, continue building stack. */ + if (!node->inlined_to) + node = NULL; + else + { + locus = gimple_location (node->callers->call_stmt); + node = node->callers->caller; + } + } + while (node); +} + +/* Return combined location of LOCUS within BLOCK that is in + function FN. + + This is a 32bit integer in which higher 16 bits stores the line offset of + LOC to the start lineno of DECL, The lower 16 bits stores the + discriminator. */ static unsigned -get_relative_location_for_stmt (gimple *stmt) +get_relative_location_for_locus (tree fn, tree block, location_t locus) { - location_t locus = gimple_location (stmt); if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) return UNKNOWN_LOCATION; - for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK); + for (; block && (TREE_CODE (block) == BLOCK); block = BLOCK_SUPERCONTEXT (block)) - if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION) + if (inlined_function_outer_scope_p (block)) return get_combined_location (locus, - get_function_decl_from_block (block)); - return get_combined_location (locus, current_function_decl); + get_function_decl_from_block (block)); + return get_combined_location (locus, fn); } -/* Return true if BB contains indirect call. */ +/* Return combined location of STMT in function FN. */ -static bool -has_indirect_call (basic_block bb) +static unsigned +get_relative_location_for_stmt (tree fn, gimple *stmt) { - gimple_stmt_iterator gsi; - - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt) - && (gimple_call_fn (stmt) == NULL - || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) - return true; - } - return false; + return get_relative_location_for_locus + (fn, LOCATION_BLOCK (gimple_location (stmt)), + gimple_location (stmt)); } /* Member functions for string_table. */ @@ -483,13 +751,8 @@ string_table::get_index (const char *name) const int string_table::get_index_by_decl (tree decl) const { - char *name - = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); + const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); int ret = get_index (name); - free (name); - if (ret != -1) - return ret; - ret = get_index (lang_hooks.dwarf_name (decl, 0)); if (ret != -1) return ret; if (DECL_FROM_INLINE (decl)) @@ -507,6 +770,16 @@ string_table::get_name (int index) const return vector_[index]; } +/* Add new name SRRING and return its index. */ + +int +string_table::add_name (char *string) +{ + vector_.safe_push (string); + map_[vector_.last ()] = vector_.length () - 1; + return vector_.length () - 1; +} + /* Read the string table. Return TRUE if reading is successful. */ bool @@ -518,9 +791,10 @@ string_table::read () gcov_read_unsigned (); /* Read in the file name table. */ unsigned string_num = gcov_read_unsigned (); + vector_.reserve (string_num); for (unsigned i = 0; i < string_num; i++) { - vector_.safe_push (get_original_name (gcov_read_string ())); + vector_.quick_push (xstrdup (gcov_read_string ())); map_[vector_.last ()] = i; } return true; @@ -530,6 +804,7 @@ string_table::read () function_instance::~function_instance () { + gcc_assert (!in_worklist_p ()); for (callsite_map::iterator iter = callsites.begin (); iter != callsites.end (); ++iter) delete iter->second; @@ -540,7 +815,8 @@ function_instance::~function_instance () function_instance * function_instance::get_function_instance_by_decl (unsigned lineno, - tree decl) const + tree decl, + location_t location) const { int func_name_idx = afdo_string_table->get_index_by_decl (decl); if (func_name_idx != -1) @@ -550,72 +826,776 @@ function_instance::get_function_instance_by_decl (unsigned lineno, if (ret != callsites.end ()) return ret->second; } - func_name_idx - = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0)); - if (func_name_idx != -1) + if (DECL_FROM_INLINE (decl)) { - callsite_map::const_iterator ret - = callsites.find (std::make_pair (lineno, func_name_idx)); - if (ret != callsites.end ()) - return ret->second; + function_instance + *ret = get_function_instance_by_decl (lineno, + DECL_ABSTRACT_ORIGIN (decl), + location); + return ret; + } + if (dump_enabled_p ()) + { + for (auto const &iter : callsites) + if (iter.first.first == lineno) + dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS, + dump_user_location_t::from_location_t (location), + "auto-profile has mismatched function name %s" + " instaed of %s at loc %i:%i", + afdo_string_table->get_name (iter.first.second), + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), + lineno << 16, + lineno & 65535); } - if (DECL_FROM_INLINE (decl)) - return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl)); return NULL; } -/* Store the profile info for LOC in INFO. Return TRUE if profile info - is found. */ +/* Merge profile of OTHER to THIS. Note that cloning hasnt been performed when + we annotate the CFG (at this stage). */ + +void +function_instance::merge (function_instance *other, + vec <function_instance *> &new_functions) +{ + /* Do not merge to itself and only merge functions of same name. */ + gcc_checking_assert (other != this && other->name () == name ()); + total_count_ += other->total_count_; + if (other->total_count () && total_count () && other->head_count () == -1) + head_count_ = -1; + else if (head_count_ != -1) + head_count_ += other->head_count_; + + bool changed = true; + + while (changed) + { + changed = false; + /* If both function instances agree on particular inlined function, + merge profiles. Otherwise offline the instance. */ + for (callsite_map::const_iterator iter = other->callsites.begin (); + iter != other->callsites.end ();) + if (callsites.count (iter->first) == 0) + { + function_instance *f = iter->second; + if (dump_file) + { + fprintf (dump_file, " Mismatch in inlined functions;" + " offlining in merge source:"); + f->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + /* We already merged outer part of the function acounting + the inlined calll; compensate. */ + for (function_instance *s = this; s; s = s->inlined_to ()) + { + s->total_count_ -= f->total_count (); + gcc_checking_assert (s->total_count_ >= 0); + } + other->callsites.erase (iter); + function_instance::offline (f, new_functions); + /* Start from begining as merging might have offlined + some functions in the case of recursive inlining. */ + iter = other->callsites.begin (); + } + else + ++iter; + for (callsite_map::const_iterator iter = callsites.begin (); + iter != callsites.end ();) + if (other->callsites.count (iter->first) == 0) + { + function_instance *f = iter->second; + if (dump_file) + { + fprintf (dump_file, " Mismatch in inlined functions;" + " offlining in merge destination:"); + f->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + callsites.erase (iter); + function_instance::offline (f, new_functions); + iter = callsites.begin (); + changed = true; + } + else + ++iter; + } + for (callsite_map::const_iterator iter = other->callsites.begin (); + iter != other->callsites.end (); ++iter) + { + if (dump_file) + { + fprintf (dump_file, " Merging profile for inlined function\n" + " from: "); + iter->second->dump_inline_stack (dump_file); + fprintf (dump_file, " total:%" PRIu64 "\n to : ", + (int64_t)iter->second->total_count ()); + callsites[iter->first]->dump_inline_stack (dump_file); + fprintf (dump_file, " total:%" PRIu64 "\n", + (int64_t)callsites[iter->first]->total_count ()); + } + + callsites[iter->first]->merge (iter->second, new_functions); + } + + for (position_count_map::const_iterator iter = other->pos_counts.begin (); + iter != other->pos_counts.end (); ++iter) + if (pos_counts.count (iter->first) == 0) + pos_counts[iter->first] = iter->second; + else + { + pos_counts[iter->first].count += iter->second.count; + for (icall_target_map::const_iterator titer + = iter->second.targets.begin (); + titer != iter->second.targets.end (); ++titer) + if (pos_counts[iter->first].targets.count (titer->first) == 0) + pos_counts[iter->first].targets[titer->first] + = titer->second; + else + pos_counts[iter->first].targets[titer->first] + += titer->second; + } +} + +/* Make inline function FN offline. + If tolevel function of same name already exists, then merge profiles. + Otherwise turn FN toplevel. Return true if new toplevel function + was introduced. + If new toplevel functions are created and NEW_FUNCTIONS != NULL, + add them to NEW_FUNCTIONS. + + TODO: When offlining indirect call we lose information about the + call target. It should be possible to add it into + targets histogram. */ bool -function_instance::get_count_info (location_t loc, count_info *info) const +function_instance::offline (function_instance *fn, + vec <function_instance *> &new_functions) { - position_count_map::const_iterator iter = pos_counts.find (loc); - if (iter == pos_counts.end ()) - return false; - *info = iter->second; + gcc_checking_assert (fn->inlined_to ()); + for (function_instance *s = fn->inlined_to (); s; s = s->inlined_to ()) + { + s->total_count_ -= fn->total_count (); + gcc_checking_assert (s->total_count_ >= 0); + } + function_instance *to + = afdo_source_profile->get_function_instance_by_name_index (fn->name ()); + fn->set_inlined_to (NULL); + /* If there is offline function of same name, we need to merge profile. + Delay this by adding function to a worklist so we do not run into + problem with recursive inlining. */ + if (to) + { + if (fn->in_worklist_p ()) + return false; + fn->set_in_worklist (); + new_functions.safe_push (fn); + if (dump_file) + { + fprintf (dump_file, " Recoding duplicate: "); + to->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + return true; + } + if (dump_file) + { + fprintf (dump_file, " Added as offline instance: "); + fn->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + if (fn->total_count ()) + fn->head_count_ = -1; + afdo_source_profile->add_function_instance (fn); + fn->set_in_worklist (); + new_functions.safe_push (fn); return true; } -/* Mark LOC as annotated. */ +/* Offline all inlined functions with name in SEEN. + If new toplevel functions are created, add them to NEW_FUNCTIONS. */ void -function_instance::mark_annotated (location_t loc) +function_instance::offline_if_in_set (name_index_set &seen, + vec <function_instance *> &new_functions) { - position_count_map::iterator iter = pos_counts.find (loc); + for (callsite_map::const_iterator iter = callsites.begin (); + iter != callsites.end ();) + if (seen.contains (iter->first.second)) + { + function_instance *f = iter->second; + if (dump_file) + { + fprintf (dump_file, "Offlining function inlined to other module: "); + f->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + iter = callsites.erase (iter); + function_instance::offline (f, new_functions); + /* Start from begining as merging might have offlined + some functions in the case of recursive inlining. */ + iter = callsites.begin (); + } + else + { + iter->second->offline_if_in_set (seen, new_functions); + ++iter; + } +} + +/* Walk inlined functions and if their name is not in SEEN + remove it. Also rename function names as given by + to_symbol_name map. */ + +void +function_instance::remove_external_functions + (name_index_set &seen, + name_index_map &to_symbol_name, + vec <function_instance *> &new_functions) +{ + auto_vec <callsite, 20> to_rename; + for (callsite_map::const_iterator iter = callsites.begin (); + iter != callsites.end ();) + if (!seen.contains (iter->first.second)) + { + function_instance *f = iter->second; + if (dump_file) + { + fprintf (dump_file, " Removing external inline: "); + f->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + iter = callsites.erase (iter); + f->set_inlined_to (NULL); + f->offline_if_in_set (seen, new_functions); + delete f; + } + else + { + gcc_checking_assert ((int)iter->first.second + == iter->second->name ()); + int *newn = to_symbol_name.get (iter->first.second); + if (newn) + { + gcc_checking_assert (iter->second->inlined_to ()); + to_rename.safe_push (iter->first); + } + iter->second->remove_external_functions + (seen, to_symbol_name, new_functions); + ++iter; + } + for (auto &key : to_rename) + { + auto iter = callsites.find (key); + callsite key2 = key; + key2.second = *to_symbol_name.get (key.second); + callsites[key2] = iter->second; + iter->second->set_name (key2.second); + callsites.erase (iter); + } + auto_vec <int, 20> target_to_rename; + for (auto &iter : pos_counts) + { + for (auto const &titer : iter.second.targets) + { + int *ren = to_symbol_name.get (titer.first); + if (ren) + target_to_rename.safe_push (titer.first); + } + while (target_to_rename.length ()) + { + int key = target_to_rename.pop (); + int key2 = *to_symbol_name.get (key); + auto i = iter.second.targets.find (key); + if (iter.second.targets.count (key2) == 0) + iter.second.targets[key2] = i->second; + else + iter.second.targets[key2] += i->second; + iter.second.targets.erase (i); + } + } +} + +/* Look for inline instances that was not realized and + remove them while possibly merging them to offline variants. */ + +void +function_instance::offline_if_not_realized + (vec <function_instance *> &new_functions) +{ + for (callsite_map::const_iterator iter = callsites.begin (); + iter != callsites.end ();) + if (!iter->second->realized_p ()) + { + function_instance *f = iter->second; + if (dump_file) + { + fprintf (dump_file, "Offlining unrealized inline "); + f->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + iter = callsites.erase (iter); + offline (f, new_functions); + } + else + { + iter->second->offline_if_not_realized (new_functions); + ++iter; + } +} + +/* Dump instance to F indented by INDENT. */ + +void +function_instance::dump (FILE *f, int indent, bool nested) const +{ + if (!nested) + fprintf (f, "%*s%s total:%" PRIu64 " head:%" PRId64 "\n", + indent, "", afdo_string_table->get_name (name ()), + (int64_t)total_count (), (int64_t)head_count ()); + else + fprintf (f, " total:%" PRIu64 "\n", (int64_t)total_count ()); + for (auto const &iter : pos_counts) + { + fprintf (f, "%*s", indent + 2, ""); + dump_afdo_loc (f, iter.first); + fprintf (f, ": %" PRIu64, (int64_t)iter.second.count); + + for (auto const &titer : iter.second.targets) + fprintf (f, " %s:%" PRIu64, + afdo_string_table->get_name (titer.first), + (int64_t)titer.second); + fprintf (f,"\n"); + } + for (auto const &iter : callsites) + { + fprintf (f, "%*s", indent + 2, ""); + dump_afdo_loc (f, iter.first.first); + fprintf (f, ": %s", afdo_string_table->get_name (iter.first.second)); + iter.second->dump (f, indent + 2, true); + gcc_checking_assert ((int)iter.first.second == iter.second->name ()); + } +} + +/* Dump inline path. */ + +void +function_instance::dump_inline_stack (FILE *f) const +{ + auto_vec <callsite, 20> stack; + const function_instance *p = this, *s = inlined_to (); + while (s) + { + bool found = false; + for (callsite_map::const_iterator iter = s->callsites.begin (); + iter != s->callsites.end (); ++iter) + if (iter->second == p) + { + gcc_checking_assert (!found + && (int)iter->first.second == p->name ()); + stack.safe_push ({iter->first.first, s->name ()}); + found = true; + } + gcc_checking_assert (found); + p = s; + s = s->inlined_to (); + } + for (callsite &s: stack) + { + fprintf (f, "%s:", afdo_string_table->get_name (s.second)); + dump_afdo_loc (f, s.first); + fprintf (f, " "); + } + fprintf (f, "%s", afdo_string_table->get_name (name ())); +} + +/* Dump instance to stderr. */ + +void +function_instance::debug () const +{ + dump (stderr); +} + +/* Return profile info for LOC in INFO. */ + +bool +function_instance::get_count_info (location_t loc, count_info *info) const +{ + position_count_map::const_iterator iter = pos_counts.find (loc); if (iter == pos_counts.end ()) - return; - iter->second.annotated = true; + return false; + *info = iter->second; + return true; } /* Read the inlined indirect call target profile for STMT and store it in MAP, return the total count for all inlined indirect calls. */ gcov_type -function_instance::find_icall_target_map (gcall *stmt, +function_instance::find_icall_target_map (tree fn, gcall *stmt, icall_target_map *map) const { gcov_type ret = 0; - unsigned stmt_offset = get_relative_location_for_stmt (stmt); + unsigned stmt_offset = get_relative_location_for_stmt (fn, stmt); for (callsite_map::const_iterator iter = callsites.begin (); iter != callsites.end (); ++iter) { unsigned callee = iter->second->name (); /* Check if callsite location match the stmt. */ - if (iter->first.first != stmt_offset) - continue; + if (iter->first.first != stmt_offset + || iter->second->removed_icall_target ()) + continue; struct cgraph_node *node = cgraph_node::get_for_asmname ( get_identifier (afdo_string_table->get_name (callee))); if (node == NULL) continue; - (*map)[callee] = iter->second->total_count (); - ret += iter->second->total_count (); + (*map)[callee] = iter->second->total_count () * afdo_count_scale; + ret += iter->second->total_count () * afdo_count_scale; } return ret; } +/* Remove the inlined indirect call target profile for STMT. */ + +void +function_instance::remove_icall_target (tree fn, gcall *stmt) +{ + unsigned stmt_offset = get_relative_location_for_stmt (fn, stmt); + int n = 0; + + for (auto iter : callsites) + if (iter.first.first == stmt_offset) + { + iter.second->remove_icall_target (); + n++; + } + /* TODO: If we add support for multiple targets, we may want to + remove only those we succesfully inlined. */ + gcc_assert (n); +} + +/* Offline all functions not defined in the current unit. + We will not be able to early inline them. + Doint so early will get VPT decisions more realistic. */ + +void +autofdo_source_profile::offline_external_functions () +{ + /* First check all available definitions and mark their names as + visible. */ + cgraph_node *node; + name_index_set seen; + name_index_map to_symbol_name; + + /* Add renames erasing suffixes produced by late clones, such as + .isra, .ipcp. */ + for (size_t i = 1; i < afdo_string_table->num_entries (); i++) + { + const char *n1 = afdo_string_table->get_name (i); + char *n2 = get_original_name (n1); + if (!strcmp (n1, n2)) + { + free (n2); + /* Watch for duplicate entries. + This seems to happen in practice and may be useful to distingush + multiple static symbols of the same name, but we do not realy + have a way to differentiate them in get_name lookup. */ + int index = afdo_string_table->get_index (n1); + if (index != (int)i) + { + if (dump_file) + fprintf (dump_file, + "string table in auto-profile contains" + " duplicated name %s\n", n1); + to_symbol_name.put (i, index); + } + continue; + } + if (dump_file) + fprintf (dump_file, "Adding rename removing clone suffxes %s -> %s\n", + n1, n2); + int index = afdo_string_table->get_index (n2); + if (index != -1) + free (n2); + else + index = afdo_string_table->add_name (n2); + to_symbol_name.put (i, index); + } + FOR_EACH_DEFINED_FUNCTION (node) + { + const char *name + = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)); + const char *dwarf_name = lang_hooks.dwarf_name (node->decl, 0); + int index = afdo_string_table->get_index (name); + + /* Inline function may be identified by its dwarf names; + rename them to symbol names. With LTO dwarf names are + lost in free_lange_data. */ + if (strcmp (name, dwarf_name)) + { + int index2 = afdo_string_table->get_index (dwarf_name); + if (index2 != -1) + { + if (index == -1) + index = afdo_string_table->add_name (xstrdup (name)); + if (dump_file) + { + fprintf (dump_file, "Adding dwarf->symbol rename %s -> %s\n", + afdo_string_table->get_name (index2), name); + if (to_symbol_name.get (index2)) + fprintf (dump_file, "Dwarf name is not unique"); + } + to_symbol_name.put (index2, index); + seen.add (index2); + } + } + if (index != -1) + { + if (dump_file) + fprintf (dump_file, "%s is defined in node %s\n", + afdo_string_table->get_name (index), + node->dump_name ()); + seen.add (index); + } + else + { + if (dump_file) + { + fprintf (dump_file, + "Node %s not in auto profile (%s neither %s)\n", + node->dump_name (), + name, + dwarf_name); + } + } + } + + for (auto iter : to_symbol_name) + { + /* In case dwarf name was duplicated and later renamed, + handle both. No more than one hop shold be needed. */ + int *newn = to_symbol_name.get (iter.second); + if (newn) + iter.second = *newn; + gcc_checking_assert (!to_symbol_name.get (iter.second)); + if (seen.contains (iter.second)) + seen.add (iter.first); + } + + /* Now process all tolevel (offline) function instances. + + If instance has no definition in this translation unit, + first offline all inlined functions which are defined here + (so we do not lose porfile due to cross-module inlining + done by link-time optimizers). + + If instance has a definition, look into all inlined functions + and remove external ones (result of cross-module inlining). + + TODO: after early-inlining we ought to offline all functions + that were not inlined. */ + vec <function_instance *>&fns = duplicate_functions_; + /* Poppulate worklist with all functions to process. Processing + may introduce new functions by offlining. */ + for (auto const &iter : map_) + { + iter.second->set_in_worklist (); + fns.safe_push (iter.second); + } + while (fns.length ()) + { + function_instance *f = fns.pop (); + int index = f->name (); + gcc_checking_assert (f->in_worklist_p ()); + + /* If map has different function_instance of same name, then + this is a duplicated entry which needs to be merged. */ + if (map_.count (index) && map_[index] != f) + { + if (dump_file) + { + fprintf (dump_file, "Merging duplicate instance: "); + f->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + map_[index]->merge (f, fns); + gcc_checking_assert (!f->inlined_to ()); + f->clear_in_worklist (); + delete f; + } + /* If name was not seen in the symbol table, remove it. */ + else if (!seen.contains (index)) + { + f->offline_if_in_set (seen, fns); + f->clear_in_worklist (); + if (dump_file) + fprintf (dump_file, "Removing external %s\n", + afdo_string_table->get_name (f->name ())); + map_.erase (f->name ()); + delete f; + } + /* If this is offline function instance seen in this + translation unit offline external inlines and possibly + rename from dwarf name. */ + else + { + f->remove_external_functions (seen, to_symbol_name, fns); + f->clear_in_worklist (); + int *newn = to_symbol_name.get (index); + if (newn) + { + gcc_checking_assert (*newn != index); + f->set_name (*newn); + if (map_.count (*newn)) + { + if (dump_file) + fprintf (dump_file, "Merging duplicate symbol %s\n", + afdo_string_table->get_name (f->name ())); + function_instance *to = map_[*newn]; + gcc_checking_assert (!map_.count (index) || map_[index] == f); + if (to != f) + { + to->merge (f, fns); + delete f; + } + if (map_.count (index)) + map_.erase (index); + } + else + { + auto iter = map_.find (index); + map_[*newn] = iter->second; + map_.erase (iter); + } + } + } + } + if (dump_file) + for (auto const &iter : map_) + { + seen.contains (iter.second->name ()); + iter.second->dump (dump_file); + } +} + +/* Walk scope block BLOCK and mark all inlined functions as realized. */ + +static void +walk_block (tree fn, function_instance *s, tree block) +{ + if (inlined_function_outer_scope_p (block)) + { + unsigned loc = get_relative_location_for_locus + (fn, BLOCK_SUPERCONTEXT (block), + BLOCK_SOURCE_LOCATION (block)); + function_instance *ns + = s->get_function_instance_by_decl + (loc, BLOCK_ABSTRACT_ORIGIN (block), + BLOCK_SOURCE_LOCATION (block)); + if (!ns) + { + if (dump_file) + { + fprintf (dump_file, " Failed to find inlined instance:"); + s->dump_inline_stack (dump_file); + fprintf (dump_file, ":"); + dump_afdo_loc (dump_file, loc); + fprintf (dump_file, " %s\n", + IDENTIFIER_POINTER + (DECL_ASSEMBLER_NAME (BLOCK_ABSTRACT_ORIGIN (block)))); + } + return; + } + s = ns; + if (dump_file) + { + fprintf (dump_file, " Marking realized inline: "); + s->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + s->set_realized (); + } + for (tree t = BLOCK_SUBBLOCKS (block); t ; t = BLOCK_CHAIN (t)) + walk_block (fn, s, t); +} + +/* Offline all inline functions that are not marked as realized. + This will merge their profile into offline versions where available. + Also remove all functions we will no longer use. */ + +void +autofdo_source_profile::offline_unrealized_inlines () +{ + auto_vec <function_instance *>fns; + /* Poppulate worklist with all functions to process. Processing + may introduce new functions by offlining. */ + for (auto const &iter : map_) + { + fns.safe_push (iter.second); + iter.second->set_in_worklist (); + } + while (fns.length ()) + { + function_instance *f = fns.pop (); + int index = f->name (); + bool in_map = map_.count (index); + if (in_map) + for (cgraph_node *n = cgraph_node::get_for_asmname + (get_identifier (afdo_string_table->get_name (index)));n;) + { + if (n->definition) + { + if (dump_file) + fprintf (dump_file, "Marking realized %s\n", + afdo_string_table->get_name (index)); + f->set_realized (); + if (DECL_INITIAL (n->decl) + && DECL_INITIAL (n->decl) != error_mark_node) + walk_block (n->decl, f, DECL_INITIAL (n->decl)); + } + if (n->next_sharing_asm_name) + n = as_a <cgraph_node *>(n->next_sharing_asm_name); + else + break; + } + f->offline_if_not_realized (fns); + gcc_checking_assert ((in_map || !f->realized_p ()) + && f->in_worklist_p ()); + + /* If this is duplicated instance, merge it into one in map. */ + if (in_map && map_[index] != f) + { + if (dump_file) + { + fprintf (dump_file, "Merging duplicate instance: "); + f->dump_inline_stack (dump_file); + fprintf (dump_file, "\n"); + } + map_[index]->merge (f, fns); + f->clear_in_worklist (); + gcc_checking_assert (!f->inlined_to ()); + delete f; + } + /* If function is not in symbol table, remove it. */ + else if (!f->realized_p ()) + { + if (dump_file) + fprintf (dump_file, "Removing optimized out function %s\n", + afdo_string_table->get_name (f->name ())); + map_.erase (index); + f->clear_in_worklist (); + delete f; + } + else + f->clear_in_worklist (); + } + if (dump_file) + for (auto const &iter : map_) + iter.second->dump (dump_file); +} + /* Read the profile and create a function_instance with head count as HEAD_COUNT. Recursively read callsites to create nested function_instances too. STACK is used to track the recursive creation process. */ @@ -655,6 +1635,8 @@ function_instance::read_function_instance (function_instance_stack *stack, unsigned num_pos_counts = gcov_read_unsigned (); unsigned num_callsites = gcov_read_unsigned (); function_instance *s = new function_instance (name, head_count); + if (!stack->is_empty ()) + s->set_inlined_to (stack->last ()); stack->safe_push (s); for (unsigned i = 0; i < num_pos_counts; i++) @@ -663,6 +1645,9 @@ function_instance::read_function_instance (function_instance_stack *stack, unsigned num_targets = gcov_read_unsigned (); gcov_type count = gcov_read_counter (); s->pos_counts[offset].count = count; + afdo_profile_info->sum_max = std::max (afdo_profile_info->sum_max, + count); + for (unsigned j = 0; j < stack->length (); j++) (*stack)[j]->total_count_ += count; for (unsigned j = 0; j < num_targets; j++) @@ -677,7 +1662,7 @@ function_instance::read_function_instance (function_instance_stack *stack, { unsigned offset = gcov_read_unsigned (); function_instance *callee_function_instance - = read_function_instance (stack, 0); + = read_function_instance (stack, -1); s->callsites[std::make_pair (offset, callee_function_instance->name ())] = callee_function_instance; } @@ -685,22 +1670,6 @@ function_instance::read_function_instance (function_instance_stack *stack, return s; } -/* Sum of counts that is used during annotation. */ - -gcov_type -function_instance::total_annotated_count () const -{ - gcov_type ret = 0; - for (callsite_map::const_iterator iter = callsites.begin (); - iter != callsites.end (); ++iter) - ret += iter->second->total_annotated_count (); - for (position_count_map::const_iterator iter = pos_counts.begin (); - iter != pos_counts.end (); ++iter) - if (iter->second.annotated) - ret += iter->second.count; - return ret; -} - /* Member functions for autofdo_source_profile. */ autofdo_source_profile::~autofdo_source_profile () @@ -722,45 +1691,52 @@ autofdo_source_profile::get_function_instance_by_decl (tree decl) const return ret == map_.end () ? NULL : ret->second; } +/* For a given NAME_INDEX, returns the top-level function_instance. */ + +function_instance * +autofdo_source_profile::get_function_instance_by_name_index (int name_index) + const +{ + name_function_instance_map::const_iterator ret = map_.find (name_index); + return ret == map_.end () ? NULL : ret->second; +} + +/* Add function instance FN. */ + +void +autofdo_source_profile::add_function_instance (function_instance *fn) +{ + int index = fn->name (); + gcc_checking_assert (map_.count (index) == 0); + map_[index] = fn; +} + /* Find count_info for a given gimple STMT. If found, store the count_info in INFO and return true; otherwise return false. */ bool -autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const +autofdo_source_profile::get_count_info (gimple *stmt, count_info *info, + cgraph_node *node) const { - return get_count_info (gimple_location (stmt), info); + return get_count_info (gimple_location (stmt), info, node); } bool autofdo_source_profile::get_count_info (location_t gimple_loc, - count_info *info) const + count_info *info, + cgraph_node *node) const { if (LOCATION_LOCUS (gimple_loc) == cfun->function_end_locus) return false; inline_stack stack; - get_inline_stack (gimple_loc, &stack); + get_inline_stack_in_node (gimple_loc, &stack, node); if (stack.length () == 0) return false; function_instance *s = get_function_instance_by_inline_stack (stack); if (s == NULL) return false; - return s->get_count_info (stack[0].second, info); -} - -/* Mark LOC as annotated. */ - -void -autofdo_source_profile::mark_annotated (location_t loc) -{ - inline_stack stack; - get_inline_stack (loc, &stack); - if (stack.length () == 0) - return; - function_instance *s = get_function_instance_by_inline_stack (stack); - if (s == NULL) - return; - s->mark_annotated (stack[0].second); + return s->get_count_info (stack[0].afdo_loc, info); } /* Update value profile INFO for STMT from the inlined indirect callsite. @@ -768,7 +1744,8 @@ autofdo_source_profile::mark_annotated (location_t loc) bool autofdo_source_profile::update_inlined_ind_target (gcall *stmt, - count_info *info) + count_info *info, + cgraph_node *node) { if (dump_file) { @@ -779,16 +1756,17 @@ autofdo_source_profile::update_inlined_ind_target (gcall *stmt, if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) { if (dump_file) - fprintf (dump_file, " good locus\n"); + fprintf (dump_file, " bad locus (funciton end)\n"); return false; } count_info old_info; - get_count_info (stmt, &old_info); + get_count_info (stmt, &old_info, node); gcov_type total = 0; for (icall_target_map::const_iterator iter = old_info.targets.begin (); iter != old_info.targets.end (); ++iter) total += iter->second; + total *= afdo_count_scale; /* Program behavior changed, original promoted (and inlined) target is not hot any more. Will avoid promote the original target. @@ -807,7 +1785,7 @@ autofdo_source_profile::update_inlined_ind_target (gcall *stmt, } inline_stack stack; - get_inline_stack (gimple_location (stmt), &stack); + get_inline_stack_in_node (gimple_location (stmt), &stack, node); if (stack.length () == 0) { if (dump_file) @@ -818,24 +1796,46 @@ autofdo_source_profile::update_inlined_ind_target (gcall *stmt, if (s == NULL) { if (dump_file) - fprintf (dump_file, " function not found in inline stack\n"); + { + fprintf (dump_file, " function not found in inline stack:"); + dump_inline_stack (dump_file, &stack); + } return false; } icall_target_map map; - if (s->find_icall_target_map (stmt, &map) == 0) + if (s->find_icall_target_map (node ? node->decl + : current_function_decl, + stmt, &map) == 0) { if (dump_file) - fprintf (dump_file, " no target map\n"); + { + fprintf (dump_file, " no target map for stack: "); + dump_inline_stack (dump_file, &stack); + } return false; } for (icall_target_map::const_iterator iter = map.begin (); iter != map.end (); ++iter) info->targets[iter->first] = iter->second; if (dump_file) - fprintf (dump_file, " looks good\n"); + { + fprintf (dump_file, " looks good; stack:"); + dump_inline_stack (dump_file, &stack); + } return true; } +void +autofdo_source_profile::remove_icall_target (cgraph_edge *e) +{ + autofdo::inline_stack stack; + autofdo::get_inline_stack_in_node (gimple_location (e->call_stmt), + &stack, e->caller); + autofdo::function_instance *s + = get_function_instance_by_inline_stack (stack); + s->remove_icall_target (e->caller->decl, e->call_stmt); +} + /* Find total count of the callee of EDGE. */ gcov_type @@ -843,16 +1843,39 @@ autofdo_source_profile::get_callsite_total_count ( struct cgraph_edge *edge) const { inline_stack stack; - stack.safe_push (std::make_pair (edge->callee->decl, 0)); - get_inline_stack (gimple_location (edge->call_stmt), &stack); + stack.safe_push ({edge->callee->decl, 0, UNKNOWN_LOCATION}); + + get_inline_stack_in_node (gimple_location (edge->call_stmt), &stack, + edge->caller); + if (dump_file) + { + if (!edge->caller->inlined_to) + fprintf (dump_file, "Looking up afdo profile for call %s -> %s stack:", + edge->caller->dump_name (), edge->callee->dump_name ()); + else + fprintf (dump_file, "Looking up afdo profile for call %s -> %s transitively %s stack:", + edge->caller->dump_name (), edge->callee->dump_name (), + edge->caller->inlined_to->dump_name ()); + dump_inline_stack (dump_file, &stack); + } function_instance *s = get_function_instance_by_inline_stack (stack); - if (s == NULL - || afdo_string_table->get_index (IDENTIFIER_POINTER ( - DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ()) - return 0; + if (s == NULL) + { + if (dump_file) + fprintf (dump_file, "No function instance found\n"); + return 0; + } + if (afdo_string_table->get_index_by_decl (edge->callee->decl) != s->name ()) + { + if (dump_file) + fprintf (dump_file, "Mismatched name of callee %s and profile %s\n", + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)), + afdo_string_table->get_name (s->name ())); + return 0; + } - return s->total_count (); + return s->total_count () * afdo_count_scale; } /* Read AutoFDO profile and returns TRUE on success. */ @@ -876,19 +1899,51 @@ autofdo_source_profile::read () return false; } + gcc_checking_assert (!afdo_source_profile); + afdo_source_profile = this; + /* Skip the length of the section. */ gcov_read_unsigned (); /* Read in the function/callsite profile, and store it in local data structure. */ unsigned function_num = gcov_read_unsigned (); + int profile_pass_num + = g->get_passes ()->get_pass_auto_profile ()->static_pass_number; + g->get_dumps ()->dump_start (profile_pass_num, NULL); for (unsigned i = 0; i < function_num; i++) { function_instance::function_instance_stack stack; function_instance *s = function_instance::read_function_instance ( &stack, gcov_read_counter ()); - map_[s->name ()] = s; + int fun_id = s->name (); + /* If function_instace with get_original_name (without the clone + suffix) exixts, merge the function instances. */ + if (map_.count (fun_id) == 0) + map_[fun_id] = s; + else + fatal_error (UNKNOWN_LOCATION, + "auto-profile contains duplicated function instance %s", + afdo_string_table->get_name (s->name ())); } + /* Scale up the profile, but leave some bits in case some counts gets + bigger than sum_max eventually. */ + if (afdo_profile_info->sum_max) + afdo_count_scale + = MAX (((gcov_type)1 << (profile_count::n_bits / 2)) + / afdo_profile_info->sum_max, 1); + if (dump_file) + fprintf (dump_file, "Max count in profile %" PRIu64 "\n" + "Setting scale %" PRIu64 "\n" + "Scaled max count %" PRIu64 "\n" + "Hot count threshold %" PRIu64 "\n\n", + (int64_t)afdo_profile_info->sum_max, + (int64_t)afdo_count_scale, + (int64_t)(afdo_profile_info->sum_max * afdo_count_scale), + (int64_t)(afdo_profile_info->sum_max * afdo_count_scale + / param_hot_bb_count_fraction)); + afdo_profile_info->sum_max *= afdo_count_scale; + g->get_dumps ()->dump_finish (profile_pass_num); return true; } @@ -900,16 +1955,60 @@ autofdo_source_profile::get_function_instance_by_inline_stack ( const inline_stack &stack) const { name_function_instance_map::const_iterator iter = map_.find ( - afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first)); - if (iter == map_.end()) - return NULL; + afdo_string_table->get_index_by_decl (stack[stack.length () - 1].decl)); + if (iter == map_.end ()) + { + if (dump_file) + fprintf (dump_file, "No offline instance for %s\n", + IDENTIFIER_POINTER + (DECL_ASSEMBLER_NAME (stack[stack.length () - 1].decl))); + return NULL; + } function_instance *s = iter->second; - for (unsigned i = stack.length() - 1; i > 0; i--) + for (unsigned i = stack.length () - 1; i > 0; i--) { - s = s->get_function_instance_by_decl ( - stack[i].second, stack[i - 1].first); + function_instance *os = s; + s = s->get_function_instance_by_decl (stack[i].afdo_loc, + stack[i - 1].decl, + stack[i].location); + /* Try lost discriminator. */ + if (!s) + { + s = os->get_function_instance_by_decl (stack[i].afdo_loc & ~65535, + stack[i - 1].decl, + stack[i].location); + if (s && dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS, + dump_user_location_t::from_location_t + (stack[i].location), + "auto-profile apparently has a missing " + "discriminator for inlined call " + "of %s at relative loc %i:%i\n", + IDENTIFIER_POINTER + (DECL_ASSEMBLER_NAME (stack[i - 1].decl)), + stack[i].afdo_loc >> 16, + stack[i].afdo_loc & 65535); + } + } if (s == NULL) - return NULL; + { + /* afdo inliner extends the stack by last entry with unknown + location while chekcing if function was inlined during train run. + We do not want to print diagnostics about every function + which is not inlined. */ + if (s && dump_enabled_p () && stack[i].location != UNKNOWN_LOCATION) + dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS, + dump_user_location_t::from_location_t + (stack[i].location), + "auto-profile has no inlined function instance " + "for inlined call of %s at relative loc %i:%i\n", + IDENTIFIER_POINTER + (DECL_ASSEMBLER_NAME (stack[i - 1].decl)), + stack[i].afdo_loc >> 16, + stack[i].afdo_loc & 65535); + return NULL; + } } return s; } @@ -961,7 +2060,7 @@ read_profile (void) /* string_table. */ afdo_string_table = new string_table (); - if (!afdo_string_table->read()) + if (!afdo_string_table->read ()) { error ("cannot read string table from %s", auto_profile_file); return; @@ -988,19 +2087,35 @@ read_profile (void) decide if it needs to promote and inline. */ static bool -afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, - bool transform) +afdo_indirect_call (gcall *stmt, const icall_target_map &map, + bool transform, cgraph_edge *indirect_edge) { - gimple *gs = gsi_stmt (*gsi); tree callee; if (map.size () == 0) - return false; - gcall *stmt = dyn_cast <gcall *> (gs); - if (!stmt - || gimple_call_internal_p (stmt) - || gimple_call_fndecl (stmt) != NULL_TREE) - return false; + { + if (dump_file) + fprintf (dump_file, "No targets found\n"); + return false; + } + if (!stmt) + { + if (dump_file) + fprintf (dump_file, "No call statement\n"); + return false; + } + if (gimple_call_internal_p (stmt)) + { + if (dump_file) + fprintf (dump_file, "Internal call\n"); + return false; + } + if (gimple_call_fndecl (stmt) != NULL_TREE) + { + if (dump_file) + fprintf (dump_file, "Call is already direct\n"); + return false; + } gcov_type total = 0; icall_target_map::const_iterator max_iter = map.end (); @@ -1012,40 +2127,50 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, if (max_iter == map.end () || max_iter->second < iter->second) max_iter = iter; } + total *= afdo_count_scale; struct cgraph_node *direct_call = cgraph_node::get_for_asmname ( get_identifier (afdo_string_table->get_name (max_iter->first))); - if (direct_call == NULL || !direct_call->profile_id) - return false; + if (direct_call == NULL) + { + if (dump_file) + fprintf (dump_file, "Failed to find cgraph node for %s\n", + afdo_string_table->get_name (max_iter->first)); + return false; + } callee = gimple_call_fn (stmt); - histogram_value hist = gimple_alloc_histogram_value ( - cfun, HIST_TYPE_INDIR_CALL, stmt, callee); - hist->n_counters = 4; - hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); - gimple_add_histogram_value (cfun, stmt, hist); - - /* Total counter */ - hist->hvalue.counters[0] = total; - /* Number of value/counter pairs */ - hist->hvalue.counters[1] = 1; - /* Value */ - hist->hvalue.counters[2] = direct_call->profile_id; - /* Counter */ - hist->hvalue.counters[3] = max_iter->second; - if (!transform) - return false; - - cgraph_node* current_function_node = cgraph_node::get (current_function_decl); - - /* If the direct call is a recursive call, don't promote it since - we are not set up to inline recursive calls at this stage. */ - if (direct_call == current_function_node) - return false; - - struct cgraph_edge *indirect_edge - = current_function_node->get_edge (stmt); + { + if (!direct_call->profile_id) + { + if (dump_file) + fprintf (dump_file, "No profile id\n"); + return false; + } + histogram_value hist = gimple_alloc_histogram_value ( + cfun, HIST_TYPE_INDIR_CALL, stmt, callee); + hist->n_counters = 4; + hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); + gimple_add_histogram_value (cfun, stmt, hist); + + /* Total counter */ + hist->hvalue.counters[0] = total; + /* Number of value/counter pairs */ + hist->hvalue.counters[1] = 1; + /* Value */ + hist->hvalue.counters[2] = direct_call->profile_id; + /* Counter */ + hist->hvalue.counters[3] = max_iter->second * afdo_count_scale; + + if (!direct_call->profile_id) + { + if (dump_file) + fprintf (dump_file, "Histogram attached\n"); + return false; + } + return false; + } if (dump_file) { @@ -1055,16 +2180,10 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, print_generic_expr (dump_file, direct_call->decl, TDF_SLIM); } - if (direct_call == NULL) + if (!direct_call->definition) { if (dump_file) - fprintf (dump_file, " not transforming\n"); - return false; - } - if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL) - { - if (dump_file) - fprintf (dump_file, " no declaration\n"); + fprintf (dump_file, " no definition available\n"); return false; } @@ -1075,13 +2194,9 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, fprintf (dump_file, "\n"); } - /* FIXME: Count should be initialized. */ - struct cgraph_edge *new_edge - = indirect_edge->make_speculative (direct_call, - profile_count::uninitialized ()); - cgraph_edge::redirect_call_stmt_to_callee (new_edge); - gimple_remove_histogram_value (cfun, stmt, hist); - inline_call (new_edge, true, NULL, NULL, false); + indirect_edge->make_speculative + (direct_call, + gimple_bb (stmt)->count.apply_scale (99, 100)); return true; } @@ -1089,55 +2204,101 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, histograms and adds them to list VALUES. */ static bool -afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map, - bool transform) +afdo_vpt (gcall *gs, const icall_target_map &map, + bool transform, cgraph_edge *indirect_edge) { - return afdo_indirect_call (gsi, map, transform); + return afdo_indirect_call (gs, map, transform, indirect_edge); } typedef std::set<basic_block> bb_set; -typedef std::set<edge> edge_set; static bool is_bb_annotated (const basic_block bb, const bb_set &annotated) { - return annotated.find (bb) != annotated.end (); + if (annotated.find (bb) != annotated.end ()) + { + gcc_checking_assert (bb->count.quality () == AFDO + || !bb->count.nonzero_p ()); + return true; + } + gcc_checking_assert (bb->count.quality () != AFDO + || !bb->count.nonzero_p ()); + return false; } static void set_bb_annotated (basic_block bb, bb_set *annotated) { + gcc_checking_assert (bb->count.quality () == AFDO + || !bb->count.nonzero_p ()); annotated->insert (bb); } +/* Update COUNT by known autofdo count C. */ +static void +update_count_by_afdo_count (profile_count *count, gcov_type c) +{ + if (c) + *count = profile_count::from_gcov_type (c).afdo (); + /* In case we have guessed profile which is already zero, preserve + quality info. */ + else if (count->nonzero_p () + || count->quality () == GUESSED + || count->quality () == GUESSED_LOCAL) + *count = profile_count::zero ().afdo (); +} + +/* Update COUNT by known autofdo count C. */ +static void +update_count_by_afdo_count (profile_count *count, profile_count c) +{ + if (c.nonzero_p ()) + *count = c; + /* In case we have guessed profile which is already zero, preserve + quality info. */ + else if (count->nonzero_p () + || count->quality () < c.quality ()) + *count = c; +} + /* For a given BB, set its execution count. Attach value profile if a stmt is not in PROMOTED, because we only want to promote an indirect call once. Return TRUE if BB is annotated. */ static bool -afdo_set_bb_count (basic_block bb, const stmt_set &promoted) +afdo_set_bb_count (basic_block bb, hash_set <basic_block> &zero_bbs) { gimple_stmt_iterator gsi; - edge e; - edge_iterator ei; gcov_type max_count = 0; bool has_annotated = false; + if (dump_file) + fprintf (dump_file, " Looking up AFDO count of bb %i\n", bb->index); for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { count_info info; gimple *stmt = gsi_stmt (gsi); if (gimple_clobber_p (stmt) || is_gimple_debug (stmt)) - continue; + continue; if (afdo_source_profile->get_count_info (stmt, &info)) - { - if (info.count > max_count) - max_count = info.count; - has_annotated = true; - if (info.targets.size () > 0 - && promoted.find (stmt) == promoted.end ()) - afdo_vpt (&gsi, info.targets, false); - } + { + if (info.count > max_count) + max_count = info.count; + if (dump_file) + { + fprintf (dump_file, " count %" PRIu64 " in stmt: ", + (int64_t)info.count); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + } + has_annotated = true; + gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi)); + /* TODO; if inlined early and indirect call was not optimized out, + we will end up speculating again. Early inliner should remove + all targets for edges it speculated into safely. */ + if (call + && info.targets.size () > 0) + afdo_vpt (call, info.targets, false, NULL); + } } if (!has_annotated) @@ -1163,6 +2324,13 @@ afdo_set_bb_count (basic_block bb, const stmt_set &promoted) { if (info.count > max_count) max_count = info.count; + if (dump_file && info.count) + { + fprintf (dump_file, + " phi op in BB %i with count %" PRIu64": ", + bb_succ->index, (int64_t)info.count); + print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); + } has_annotated = true; } } @@ -1172,22 +2340,25 @@ afdo_set_bb_count (basic_block bb, const stmt_set &promoted) return false; } - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi))); - for (gphi_iterator gpi = gsi_start_phis (bb); - !gsi_end_p (gpi); - gsi_next (&gpi)) + if (max_count) + { + update_count_by_afdo_count (&bb->count, max_count * afdo_count_scale); + if (dump_file) + fprintf (dump_file, + " Annotated bb %i with count %" PRId64 + ", scaled to %" PRId64 "\n", + bb->index, (int64_t)max_count, + (int64_t)(max_count * afdo_count_scale)); + return true; + } + else { - gphi *phi = gpi.phi (); - size_t i; - for (i = 0; i < gimple_phi_num_args (phi); i++) - afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i)); + if (dump_file) + fprintf (dump_file, + " bb %i has statements with 0 count\n", bb->index); + zero_bbs.add (bb); } - FOR_EACH_EDGE (e, ei, bb->succs) - afdo_source_profile->mark_annotated (e->goto_locus); - - bb->count = profile_count::from_gcov_type (max_count).afdo (); - return true; + return false; } /* BB1 and BB2 are in an equivalent class iff: @@ -1205,7 +2376,7 @@ afdo_find_equiv_class (bb_set *annotated_bb) basic_block bb; FOR_ALL_BB_FN (bb, cfun) - bb->aux = NULL; + bb->aux = NULL; FOR_ALL_BB_FN (bb, cfun) { @@ -1217,9 +2388,20 @@ afdo_find_equiv_class (bb_set *annotated_bb) && bb1->loop_father == bb->loop_father) { bb1->aux = bb; - if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) + if (is_bb_annotated (bb1, *annotated_bb) + && (!is_bb_annotated (bb, *annotated_bb) + || bb1->count > bb->count)) { - bb->count = bb1->count; + if (dump_file) + { + fprintf (dump_file, + " Copying count of bb %i to bb %i; count is:", + bb1->index, + bb->index); + bb1->count.dump (dump_file); + fprintf (dump_file, "\n"); + } + update_count_by_afdo_count (&bb->count, bb1->count); set_bb_annotated (bb, annotated_bb); } } @@ -1229,9 +2411,20 @@ afdo_find_equiv_class (bb_set *annotated_bb) && bb1->loop_father == bb->loop_father) { bb1->aux = bb; - if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) + if (is_bb_annotated (bb1, *annotated_bb) + && (!is_bb_annotated (bb, *annotated_bb) + || bb1->count > bb->count)) { - bb->count = bb1->count; + if (dump_file) + { + fprintf (dump_file, + " Copying count of bb %i to bb %i; count is:", + bb1->index, + bb->index); + bb1->count.dump (dump_file); + fprintf (dump_file, "\n"); + } + update_count_by_afdo_count (&bb->count, bb1->count); set_bb_annotated (bb, annotated_bb); } } @@ -1257,50 +2450,125 @@ afdo_propagate_edge (bool is_succ, bb_set *annotated_bb) { edge e, unknown_edge = NULL; edge_iterator ei; - int num_unknown_edge = 0; - int num_edge = 0; + int num_unknown_edges = 0; + int num_edges = 0; profile_count total_known_count = profile_count::zero ().afdo (); FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) { gcc_assert (AFDO_EINFO (e) != NULL); if (! AFDO_EINFO (e)->is_annotated ()) - num_unknown_edge++, unknown_edge = e; + num_unknown_edges++, unknown_edge = e; else total_known_count += AFDO_EINFO (e)->get_count (); - num_edge++; + num_edges++; + } + if (dump_file) + { + fprintf (dump_file, "bb %i %s propagating %s edges %i, " + "unknown edges %i, known count ", + bb->index, + is_bb_annotated (bb, *annotated_bb) ? "(annotated)" : "", + is_succ ? "succesors" : "predecessors", num_edges, + num_unknown_edges); + total_known_count.dump (dump_file); + fprintf (dump_file, " bb count "); + bb->count.dump (dump_file); + fprintf (dump_file, "\n"); } /* Be careful not to annotate block with no successor in special cases. */ - if (num_unknown_edge == 0 && total_known_count > bb->count) + if (num_unknown_edges == 0 && num_edges + && !is_bb_annotated (bb, *annotated_bb)) { + if (dump_file) + { + fprintf (dump_file, " Annotating bb %i with count ", bb->index); + total_known_count.dump (dump_file); + fprintf (dump_file, "\n"); + } + update_count_by_afdo_count (&bb->count, total_known_count); + set_bb_annotated (bb, annotated_bb); + changed = true; + } + else if (is_bb_annotated (bb, *annotated_bb) + && bb->count < total_known_count) + { + if (dump_file) + { + fprintf (dump_file, " Increasing bb %i count from ", + bb->index); + bb->count.dump (dump_file); + fprintf (dump_file, " to "); + total_known_count.dump (dump_file); + fprintf (dump_file, " hoping to mitigate afdo inconsistency\n"); + } bb->count = total_known_count; - if (!is_bb_annotated (bb, *annotated_bb)) - set_bb_annotated (bb, annotated_bb); changed = true; } - else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb)) + else if (num_unknown_edges == 1 && is_bb_annotated (bb, *annotated_bb)) { if (bb->count > total_known_count) { - profile_count new_count = bb->count - total_known_count; - AFDO_EINFO(unknown_edge)->set_count(new_count); - if (num_edge == 1) - { - basic_block succ_or_pred_bb = is_succ ? unknown_edge->dest : unknown_edge->src; - if (new_count > succ_or_pred_bb->count) - { - succ_or_pred_bb->count = new_count; - if (!is_bb_annotated (succ_or_pred_bb, *annotated_bb)) - set_bb_annotated (succ_or_pred_bb, annotated_bb); - } - } - } + profile_count new_count = bb->count - total_known_count; + AFDO_EINFO (unknown_edge)->set_count (new_count); + } else - AFDO_EINFO (unknown_edge)->set_count (profile_count::zero().afdo ()); + AFDO_EINFO (unknown_edge)->set_count + (profile_count::zero ().afdo ()); + if (dump_file) + { + fprintf (dump_file, " Annotated edge %i->%i with count ", + unknown_edge->src->index, unknown_edge->dest->index); + AFDO_EINFO (unknown_edge)->get_count ().dump (dump_file); + fprintf (dump_file, "\n"); + } AFDO_EINFO (unknown_edge)->set_annotated (); changed = true; } + else if (num_unknown_edges > 1 + && is_bb_annotated (bb, *annotated_bb) + && (total_known_count >= bb->count || !bb->count.nonzero_p ())) + { + FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) + { + gcc_assert (AFDO_EINFO (e) != NULL); + if (! AFDO_EINFO (e)->is_annotated ()) + { + AFDO_EINFO (e)->set_count + (profile_count::zero ().afdo ()); + AFDO_EINFO (e)->set_annotated (); + if (dump_file) + { + fprintf (dump_file, " Annotated edge %i->%i with count ", + e->src->index, e->dest->index); + AFDO_EINFO (unknown_edge)->get_count ().dump (dump_file); + fprintf (dump_file, "\n"); + } + } + } + } + else if (num_unknown_edges == 0 + && is_bb_annotated (bb, *annotated_bb) + && (is_succ ? single_succ_p (bb) : single_pred_p (bb))) + { + edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb); + if (AFDO_EINFO (e)->is_annotated () + && AFDO_EINFO (e)->get_count () < bb->count) + { + if (dump_file) + { + fprintf (dump_file, " Increasing edge %i->%i count from ", + e->src->index, e->dest->index); + AFDO_EINFO (e)->get_count ().dump (dump_file); + fprintf (dump_file, " to "); + bb->count.dump (dump_file); + fprintf (dump_file, " hoping to mitigate afdo inconsistency\n"); + } + AFDO_EINFO (e)->set_count (bb->count); + changed = true; + } + } } return changed; } @@ -1413,18 +2681,28 @@ afdo_propagate_circuit (const bb_set &annotated_bb) static void afdo_propagate (bb_set *annotated_bb) { - basic_block bb; bool changed = true; int i = 0; + basic_block bb; FOR_ALL_BB_FN (bb, cfun) - { - bb->count = ((basic_block)bb->aux)->count; - if (is_bb_annotated ((basic_block)bb->aux, *annotated_bb)) - set_bb_annotated (bb, annotated_bb); - } + if (!is_bb_annotated (bb, *annotated_bb) + && is_bb_annotated ((basic_block)bb->aux, *annotated_bb)) + { + update_count_by_afdo_count (&bb->count, ((basic_block)bb->aux)->count); + set_bb_annotated (bb, annotated_bb); + if (dump_file) + { + fprintf (dump_file, + " Copying count of bb %i to bb %i; count is:", + ((basic_block)bb->aux)->index, + bb->index); + bb->count.dump (dump_file); + fprintf (dump_file, "\n"); + } + } - while (changed && i++ < 10) + while (changed && i++ < 100) { changed = false; @@ -1434,6 +2712,279 @@ afdo_propagate (bb_set *annotated_bb) changed = true; afdo_propagate_circuit (*annotated_bb); } + if (dump_file) + fprintf (dump_file, "Propagation took %i iterations %s\n", + i, changed ? "; iteration limit reached\n" : ""); +} + +/* qsort comparator of sreals. */ +static int +cmp (const void *a, const void *b) +{ + if (*(const sreal *)a < *(const sreal *)b) + return 1; + if (*(const sreal *)a > *(const sreal *)b) + return -1; + return 0; +} + +/* Add scale ORIG/ANNOTATED to SCALES. */ + +static void +add_scale (vec <sreal> *scales, profile_count annotated, profile_count orig) +{ + if (dump_file) + { + orig.dump (dump_file); + fprintf (dump_file, " should be "); + annotated.dump (dump_file); + fprintf (dump_file, "\n"); + } + if (orig.nonzero_p ()) + { + sreal scale + = annotated.guessed_local () + .to_sreal_scale (orig); + if (dump_file) + fprintf (dump_file, " adding scale %.16f\n", + scale.to_double ()); + scales->safe_push (scale); + } +} + +/* Scale counts of all basic blocks in BBS by SCALE and convert them to + IPA quality. */ + +static void +scale_bbs (const vec <basic_block> &bbs, sreal scale) +{ + if (dump_file) + fprintf (dump_file, " Scaling by %.16f\n", scale.to_double ()); + for (basic_block b : bbs) + if (!(b->count == profile_count::zero ()) + && b->count.initialized_p ()) + { + profile_count o = b->count; + b->count = b->count.force_guessed () * scale; + + /* If we scaled to 0, make it auto-fdo since that is treated + less agressively. */ + if (!b->count.nonzero_p () && o.nonzero_p ()) + b->count = profile_count::zero ().afdo (); + if (dump_file) + { + fprintf (dump_file, " bb %i count updated ", b->index); + o.dump (dump_file); + fprintf (dump_file, " -> "); + b->count.dump (dump_file); + fprintf (dump_file, "\n"); + } + } +} + +/* In case given basic block was fully optimized out, AutoFDO + will have no data about it. In this case try to preserve static profile. + Identify connected components (in undirected form of CFG) which has + no annotations at all. Look at thir boundaries and try to determine + scaling factor and scale. */ + +void +afdo_adjust_guessed_profile (bb_set *annotated_bb) +{ + /* Basic blocks of connected component currently processed. */ + auto_vec <basic_block, 20> bbs (n_basic_blocks_for_fn (cfun)); + /* Scale factors found. */ + auto_vec <sreal, 20> scales; + auto_vec <basic_block, 20> stack (n_basic_blocks_for_fn (cfun)); + + basic_block seed_bb; + unsigned int component_id = 1; + + /* Map from basic block to its component. + 0 is used for univisited BBs, + 1 means that BB is annotated, + >=2 is an id of the component BB belongs to. */ + auto_vec <unsigned int, 20> component; + component.safe_grow (last_basic_block_for_fn (cfun)); + FOR_ALL_BB_FN (seed_bb, cfun) + component[seed_bb->index] + = is_bb_annotated (seed_bb, *annotated_bb) ? 1 : 0; + FOR_ALL_BB_FN (seed_bb, cfun) + if (!component[seed_bb->index]) + { + stack.quick_push (seed_bb); + component_id++; + bbs.truncate (0); + scales.truncate (0); + component[seed_bb->index] = component_id; + profile_count max_count = profile_count::zero (); + + /* Identify connected component starting in BB. */ + if (dump_file) + fprintf (dump_file, "Starting connected component in bb %i\n", + seed_bb->index); + do + { + basic_block b = stack.pop (); + + bbs.quick_push (b); + max_count = max_count.max (b->count); + + for (edge e: b->preds) + if (!component[e->src->index]) + { + stack.quick_push (e->src); + component[e->src->index] = component_id; + } + for (edge e: b->succs) + if (!component[e->dest->index]) + { + stack.quick_push (e->dest); + component[e->dest->index] = component_id; + } + } + while (!stack.is_empty ()); + + /* If all blocks in components has 0 count, we do not need + to scale, only we must convert to IPA quality. */ + if (!max_count.nonzero_p ()) + { + if (dump_file) + fprintf (dump_file, " All counts are 0; scale = 1\n"); + scale_bbs (bbs, 1); + continue; + } + + /* Now visit the component and try to figure out its desired + frequency. */ + for (basic_block b : bbs) + { + if (dump_file) + { + fprintf (dump_file, " visiting bb %i with count ", b->index); + b->count.dump (dump_file); + fprintf (dump_file, "\n"); + } + if (!b->count.nonzero_p ()) + continue; + /* Sum of counts of annotated edges into B. */ + profile_count annotated_count = profile_count::zero (); + /* Sum of counts of edges into B with source in current + component. */ + profile_count current_component_count = profile_count::zero (); + bool boundary = false; + + for (edge e: b->preds) + if (AFDO_EINFO (e)->is_annotated ()) + { + if (dump_file) + { + fprintf (dump_file, " Annotated pred edge to %i " + "with count ", e->src->index); + AFDO_EINFO (e)->get_count ().dump (dump_file); + fprintf (dump_file, "\n"); + } + boundary = true; + annotated_count += AFDO_EINFO (e)->get_count (); + } + /* If source is anotated, combine with static + probability prediction. + TODO: We can do better in case some of edges out are + annotated and distribute only remaining count out of BB. */ + else if (is_bb_annotated (e->src, *annotated_bb)) + { + boundary = true; + if (dump_file) + { + fprintf (dump_file, " Annotated predecesor %i " + "with count ", e->src->index); + e->src->count.dump (dump_file); + fprintf (dump_file, " edge count using static profile "); + e->count ().dump (dump_file); + fprintf (dump_file, "\n"); + } + annotated_count += e->count (); + } + else + { + current_component_count += e->count (); + gcc_checking_assert (component[e->src->index] == component_id); + } + if (boundary && current_component_count.initialized_p ()) + { + if (dump_file) + fprintf (dump_file, " bb %i in count ", b->index); + add_scale (&scales, + annotated_count, + b->count - current_component_count); + } + for (edge e: b->succs) + if (AFDO_EINFO (e)->is_annotated ()) + { + if (dump_file) + fprintf (dump_file, " edge %i->%i count ", + b->index, e->dest->index); + add_scale (&scales, AFDO_EINFO (e)->get_count (), e->count ()); + } + else if (is_bb_annotated (e->dest, *annotated_bb)) + { + profile_count annotated_count = e->dest->count; + profile_count out_count = profile_count::zero (); + bool ok = true; + for (edge e2: e->dest->preds) + if (AFDO_EINFO (e2)->is_annotated ()) + annotated_count -= AFDO_EINFO (e2)->get_count (); + else if (component[e->src->index] == component_id) + out_count += e->count (); + else if (e->probability.nonzero_p ()) + { + ok = false; + break; + } + if (!ok) + continue; + if (dump_file) + fprintf (dump_file, + " edge %i->%i has annotated sucessor; count ", + b->index, e->dest->index); + add_scale (&scales, annotated_count, e->count ()); + } + + } + + /* If we failed to find annotated entry or exit edge, + look for exit edges and scale profile so the dest + BB get all flow it needs. This is inprecise because + the edge is not annotated and thus BB has more than + one such predecessor. */ + if (!scales.length ()) + for (basic_block b : bbs) + if (b->count.nonzero_p ()) + for (edge e: b->succs) + if (is_bb_annotated (e->dest, *annotated_bb)) + { + profile_count annotated_count = e->dest->count; + for (edge e2: e->dest->preds) + if (AFDO_EINFO (e2)->is_annotated ()) + annotated_count -= AFDO_EINFO (e2)->get_count (); + if (dump_file) + fprintf (dump_file, + " edge %i->%i has annotated sucessor;" + " upper bound count ", + b->index, e->dest->index); + add_scale (&scales, annotated_count, e->count ()); + } + if (!scales.length ()) + { + if (dump_file) + fprintf (dump_file, + " Can not determine count from the boundary; giving up"); + continue; + } + gcc_checking_assert (scales.length ()); + scales.qsort (cmp); + scale_bbs (bbs, scales[scales.length () / 2]); + } } /* Propagate counts on control flow graph and calculate branch @@ -1446,10 +2997,6 @@ afdo_calculate_branch_prob (bb_set *annotated_bb) edge_iterator ei; basic_block bb; - calculate_dominance_info (CDI_POST_DOMINATORS); - calculate_dominance_info (CDI_DOMINATORS); - loop_optimizer_init (0); - FOR_ALL_BB_FN (bb, cfun) { gcc_assert (bb->aux == NULL); @@ -1464,25 +3011,50 @@ afdo_calculate_branch_prob (bb_set *annotated_bb) afdo_propagate (annotated_bb); FOR_EACH_BB_FN (bb, cfun) - { - int num_unknown_succ = 0; - profile_count total_count = profile_count::zero ().afdo (); - - FOR_EACH_EDGE (e, ei, bb->succs) - { - gcc_assert (AFDO_EINFO (e) != NULL); - if (! AFDO_EINFO (e)->is_annotated ()) - num_unknown_succ++; - else - total_count += AFDO_EINFO (e)->get_count (); - } - if (num_unknown_succ == 0 && total_count.nonzero_p()) + if (is_bb_annotated (bb, *annotated_bb)) { + bool all_known = true; + profile_count total_count = profile_count::zero ().afdo (); + + FOR_EACH_EDGE (e, ei, bb->succs) + { + gcc_assert (AFDO_EINFO (e) != NULL); + if (! AFDO_EINFO (e)->is_annotated ()) + { + /* If by static profile this edge never happens, + still propagate the rest. */ + if (e->probability.nonzero_p ()) + { + all_known = false; + break; + } + } + else + total_count += AFDO_EINFO (e)->get_count (); + } + if (!all_known || !total_count.nonzero_p ()) + continue; + FOR_EACH_EDGE (e, ei, bb->succs) - e->probability - = AFDO_EINFO (e)->get_count ().probability_in (total_count); + if (AFDO_EINFO (e)->is_annotated ()) + { + /* If probability is 1, preserve reliable static prediction + (This is, for example the case of single fallthru edge + or single fallthru plus unlikely EH edge.) */ + if (AFDO_EINFO (e)->get_count () == total_count + && e->probability == profile_probability::always ()) + ; + else if (AFDO_EINFO (e)->get_count ().nonzero_p ()) + e->probability + = AFDO_EINFO (e)->get_count ().probability_in (total_count); + /* If probability is zero, preserve reliable static + prediction. */ + else if (e->probability.nonzero_p () + || e->probability.quality () == GUESSED) + e->probability = profile_probability::never ().afdo (); + } } - } + afdo_adjust_guessed_profile (annotated_bb); FOR_ALL_BB_FN (bb, cfun) { bb->aux = NULL; @@ -1493,82 +3065,12 @@ afdo_calculate_branch_prob (bb_set *annotated_bb) e->aux = NULL; } } - - loop_optimizer_finalize (); - free_dominance_info (CDI_DOMINATORS); - free_dominance_info (CDI_POST_DOMINATORS); -} - -/* Perform value profile transformation using AutoFDO profile. Add the - promoted stmts to PROMOTED_STMTS. Return TRUE if there is any - indirect call promoted. */ - -static bool -afdo_vpt_for_early_inline (stmt_set *promoted_stmts) -{ - basic_block bb; - if (afdo_source_profile->get_function_instance_by_decl ( - current_function_decl) == NULL) - return false; - - compute_fn_summary (cgraph_node::get (current_function_decl), true); - - bool has_vpt = false; - FOR_EACH_BB_FN (bb, cfun) - { - if (!has_indirect_call (bb)) - continue; - gimple_stmt_iterator gsi; - - gcov_type bb_count = 0; - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - count_info info; - gimple *stmt = gsi_stmt (gsi); - if (afdo_source_profile->get_count_info (stmt, &info)) - bb_count = MAX (bb_count, info.count); - } - - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi)); - /* IC_promotion and early_inline_2 is done in multiple iterations. - No need to promoted the stmt if its in promoted_stmts (means - it is already been promoted in the previous iterations). */ - if ((!stmt) || gimple_call_fn (stmt) == NULL - || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL - || promoted_stmts->find (stmt) != promoted_stmts->end ()) - continue; - - count_info info; - afdo_source_profile->get_count_info (stmt, &info); - info.count = bb_count; - if (afdo_source_profile->update_inlined_ind_target (stmt, &info)) - { - /* Promote the indirect call and update the promoted_stmts. */ - promoted_stmts->insert (stmt); - if (afdo_vpt (&gsi, info.targets, true)) - has_vpt = true; - } - } - } - - if (has_vpt) - { - unsigned todo = optimize_inline_calls (current_function_decl); - if (todo & TODO_update_ssa_any) - update_ssa (TODO_update_ssa); - return true; - } - - return false; } -/* Annotate auto profile to the control flow graph. Do not annotate value - profile for stmts in PROMOTED_STMTS. */ +/* Annotate auto profile to the control flow graph. */ static void -afdo_annotate_cfg (const stmt_set &promoted_stmts) +afdo_annotate_cfg (void) { basic_block bb; bb_set annotated_bb; @@ -1577,47 +3079,140 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts) current_function_decl); if (s == NULL) - return; - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count - = profile_count::from_gcov_type (s->head_count ()).afdo (); - EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero ().afdo (); - profile_count max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; - - FOR_EACH_BB_FN (bb, cfun) { - /* As autoFDO uses sampling approach, we have to assume that all - counters are zero when not seen by autoFDO. */ - bb->count = profile_count::zero ().afdo (); - if (afdo_set_bb_count (bb, promoted_stmts)) - set_bb_annotated (bb, &annotated_bb); - if (bb->count > max_count) - max_count = bb->count; + if (dump_file) + fprintf (dump_file, "No afdo profile for %s\n", + cgraph_node::get (current_function_decl)->dump_name ()); + return; } - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count - > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) + + calculate_dominance_info (CDI_POST_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (0); + + if (dump_file) { - ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count - = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; - set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb); + fprintf (dump_file, "\n\nAnnotating BB profile of %s\n", + cgraph_node::get (current_function_decl)->dump_name ()); + fprintf (dump_file, "\n"); + s->dump (dump_file); + fprintf (dump_file, "\n"); } - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count - > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) + + /* In the first pass only store non-zero counts. */ + gcov_type head_count = s->head_count () * autofdo::afdo_count_scale; + bool profile_found = head_count > 0; + hash_set <basic_block> zero_bbs; + FOR_EACH_BB_FN (bb, cfun) { - EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count - = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; - set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb); + if (afdo_set_bb_count (bb, zero_bbs)) + { + if (bb->count.quality () == AFDO) + { + gcc_assert (bb->count.nonzero_p ()); + profile_found = true; + } + set_bb_annotated (bb, &annotated_bb); + } } - afdo_source_profile->mark_annotated ( - DECL_SOURCE_LOCATION (current_function_decl)); - afdo_source_profile->mark_annotated (cfun->function_start_locus); - afdo_source_profile->mark_annotated (cfun->function_end_locus); - if (max_count.nonzero_p()) + /* We try to preserve static profile for BBs with 0 + afdo samples, but if even static profile agrees with 0, + consider it final so propagation works better. */ + for (basic_block bb : zero_bbs) + if (!bb->count.nonzero_p ()) + { + update_count_by_afdo_count (&bb->count, 0); + set_bb_annotated (bb, &annotated_bb); + if (dump_file) + { + fprintf (dump_file, " Annotating bb %i with count ", bb->index); + bb->count.dump (dump_file); + fprintf (dump_file, + " (has 0 count in both static and afdo profile)\n"); + } + } + /* Exit without clobbering static profile if there was no + non-zero count. */ + if (!profile_found) { - /* Calculate, propagate count and probability information on CFG. */ - afdo_calculate_branch_prob (&annotated_bb); + /* create_gcov only dumps symbols with some samples in them. + This means that we get nonempty zero_bbs only if some + nonzero counts in profile were not matched with statements. + ??? We can adjust create_gcov to also recordinfo + about function with no samples. Then we can distinguish + between lost profiles which should be kept local and + real functions with 0 samples during train run. */ + if (zero_bbs.is_empty ()) + { + if (dump_file) + fprintf (dump_file, "No afdo samples found" + "; Setting global count to afdo0\n"); + } + else + { + if (dump_file) + fprintf (dump_file, "Setting global count to afdo0\n"); + } + FOR_ALL_BB_FN (bb, cfun) + if (bb->count.quality () == GUESSED_LOCAL) + bb->count = bb->count.global0afdo (); + + loop_optimizer_finalize (); + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + return; } - cgraph_node::get(current_function_decl)->count - = ENTRY_BLOCK_PTR_FOR_FN(cfun)->count; + + /* Update profile. */ + if (head_count > 0) + { + update_count_by_afdo_count (&ENTRY_BLOCK_PTR_FOR_FN (cfun)->count, + head_count); + set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun), &annotated_bb); + if (!is_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, annotated_bb) + || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count + > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) + { + ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count + = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, + &annotated_bb); + } + if (!is_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun), annotated_bb) + || ENTRY_BLOCK_PTR_FOR_FN (cfun)->count + > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) + { + EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count + = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb); + } + } + + /* Calculate, propagate count and probability information on CFG. */ + afdo_calculate_branch_prob (&annotated_bb); + + /* If we failed to turn some of original guessed profile to global, + set basic blocks uninitialized. */ + FOR_ALL_BB_FN (bb, cfun) + if (!bb->count.ipa_p ()) + { + /* We skip annotating entry profile if it is 0 + in hope to be able to determine it better from the + static profile. + + Now we know we can not derive it from other info, + so set it since it is better than UNKNOWN. */ + if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) + bb->count = profile_count::zero ().afdo (); + else + bb->count = profile_count::uninitialized (); + if (dump_file) + fprintf (dump_file, " Unknown count of bb %i\n", bb->index); + cfun->cfg->full_profile = false; + } + + cgraph_node::get (current_function_decl)->count + = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; update_max_bb_count (); profile_status_for_fn (cfun) = PROFILE_READ; if (flag_value_profile_transformations) @@ -1627,18 +3222,10 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts) free_dominance_info (CDI_POST_DOMINATORS); update_ssa (TODO_update_ssa); } -} - -/* Wrapper function to invoke early inliner. */ -static unsigned int -early_inline () -{ - compute_fn_summary (cgraph_node::get (current_function_decl), true); - unsigned int todo = early_inliner (cfun); - if (todo & TODO_update_ssa_any) - update_ssa (TODO_update_ssa); - return todo; + loop_optimizer_finalize (); + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); } /* Use AutoFDO profile to annoate the control flow graph. @@ -1654,6 +3241,7 @@ auto_profile (void) init_node_map (true); profile_info = autofdo::afdo_profile_info; + afdo_source_profile->offline_unrealized_inlines (); FOR_EACH_FUNCTION (node) { @@ -1666,56 +3254,12 @@ auto_profile (void) push_cfun (DECL_STRUCT_FUNCTION (node->decl)); - /* First do indirect call promotion and early inline to make the - IR match the profiled binary before actual annotation. - - This is needed because an indirect call might have been promoted - and inlined in the profiled binary. If we do not promote and - inline these indirect calls before annotation, the profile for - these promoted functions will be lost. - - e.g. foo() --indirect_call--> bar() - In profiled binary, the callsite is promoted and inlined, making - the profile look like: - - foo: { - loc_foo_1: count_1 - bar@loc_foo_2: { - loc_bar_1: count_2 - loc_bar_2: count_3 - } - } - - Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. - If we perform annotation on it, the profile inside bar@loc_foo2 - will be wasted. - - To avoid this, we promote loc_foo_2 and inline the promoted bar - function before annotation, so the profile inside bar@loc_foo2 - will be useful. */ - autofdo::stmt_set promoted_stmts; - unsigned int todo = 0; - for (int i = 0; i < 10; i++) - { - if (!flag_value_profile_transformations - || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts)) - break; - todo |= early_inline (); - } - - todo |= early_inline (); - autofdo::afdo_annotate_cfg (promoted_stmts); + autofdo::afdo_annotate_cfg (); compute_function_frequency (); - /* Local pure-const may imply need to fixup the cfg. */ - todo |= execute_fixup_cfg (); - if (todo & TODO_cleanup_cfg) - cleanup_tree_cfg (); - free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); cgraph_edge::rebuild_edges (); - compute_fn_summary (cgraph_node::get (current_function_decl), true); pop_cfun (); } @@ -1766,6 +3310,14 @@ afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) temporarily set it to afdo_profile_info to calculate hotness. */ profile_info = autofdo::afdo_profile_info; is_hot = maybe_hot_count_p (NULL, pcount); + if (dump_file) + { + fprintf (dump_file, "Call %s -> %s has %s afdo profile count ", + edge->caller->dump_name (), edge->callee->dump_name (), + is_hot ? "hot" : "cold"); + pcount.dump (dump_file); + fprintf (dump_file, "\n"); + } profile_info = saved_profile_info; return is_hot; } @@ -1773,6 +3325,78 @@ afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) return false; } +/* Do indirect call promotion during early inlining to make the + IR match the profiled binary before actual annotation. + + This is needed because an indirect call might have been promoted + and inlined in the profiled binary. If we do not promote and + inline these indirect calls before annotation, the profile for + these promoted functions will be lost. + + e.g. foo() --indirect_call--> bar() + In profiled binary, the callsite is promoted and inlined, making + the profile look like: + + foo: { + loc_foo_1: count_1 + bar@loc_foo_2: { + loc_bar_1: count_2 + loc_bar_2: count_3 + } + } + + Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. + If we perform annotation on it, the profile inside bar@loc_foo2 + will be wasted. + + To avoid this, we promote loc_foo_2 and inline the promoted bar + function before annotation, so the profile inside bar@loc_foo2 + will be useful. */ + +bool +afdo_vpt_for_early_inline (cgraph_node *node) +{ + if (!node->indirect_calls) + return false; + bool changed = false; + cgraph_node *outer = node->inlined_to ? node->inlined_to : node; + if (autofdo::afdo_source_profile->get_function_instance_by_decl + (outer->decl) == NULL) + return false; + for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) + { + gcov_type bb_count = 0; + autofdo::count_info info; + basic_block bb = gimple_bb (e->call_stmt); + + /* TODO: This is quadratic; cache the value. */ + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + autofdo::count_info info; + gimple *stmt = gsi_stmt (gsi); + if (autofdo::afdo_source_profile->get_count_info (stmt, &info, node)) + bb_count = MAX (bb_count, info.count); + } + autofdo::afdo_source_profile->get_count_info (e->call_stmt, &info, node); + info.count = bb_count; + if (!autofdo::afdo_source_profile->update_inlined_ind_target + (e->call_stmt, &info, node)) + continue; + changed |= autofdo::afdo_vpt (e->call_stmt, info.targets, true, e); + } + return changed; +} + +/* If speculation used during early inline, remove the target + so we do not speculate the indirect edge again during afdo pass. */ + +void +remove_afdo_speculative_target (cgraph_edge *e) +{ + autofdo::afdo_source_profile->remove_icall_target (e); +} + namespace { @@ -1815,3 +3439,48 @@ make_pass_ipa_auto_profile (gcc::context *ctxt) { return new pass_ipa_auto_profile (ctxt); } + +namespace +{ + +const pass_data pass_data_ipa_auto_profile_offline = { + SIMPLE_IPA_PASS, "afdo_offline", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_AUTOFDO_OFFLINE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_ipa_auto_profile_offline : public simple_ipa_opt_pass +{ +public: + pass_ipa_auto_profile_offline (gcc::context *ctxt) + : simple_ipa_opt_pass (pass_data_ipa_auto_profile_offline, ctxt) + { + } + + /* opt_pass methods: */ + bool + gate (function *) final override + { + return flag_auto_profile; + } + unsigned int + execute (function *) final override + { + if (autofdo::afdo_source_profile) + autofdo::afdo_source_profile->offline_external_functions (); + return 0; + } +}; // class pass_ipa_auto_profile + +} // anon namespace + +simple_ipa_opt_pass * +make_pass_ipa_auto_profile_offline (gcc::context *ctxt) +{ + return new pass_ipa_auto_profile_offline (ctxt); +} |