aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gprof/ChangeLog31
-rw-r--r--gprof/cg_arcs.c36
-rw-r--r--gprof/cg_arcs.h3
-rw-r--r--gprof/cg_print.c616
-rw-r--r--gprof/core.c146
-rw-r--r--gprof/gprof.c31
-rw-r--r--gprof/gprof.h3
-rw-r--r--gprof/gprof.texi40
-rw-r--r--gprof/symtab.h7
9 files changed, 908 insertions, 5 deletions
diff --git a/gprof/ChangeLog b/gprof/ChangeLog
index 2e2a726..e298702 100644
--- a/gprof/ChangeLog
+++ b/gprof/ChangeLog
@@ -1,3 +1,34 @@
+Sat Dec 30 10:11:03 1995 Jeffrey A Law (law@cygnus.com)
+
+ * gprof.c (long_options): Add "--function-ordering" and
+ "--file-ordering" options.
+ (usage): Add new options to usage message.
+ (main): Handle new options.
+ * gprof.h (STYLE_FUNCTION_ORDER): Define.
+ (STYLE_FILE_ORDER): Define.
+ (function_mapping_file): Declare.
+ * cg_arcs.c (arcs, numarcs): New globals.
+ (arc_add): Put new arcs into the arc array so the function/file
+ ordering code can examine them.
+ * cg_arcs.h (struct arc): New field "has_been_placed".
+ (arcs, numarcs): Declare new globals.
+ * core.c (symbol_map, symbol_map_count): New globals.
+ (read_function_mappings): New function to read in a function
+ to object map file.
+ (core_init): Call read_function_mappings if a function mapping
+ file exists.
+ (core_create_function_syms): Handle function to object file
+ mappings.
+ * symtab.h (struct sym): New fields "mapped", "has_been_placed",
+ "nuses", "prev".
+ * cg_print.c (cmp_arc_count): New function for sorting arcs.
+ (cmp_fun_nuses): Likewise for functions.
+ (cg_print_function_ordering): New function to print a suggested
+ function ordering.
+ (cg_print_file_ordering): Likewise for ordering .o files.
+ (order_and_dump_functions_by_arcs): Helper function for function
+ and object file ordering code.
+
Sun Dec 24 21:32:27 1995 Jeffrey A Law (law@cygnus.com)
* core.c (core_sym_class): Ignore symbols without BSF_FUNCTION
diff --git a/gprof/cg_arcs.c b/gprof/cg_arcs.c
index 8b6184b..c6431e4 100644
--- a/gprof/cg_arcs.c
+++ b/gprof/cg_arcs.c
@@ -27,6 +27,8 @@
Sym *cycle_header;
int num_cycles;
+Arc **arcs;
+int numarcs;
/*
* Return TRUE iff PARENT has an arc to covers the address
@@ -65,7 +67,8 @@ void
DEFUN (arc_add, (parent, child, count),
Sym * parent AND Sym * child AND int count)
{
- Arc *arc;
+ static int maxarcs = 0;
+ Arc *arc, **newarcs;
DBG (TALLYDEBUG, printf ("[arc_add] %d arcs from %s to %s\n",
count, parent->name, child->name));
@@ -85,6 +88,37 @@ DEFUN (arc_add, (parent, child, count),
arc->child = child;
arc->count = count;
+ /* If this isn't an arc for a recursive call to parent, then add it
+ to the array of arcs. */
+ if (parent != child)
+ {
+ /* If we've exhausted space in our current array, get a new one
+ and copy the contents. We might want to throttle the doubling
+ factor one day. */
+ if (numarcs == maxarcs)
+ {
+ /* Determine how much space we want to allocate. */
+ if (maxarcs == 0)
+ maxarcs = 1;
+ maxarcs *= 2;
+
+ /* Allocate the new array. */
+ newarcs = (Arc **)xmalloc(sizeof (Arc *) * maxarcs);
+
+ /* Copy the old array's contents into the new array. */
+ bcopy (arcs, newarcs, numarcs * sizeof (Arc *));
+
+ /* Free up the old array. */
+ free (arcs);
+
+ /* And make the new array be the current array. */
+ arcs = newarcs;
+ }
+
+ /* Place this arc in the arc array. */
+ arcs[numarcs++] = arc;
+ }
+
/* prepend this child to the children of this parent: */
arc->next_child = parent->cg.children;
parent->cg.children = arc;
diff --git a/gprof/cg_arcs.h b/gprof/cg_arcs.h
index 25d5b61..132ee73 100644
--- a/gprof/cg_arcs.h
+++ b/gprof/cg_arcs.h
@@ -20,6 +20,7 @@ typedef struct arc
double child_time; /* child-time inherited along arc */
struct arc *next_parent; /* next parent of CHILD */
struct arc *next_child; /* next child of PARENT */
+ int has_been_placed; /* have this arc's functions been placed? */
}
Arc;
@@ -29,5 +30,7 @@ extern Sym *cycle_header; /* cycle headers */
extern void arc_add PARAMS ((Sym * parent, Sym * child, int count));
extern Arc *arc_lookup PARAMS ((Sym * parent, Sym * child));
extern Sym **cg_assemble PARAMS ((void));
+extern Arc **arcs;
+extern int numarcs;
#endif /* cg_arcs_h */
diff --git a/gprof/cg_print.c b/gprof/cg_print.c
index 460bc04..c2f6ecb 100644
--- a/gprof/cg_print.c
+++ b/gprof/cg_print.c
@@ -11,6 +11,9 @@
#define EQUALTO 0
#define GREATERTHAN 1
+static void order_and_dump_functions_by_arcs PARAMS ((Arc **, unsigned long,
+ int, Arc **,
+ unsigned long *));
/* declarations of automatically generated functions to output blurbs: */
extern void bsd_callg_blurb PARAMS ((FILE * fp));
extern void fsf_callg_blurb PARAMS ((FILE * fp));
@@ -654,3 +657,616 @@ DEFUN_VOID (cg_print_index)
}
free (name_sorted_syms);
}
+
+/* Compare two arcs based on their usage counts. We want to sort
+ in descending order. */
+static int
+DEFUN (cmp_arc_count, (left, right), const PTR left AND const PTR right)
+{
+ const Arc **npp1 = (const Arc **) left;
+ const Arc **npp2 = (const Arc **) right;
+
+ if ((*npp1)->count > (*npp2)->count)
+ return -1;
+ else if ((*npp1)->count < (*npp2)->count)
+ return 1;
+ else
+ return 0;
+}
+
+/* Compare two funtions based on their usage counts. We want to sort
+ in descending order. */
+static int
+DEFUN (cmp_fun_nuses, (left, right), const PTR left AND const PTR right)
+{
+ const Sym **npp1 = (const Sym **) left;
+ const Sym **npp2 = (const Sym **) right;
+
+ if ((*npp1)->nuses > (*npp2)->nuses)
+ return -1;
+ else if ((*npp1)->nuses < (*npp2)->nuses)
+ return 1;
+ else
+ return 0;
+}
+
+/* Print a suggested function ordering based on the profiling data.
+
+ We perform 4 major steps when ordering functions:
+
+ * Group unused functions together and place them at the
+ end of the function order.
+
+ * Search the highest use arcs (those which account for 90% of
+ the total arc count) for functions which have several parents.
+
+ Group those with the most call sites together (currently the
+ top 1.25% which have at least five different call sites).
+
+ These are emitted at the start of the function order.
+
+ * Use a greedy placement algorithm to place functions which
+ occur in the top 99% of the arcs in the profile. Some provisions
+ are made to handle high usage arcs where the parent and/or
+ child has already been placed.
+
+ * Run the same greedy placement algorithm on the remaining
+ arcs to place the leftover functions.
+
+
+ The various "magic numbers" should (one day) be tuneable by command
+ line options. They were arrived at by benchmarking a few applications
+ with various values to see which values produced better overall function
+ orderings.
+
+ Of course, profiling errors, machine limitations (PA long calls), and
+ poor cutoff values for the placement algorithm may limit the usefullness
+ of the resulting function order. Improvements would be greatly appreciated.
+
+ Suggestions:
+
+ * Place the functions with many callers near the middle of the
+ list to reduce long calls.
+
+ * Propagate arc usage changes as functions are placed. Ie if
+ func1 and func2 are placed together, arcs to/from those arcs
+ to the same parent/child should be combined, then resort the
+ arcs to choose the next one.
+
+ * Implement some global positioning algorithm to place the
+ chains made by the greedy local positioning algorithm. Probably
+ by examining arcs which haven't been placed yet to tie two
+ chains together.
+
+ * Take a function's size and time into account in the algorithm;
+ size in particular is important on the PA (long calls). Placing
+ many small functions onto their own page may be wise.
+
+ * Use better profiling information; many published algorithms
+ are based on call sequences through time, rather than just
+ arc counts.
+
+ * Prodecure cloning could improve performance when a small number
+ of arcs account for most of the calls to a particular function.
+
+ * Use relocation information to avoid moving unused functions
+ completely out of the code stream; this would avoid severe lossage
+ when the profile data bears little resemblance to actual runs.
+
+ * Propagation of arc usages should also improve .o link line
+ ordering which shares the same arc placement algorithm with
+ the function ordering code (in fact it is a degenerate case
+ of function ordering). */
+
+void
+DEFUN_VOID (cg_print_function_ordering)
+{
+ unsigned long index, used, unused, scratch_index;
+ unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
+#ifdef __GNU_C__
+ unsigned long long total_arcs, tmp_arcs_count;
+#else
+ unsigned long total_arcs, tmp_arcs_count;
+#endif
+ Sym **unused_syms, **used_syms, **scratch_syms;
+ Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
+
+ index = 0;
+ used = 0;
+ unused = 0;
+ scratch_index = 0;
+ unplaced_arc_count = 0;
+ high_arc_count = 0;
+ scratch_arc_count = 0;
+
+ /* First group all the unused functions together. */
+ unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
+ used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
+ scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
+ high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
+ scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
+ unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
+
+ /* Walk through all the functions; mark those which are never
+ called as placed (we'll emit them as a group later). */
+ for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
+ {
+ if (symtab.base[index].ncalls == 0)
+ {
+ /* Filter out gprof generated names. */
+ if (strcmp (symtab.base[index].name, "<locore>")
+ && strcmp (symtab.base[index].name, "<hicore>"))
+ {
+ unused_syms[unused++] = &symtab.base[index];
+ symtab.base[index].has_been_placed = 1;
+ }
+ }
+ else
+ {
+ used_syms[used++] = &symtab.base[index];
+ symtab.base[index].has_been_placed = 0;
+ symtab.base[index].next = 0;
+ symtab.base[index].prev = 0;
+ symtab.base[index].nuses = 0;
+ }
+ }
+
+ /* Sort the arcs from most used to least used. */
+ qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
+
+ /* Compute the total arc count. Also mark arcs as unplaced.
+
+ Note we don't compensate for overflow if that happens!
+ Overflow is much less likely when this file is compiled
+ with GCC as it can double-wide integers via long long. */
+ total_arcs = 0;
+ for (index = 0; index < numarcs; index++)
+ {
+ total_arcs += arcs[index]->count;
+ arcs[index]->has_been_placed = 0;
+ }
+
+ /* We want to pull out those functions which are referenced
+ by many highly used arcs and emit them as a group. This
+ could probably use some tuning. */
+ tmp_arcs_count = 0;
+ for (index = 0; index < numarcs; index++)
+ {
+ tmp_arcs_count += arcs[index]->count;
+
+ /* Count how many times each parent and child are used up
+ to our threshhold of arcs (90%). */
+ if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
+ break;
+
+ arcs[index]->child->nuses++;
+ }
+
+ /* Now sort a temporary symbol table based on the number of
+ times each function was used in the highest used arcs. */
+ bcopy (used_syms, scratch_syms, used * sizeof (Sym *));
+ qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
+
+ /* Now pick out those symbols we're going to emit as
+ a group. We take up to 1.25% of the used symbols. */
+ for (index = 0; index < used / 80; index++)
+ {
+ Sym *sym = scratch_syms[index];
+ Arc *arc;
+
+ /* If we hit symbols that aren't used from many call sites,
+ then we can quit. We choose five as the low limit for
+ no particular reason. */
+ if (sym->nuses == 5)
+ break;
+
+ /* We're going to need the arcs between these functions.
+ Unfortunately, we don't know all these functions
+ until we're done. So we keep track of all the arcs
+ to the functions we care about, then prune out those
+ which are uninteresting.
+
+ An interesting variation would be to quit when we found
+ multi-call site functions which account for some percentage
+ of the arcs. */
+
+ arc = sym->cg.children;
+ while (arc)
+ {
+ if (arc->parent != arc->child)
+ scratch_arcs[scratch_arc_count++] = arc;
+ arc->has_been_placed = 1;
+ arc = arc->next_child;
+ }
+
+ arc = sym->cg.parents;
+ while (arc)
+ {
+ if (arc->parent != arc->child)
+ scratch_arcs[scratch_arc_count++] = arc;
+ arc->has_been_placed = 1;
+ arc = arc->next_parent;
+ }
+
+ /* Keep track of how many symbols we're going to place. */
+ scratch_index = index;
+
+ /* A lie, but it makes identifying these functions easier
+ later. */
+ sym->has_been_placed = 1;
+ }
+
+ /* Now walk through the temporary arcs and copy those we care about
+ into the high arcs array. */
+ for (index = 0; index < scratch_arc_count; index++)
+ {
+ Arc *arc = scratch_arcs[index];
+
+ /* If this arc refers to highly used functions, then
+ then we want to keep it. */
+ if (arc->child->has_been_placed
+ && arc->parent->has_been_placed)
+ {
+ high_arcs[high_arc_count++] = scratch_arcs[index];
+
+ /* We need to turn of has_been_placed since we're going to
+ use the main arc placement algorithm on these arcs. */
+ arc->child->has_been_placed = 0;
+ arc->parent->has_been_placed = 0;
+ }
+ }
+
+ /* Dump the multi-site high usage functions which are not going
+ to be ordered by the main ordering algorithm. */
+ for (index = 0; index < scratch_index; index++)
+ {
+ if (scratch_syms[index]->has_been_placed)
+ printf ("%s\n", scratch_syms[index]->name);
+ }
+
+ /* Now we can order the multi-site high use functions based on the
+ arcs between them. */
+ qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
+ order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
+ unplaced_arcs, &unplaced_arc_count);
+
+ /* Order and dump the high use functions left, these typically
+ have only a few call sites. */
+ order_and_dump_functions_by_arcs (arcs, numarcs, 0,
+ unplaced_arcs, &unplaced_arc_count);
+
+ /* Now place the rarely used functions. */
+ order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
+ scratch_arcs, &scratch_arc_count);
+
+ /* Output any functions not emitted by the order_and_dump calls. */
+ for (index = 0; index < used; index++)
+ if (used_syms[index]->has_been_placed == 0)
+ printf("%s\n", used_syms[index]->name);
+
+ /* Output the unused functions. */
+ for (index = 0; index < unused; index++)
+ printf("%s\n", unused_syms[index]->name);
+
+ unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
+ used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
+ scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
+ high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
+ scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
+ unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
+
+ free (unused_syms);
+ free (used_syms);
+ free (scratch_syms);
+ free (high_arcs);
+ free (scratch_arcs);
+ free (unplaced_arcs);
+}
+
+/* Place functions based on the arcs in ARCS with NUMARCS entries;
+ place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
+
+ If ALL is nonzero, then place all functions referenced by ARCS,
+ else only place those referenced in the top 99% of the arcs in ARCS. */
+
+#define MOST 0.99
+static void
+order_and_dump_functions_by_arcs (arcs, numarcs, all,
+ unplaced_arcs, unplaced_arc_count)
+ Arc **arcs;
+ unsigned long numarcs;
+ int all;
+ Arc **unplaced_arcs;
+ unsigned long *unplaced_arc_count;
+{
+#ifdef __GNU_C__
+ unsigned long long tmp_arcs, total_arcs;
+#else
+ unsigned long tmp_arcs, total_arcs;
+#endif
+ unsigned int index;
+
+ /* If needed, compute the total arc count.
+
+ Note we don't compensate for overflow if that happens! */
+ if (! all)
+ {
+ total_arcs = 0;
+ for (index = 0; index < numarcs; index++)
+ total_arcs += arcs[index]->count;
+ }
+ else
+ total_arcs = 0;
+
+ tmp_arcs = 0;
+ for (index = 0; index < numarcs; index++)
+ {
+ Sym *sym1, *sym2;
+ Sym *child, *parent;
+
+ tmp_arcs += arcs[index]->count;
+
+ /* Ignore this arc if it's already been placed. */
+ if (arcs[index]->has_been_placed)
+ continue;
+
+ child = arcs[index]->child;
+ parent = arcs[index]->parent;
+
+ /* If we're not using all arcs, and this is a rarely used
+ arc, then put it on the unplaced_arc list. Similarly
+ if both the parent and child of this arc have been placed. */
+ if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
+ || child->has_been_placed || parent->has_been_placed)
+ {
+ unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
+ continue;
+ }
+
+ /* If all slots in the parent and child are full, then there isn't
+ anything we can do right now. We'll place this arc on the
+ unplaced arc list in the hope that a global positioning
+ algorithm can use it to place function chains. */
+ if (parent->next && parent->prev && child->next && child->prev)
+ {
+ unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
+ continue;
+ }
+
+ /* If the parent is unattached, then find the closest
+ place to attach it onto child's chain. Similarly
+ for the opposite case. */
+ if (!parent->next && !parent->prev)
+ {
+ int next_count = 0;
+ int prev_count = 0;
+ Sym *prev = child;
+ Sym *next = child;
+
+ /* Walk to the beginning and end of the child's chain. */
+ while (next->next)
+ {
+ next = next->next;
+ next_count++;
+ }
+
+ while (prev->prev)
+ {
+ prev = prev->prev;
+ prev_count++;
+ }
+
+ /* Choose the closest. */
+ child = next_count < prev_count ? next : prev;
+ }
+ else if (! child->next && !child->prev)
+ {
+ int next_count = 0;
+ int prev_count = 0;
+ Sym *prev = parent;
+ Sym *next = parent;
+
+ while (next->next)
+ {
+ next = next->next;
+ next_count++;
+ }
+
+ while (prev->prev)
+ {
+ prev = prev->prev;
+ prev_count++;
+ }
+
+ parent = prev_count < next_count ? prev : next;
+ }
+ else
+ {
+ /* Couldn't find anywhere to attach the functions,
+ put the arc on the unplaced arc list. */
+ unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
+ continue;
+ }
+
+ /* Make sure we don't tie two ends together. */
+ sym1 = parent;
+ if (sym1->next)
+ while (sym1->next)
+ sym1 = sym1->next;
+ else
+ while (sym1->prev)
+ sym1 = sym1->prev;
+
+ sym2 = child;
+ if (sym2->next)
+ while (sym2->next)
+ sym2 = sym2->next;
+ else
+ while (sym2->prev)
+ sym2 = sym2->prev;
+
+ if (sym1 == child
+ && sym2 == parent)
+ {
+ /* This would tie two ends together. */
+ unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
+ continue;
+ }
+
+ if (parent->next)
+ {
+ /* Must attach to the parent's prev field. */
+ if (! child->next)
+ {
+ /* parent-prev and child-next */
+ parent->prev = child;
+ child->next = parent;
+ arcs[index]->has_been_placed = 1;
+ }
+ }
+ else if (parent->prev)
+ {
+ /* Must attach to the parent's next field. */
+ if (! child->prev)
+ {
+ /* parent-next and child-prev */
+ parent->next = child;
+ child->prev = parent;
+ arcs[index]->has_been_placed = 1;
+ }
+ }
+ else
+ {
+ /* Can attach to either field in the parent, depends
+ on where we've got space in the child. */
+ if (child->prev)
+ {
+ /* parent-prev and child-next */
+ parent->prev = child;
+ child->next = parent;
+ arcs[index]->has_been_placed = 1;
+ }
+ else
+ {
+ /* parent-next and child-prev */
+ parent->next = child;
+ child->prev = parent;
+ arcs[index]->has_been_placed = 1;
+ }
+ }
+ }
+
+ /* Dump the chains of functions we've made. */
+ for (index = 0; index < numarcs; index++)
+ {
+ Sym *sym;
+ if (arcs[index]->parent->has_been_placed
+ || arcs[index]->child->has_been_placed)
+ continue;
+
+ sym = arcs[index]->parent;
+
+ /* If this symbol isn't attached to any other
+ symbols, then we've got a rarely used arc.
+
+ Skip it for now, we'll deal with them later. */
+ if (sym->next == NULL
+ && sym->prev == NULL)
+ continue;
+
+ /* Get to the start of this chain. */
+ while (sym->prev)
+ sym = sym->prev;
+
+ while (sym)
+ {
+ /* Mark it as placed. */
+ sym->has_been_placed = 1;
+ printf ("%s\n", sym->name);
+ sym = sym->next;
+ }
+ }
+
+ /* If we want to place all the arcs, then output those which weren't
+ placed by the main algorithm. */
+ if (all)
+ for (index = 0; index < numarcs; index++)
+ {
+ Sym *sym;
+ if (arcs[index]->parent->has_been_placed
+ || arcs[index]->child->has_been_placed)
+ continue;
+
+ sym = arcs[index]->parent;
+
+ sym->has_been_placed = 1;
+ printf ("%s\n", sym->name);
+ }
+}
+
+/* Print a suggested .o ordering for files on a link line based
+ on profiling information. This uses the function placement
+ code for the bulk of its work. */
+
+struct function_map {
+ char *function_name;
+ char *file_name;
+};
+
+void
+DEFUN_VOID (cg_print_file_ordering)
+{
+ unsigned long scratch_arc_count, index;
+ Arc **scratch_arcs;
+ extern struct function_map *symbol_map;
+ extern int symbol_map_count;
+ char *last;
+
+ scratch_arc_count = 0;
+
+ scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
+ for (index = 0; index < numarcs; index++)
+ {
+ if (! arcs[index]->parent->mapped
+ || ! arcs[index]->child->mapped)
+ arcs[index]->has_been_placed = 1;
+ }
+
+ order_and_dump_functions_by_arcs (arcs, numarcs, 0,
+ scratch_arcs, &scratch_arc_count);
+
+ /* Output .o's not handled by the main placement algorithm. */
+ for (index = 0; index < symtab.len; index++)
+ {
+ if (symtab.base[index].mapped
+ && ! symtab.base[index].has_been_placed)
+ printf ("%s\n", symtab.base[index].name);
+ }
+
+ /* Now output any .o's that didn't have any text symbols. */
+ last = NULL;
+ for (index = 0; index < symbol_map_count; index++)
+ {
+ int index2;
+
+ /* Don't bother searching if this symbol is the
+ same as the previous one. */
+ if (last && !strcmp (last, symbol_map[index].file_name))
+ continue;
+
+ for (index2 = 0; index2 < symtab.len; index2++)
+ {
+ if (! symtab.base[index2].mapped)
+ continue;
+
+ if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
+ break;
+ }
+
+ /* If we didn't find it in the symbol table, then it must be a .o
+ with no text symbols. Output it last. */
+ if (index2 == symtab.len)
+ printf ("%s\n", symbol_map[index].file_name);
+ last = symbol_map[index].file_name;
+ }
+}
diff --git a/gprof/core.c b/gprof/core.c
index aa6012a..d514178 100644
--- a/gprof/core.c
+++ b/gprof/core.c
@@ -9,6 +9,98 @@ asymbol **core_syms;
asection *core_text_sect;
PTR core_text_space;
+/* For mapping symbols to specific .o files during file ordering. */
+struct function_map {
+ char *function_name;
+ char *file_name;
+};
+
+struct function_map *symbol_map;
+int symbol_map_count;
+
+static void
+DEFUN (read_function_mappings, (filename), const char *filename)
+{
+ FILE *file = fopen (filename, "r");
+ char dummy[1024];
+ int count = 0;
+
+ if (!file)
+ {
+ fprintf (stderr, "%s: could not open %s.\n", whoami, filename);
+ done (1);
+ }
+
+ /* First parse the mapping file so we know how big we need to
+ make our tables. We also do some sanity checks at this
+ time. */
+ while (!feof (file))
+ {
+ int matches;
+
+ matches = fscanf (file, "%[^\n:]", dummy);
+ if (!matches)
+ {
+ fprintf (stderr, "%s: unable to parse mapping file %s.\n",
+ whoami, filename);
+ done (1);
+ }
+
+ /* Just skip messages about files with no symbols. */
+ if (!strncmp (dummy, "No symbols in ", 14))
+ {
+ fscanf (file, "\n");
+ continue;
+ }
+
+ /* Don't care what else is on this line at this point. */
+ fscanf (file, "%[^\n]\n", dummy);
+ count++;
+ }
+
+ /* Now we know how big we need to make our table. */
+ symbol_map = xmalloc (count * sizeof (struct function_map));
+
+ /* Rewind the input file so we can read it again. */
+ rewind (file);
+
+ /* Read each entry and put it into the table. */
+ count = 0;
+ while (!feof (file))
+ {
+ int matches;
+ char *tmp;
+
+ matches = fscanf (file, "%[^\n:]", dummy);
+ if (!matches)
+ {
+ fprintf (stderr, "%s: unable to parse mapping file %s.\n",
+ whoami, filename);
+ done (1);
+ }
+
+ /* Just skip messages about files with no symbols. */
+ if (!strncmp (dummy, "No symbols in ", 14))
+ {
+ fscanf (file, "\n");
+ continue;
+ }
+
+ /* dummy has the filename, go ahead and copy it. */
+ symbol_map[count].file_name = xmalloc (strlen (dummy) + 1);
+ strcpy (symbol_map[count].file_name, dummy);
+
+ /* Now we need the function name. */
+ fscanf (file, "%[^\n]\n", dummy);
+ tmp = strrchr (dummy, ' ') + 1;
+ symbol_map[count].function_name = xmalloc (strlen (tmp) + 1);
+ strcpy (symbol_map[count].function_name, tmp);
+ count++;
+ }
+
+ /* Record the size of the map table for future reference. */
+ symbol_map_count = count;
+}
void
DEFUN (core_init, (a_out_name), const char *a_out_name)
@@ -59,6 +151,9 @@ DEFUN (core_init, (a_out_name), const char *a_out_name)
bfd_errmsg (bfd_get_error ()));
done (1);
}
+
+ if (function_mapping_file)
+ read_function_mappings (function_mapping_file);
}
@@ -232,7 +327,7 @@ DEFUN (core_create_function_syms, (core_bfd), bfd * core_bfd)
bfd_vma min_vma = ~0, max_vma = 0;
const char *filename, *func_name;
int class;
- long i;
+ long i, j, found, skip;
/* pass 1 - determine upper bound on number of function names: */
symtab.len = 0;
@@ -242,7 +337,24 @@ DEFUN (core_create_function_syms, (core_bfd), bfd * core_bfd)
{
continue;
}
- ++symtab.len;
+
+ /* This should be replaced with a binary search or hashed
+ search. Gross.
+
+ Don't create a symtab entry for a function that has
+ a mapping to a file, unless it's the first function
+ in the file. */
+ skip = 0;
+ for (j = 0; j < symbol_map_count; j++)
+ if (!strcmp (core_syms[i]->name, symbol_map[j].function_name))
+ {
+ if (j > 0 && ! strcmp (symbol_map [j].file_name,
+ symbol_map [j - 1].file_name))
+ skip = 1;
+ break;
+ }
+ if (!skip)
+ ++symtab.len;
}
if (symtab.len == 0)
@@ -267,13 +379,41 @@ DEFUN (core_create_function_syms, (core_bfd), bfd * core_bfd)
core_syms[i]->value, core_syms[i]->name));
continue;
}
+ /* This should be replaced with a binary search or hashed
+ search. Gross. */
+
+ skip = 0;
+ found = 0;
+ for (j = 0; j < symbol_map_count; j++)
+ if (!strcmp (core_syms[i]->name, symbol_map[j].function_name))
+ {
+ if (j > 0 && ! strcmp (symbol_map [j].file_name,
+ symbol_map [j - 1].file_name))
+ skip = 1;
+ else
+ found = j;
+ break;
+ }
+
+ if (skip)
+ continue;
sym_init (symtab.limit);
/* symbol offsets are always section-relative: */
symtab.limit->addr = core_syms[i]->value + core_syms[i]->section->vma;
- symtab.limit->name = core_syms[i]->name;
+ if (symbol_map_count
+ && !strcmp (core_syms[i]->name, symbol_map[found].function_name))
+ {
+ symtab.limit->name = symbol_map[found].file_name;
+ symtab.limit->mapped = 1;
+ }
+ else
+ {
+ symtab.limit->name = core_syms[i]->name;
+ symtab.limit->mapped = 0;
+ }
#ifdef __osf__
/*
diff --git a/gprof/gprof.c b/gprof/gprof.c
index 0f9dd85..742f0a6 100644
--- a/gprof/gprof.c
+++ b/gprof/gprof.c
@@ -33,6 +33,7 @@
#define VERSION "2.6"
const char *whoami;
+const char *function_mapping_file;
const char *a_out_name = A_OUTNAME;
long hz = HZ_WRONG;
@@ -89,6 +90,8 @@ static struct option long_options[] =
{"no-graph", optional_argument, 0, 'Q'},
{"exec-counts", optional_argument, 0, 'C'},
{"no-exec-counts", optional_argument, 0, 'Z'},
+ {"function-ordering", no_argument, 0, 'r'},
+ {"file-ordering", required_argument, 0, 'R'},
{"file-info", no_argument, 0, 'i'},
{"sum", no_argument, 0, 's'},
@@ -136,6 +139,7 @@ Usage: %s [-[abcDhilLsTvwxyz]] [-[ACeEfFJnNOpPqQZ][name]] [-I dirs]\n\
[--[no-]annotated-source[=name]] [--[no-]exec-counts[=name]]\n\
[--[no-]flat-profile[=name]] [--[no-]graph[=name]]\n\
[--[no-]time=name] [--all-lines] [--brief] [--debug[=level]]\n\
+ [--function-ordering] [--file-ordering]\n\
[--directory-path=dirs] [--display-unused-functions]\n\
[--file-format=name] [--file-info] [--help] [--line] [--min-count=n]\n\
[--no-static] [--print-path] [--separate-files]\n\
@@ -322,6 +326,15 @@ DEFUN (main, (argc, argv), int argc AND char **argv)
output_style |= STYLE_CALL_GRAPH;
user_specified |= STYLE_CALL_GRAPH;
break;
+ case 'r':
+ output_style |= STYLE_FUNCTION_ORDER;
+ user_specified |= STYLE_FUNCTION_ORDER;
+ break;
+ case 'R':
+ output_style |= STYLE_FILE_ORDER;
+ user_specified |= STYLE_FILE_ORDER;
+ function_mapping_file = optarg;
+ break;
case 'Q':
if (optarg)
{
@@ -391,6 +404,16 @@ DEFUN (main, (argc, argv), int argc AND char **argv)
}
}
+ /* Don't allow both ordering options, they modify the arc data in-place. */
+ if ((user_specified & STYLE_FUNCTION_ORDER)
+ && (user_specified & STYLE_FILE_ORDER))
+ {
+ fprintf (stderr,"\
+%s: Only one of --function-ordering and --file-ordering may be specified.\n",
+ whoami);
+ done (1);
+ }
+
/* append value of GPROF_PATH to source search list if set: */
str = (char *) getenv ("GPROF_PATH");
if (str)
@@ -581,6 +604,14 @@ DEFUN (main, (argc, argv), int argc AND char **argv)
{
print_annotated_source ();
}
+ if (output_style & STYLE_FUNCTION_ORDER)
+ {
+ cg_print_function_ordering ();
+ }
+ if (output_style & STYLE_FILE_ORDER)
+ {
+ cg_print_file_ordering ();
+ }
return 0;
}
diff --git a/gprof/gprof.h b/gprof/gprof.h
index 94fee0d..c61886a 100644
--- a/gprof/gprof.h
+++ b/gprof/gprof.h
@@ -79,6 +79,8 @@
#define STYLE_EXEC_COUNTS (1<<3)
#define STYLE_ANNOTATED_SOURCE (1<<4)
#define STYLE_GMON_INFO (1<<5)
+#define STYLE_FUNCTION_ORDER (1<<6)
+#define STYLE_FILE_ORDER (1<<7)
#define ANYDEBUG (1<<0) /* 1 */
#define DFNDEBUG (1<<1) /* 2 */
@@ -111,6 +113,7 @@ typedef int bool;
typedef unsigned char UNIT[2]; /* unit of profiling */
extern const char *whoami; /* command-name, for error messages */
+extern const char *function_mapping_file; /* file mapping functions to files */
extern const char *a_out_name; /* core filename */
extern long hz; /* ticks per second */
diff --git a/gprof/gprof.texi b/gprof/gprof.texi
index 1bf6315..bb1490d 100644
--- a/gprof/gprof.texi
+++ b/gprof/gprof.texi
@@ -394,6 +394,46 @@ cumulative data in the file @file{gmon.sum}.
@item -T
The @samp{-T} option causes @code{gprof} to print its output in
``traditional'' BSD style.
+
+@item --function-ordering
+The @samp{--function-ordering} option causes @code{gprof} to print a
+suggested function ordering for the program based on profiling data.
+This option suggests an ordering which may improve paging, tlb and
+cache behavior for the program on systems which support arbitrary
+ordering of functions in an executable.
+
+The exact details of how to force the linker to place functions
+in a particular order is system dependent and out of the scope of this
+manual.
+
+@item --file-ordering @var{map_file}
+The @samp{--file-ordering} option causes @code{gprof} to print a
+suggested .o link line ordering for the program based on profiling data.
+This option suggests an ordering which may improve paging, tlb and
+cache behavior for the program on systems which do not support arbitrary
+ordering of functions in an executable.
+
+Use of the @samp{-a} argument is highly recommended with this option.
+
+The @var{map_file} argument is a pathname to a file which provides
+function name to object file mappings. The format of the file is similar to
+the output of the program @code{nm}.
+
+@smallexample
+@group
+c-parse.o:00000000 T yyparse
+c-parse.o:00000004 C yyerrflag
+c-lang.o:00000000 T maybe_objc_method_name
+c-lang.o:00000000 T print_lang_statistics
+c-lang.o:00000000 T recognize_objc_keyword
+c-decl.o:00000000 T print_lang_identifier
+c-decl.o:00000000 T print_lang_type
+@dots{}
+
+@end group
+@end smallexample
+
+GNU @code{nm} @samp{--extern-only} @samp{--defined-only} @samp{-v} @samp{--print-file-name} can be used to create @var{map_file}.
@end table
@node Flat Profile
diff --git a/gprof/symtab.h b/gprof/symtab.h
index 07c7751..304e6b5 100644
--- a/gprof/symtab.h
+++ b/gprof/symtab.h
@@ -36,9 +36,14 @@ typedef struct sym
int line_num; /* source line number */
unsigned int is_func:1, /* is this a function entry point? */
is_static:1, /* is this a local (static) symbol? */
- is_bb_head:1; /* is this the head of a basic-blk? */
+ is_bb_head:1, /* is this the head of a basic-blk? */
+ mapped:1, /* this symbol was mapped to another name */
+ has_been_placed:1; /* have we placed this symbol? */
int ncalls; /* how many times executed */
+ int nuses; /* how many times this symbol appears in
+ a particular context */
struct sym *next; /* for building chains of syms */
+ struct sym *prev; /* for building chains of syms */
/* profile-specific information: */