/* Read and display shared object profiling data. Copyright (C) 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include <argp.h> #include <dlfcn.h> #include <elf.h> #include <error.h> #include <fcntl.h> #include <inttypes.h> #include <libintl.h> #include <locale.h> #include <obstack.h> #include <search.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <elf/ldsodefs.h> #include <sys/gmon.h> #include <sys/gmon_out.h> #include <sys/mman.h> #include <sys/param.h> #include <sys/stat.h> /* Undefine the following line line in the production version. */ /* #define _NDEBUG 1 */ #include <assert.h> /* Get libc version number. */ #include "../version.h" #define PACKAGE _libc_intl_domainname #include <endian.h> #if BYTE_ORDER == BIG_ENDIAN #define byteorder ELFDATA2MSB #define byteorder_name "big-endian" #elif BYTE_ORDER == LITTLE_ENDIAN #define byteorder ELFDATA2LSB #define byteorder_name "little-endian" #else #error "Unknown BYTE_ORDER " BYTE_ORDER #define byteorder ELFDATANONE #endif extern int __profile_frequency __P ((void)); /* Name and version of program. */ static void print_version (FILE *stream, struct argp_state *state); void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; #define OPT_TEST 1 /* Definitions of arguments for argp functions. */ static const struct argp_option options[] = { { NULL, 0, NULL, 0, N_("Output selection:") }, { "call-pairs", 'c', NULL, 0, N_("print list of count paths and their number of use") }, { "flat-profile", 'p', NULL, 0, N_("generate flat profile with counts and ticks") }, { "graph", 'q', NULL, 0, N_("generate call graph") }, { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL }, { NULL, 0, NULL, 0, NULL } }; /* Short description of program. */ static const char doc[] = N_("Read and display shared object profiling data"); /* Strings for arguments in help texts. */ static const char args_doc[] = N_("SHOBJ [PROFDATA]"); /* Prototype for option handler. */ static error_t parse_opt (int key, char *arg, struct argp_state *state); /* Data structure to communicate with argp functions. */ static struct argp argp = { options, parse_opt, args_doc, doc, NULL, NULL }; /* Operation modes. */ static enum { NONE = 0, FLAT_MODE = 1 << 0, CALL_GRAPH_MODE = 1 << 1, CALL_PAIRS = 1 << 2, DEFAULT_MODE = FLAT_MODE | CALL_GRAPH_MODE } mode; /* If nonzero the total number of invocations of a function is emitted. */ int count_total; /* Nozero for testing. */ int do_test; /* Strcuture describing calls. */ struct here_fromstruct { struct here_cg_arc_record volatile *here; uint16_t link; }; /* We define a special type to address the elements of the arc table. This is basically the `gmon_cg_arc_record' format but it includes the room for the tag and it uses real types. */ struct here_cg_arc_record { uintptr_t from_pc; uintptr_t self_pc; uint32_t count; } __attribute__ ((packed)); struct known_symbol; struct arc_list { size_t idx; uintmax_t count; struct arc_list *next; }; static struct obstack ob_list; struct known_symbol { const char *name; uintptr_t addr; size_t size; uintmax_t ticks; uintmax_t calls; struct arc_list *froms; struct arc_list *tos; }; struct shobj { const char *name; /* User-provided name. */ struct link_map *map; const char *dynstrtab; /* Dynamic string table of shared object. */ const char *soname; /* Soname of shared object. */ uintptr_t lowpc; uintptr_t highpc; unsigned long int kcountsize; size_t expected_size; /* Expected size of profiling file. */ size_t tossize; size_t fromssize; size_t fromlimit; unsigned int hashfraction; int s_scale; void *symbol_map; size_t symbol_mapsize; const ElfW(Sym) *symtab; size_t symtab_size; const char *strtab; struct obstack ob_str; struct obstack ob_sym; }; struct profdata { void *addr; off_t size; char *hist; struct gmon_hist_hdr *hist_hdr; uint16_t *kcount; uint32_t narcs; /* Number of arcs in toset. */ struct here_cg_arc_record *data; uint16_t *tos; struct here_fromstruct *froms; }; /* Search tree for symbols. */ void *symroot; static struct known_symbol **sortsym; static size_t symidx; static uintmax_t total_ticks; /* Prototypes for local functions. */ static struct shobj *load_shobj (const char *name); static void unload_shobj (struct shobj *shobj); static struct profdata *load_profdata (const char *name, struct shobj *shobj); static void unload_profdata (struct profdata *profdata); static void count_total_ticks (struct shobj *shobj, struct profdata *profdata); static void count_calls (struct shobj *shobj, struct profdata *profdata); static void read_symbols (struct shobj *shobj); static void add_arcs (struct profdata *profdata); static void generate_flat_profile (struct profdata *profdata); static void generate_call_graph (struct profdata *profdata); static void generate_call_pair_list (struct profdata *profdata); int main (int argc, char *argv[]) { const char *shobj; const char *profdata; struct shobj *shobj_handle; struct profdata *profdata_handle; int remaining; setlocale (LC_ALL, ""); /* Initialize the message catalog. */ textdomain (_libc_intl_domainname); /* Parse and process arguments. */ argp_parse (&argp, argc, argv, 0, &remaining, NULL); if (argc - remaining == 0 || argc - remaining > 2) { /* We need exactly two non-option parameter. */ argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR, program_invocation_short_name); exit (1); } /* Get parameters. */ shobj = argv[remaining]; if (argc - remaining == 2) profdata = argv[remaining + 1]; else /* No filename for the profiling data given. We will determine it from the soname of the shobj, later. */ profdata = NULL; /* First see whether we can load the shared object. */ shobj_handle = load_shobj (shobj); if (shobj_handle == NULL) exit (1); /* We can now determine the filename for the profiling data, if nececessary. */ if (profdata == NULL) { char *newp; if (shobj_handle->soname == NULL) { unload_shobj (shobj_handle); error (EXIT_FAILURE, 0, _("\ no filename for profiling data given and shared object `%s' has no soname"), shobj); } newp = (char *) alloca (strlen (shobj_handle->soname) + sizeof ".profile"); stpcpy (stpcpy (newp, shobj_handle->soname), ".profile"); profdata = newp; } /* Now see whether the profiling data file matches the given object. */ profdata_handle = load_profdata (profdata, shobj_handle); if (profdata_handle == NULL) { unload_shobj (shobj_handle); exit (1); } read_symbols (shobj_handle); /* Count the ticks. */ count_total_ticks (shobj_handle, profdata_handle); /* Count the calls. */ count_calls (shobj_handle, profdata_handle); /* Add the arc information. */ add_arcs (profdata_handle); /* If no mode is specified fall back to the default mode. */ if (mode == NONE) mode = DEFAULT_MODE; /* Do some work. */ if (mode & FLAT_MODE) generate_flat_profile (profdata_handle); if (mode & CALL_GRAPH_MODE) generate_call_graph (profdata_handle); if (mode & CALL_PAIRS) generate_call_pair_list (profdata_handle); /* Free the resources. */ unload_shobj (shobj_handle); unload_profdata (profdata_handle); return 0; } /* Handle program arguments. */ static error_t parse_opt (int key, char *arg, struct argp_state *state) { switch (key) { case 'c': mode |= CALL_PAIRS; break; case 'p': mode |= FLAT_MODE; break; case 'q': mode |= CALL_GRAPH_MODE; break; case OPT_TEST: do_test = 1; break; default: return ARGP_ERR_UNKNOWN; } return 0; } /* Print the version information. */ static void print_version (FILE *stream, struct argp_state *state) { fprintf (stream, "sprof (GNU %s) %s\n", PACKAGE, VERSION); fprintf (stream, gettext ("\ Copyright (C) %s Free Software Foundation, Inc.\n\ This is free software; see the source for copying conditions. There is NO\n\ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ "), "1997, 1998"); fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); } /* Note that we must not use `dlopen' etc. The shobj object must not be loaded for use. */ static struct shobj * load_shobj (const char *name) { struct link_map *map = NULL; struct shobj *result; ElfW(Addr) mapstart = ~((ElfW(Addr)) 0); ElfW(Addr) mapend = 0; const ElfW(Phdr) *ph; size_t textsize; unsigned int log_hashfraction; ElfW(Ehdr) *ehdr; int fd; ElfW(Shdr) *shdr; void *ptr; size_t pagesize = getpagesize (); const char *shstrtab; int idx; ElfW(Shdr) *symtab_entry; /* Since we use dlopen() we must be prepared to work around the sometimes strange lookup rules for the shared objects. If we have a file foo.so in the current directory and the user specfies foo.so on the command line (without specifying a directory) we should load the file in the current directory even if a normal dlopen() call would read the other file. We do this by adding a directory portion to the name. */ if (strchr (name, '/') == NULL) { char *load_name = (char *) alloca (strlen (name) + 3); stpcpy (stpcpy (load_name, "./"), name); map = (struct link_map *) dlopen (load_name, RTLD_LAZY); } if (map == NULL) { map = (struct link_map *) dlopen (name, RTLD_LAZY); if (map == NULL) { error (0, errno, _("failed to load shared object `%s'"), name); return NULL; } } /* Prepare the result. */ result = (struct shobj *) calloc (1, sizeof (struct shobj)); if (result == NULL) { error (0, errno, _("cannot create internal descriptors")); dlclose (map); return NULL; } result->name = name; result->map = map; /* Compute the size of the sections which contain program code. This must match the code in dl-profile.c (_dl_start_profile). */ for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph) if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X)) { ElfW(Addr) start = (ph->p_vaddr & ~(pagesize - 1)); ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + pagesize - 1) & ~(pagesize - 1)); if (start < mapstart) mapstart = start; if (end > mapend) mapend = end; } result->lowpc = ROUNDDOWN ((uintptr_t) (mapstart + map->l_addr), HISTFRACTION * sizeof (HISTCOUNTER)); result->highpc = ROUNDUP ((uintptr_t) (mapend + map->l_addr), HISTFRACTION * sizeof (HISTCOUNTER)); if (do_test) printf ("load addr: %0#*" PRIxPTR "\n" "lower bound PC: %0#*" PRIxPTR "\n" "upper bound PC: %0#*" PRIxPTR "\n", __ELF_NATIVE_CLASS == 32 ? 10 : 18, map->l_addr, __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->lowpc, __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->highpc); textsize = result->highpc - result->lowpc; result->kcountsize = textsize / HISTFRACTION; result->hashfraction = HASHFRACTION; if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) /* If HASHFRACTION is a power of two, mcount can use shifting instead of integer division. Precompute shift amount. */ log_hashfraction = __builtin_ffs (result->hashfraction * sizeof (struct here_fromstruct)) - 1; else log_hashfraction = -1; if (do_test) printf ("hashfraction = %d\ndivider = %Zu\n", result->hashfraction, result->hashfraction * sizeof (struct here_fromstruct)); result->tossize = textsize / HASHFRACTION; result->fromlimit = textsize * ARCDENSITY / 100; if (result->fromlimit < MINARCS) result->fromlimit = MINARCS; if (result->fromlimit > MAXARCS) result->fromlimit = MAXARCS; result->fromssize = result->fromlimit * sizeof (struct here_fromstruct); result->expected_size = (sizeof (struct gmon_hdr) + 4 + sizeof (struct gmon_hist_hdr) + result->kcountsize + 4 + 4 + (result->fromssize * sizeof (struct here_cg_arc_record))); if (do_test) printf ("expected size: %Zd\n", result->expected_size); #define SCALE_1_TO_1 0x10000L if (result->kcountsize < result->highpc - result->lowpc) { size_t range = result->highpc - result->lowpc; size_t quot = range / result->kcountsize; if (quot >= SCALE_1_TO_1) result->s_scale = 1; else if (quot >= SCALE_1_TO_1 / 256) result->s_scale = SCALE_1_TO_1 / quot; else if (range > ULONG_MAX / 256) result->s_scale = ((SCALE_1_TO_1 * 256) / (range / (result->kcountsize / 256))); else result->s_scale = ((SCALE_1_TO_1 * 256) / ((range * 256) / result->kcountsize)); } else result->s_scale = SCALE_1_TO_1; if (do_test) printf ("s_scale: %d\n", result->s_scale); /* Determine the dynamic string table. */ if (map->l_info[DT_STRTAB] == NULL) result->dynstrtab = NULL; else result->dynstrtab = (const char *) (map->l_addr + map->l_info[DT_STRTAB]->d_un.d_ptr); if (do_test) printf ("string table: %p\n", result->dynstrtab); /* Determine the soname. */ if (map->l_info[DT_SONAME] == NULL) result->soname = NULL; else result->soname = result->dynstrtab + map->l_info[DT_SONAME]->d_un.d_val; if (do_test) printf ("soname: %s\n", result->soname); /* Now we have to load the symbol table. First load the section header table. */ ehdr = (ElfW(Ehdr) *) map->l_addr; /* Make sure we are on the right party. */ if (ehdr->e_shentsize != sizeof (ElfW(Shdr))) abort (); /* And we need the shared object file descriptor again. */ fd = open (map->l_name, O_RDONLY); if (fd == -1) /* Dooh, this really shouldn't happen. We know the file is available. */ error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed")); /* Now map the section header. */ ptr = mmap (NULL, (ehdr->e_shnum * sizeof (ElfW(Shdr)) + (ehdr->e_shoff & (pagesize - 1))), PROT_READ, MAP_SHARED|MAP_FILE, fd, ehdr->e_shoff & ~(pagesize - 1)); if (ptr == MAP_FAILED) error (EXIT_FAILURE, errno, _("mapping of section headers failed")); shdr = (ElfW(Shdr) *) ((char *) ptr + (ehdr->e_shoff & (pagesize - 1))); /* Get the section header string table. */ ptr = mmap (NULL, (shdr[ehdr->e_shstrndx].sh_size + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))), PROT_READ, MAP_SHARED|MAP_FILE, fd, shdr[ehdr->e_shstrndx].sh_offset & ~(pagesize - 1)); if (ptr == MAP_FAILED) error (EXIT_FAILURE, errno, _("mapping of section header string table failed")); shstrtab = ((const char *) ptr + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))); /* Search for the ".symtab" section. */ symtab_entry = NULL; for (idx = 0; idx < ehdr->e_shnum; ++idx) if (shdr[idx].sh_type == SHT_SYMTAB && strcmp (shstrtab + shdr[idx].sh_name, ".symtab") == 0) { symtab_entry = &shdr[idx]; break; } /* We don't need the section header string table anymore. */ munmap (ptr, (shdr[ehdr->e_shstrndx].sh_size + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1)))); if (symtab_entry == NULL) { fprintf (stderr, _("\ *** The file `%s' is stripped: no detailed analysis possible\n"), name); result->symtab = NULL; result->strtab = NULL; } else { ElfW(Off) min_offset, max_offset; ElfW(Shdr) *strtab_entry; strtab_entry = &shdr[symtab_entry->sh_link]; /* Find the minimum and maximum offsets that include both the symbol table and the string table. */ if (symtab_entry->sh_offset < strtab_entry->sh_offset) { min_offset = symtab_entry->sh_offset & ~(pagesize - 1); max_offset = strtab_entry->sh_offset + strtab_entry->sh_size; } else { min_offset = strtab_entry->sh_offset & ~(pagesize - 1); max_offset = symtab_entry->sh_offset + symtab_entry->sh_size; } result->symbol_map = mmap (NULL, max_offset - min_offset, PROT_READ, MAP_SHARED|MAP_FILE, fd, min_offset); if (result->symbol_map == NULL) error (EXIT_FAILURE, errno, _("failed to load symbol data")); result->symtab = (const ElfW(Sym) *) ((const char *) result->symbol_map + (symtab_entry->sh_offset - min_offset)); result->symtab_size = symtab_entry->sh_size; result->strtab = ((const char *) result->symbol_map + (strtab_entry->sh_offset - min_offset)); result->symbol_mapsize = max_offset - min_offset; } /* Now we also don't need the section header table anymore. */ munmap ((char *) shdr - (ehdr->e_shoff & (pagesize - 1)), (ehdr->e_phnum * sizeof (ElfW(Shdr)) + (ehdr->e_shoff & (pagesize - 1)))); /* Free the descriptor for the shared object. */ close (fd); return result; } static void unload_shobj (struct shobj *shobj) { munmap (shobj->symbol_map, shobj->symbol_mapsize); dlclose (shobj->map); } static struct profdata * load_profdata (const char *name, struct shobj *shobj) { struct profdata *result; int fd; struct stat st; void *addr; struct gmon_hdr gmon_hdr; struct gmon_hist_hdr hist_hdr; uint32_t *narcsp; size_t fromlimit; struct here_cg_arc_record *data; struct here_fromstruct *froms; uint16_t *tos; size_t fromidx; size_t idx; fd = open (name, O_RDONLY); if (fd == -1) { char *ext_name; if (errno != ENOENT || strchr (name, '/') != NULL) /* The file exists but we are not allowed to read it or the file does not exist and the name includes a path specification.. */ return NULL; /* A file with the given name does not exist in the current directory, try it in the default location where the profiling files are created. */ ext_name = (char *) alloca (strlen (name) + sizeof "/var/tmp/"); stpcpy (stpcpy (ext_name, "/var/tmp/"), name); name = ext_name; fd = open (ext_name, O_RDONLY); if (fd == -1) { /* Even this file does not exist. */ error (0, errno, _("cannot load profiling data")); return NULL; } } /* We have found the file, now make sure it is the right one for the data file. */ if (fstat (fd, &st) < 0) { error (0, errno, _("while stat'ing profiling data file")); close (fd); return NULL; } if (st.st_size != shobj->expected_size) { error (0, 0, _("profiling data file `%s' does match shared object `%s'"), name, shobj->name); close (fd); return NULL; } /* The data file is most probably the right one for our shared object. Map it now. */ addr = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0); if (addr == MAP_FAILED) { error (0, errno, _("failed to mmap the profiling data file")); close (fd); return NULL; } /* We don't need the file desriptor anymore. */ if (close (fd) < 0) { error (0, errno, _("error while closing the profiling data file")); munmap (addr, st.st_size); return NULL; } /* Prepare the result. */ result = (struct profdata *) calloc (1, sizeof (struct profdata)); if (result == NULL) { error (0, errno, _("cannot create internal descriptor")); munmap (addr, st.st_size); return NULL; } /* Store the address and size so that we can later free the resources. */ result->addr = addr; result->size = st.st_size; /* Pointer to data after the header. */ result->hist = (char *) ((struct gmon_hdr *) addr + 1); result->hist_hdr = (struct gmon_hist_hdr *) ((char *) result->hist + sizeof (uint32_t)); result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t) + sizeof (struct gmon_hist_hdr)); /* Compute pointer to array of the arc information. */ narcsp = (uint32_t *) ((char *) result->kcount + shobj->kcountsize + sizeof (uint32_t)); result->narcs = *narcsp; result->data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t)); /* Create the gmon_hdr we expect or write. */ memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr)); memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie)); *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION; /* Create the hist_hdr we expect or write. */ *(char **) hist_hdr.low_pc = (char *) shobj->lowpc - shobj->map->l_addr; *(char **) hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr; if (do_test) printf ("low_pc = %p\nhigh_pc = %p\n", *(char **) hist_hdr.low_pc, *(char **) hist_hdr.high_pc); *(int32_t *) hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER); *(int32_t *) hist_hdr.prof_rate = __profile_frequency (); strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); hist_hdr.dimen_abbrev = 's'; /* Test whether the header of the profiling data is ok. */ if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST || memcmp (result->hist_hdr, &hist_hdr, sizeof (struct gmon_hist_hdr)) != 0 || narcsp[-1] != GMON_TAG_CG_ARC) { free (result); error (0, 0, _("`%s' is no correct profile data file for `%s'"), name, shobj->name); munmap (addr, st.st_size); return NULL; } /* We are pretty sure now that this is a correct input file. Set up the remaining information in the result structure and return. */ result->tos = (uint16_t *) calloc (shobj->tossize + shobj->fromssize, 1); if (result->tos == NULL) { error (0, errno, _("cannot create internal descriptor")); munmap (addr, st.st_size); free (result); return NULL; } result->froms = (struct here_fromstruct *) ((char *) result->tos + shobj->tossize); fromidx = 0; /* Now we have to process all the arc count entries. */ fromlimit = shobj->fromlimit; data = result->data; froms = result->froms; tos = result->tos; for (idx = 0; idx < MIN (*narcsp, fromlimit); ++idx) { size_t to_index; size_t newfromidx; to_index = (data[idx].self_pc / (shobj->hashfraction * sizeof (*tos))); newfromidx = fromidx++; froms[newfromidx].here = &data[idx]; froms[newfromidx].link = tos[to_index]; tos[to_index] = newfromidx; } return result; } static void unload_profdata (struct profdata *profdata) { free (profdata->tos); munmap (profdata->addr, profdata->size); free (profdata); } static void count_total_ticks (struct shobj *shobj, struct profdata *profdata) { volatile uint16_t *kcount = profdata->kcount; size_t maxkidx = shobj->kcountsize; size_t factor = 2 * (65536 / shobj->s_scale); size_t kidx = 0; size_t sidx = 0; while (sidx < symidx) { uintptr_t start = sortsym[sidx]->addr; uintptr_t end = start + sortsym[sidx]->size; while (kidx < maxkidx && factor * kidx < start) ++kidx; if (kidx == maxkidx) break; while (kidx < maxkidx && factor * kidx < end) sortsym[sidx]->ticks += kcount[kidx++]; if (kidx == maxkidx) break; total_ticks += sortsym[sidx++]->ticks; } } static size_t find_symbol (uintptr_t addr) { size_t sidx = 0; while (sidx < symidx) { uintptr_t start = sortsym[sidx]->addr; uintptr_t end = start + sortsym[sidx]->size; if (addr >= start && addr < end) return sidx; if (addr < start) break; ++sidx; } return (size_t) -1l; } static void count_calls (struct shobj *shobj, struct profdata *profdata) { struct here_cg_arc_record *data = profdata->data; uint32_t narcs = profdata->narcs; uint32_t cnt; for (cnt = 0; cnt < narcs; ++cnt) { uintptr_t here = data[cnt].self_pc; size_t symbol_idx; /* Find the symbol for this address. */ symbol_idx = find_symbol (here); if (symbol_idx != (size_t) -1l) sortsym[symbol_idx]->calls += data[cnt].count; } } static int symorder (const void *o1, const void *o2) { const struct known_symbol *p1 = (const struct known_symbol *) o1; const struct known_symbol *p2 = (const struct known_symbol *) o2; return p1->addr - p2->addr; } static void printsym (const void *node, VISIT value, int level) { if (value == leaf || value == postorder) sortsym[symidx++] = *(struct known_symbol **) node; } static void read_symbols (struct shobj *shobj) { void *load_addr = (void *) shobj->map->l_addr; int n = 0; /* Initialize the obstacks. */ #define obstack_chunk_alloc malloc #define obstack_chunk_free free obstack_init (&shobj->ob_str); obstack_init (&shobj->ob_sym); obstack_init (&ob_list); /* Process the symbols. */ if (shobj->symtab) { const ElfW(Sym) *sym = shobj->symtab; const ElfW(Sym) *sym_end = (const ElfW(Sym) *) ((const char *) sym + shobj->symtab_size); for (; sym < sym_end; sym++) if ((ELFW(ST_TYPE) (sym->st_info) == STT_FUNC || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE) && sym->st_size != 0) { struct known_symbol **existp; struct known_symbol *newsym = (struct known_symbol *) obstack_alloc (&shobj->ob_sym, sizeof (*newsym)); if (newsym == NULL) error (EXIT_FAILURE, errno, _("cannot allocate symbol data")); newsym->name = &shobj->strtab[sym->st_name]; newsym->addr = sym->st_value; newsym->size = sym->st_size; newsym->ticks = 0; newsym->calls = 0; existp = tfind (newsym, &symroot, symorder); if (existp == NULL) { /* New function. */ tsearch (newsym, &symroot, symorder); ++n; } else { /* The function is already defined. See whether we have a better name here. */ if ((*existp)->name[0] == '_' && newsym->name[0] != '_') *existp = newsym; else /* We don't need the allocated memory. */ obstack_free (&shobj->ob_sym, newsym); } } } else { /* Blarg, the binary is stripped. We have to rely on the information contained in the dynamic section of the object. */ const ElfW(Sym) *symtab = (load_addr + shobj->map->l_info[DT_SYMTAB]->d_un.d_ptr); const char *strtab = (load_addr + shobj->map->l_info[DT_STRTAB]->d_un.d_ptr); /* We assume that the string table follows the symbol table, because there is no way in ELF to know the size of the dynamic symbol table!! */ while ((void *) symtab < (void *) strtab) { if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE) && symtab->st_size != 0) { struct known_symbol *newsym; struct known_symbol **existp; newsym = (struct known_symbol *) obstack_alloc (&shobj->ob_sym, sizeof (*newsym)); if (newsym == NULL) error (EXIT_FAILURE, errno, _("cannot allocate symbol data")); newsym->name = &strtab[symtab->st_name]; newsym->addr = symtab->st_value; newsym->size = symtab->st_size; newsym->ticks = 0; newsym->froms = NULL; newsym->tos = NULL; existp = tfind (newsym, &symroot, symorder); if (existp == NULL) { /* New function. */ tsearch (newsym, &symroot, symorder); ++n; } else { /* The function is already defined. See whether we have a better name here. */ if ((*existp)->name[0] == '_' && newsym->name[0] != '_') *existp = newsym; else /* We don't need the allocated memory. */ obstack_free (&shobj->ob_sym, newsym); } } } ++symtab; } sortsym = malloc (n * sizeof (struct known_symbol *)); if (sortsym == NULL) abort (); twalk (symroot, printsym); } static void add_arcs (struct profdata *profdata) { uint32_t narcs = profdata->narcs; struct here_cg_arc_record *data = profdata->data; uint32_t cnt; for (cnt = 0; cnt < narcs; ++cnt) { /* First add the incoming arc. */ size_t sym_idx = find_symbol (data[cnt].self_pc); if (sym_idx != (size_t) -1l) { struct known_symbol *sym = sortsym[sym_idx]; struct arc_list *runp = sym->froms; while (runp != NULL && ((data[cnt].from_pc == 0 && runp->idx != (size_t) -1l) || (data[cnt].from_pc != 0 && (runp->idx == (size_t) -1l || data[cnt].from_pc < sortsym[runp->idx]->addr || (data[cnt].from_pc >= (sortsym[runp->idx]->addr + sortsym[runp->idx]->size)))))) runp = runp->next; if (runp == NULL) { /* We need a new entry. */ struct arc_list *newp = (struct arc_list *) obstack_alloc (&ob_list, sizeof (struct arc_list)); if (data[cnt].from_pc == 0) newp->idx = (size_t) -1l; else newp->idx = find_symbol (data[cnt].from_pc); newp->count = data[cnt].count; newp->next = sym->froms; sym->froms = newp; } else /* Increment the counter for the found entry. */ runp->count += data[cnt].count; } /* Now add it to the appropriate outgoing list. */ sym_idx = find_symbol (data[cnt].from_pc); if (sym_idx != (size_t) -1l) { struct known_symbol *sym = sortsym[sym_idx]; struct arc_list *runp = sym->tos; while (runp != NULL && (runp->idx == (size_t) -1l || data[cnt].self_pc < sortsym[runp->idx]->addr || data[cnt].self_pc >= (sortsym[runp->idx]->addr + sortsym[runp->idx]->size))) runp = runp->next; if (runp == NULL) { /* We need a new entry. */ struct arc_list *newp = (struct arc_list *) obstack_alloc (&ob_list, sizeof (struct arc_list)); newp->idx = find_symbol (data[cnt].self_pc); newp->count = data[cnt].count; newp->next = sym->tos; sym->tos = newp; } else /* Increment the counter for the found entry. */ runp->count += data[cnt].count; } } } static int countorder (const void *p1, const void *p2) { struct known_symbol *s1 = (struct known_symbol *) p1; struct known_symbol *s2 = (struct known_symbol *) p2; if (s1->ticks != s2->ticks) return (int) (s2->ticks - s1->ticks); if (s1->calls != s2->calls) return (int) (s2->calls - s1->calls); return strcmp (s1->name, s2->name); } static double tick_unit; static uintmax_t cumu_ticks; static void printflat (const void *node, VISIT value, int level) { if (value == leaf || value == postorder) { struct known_symbol *s = *(struct known_symbol **) node; cumu_ticks += s->ticks; printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f %s\n", total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0, tick_unit * cumu_ticks, tick_unit * s->ticks, s->calls, s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0, /* FIXME: don't know about called functions. */ s->name); } } /* ARGUSED */ static void freenoop (void *p) { } static void generate_flat_profile (struct profdata *profdata) { size_t n; void *data = NULL; tick_unit = 1.0 / *(uint32_t *) profdata->hist_hdr->prof_rate; printf ("Flat profile:\n\n" "Each sample counts as %g %s.\n", tick_unit, profdata->hist_hdr->dimen); fputs (" % cumulative self self total\n" " time seconds seconds calls us/call us/call name\n", stdout); for (n = 0; n < symidx; ++n) if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0) tsearch (sortsym[n], &data, countorder); twalk (data, printflat); tdestroy (data, freenoop); } static void generate_call_graph (struct profdata *profdata) { size_t cnt; puts ("\nindex % time self children called name\n"); for (cnt = 0; cnt < symidx; ++cnt) if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL) { struct arc_list *runp; size_t n; /* First print the from-information. */ runp = sortsym[cnt]->froms; while (runp != NULL) { printf (" %8.2f%8.2f%9" PRIdMAX "/%-9" PRIdMAX " %s", (runp->idx != (size_t) -1l ? sortsym[runp->idx]->ticks * tick_unit : 0.0), 0.0, /* FIXME: what's time for the children, recursive */ runp->count, sortsym[cnt]->calls, (runp->idx != (size_t) -1l ? sortsym[runp->idx]->name : "<UNKNOWN>")); if (runp->idx != (size_t) -1l) printf (" [%Zd]", runp->idx); putchar_unlocked ('\n'); runp = runp->next; } /* Info abount the function itself. */ n = printf ("[%Zu]", cnt); printf ("%*s%5.1f%8.2f%8.2f%9" PRIdMAX " %s [%Zd]\n", 7 - n, " ", total_ticks ? (100.0 * sortsym[cnt]->ticks) / total_ticks : 0, sortsym[cnt]->ticks * tick_unit, 0.0, /* FIXME: what's time for the children, recursive */ sortsym[cnt]->calls, sortsym[cnt]->name, cnt); /* Info about the functions this function calls. */ runp = sortsym[cnt]->tos; while (runp != NULL) { printf (" %8.2f%8.2f%9" PRIdMAX "/", (runp->idx != (size_t) -1l ? sortsym[runp->idx]->ticks * tick_unit : 0.0), 0.0, /* FIXME: what's time for the children, recursive */ runp->count); if (runp->idx != (size_t) -1l) printf ("%-9" PRIdMAX " %s [%Zd]\n", sortsym[runp->idx]->calls, sortsym[runp->idx]->name, runp->idx); else fputs ("??? <UNKNOWN>\n\n", stdout); runp = runp->next; } fputs ("-----------------------------------------------\n", stdout); } } static void generate_call_pair_list (struct profdata *profdata) { size_t cnt; for (cnt = 0; cnt < symidx; ++cnt) if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL) { struct arc_list *runp; /* First print the incoming arcs. */ runp = sortsym[cnt]->froms; while (runp != NULL) { if (runp->idx == (size_t) -1l) printf ("\ <UNKNOWN> %-34s %9" PRIdMAX "\n", sortsym[cnt]->name, runp->count); runp = runp->next; } /* Next the outgoing arcs. */ runp = sortsym[cnt]->tos; while (runp != NULL) { printf ("%-34s %-34s %9" PRIdMAX "\n", sortsym[cnt]->name, (runp->idx != (size_t) -1l ? sortsym[runp->idx]->name : "<UNKNOWN>"), runp->count); runp = runp->next; } } }