From c0fb8a563c1c49e5fbec9bc22deac618910a3ff4 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 7 Jun 1998 14:06:56 +0000 Subject: Update. 1998-06-07 13:32 Ulrich Drepper * libc.map: Add _dl_profile. * elf/dl-reloc.c (_dl_relocate_object): Take extra argument, pass this to ELF_DYNAMIC_RELOCATE. Always allocate array for relocation result if LD_PROFILE is defined. * elf/ldsodefs.h: Adjust prototypes. * elf/dl-open.c (_dl_open): Call relocation function with extra argument. * elf/rtld.c: Likewise. * elf/dl-profile.c (_dl_mcount): Don't mark as internal function. Correct loop condition. * elf/dynamic-link.h: Don't examine _dl_profile variable, pass consider_profile to runtime setup function. * sysdeps/i386/dl-machine.h (elf_machine_runtime_setup): Use _dl_runtime_profile for all shared objects if LD_PROFILE is defined. * elf/dl-support.c: Define __libc_stack_end. * elf/rtld.c: Likewise. * sysdeps/generic/libc-start.c: Store last stack address in __libc_stack_end. * sysdeps/i386/dl-machine.h (_dl_start_user): Store stack address. * sysdeps/i386/elf/start.s: Call __libc_start_main with extra argument. * elf/elf.h: Include , not . Include , not . * elf/sprof.c: Implement flat profiling. * libio/fgetc.c: Call _IO_cleanup_region_end with 0 and call _IO_funlockfile explicitly. * libio/fileops.c: Likewise. * libio/fputc.c: Likewise. * libio/freopen.c: Likewise. * libio/freopen64.c: Likewise. * libio/fseek.c: Likewise. * libio/fseeko.c: Likewise. * libio/fseeko64.c: Likewise. * libio/ftello.c: Likewise. * libio/ftello64.c: Likewise. * libio/getc.c: Likewise. * libio/getchar.c: Likewise. * libio/iofclose.c: Likewise. * libio/iofflush.c: Likewise. * libio/iofgetpos.c: Likewise. * libio/iofgetpos64.c: Likewise. * libio/iofgets.c: Likewise. * libio/iofputs.c: Likewise. * libio/iofread.c: Likewise. * libio/iofsetpos.c: Likewise. * libio/iofsetpos64.c: Likewise. * libio/ioftell.c: Likewise. * libio/iofwrite.c: Likewise. * libio/iogetdelim.c: Likewise. * libio/iogets.c: Likewise. * libio/ioputs.c: Likewise. * libio/ioseekoff.c: Likewise. * libio/ioseekpos.c: Likewise. * libio/iosetbuffer.c: Likewise. * libio/iosetvbuf.c: Likewise. * libio/ioungetc.c: Likewise. * libio/iovsprintf.c: Likewise. * libio/iovsscanf.c: Likewise. * libio/oldfileops.c: Likewise. * libio/oldiofclose.c: Likewise. * libio/peekc.c: Likewise. * libio/putc.c: Likewise. * libio/putchar.c: Likewise. * libio/rewind.c: Likewise. * malloc/mtrace.c: Pretty print. * misc/mntent.h (struct mentent): Make string elements const char *. * nis/nis_printf.c: Optimize I/O a little bit. * signal/Makefile (distribute): Add sigset-cvt-mask.h. * sysdeps/generic/sigset-cvt-mask.h: New file. * sysdeps/unix/sysv/linux/sigset-cvt-mask.h: New file. * sysdeps/unix/sysv/sysv4/sigset-cvt-mask.h: New file. * sysdeps/posix/sigvec.c: Rewrite the use definitions from sigset-cvt-mask.h to do the dirty work. Patches by Joe Keane. * sysdeps/posix/mkstemp.c: Save one precious byte of rodata. * sysdeps/unix/sysv/linux/i386/sysdep.h: Rewrite PSEUDO etc to make syscall_error label in case of PIC anonymous. * sysdeps/unix/sysv/linux/i386/i686/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/i386/clone.S: Adapt for this change. * sysdeps/unix/sysv/linux/i386/mmap.S: Adapt for this change. * sysdeps/unix/sysv/linux/i386/s_pread64.S: Adapt for this change. * sysdeps/unix/sysv/linux/i386/s_pwrite64.S: Adapt for this change. * sysdeps/unix/sysv/linux/i386/socket.S: Adapt for this change. * sysdeps/unix/sysv/linux/i386/syscall.S: Adapt for this change. --- elf/dl-open.c | 3 +- elf/dl-profile.c | 6 +- elf/dl-reloc.c | 7 +- elf/dl-support.c | 3 + elf/dynamic-link.h | 8 +- elf/elf.h | 6 +- elf/ldsodefs.h | 6 +- elf/rtld.c | 15 +++- elf/sprof.c | 220 ++++++++++++++++++++++++++++++++++++++++++++--------- 9 files changed, 215 insertions(+), 59 deletions(-) (limited to 'elf') diff --git a/elf/dl-open.c b/elf/dl-open.c index 308175a..4c4c8ab 100644 --- a/elf/dl-open.c +++ b/elf/dl-open.c @@ -102,7 +102,8 @@ _dl_open (const char *file, int mode) asm ("" : "=r" (reloc) : "0" (reloc)); (*reloc) (l, _dl_object_relocation_scope (l), - (mode & RTLD_BINDING_MASK) == RTLD_LAZY); + ((mode & RTLD_BINDING_MASK) == RTLD_LAZY + || _dl_profile != NULL), _dl_profile != NULL); *_dl_global_scope_end = NULL; } diff --git a/elf/dl-profile.c b/elf/dl-profile.c index c7ac360..a9f8dd6 100644 --- a/elf/dl-profile.c +++ b/elf/dl-profile.c @@ -436,7 +436,6 @@ _dl_start_profile (struct link_map *map, const char *output_dir) void -internal_function _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) { uint16_t *topcindex; @@ -479,7 +478,7 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) /* We have to look through the chain of arcs whether there is already an entry for our arc. */ - while (fromp->here->from_pc == frompc) + while (fromp->here->from_pc != frompc) { if (fromp->link != 0) do @@ -523,8 +522,7 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) data[newarc].self_pc = selfpc; data[newarc].count = 0; fromp->link = 0; - - narcs++; + ++narcs; break; } diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c index 898fb48..e0eae3c 100644 --- a/elf/dl-reloc.c +++ b/elf/dl-reloc.c @@ -28,7 +28,8 @@ void internal_function -_dl_relocate_object (struct link_map *l, struct link_map *scope[], int lazy) +_dl_relocate_object (struct link_map *l, struct link_map *scope[], int lazy, + int consider_profiling) { if (l->l_relocated) return; @@ -72,9 +73,9 @@ _dl_relocate_object (struct link_map *l, struct link_map *scope[], int lazy) l->l_name, (flags))) #include "dynamic-link.h" - ELF_DYNAMIC_RELOCATE (l, lazy, 1); + ELF_DYNAMIC_RELOCATE (l, lazy, consider_profiling); - if (_dl_profile_map == l) + if (_dl_profile != NULL) { /* Allocate the array which will contain the already found relocations. */ diff --git a/elf/dl-support.c b/elf/dl-support.c index 9012a0c..85f656c 100644 --- a/elf/dl-support.c +++ b/elf/dl-support.c @@ -58,6 +58,9 @@ struct link_map *_dl_profile_map; /* Names of shared object for which the RPATHs should be ignored. */ const char *_dl_inhibit_rpath; +/* This is the address of the last stack address ever used. */ +void *__libc_stack_end; + static void non_dynamic_init (void) __attribute__ ((unused)); diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h index aedee20..9d7ae3d 100644 --- a/elf/dynamic-link.h +++ b/elf/dynamic-link.h @@ -1,5 +1,5 @@ /* Inline functions for dynamic linking. - Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. + Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -161,10 +161,8 @@ elf_get_dynamic_info (ElfW(Dyn) *dyn, to inline functions containing inlines themselves. */ #define ELF_DYNAMIC_RELOCATE(map, lazy, consider_profile) \ do { \ - int profile = (consider_profile && _dl_profile != NULL \ - && _dl_name_match_p (_dl_profile, (map))); \ - int edr_lazy = elf_machine_runtime_setup ((map), (lazy) || profile, \ - profile); \ + int edr_lazy = elf_machine_runtime_setup ((map), (lazy), \ + (consider_profile)); \ ELF_DYNAMIC_DO_REL ((map), edr_lazy); \ ELF_DYNAMIC_DO_RELA ((map), edr_lazy); \ } while (0) diff --git a/elf/elf.h b/elf/elf.h index fcc8a53..80f10d6 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -1,5 +1,5 @@ /* This file defines standard ELF types, structures, and macros. - Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. + Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ian Lance Taylor . @@ -21,13 +21,13 @@ #ifndef _ELF_H #define _ELF_H 1 -#include +#include __BEGIN_DECLS /* Standard ELF types. */ -#include +#include /* Type for a 16-bit quantity. */ typedef uint16_t Elf32_Half; diff --git a/elf/ldsodefs.h b/elf/ldsodefs.h index a2bfc0b..45c2a5e 100644 --- a/elf/ldsodefs.h +++ b/elf/ldsodefs.h @@ -363,7 +363,8 @@ extern struct link_map *_dl_new_object (char *realname, const char *libname, If LAZY is nonzero, don't relocate its PLT. */ extern void _dl_relocate_object (struct link_map *map, struct link_map *scope[], - int lazy) internal_function; + int lazy, int consider_profiling) + internal_function; /* Check the version dependencies of all objects available through MAP. If VERBOSE print some more diagnostics. */ @@ -406,8 +407,7 @@ extern void _dl_start_profile (struct link_map *map, const char *output_dir) internal_function; /* The actual functions used to keep book on the calls. */ -extern void _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) - internal_function; +extern void _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc); /* Show the members of the auxiliary array passed up from the kernel. */ diff --git a/elf/rtld.c b/elf/rtld.c index fe676f7..58f9da8 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -93,6 +93,9 @@ const char *_dl_inhibit_rpath; /* RPATH values which should be never be called. */ int _dl_starting_up; +/* This variable contains the lowest stack address ever used. */ +void *__libc_stack_end; + static void dl_main (const ElfW(Phdr) *phdr, ElfW(Half) phent, ElfW(Addr) *user_entry); @@ -201,7 +204,7 @@ relocate_doit (void *a) struct relocate_args *args = (struct relocate_args *) a; _dl_relocate_object (args->l, _dl_object_relocation_scope (args->l), - args->lazy); + args->lazy, 0); } static void @@ -852,6 +855,11 @@ of this helper program; chances are you did not intend to run this program.\n\ know that because it is self-contained). */ struct link_map *l; + int consider_profiling = _dl_profile != NULL; + + /* If we are profiling we also must do lazy reloaction. */ + lazy |= consider_profiling; + l = _dl_loaded; while (l->l_next) l = l->l_next; @@ -859,7 +867,8 @@ of this helper program; chances are you did not intend to run this program.\n\ { if (l != &_dl_rtld_map) { - _dl_relocate_object (l, _dl_object_relocation_scope (l), lazy); + _dl_relocate_object (l, _dl_object_relocation_scope (l), lazy, + consider_profiling); *_dl_global_scope_end = NULL; } l = l->l_prev; @@ -875,7 +884,7 @@ of this helper program; chances are you did not intend to run this program.\n\ if (_dl_rtld_map.l_opencount > 0) /* There was an explicit ref to the dynamic linker as a shared lib. Re-relocate ourselves with user-controlled symbol definitions. */ - _dl_relocate_object (&_dl_rtld_map, &_dl_default_scope[2], 0); + _dl_relocate_object (&_dl_rtld_map, &_dl_default_scope[2], 0, 0); } { diff --git a/elf/sprof.c b/elf/sprof.c index 95e9664..477d95a 100644 --- a/elf/sprof.c +++ b/elf/sprof.c @@ -68,15 +68,15 @@ extern int __profile_frequency __P ((void)); static void print_version (FILE *stream, struct argp_state *state); void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; -#define OPT_COUNT_TOTAL 1 -#define OPT_TEST 2 +#define OPT_TEST 1 /* Definitions of arguments for argp functions. */ static const struct argp_option options[] = { { NULL, 0, NULL, 0, N_("Output selection:") }, - { "count-total", OPT_COUNT_TOTAL, NULL, 0, - N_("print number of invocations for each function") }, + { "flat-profile", 'p', NULL, 0, + N_("generate flat profile with counts and ticks") }, + { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL }, { NULL, 0, NULL, 0, NULL } }; @@ -101,7 +101,9 @@ static struct argp argp = static enum { NONE = 0, - COUNT_TOTAL + FLAT_MODE = 1 << 0, + + DEFAULT_MODE = FLAT_MODE } mode; /* If nonzero the total number of invocations of a function is emitted. */ @@ -135,6 +137,7 @@ struct known_symbol size_t size; uintmax_t ticks; + uintmax_t calls; }; @@ -173,6 +176,7 @@ struct profdata off_t size; char *hist; + struct gmon_hist_hdr *hist_hdr; uint16_t *kcount; uint32_t narcs; /* Number of arcs in toset. */ struct here_cg_arc_record *data; @@ -192,7 +196,9 @@ static void unload_shobj (struct shobj *shobj); static struct profdata *load_profdata (const char *name, struct shobj *shobj); static void unload_profdata (struct profdata *profdata); static void count_total_ticks (struct shobj *shobj, struct profdata *profdata); +static void count_calls (struct shobj *shobj, struct profdata *profdata); static void read_symbols (struct shobj *shobj); +static void generate_flat_profile (struct profdata *profdata); int @@ -266,26 +272,19 @@ no filename for profiling data given and shared object `%s' has no soname"), read_symbols (shobj_handle); + /* Count the ticks. */ + count_total_ticks (shobj_handle, profdata_handle); + + /* Count the calls. */ + count_calls (shobj_handle, profdata_handle); + + /* If no mode is specified fall back to the default mode. */ + if (mode == NONE) + mode = DEFAULT_MODE; + /* Do some work. */ - switch (mode) - { - case COUNT_TOTAL: - count_total_ticks (shobj_handle, profdata_handle); - { - size_t n; - for (n = 0; n < symidx; ++n) - if (sortsym[n]->ticks != 0) - printf ("Name: %-30s, Ticks: %" PRIdMAX "\n", sortsym[n]->name, - sortsym[n]->ticks); - printf ("Total ticks: %" PRIdMAX "\n", total_ticks); - } - break; - case NONE: - /* Do nothing. */ - break; - default: - assert (! "Internal error"); - } + if (mode & FLAT_MODE) + generate_flat_profile (profdata_handle); /* Free the resources. */ unload_shobj (shobj_handle); @@ -301,9 +300,6 @@ parse_opt (int key, char *arg, struct argp_state *state) { switch (key) { - case OPT_COUNT_TOTAL: - mode = COUNT_TOTAL; - break; case OPT_TEST: do_test = 1; break; @@ -689,6 +685,8 @@ load_profdata (const char *name, struct shobj *shobj) /* Pointer to data after the header. */ result->hist = (char *) ((struct gmon_hdr *) addr + 1); + result->hist_hdr = (struct gmon_hist_hdr *) ((char *) result->hist + + sizeof (uint32_t)); result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t) + sizeof (struct gmon_hist_hdr)); @@ -709,7 +707,7 @@ load_profdata (const char *name, struct shobj *shobj) *(char **) hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr; if (do_test) printf ("low_pc = %p\nhigh_pc = %p\n", - hist_hdr.low_pc, hist_hdr.high_pc); + *(char **) hist_hdr.low_pc, *(char **) hist_hdr.high_pc); *(int32_t *) hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER); *(int32_t *) hist_hdr.prof_rate = __profile_frequency (); strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); @@ -718,7 +716,7 @@ load_profdata (const char *name, struct shobj *shobj) /* Test whether the header of the profiling data is ok. */ if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST - || memcmp (result->hist + sizeof (uint32_t), &hist_hdr, + || memcmp (result->hist_hdr, &hist_hdr, sizeof (struct gmon_hist_hdr)) != 0 || narcsp[-1] != GMON_TAG_CG_ARC) { @@ -802,6 +800,49 @@ count_total_ticks (struct shobj *shobj, struct profdata *profdata) } +static struct known_symbol * +find_symbol (uintptr_t addr) +{ + size_t sidx = 0; + + while (sidx < symidx) + { + uintptr_t start = sortsym[sidx]->addr; + uintptr_t end = start + sortsym[sidx]->size; + + if (addr >= start && addr < end) + return sortsym[sidx]; + + if (addr < start) + break; + + ++sidx; + } + + return NULL; +} + + +static void +count_calls (struct shobj *shobj, struct profdata *profdata) +{ + struct here_cg_arc_record *data = profdata->data; + uint32_t narcs = profdata->narcs; + uint32_t cnt; + + for (cnt = 0; cnt < narcs; ++cnt) + { + uintptr_t here = data[cnt].self_pc; + struct known_symbol *symbol; + + /* Find the symbol for this address. */ + symbol = find_symbol (here); + if (symbol != NULL) + symbol->calls += data[cnt].count; + } +} + + static int symorder (const void *o1, const void *o2) { @@ -843,6 +884,7 @@ read_symbols (struct shobj *shobj) || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE) && sym->st_size != 0) { + struct known_symbol **existp; struct known_symbol *newsym = (struct known_symbol *) obstack_alloc (&shobj->ob_sym, sizeof (*newsym)); @@ -853,9 +895,25 @@ read_symbols (struct shobj *shobj) newsym->addr = sym->st_value; newsym->size = sym->st_size; newsym->ticks = 0; - - tsearch (newsym, &symroot, symorder); - ++n; + newsym->calls = 0; + + existp = tfind (newsym, &symroot, symorder); + if (existp == NULL) + { + /* New function. */ + tsearch (newsym, &symroot, symorder); + ++n; + } + else + { + /* The function is already defined. See whether we have + a better name here. */ + if ((*existp)->name[0] == '_' && newsym->name[0] != '_') + *existp = newsym; + else + /* We don't need the allocated memory. */ + obstack_free (&shobj->ob_sym, newsym); + } } } else @@ -872,11 +930,12 @@ read_symbols (struct shobj *shobj) dynamic symbol table!! */ while ((void *) symtab < (void *) strtab) { - if (/*(ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC - || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE) - &&*/ symtab->st_size != 0) + if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC + || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE) + && symtab->st_size != 0) { struct known_symbol *newsym; + struct known_symbol **existp; newsym = (struct known_symbol *) obstack_alloc (&shobj->ob_sym, @@ -889,8 +948,23 @@ read_symbols (struct shobj *shobj) newsym->size = symtab->st_size; newsym->ticks = 0; - tsearch (newsym, &symroot, symorder); - ++n; + existp = tfind (newsym, &symroot, symorder); + if (existp == NULL) + { + /* New function. */ + tsearch (newsym, &symroot, symorder); + ++n; + } + else + { + /* The function is already defined. See whether we have + a better name here. */ + if ((*existp)->name[0] == '_' && newsym->name[0] != '_') + *existp = newsym; + else + /* We don't need the allocated memory. */ + obstack_free (&shobj->ob_sym, newsym); + } } } @@ -903,3 +977,75 @@ read_symbols (struct shobj *shobj) twalk (symroot, printsym); } + + +static int +countorder (const void *p1, const void *p2) +{ + struct known_symbol *s1 = (struct known_symbol *) p1; + struct known_symbol *s2 = (struct known_symbol *) p2; + + if (s1->ticks != s2->ticks) + return (int) (s2->ticks - s1->ticks); + + if (s1->calls != s2->calls) + return (int) (s2->calls - s1->calls); + + return strcmp (s1->name, s2->name); +} + + +static double tick_unit; +static uintmax_t cumu_ticks; + +static void +printflat (const void *node, VISIT value, int level) +{ + if (value == leaf || value == postorder) + { + struct known_symbol *s = *(struct known_symbol **) node; + + cumu_ticks += s->ticks; + + printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f%9.2f %s\n", + total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0, + tick_unit * cumu_ticks, + tick_unit * s->ticks, + s->calls, + s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0, + 0.0, /* FIXME: don't know about called functions. */ + s->name); + } +} + + +/* ARGUSED */ +static void +freenoop (void *p) +{ +} + + +static void +generate_flat_profile (struct profdata *profdata) +{ + size_t n; + void *data = NULL; + + tick_unit = 1.0 / *(uint32_t *) profdata->hist_hdr->prof_rate; + + printf ("Flat profile:\n\n" + "Each sample counts as %g %s.\n", + tick_unit, profdata->hist_hdr->dimen); + fputs (" % cumulative self self total\n" + " time seconds seconds calls us/call us/call name\n", + stdout); + + for (n = 0; n < symidx; ++n) + if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0) + tsearch (sortsym[n], &data, countorder); + + twalk (data, printflat); + + tdestroy (data, freenoop); +} -- cgit v1.1