From 1e372ded4f83362509c8672ff501cba871bb1edc Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Thu, 24 Jan 2019 12:46:59 +0000 Subject: Refactor hp-timing rtld usage This patch refactor how hp-timing is used on loader code for statistics report. The HP_TIMING_AVAIL and HP_SMALL_TIMING_AVAIL are removed and HP_TIMING_INLINE is used instead to check for hp-timing avaliability. For alpha, which only defines HP_SMALL_TIMING_AVAIL, the HP_TIMING_INLINE is set iff for IS_IN(rtld). Checked on aarch64-linux-gnu, x86_64-linux-gnu, and i686-linux-gnu. I also checked the builds for all afected ABIs. * benchtests/bench-timing.h: Replace HP_TIMING_AVAIL with HP_TIMING_INLINE. * nptl/descr.h: Likewise. * elf/rtld.c (RLTD_TIMING_DECLARE, RTLD_TIMING_NOW, RTLD_TIMING_DIFF, RTLD_TIMING_ACCUM_NT, RTLD_TIMING_SET): Define. (dl_start_final_info, _dl_start_final, dl_main, print_statistics): Abstract hp-timing usage with RTLD_* macros. * sysdeps/alpha/hp-timing.h (HP_TIMING_INLINE): Define iff IS_IN(rtld). (HP_TIMING_AVAIL, HP_SMALL_TIMING_AVAIL): Remove. * sysdeps/generic/hp-timing.h (HP_TIMING_AVAIL, HP_SMALL_TIMING_AVAIL, HP_TIMING_NONAVAIL): Likewise. * sysdeps/ia64/hp-timing.h (HP_TIMING_AVAIL, HP_SMALL_TIMING_AVAIL): Likewise. * sysdeps/powerpc/powerpc32/power4/hp-timing.h (HP_TIMING_AVAIL, HP_SMALL_TIMING_AVAIL): Likewise. * sysdeps/powerpc/powerpc64/hp-timing.h (HP_TIMING_AVAIL, HP_SMALL_TIMING_AVAIL): Likewise. * sysdeps/sparc/sparc32/sparcv9/hp-timing.h (HP_TIMING_AVAIL, HP_SMALL_TIMING_AVAIL): Likewise. * sysdeps/sparc/sparc64/hp-timing.h (HP_TIMING_AVAIL, HP_SMALL_TIMING_AVAIL): Likewise. * sysdeps/x86/hp-timing.h (HP_TIMING_AVAIL, HP_SMALL_TIMING_AVAIL): Likewise. * sysdeps/generic/hp-timing-common.h: Update comment with HP_TIMING_AVAIL removal. --- elf/rtld.c | 281 ++++++++++++++++++++++++++++--------------------------------- 1 file changed, 130 insertions(+), 151 deletions(-) (limited to 'elf/rtld.c') diff --git a/elf/rtld.c b/elf/rtld.c index 1f20d4a..c9490ff 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -46,6 +46,49 @@ #include +/* Only enables rtld profiling for architectures which provides non generic + hp-timing support. The generic support requires either syscall + (clock_gettime), which will incur in extra overhead on loading time. + Using vDSO is also an option, but it will require extra support on loader + to setup the vDSO pointer before its usage. */ +#if HP_TIMING_INLINE +# define RLTD_TIMING_DECLARE(var, classifier,...) \ + classifier hp_timing_t var __VA_ARGS__ +# define RTLD_TIMING_VAR(var) RLTD_TIMING_DECLARE (var, ) +# define RTLD_TIMING_SET(var, value) (var) = (value) +# define RTLD_TIMING_REF(var) &(var) + +static inline void +rtld_timer_start (hp_timing_t *var) +{ + HP_TIMING_NOW (*var); +} + +static inline void +rtld_timer_stop (hp_timing_t *var, hp_timing_t start) +{ + hp_timing_t stop; + HP_TIMING_NOW (stop); + HP_TIMING_DIFF (*var, start, stop); +} + +static inline void +rtld_timer_accum (hp_timing_t *sum, hp_timing_t start) +{ + hp_timing_t stop; + rtld_timer_stop (&stop, start); + HP_TIMING_ACCUM_NT(*sum, stop); +} +#else +# define RLTD_TIMING_DECLARE(var, classifier...) +# define RTLD_TIMING_SET(var, value) +# define RTLD_TIMING_VAR(var) +# define RTLD_TIMING_REF(var) 0 +# define rtld_timer_start(var) +# define rtld_timer_stop(var, start) +# define rtld_timer_accum(sum, start) +#endif + /* Avoid PLT use for our local calls at startup. */ extern __typeof (__mempcpy) __mempcpy attribute_hidden; @@ -62,7 +105,7 @@ static void print_missing_version (int errcode, const char *objname, const char *errsting); /* Print the various times we collected. */ -static void print_statistics (hp_timing_t *total_timep); +static void print_statistics (const hp_timing_t *total_timep); /* Add audit objects. */ static void process_dl_audit (char *str); @@ -303,11 +346,9 @@ static struct libname_list _dl_rtld_libname; static struct libname_list _dl_rtld_libname2; /* Variable for statistics. */ -#ifndef HP_TIMING_NONAVAIL -static hp_timing_t relocate_time; -static hp_timing_t load_time attribute_relro; -static hp_timing_t start_time attribute_relro; -#endif +RLTD_TIMING_DECLARE (relocate_time, static); +RLTD_TIMING_DECLARE (load_time, static, attribute_relro); +RLTD_TIMING_DECLARE (start_time, static, attribute_relro); /* Additional definitions needed by TLS initialization. */ #ifdef TLS_INIT_HELPER @@ -335,9 +376,7 @@ static ElfW(Addr) _dl_start_final (void *arg); struct dl_start_final_info { struct link_map l; -#if !defined HP_TIMING_NONAVAIL && HP_TIMING_INLINE - hp_timing_t start_time; -#endif + RTLD_TIMING_VAR (start_time); }; static ElfW(Addr) _dl_start_final (void *arg, struct dl_start_final_info *info); @@ -371,16 +410,11 @@ _dl_start_final (void *arg, struct dl_start_final_info *info) { ElfW(Addr) start_addr; - if (HP_SMALL_TIMING_AVAIL) - { - /* If it hasn't happen yet record the startup time. */ - if (! HP_TIMING_INLINE) - HP_TIMING_NOW (start_time); -#if !defined DONT_USE_BOOTSTRAP_MAP && !defined HP_TIMING_NONAVAIL - else - start_time = info->start_time; + /* If it hasn't happen yet record the startup time. */ + rtld_timer_start (&start_time); +#if !defined DONT_USE_BOOTSTRAP_MAP + RTLD_TIMING_SET (start_time, info->start_time); #endif - } /* Transfer data about ourselves to the permanent link_map structure. */ #ifndef DONT_USE_BOOTSTRAP_MAP @@ -412,27 +446,11 @@ _dl_start_final (void *arg, struct dl_start_final_info *info) entry point on the same stack we entered on. */ start_addr = _dl_sysdep_start (arg, &dl_main); -#ifndef HP_TIMING_NONAVAIL - hp_timing_t rtld_total_time; - if (HP_SMALL_TIMING_AVAIL) - { - hp_timing_t end_time; - - /* Get the current time. */ - HP_TIMING_NOW (end_time); - - /* Compute the difference. */ - HP_TIMING_DIFF (rtld_total_time, start_time, end_time); - } -#endif - if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_STATISTICS)) { -#ifndef HP_TIMING_NONAVAIL - print_statistics (&rtld_total_time); -#else - print_statistics (NULL); -#endif + RTLD_TIMING_VAR (rtld_total_time); + rtld_timer_stop (&rtld_total_time, start_time); + print_statistics (RTLD_TIMING_REF(rtld_total_time)); } return start_addr; @@ -457,11 +475,10 @@ _dl_start (void *arg) #define RESOLVE_MAP(sym, version, flags) BOOTSTRAP_MAP #include "dynamic-link.h" - if (HP_TIMING_INLINE && HP_SMALL_TIMING_AVAIL) #ifdef DONT_USE_BOOTSTRAP_MAP - HP_TIMING_NOW (start_time); + rtld_timer_start (&start_time); #else - HP_TIMING_NOW (info.start_time); + rtld_timer_start (&info.start_time); #endif /* Partly clean the `bootstrap_map' structure up. Don't use @@ -1078,11 +1095,6 @@ dl_main (const ElfW(Phdr) *phdr, unsigned int i; bool prelinked = false; bool rtld_is_main = false; -#ifndef HP_TIMING_NONAVAIL - hp_timing_t start; - hp_timing_t stop; - hp_timing_t diff; -#endif void *tcbp = NULL; GL(dl_init_static_tls) = &_dl_nothread_init_static_tls; @@ -1258,12 +1270,11 @@ of this helper program; chances are you did not intend to run this program.\n\ } else { - HP_TIMING_NOW (start); + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); _dl_map_object (NULL, rtld_progname, lt_executable, 0, __RTLD_OPENEXEC, LM_ID_BASE); - HP_TIMING_NOW (stop); - - HP_TIMING_DIFF (load_time, start, stop); + rtld_timer_stop (&load_time, start); } /* Now the map for the main executable is available. */ @@ -1666,20 +1677,18 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); if (__glibc_unlikely (preloadlist != NULL)) { - HP_TIMING_NOW (start); + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); npreloads += handle_preload_list (preloadlist, main_map, "LD_PRELOAD"); - HP_TIMING_NOW (stop); - HP_TIMING_DIFF (diff, start, stop); - HP_TIMING_ACCUM_NT (load_time, diff); + rtld_timer_accum (&load_time, start); } if (__glibc_unlikely (preloadarg != NULL)) { - HP_TIMING_NOW (start); + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); npreloads += handle_preload_list (preloadarg, main_map, "--preload"); - HP_TIMING_NOW (stop); - HP_TIMING_DIFF (diff, start, stop); - HP_TIMING_ACCUM_NT (load_time, diff); + rtld_timer_accum (&load_time, start); } /* There usually is no ld.so.preload file, it should only be used @@ -1739,7 +1748,8 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); file[file_size - 1] = '\0'; } - HP_TIMING_NOW (start); + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); if (file != problem) { @@ -1757,9 +1767,7 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); npreloads += do_preload (p, main_map, preload_file); } - HP_TIMING_NOW (stop); - HP_TIMING_DIFF (diff, start, stop); - HP_TIMING_ACCUM_NT (load_time, diff); + rtld_timer_accum (&load_time, start); /* We don't need the file anymore. */ __munmap (file, file_size); @@ -1783,11 +1791,12 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); /* Load all the libraries specified by DT_NEEDED entries. If LD_PRELOAD specified some libraries to load, these are inserted before the actual dependencies in the executable's searchlist for symbol resolution. */ - HP_TIMING_NOW (start); - _dl_map_object_deps (main_map, preloads, npreloads, mode == trace, 0); - HP_TIMING_NOW (stop); - HP_TIMING_DIFF (diff, start, stop); - HP_TIMING_ACCUM_NT (load_time, diff); + { + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); + _dl_map_object_deps (main_map, preloads, npreloads, mode == trace, 0); + rtld_timer_accum (&load_time, start); + } /* Mark all objects as being in the global scope. */ for (i = main_map->l_searchlist.r_nlist; i > 0; ) @@ -2180,12 +2189,10 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); if (main_map->l_info [ADDRIDX (DT_GNU_CONFLICT)] != NULL) { ElfW(Rela) *conflict, *conflictend; -#ifndef HP_TIMING_NONAVAIL - hp_timing_t start; - hp_timing_t stop; -#endif - HP_TIMING_NOW (start); + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); + assert (main_map->l_info [VALIDX (DT_GNU_CONFLICTSZ)] != NULL); conflict = (ElfW(Rela) *) main_map->l_info [ADDRIDX (DT_GNU_CONFLICT)]->d_un.d_ptr; @@ -2193,8 +2200,8 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); ((char *) conflict + main_map->l_info [VALIDX (DT_GNU_CONFLICTSZ)]->d_un.d_val); _dl_resolve_conflicts (main_map, conflict, conflictend); - HP_TIMING_NOW (stop); - HP_TIMING_DIFF (relocate_time, start, stop); + + rtld_timer_stop (&relocate_time, start); } @@ -2222,15 +2229,12 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); know that because it is self-contained). */ int consider_profiling = GLRO(dl_profile) != NULL; -#ifndef HP_TIMING_NONAVAIL - hp_timing_t start; - hp_timing_t stop; -#endif /* If we are profiling we also must do lazy reloaction. */ GLRO(dl_lazy) |= consider_profiling; - HP_TIMING_NOW (start); + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); unsigned i = main_map->l_searchlist.r_nlist; while (i-- > 0) { @@ -2257,9 +2261,7 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); if (l->l_tls_blocksize != 0 && tls_init_tp_called) _dl_add_to_slotinfo (l); } - HP_TIMING_NOW (stop); - - HP_TIMING_DIFF (relocate_time, start, stop); + rtld_timer_stop (&relocate_time, start); /* Now enable profiling if needed. Like the previous call, this has to go here because the calls it makes should use the @@ -2302,19 +2304,14 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]); re-relocation, we might call a user-supplied function (e.g. calloc from _dl_relocate_object) that uses TLS data. */ -#ifndef HP_TIMING_NONAVAIL - hp_timing_t start; - hp_timing_t stop; - hp_timing_t add; -#endif + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); - HP_TIMING_NOW (start); /* Mark the link map as not yet relocated again. */ GL(dl_rtld_map).l_relocated = 0; _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope, 0, 0); - HP_TIMING_NOW (stop); - HP_TIMING_DIFF (add, start, stop); - HP_TIMING_ACCUM_NT (relocate_time, add); + + rtld_timer_accum (&relocate_time, start); } /* Do any necessary cleanups for the startup OS interface code. @@ -2746,46 +2743,51 @@ process_envvars (enum mode *modep) } } +#if HP_TIMING_INLINE +static void +print_statistics_item (const char *title, hp_timing_t time, + hp_timing_t total) +{ + char cycles[HP_TIMING_PRINT_SIZE]; + HP_TIMING_PRINT (cycles, sizeof (cycles), time); + + char relative[3 * sizeof (hp_timing_t) + 2]; + char *cp = _itoa ((1000ULL * time) / total, relative + sizeof (relative), + 10, 0); + /* Sets the decimal point. */ + char *wp = relative; + switch (relative + sizeof (relative) - cp) + { + case 3: + *wp++ = *cp++; + /* Fall through. */ + case 2: + *wp++ = *cp++; + /* Fall through. */ + case 1: + *wp++ = '.'; + *wp++ = *cp++; + } + *wp = '\0'; + _dl_debug_printf ("%s: %s cycles (%s%%)\n", title, cycles, relative); +} +#endif /* Print the various times we collected. */ static void __attribute ((noinline)) -print_statistics (hp_timing_t *rtld_total_timep) +print_statistics (const hp_timing_t *rtld_total_timep) { -#ifndef HP_TIMING_NONAVAIL - char buf[200]; - char *cp; - char *wp; - - /* Total time rtld used. */ - if (HP_SMALL_TIMING_AVAIL) - { - HP_TIMING_PRINT (buf, sizeof (buf), *rtld_total_timep); - _dl_debug_printf ("\nruntime linker statistics:\n" - " total startup time in dynamic loader: %s\n", buf); - - /* Print relocation statistics. */ - char pbuf[30]; - HP_TIMING_PRINT (buf, sizeof (buf), relocate_time); - cp = _itoa ((1000ULL * relocate_time) / *rtld_total_timep, - pbuf + sizeof (pbuf), 10, 0); - wp = pbuf; - switch (pbuf + sizeof (pbuf) - cp) - { - case 3: - *wp++ = *cp++; - /* Fall through. */ - case 2: - *wp++ = *cp++; - /* Fall through. */ - case 1: - *wp++ = '.'; - *wp++ = *cp++; - } - *wp = '\0'; - _dl_debug_printf ("\ - time needed for relocation: %s (%s%%)\n", buf, pbuf); - } +#if HP_TIMING_INLINE + { + char cycles[HP_TIMING_PRINT_SIZE]; + HP_TIMING_PRINT (cycles, sizeof (cycles), *rtld_total_timep); + _dl_debug_printf ("\nruntime linker statistics:\n" + " total startup time in dynamic loader: %s cycles\n", + cycles); + print_statistics_item (" time needed for relocation", + relocate_time, *rtld_total_timep); + } #endif unsigned long int num_relative_relocations = 0; @@ -2826,31 +2828,8 @@ print_statistics (hp_timing_t *rtld_total_timep) GL(dl_num_cache_relocations), num_relative_relocations); -#ifndef HP_TIMING_NONAVAIL - /* Time spend while loading the object and the dependencies. */ - if (HP_SMALL_TIMING_AVAIL) - { - char pbuf[30]; - HP_TIMING_PRINT (buf, sizeof (buf), load_time); - cp = _itoa ((1000ULL * load_time) / *rtld_total_timep, - pbuf + sizeof (pbuf), 10, 0); - wp = pbuf; - switch (pbuf + sizeof (pbuf) - cp) - { - case 3: - *wp++ = *cp++; - /* Fall through. */ - case 2: - *wp++ = *cp++; - /* Fall through. */ - case 1: - *wp++ = '.'; - *wp++ = *cp++; - } - *wp = '\0'; - _dl_debug_printf ("\ - time needed to load objects: %s (%s%%)\n", - buf, pbuf); - } +#if HP_TIMING_INLINE + print_statistics_item (" time needed to load objects", + load_time, *rtld_total_timep); #endif } -- cgit v1.1