Fix TAB-completion + .gdb_index slowness (generalize filename_seen_cache)

Tab completion when debugging a program binary that uses GDB index is surprisingly much slower than when GDB uses psymtabs instead. Around 1.5x/3x slower. That's surprising, because the whole point of GDB index is to speed things up... For example, with: set pagination off set $count = 0 while $count < 400 complete b string_prin # matches gdb's string_printf printf "count = %d\n", $count set $count = $count + 1 end $ time ./gdb --batch -q ./gdb-with-index -ex "source script.cmd" real 0m11.042s user 0m10.920s sys 0m0.042s $ time ./gdb --batch -q ./gdb-without-index -ex "source script.cmd" real 0m4.635s user 0m4.590s sys 0m0.037s Same but with: - complete b string_prin + complete b zzzzzz to exercise the no-matches worst case, master currently gets you something like: with index without index real 0m11.971s 0m8.413s user 0m11.912s 0m8.355s sys 0m0.035s 0m0.035s Running gdb under perf shows 80% spent inside maybe_add_partial_symtab_filename, and 20% spent in the lbasename inside that. The problem that tab completion walks over all compunit symtabs, and for each, walks the contained file symtabs. And there a huge number of file symtabs (each included system header, etc.) that appear in each compunit symtab's file symtab list. As in, when debugging GDB, I have 367381 symtabs iterated, when of those only 5371 filenames are unique... This was a regression from the earlier (nice) split of symtabs in compunit symtabs + file symtabs. The fix here is to add a cache of unique filenames per objfile so that the walk / uniquing is only done once. There's already a abstraction for this in symtab.c; this patch moves that code out to a separate file and C++ifies it bit. This makes the worst-case scenario above consistently drop to ~2.5s (1.5s for the "string_prin" hit case), making it over 3.3x times faster than psymtabs in this use case (7x in the "string_prin" hit case). gdb/ChangeLog: 2017-07-17 Pedro Alves <palves@redhat.com> * Makefile.in (COMMON_OBS): Add filename-seen-cache.o. * dwarf2read.c: Include "filename-seen-cache.h". * dwarf2read.c (dwarf2_per_objfile) <filenames_cache>: New field. (dw2_map_symbol_filenames): Build and use a filenames_seen_cache. * filename-seen-cache.c: New file. * filename-seen-cache.h: New file. * symtab.c: Include "filename-seen-cache.h". (struct filename_seen_cache, INITIAL_FILENAME_SEEN_CACHE_SIZE) (create_filename_seen_cache, clear_filename_seen_cache) (delete_filename_seen_cache, filename_seen): Delete, parts moved to filename-seen-cache.h/filename-seen-cache.c. (output_source_filename, sources_info) (maybe_add_partial_symtab_filename) (make_source_files_completion_list): Adjust to use filename_seen_cache.
author: Pedro Alves <palves@redhat.com> 2017-07-17 11:28:33 +0100
committer: Pedro Alves <palves@redhat.com> 2017-07-17 11:38:11 +0100
commit: bbf2f4dfaec5cf2e21b0935300b4921f0b5a8eb7 (patch)
tree: 765f5cc2487d2784efa817f342217a5c9c5618ac /gdb/dwarf2read.c
parent: 330cdd98910dbd34e969f60d48688fb81c2b374a (diff)
download: fsf-binutils-gdb-bbf2f4dfaec5cf2e21b0935300b4921f0b5a8eb7.zip
fsf-binutils-gdb-bbf2f4dfaec5cf2e21b0935300b4921f0b5a8eb7.tar.gz
fsf-binutils-gdb-bbf2f4dfaec5cf2e21b0935300b4921f0b5a8eb7.tar.bz2
1 files changed, 60 insertions, 45 deletions
diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
index b690d0b..bb69f44 100644
--- a/gdb/dwarf2read.c
+++ b/gdb/dwarf2read.c
@@ -74,7 +74,7 @@
 #include "common/gdb_optional.h"
 #include "common/underlying.h"
 #include "common/byte-vector.h"
-
+#include "filename-seen-cache.h"
 #include <fcntl.h>
 #include <sys/types.h>
 #include <algorithm>
@@ -349,6 +349,10 @@ public:
 
   /* Table containing line_header indexed by offset and offset_in_dwz.  */
   htab_t line_header_hash {};
+
+  /* Table containing all filenames.  This is an optional because the
+     table is lazily constructed on first access.  */
+  gdb::optional<filename_seen_cache> filenames_cache;
 };
 
 static struct dwarf2_per_objfile *dwarf2_per_objfile;
@@ -4312,64 +4316,75 @@ static void
 dw2_map_symbol_filenames (struct objfile *objfile, symbol_filename_ftype *fun,
 			  void *data, int need_fullname)
 {
-  int i;
-  htab_up visited (htab_create_alloc (10, htab_hash_pointer, htab_eq_pointer,
-				      NULL, xcalloc, xfree));
-
   dw2_setup (objfile);
 
-  /* The rule is CUs specify all the files, including those used by
-     any TU, so there's no need to scan TUs here.
-     We can ignore file names coming from already-expanded CUs.  */
-
-  for (i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
+  if (!dwarf2_per_objfile->filenames_cache)
     {
-      struct dwarf2_per_cu_data *per_cu = dw2_get_cutu (i);
+      dwarf2_per_objfile->filenames_cache.emplace ();
 
-      if (per_cu->v.quick->compunit_symtab)
-	{
-	  void **slot = htab_find_slot (visited.get (),
-					per_cu->v.quick->file_names,
-					INSERT);
+      htab_up visited (htab_create_alloc (10,
+					  htab_hash_pointer, htab_eq_pointer,
+					  NULL, xcalloc, xfree));
 
-	  *slot = per_cu->v.quick->file_names;
-	}
-    }
-
-  for (i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
-    {
-      int j;
-      struct dwarf2_per_cu_data *per_cu = dw2_get_cu (i);
-      struct quick_file_names *file_data;
-      void **slot;
+      /* The rule is CUs specify all the files, including those used
+	 by any TU, so there's no need to scan TUs here.  We can
+	 ignore file names coming from already-expanded CUs.  */
 
-      /* We only need to look at symtabs not already expanded.  */
-      if (per_cu->v.quick->compunit_symtab)
-	continue;
+      for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
+	{
+	  struct dwarf2_per_cu_data *per_cu = dw2_get_cutu (i);
 
-      file_data = dw2_get_file_names (per_cu);
-      if (file_data == NULL)
-	continue;
+	  if (per_cu->v.quick->compunit_symtab)
+	    {
+	      void **slot = htab_find_slot (visited.get (),
+					    per_cu->v.quick->file_names,
+					    INSERT);
 
-      slot = htab_find_slot (visited.get (), file_data, INSERT);
-      if (*slot)
-	{
-	  /* Already visited.  */
-	  continue;
+	      *slot = per_cu->v.quick->file_names;
+	    }
 	}
-      *slot = file_data;
 
-      for (j = 0; j < file_data->num_file_names; ++j)
+      for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
 	{
-	  const char *this_real_name;
+	  int j;
+	  struct dwarf2_per_cu_data *per_cu = dw2_get_cu (i);
+	  struct quick_file_names *file_data;
+	  void **slot;
 
-	  if (need_fullname)
-	    this_real_name = dw2_get_real_path (objfile, file_data, j);
-	  else
-	    this_real_name = NULL;
-	  (*fun) (file_data->file_names[j], this_real_name, data);
+	  /* We only need to look at symtabs not already expanded.  */
+	  if (per_cu->v.quick->compunit_symtab)
+	    continue;
+
+	  file_data = dw2_get_file_names (per_cu);
+	  if (file_data == NULL)
+	    continue;
+
+	  slot = htab_find_slot (visited.get (), file_data, INSERT);
+	  if (*slot)
+	    {
+	      /* Already visited.  */
+	      continue;
+	    }
+	  *slot = file_data;
+
+	  for (int j = 0; j < file_data->num_file_names; ++j)
+	    {
+	      const char *filename = file_data->file_names[j];
+	      dwarf2_per_objfile->filenames_cache->seen (filename);
+	    }
 	}
     }
+
+  dwarf2_per_objfile->filenames_cache->traverse ([&] (const char *filename)
+    {
+      const char *this_real_name;
+
+      if (need_fullname)
+	this_real_name = gdb_realpath (filename);
+      else
+	this_real_name = NULL;
+      (*fun) (filename, this_real_name, data);
+    });
 }
 
 static int
author	Pedro Alves <palves@redhat.com>	2017-07-17 11:28:33 +0100
committer	Pedro Alves <palves@redhat.com>	2017-07-17 11:38:11 +0100
commit	bbf2f4dfaec5cf2e21b0935300b4921f0b5a8eb7 (patch)
tree	765f5cc2487d2784efa817f342217a5c9c5618ac /gdb/dwarf2read.c
parent	330cdd98910dbd34e969f60d48688fb81c2b374a (diff)
download	fsf-binutils-gdb-bbf2f4dfaec5cf2e21b0935300b4921f0b5a8eb7.zip fsf-binutils-gdb-bbf2f4dfaec5cf2e21b0935300b4921f0b5a8eb7.tar.gz fsf-binutils-gdb-bbf2f4dfaec5cf2e21b0935300b4921f0b5a8eb7.tar.bz2