Introduce the new DWARF index class

This patch introduces the new DWARF index class. It is called "cooked" to contrast against a "raw" index, which is mapped from disk without extra effort. Nothing constructs a cooked index yet. The essential idea here is that index entries are created via the "add" method; then when all the entries have been read, they are "finalize"d -- name canonicalization is performed and the entries are added to a sorted vector. Entries use the DWARF name (DW_AT_name) or linkage name, not the full name as is done for partial symbols. These two facets -- the short name and the deferred canonicalization -- help improve the performance of this approach. This will become clear in later patches, when parallelization is added. Some special code is needed for Ada, because GNAT only emits mangled ("encoded", in the Ada lingo) names, and so we reconstruct the hierarchical structure after the fact. This is also done in the finalization phase. One other aspect worth noting is that the way the "main" function is found is different in the new code. Currently gdb will notice DW_AT_main_subprogram, but won't recognize "main" during reading -- this is done later, via explicit symbol lookup. This is done differently in the new code so that finalization can be done in the background without then requiring a synchronization to look up the symbol.
author: Tom Tromey <tom@tromey.com> 2021-03-14 11:38:54 -0600
committer: Tom Tromey <tom@tromey.com> 2022-04-12 09:31:16 -0600
commit: 51f5a4b8e9397ae9e93789cd7974fa62aeee6cd2 (patch)
tree: b128706b2b30b9dd0c67efa1e88a500c98245676 /gdb
parent: a2f0ab9310cb2ff235b436e0492fbaa804ce1bc9 (diff)
download: gdb-51f5a4b8e9397ae9e93789cd7974fa62aeee6cd2.zip
gdb-51f5a4b8e9397ae9e93789cd7974fa62aeee6cd2.tar.gz
gdb-51f5a4b8e9397ae9e93789cd7974fa62aeee6cd2.tar.bz2
3 files changed, 532 insertions, 0 deletions
diff --git a/gdb/Makefile.in b/gdb/Makefile.in
index 1374f29..872fbe1 100644
--- a/gdb/Makefile.in
+++ b/gdb/Makefile.in
@@ -1047,6 +1047,7 @@ COMMON_SFILES = \
 	dwarf2/abbrev-cache.c \
 	dwarf2/attribute.c \
 	dwarf2/comp-unit-head.c \
+	dwarf2/cooked-index.c \
 	dwarf2/cu.c \
 	dwarf2/dwz.c \
 	dwarf2/expr.c \
@@ -1294,6 +1295,7 @@ HFILES_NO_SRCDIR = \
 	disasm-flags.h \
 	disasm.h \
 	dummy-frame.h \
+	dwarf2/cooked-index.h \
 	dwarf2/cu.h \
 	dwarf2/frame-tailcall.h \
 	dwarf2/frame.h \
diff --git a/gdb/dwarf2/cooked-index.c b/gdb/dwarf2/cooked-index.c
new file mode 100644
index 0000000..1b7e25d
--- /dev/null
+++ b/gdb/dwarf2/cooked-index.c
@@ -0,0 +1,270 @@
+/* DIE indexing 
+
+   Copyright (C) 2022 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "defs.h"
+#include "dwarf2/cooked-index.h"
+#include "dwarf2/read.h"
+#include "cp-support.h"
+#include "ada-lang.h"
+#include "split-name.h"
+#include <algorithm>
+
+/* Hash function for cooked_index_entry.  */
+
+static hashval_t
+hash_entry (const void *e)
+{
+  const cooked_index_entry *entry = (const cooked_index_entry *) e;
+  return dwarf5_djb_hash (entry->canonical);
+}
+
+/* Equality function for cooked_index_entry.  */
+
+static int
+eq_entry (const void *a, const void *b)
+{
+  const cooked_index_entry *ae = (const cooked_index_entry *) a;
+  const gdb::string_view *sv = (const gdb::string_view *) b;
+  return (strlen (ae->canonical) == sv->length ()
+	  && strncasecmp (ae->canonical, sv->data (), sv->length ()) == 0);
+}
+
+/* See cooked-index.h.  */
+
+const char *
+cooked_index_entry::full_name (struct obstack *storage) const
+{
+  if ((flags & IS_LINKAGE) != 0)
+    return canonical;
+
+  const char *sep = nullptr;
+  switch (per_cu->lang)
+    {
+    case language_cplus:
+    case language_rust:
+      sep = "::";
+      break;
+
+    case language_go:
+    case language_d:
+    case language_ada:
+      sep = ".";
+      break;
+    }
+
+  if (sep == nullptr)
+    return canonical;
+
+  if (parent_entry != nullptr)
+    parent_entry->write_scope (storage, sep);
+  obstack_grow0 (storage, canonical, strlen (canonical));
+  return (const char *) obstack_finish (storage);
+}
+
+/* See cooked-index.h.  */
+
+void
+cooked_index_entry::write_scope (struct obstack *storage,
+				 const char *sep) const
+{
+  if (parent_entry != nullptr)
+    parent_entry->write_scope (storage, sep);
+  obstack_grow (storage, canonical, strlen (canonical));
+  obstack_grow (storage, sep, strlen (sep));
+}
+
+/* See cooked-index.h.  */
+
+const cooked_index_entry *
+cooked_index::add (sect_offset die_offset, enum dwarf_tag tag,
+		   cooked_index_flag flags, const char *name,
+		   const cooked_index_entry *parent_entry,
+		   dwarf2_per_cu_data *per_cu)
+{
+  cooked_index_entry *result = create (die_offset, tag, flags, name,
+				       parent_entry, per_cu);
+  m_entries.push_back (result);
+
+  /* An explicitly-tagged main program should always override the
+     implicit "main" discovery.  */
+  if ((flags & IS_MAIN) != 0)
+    m_main = result;
+  else if (per_cu->lang != language_ada
+	   && m_main == nullptr
+	   && strcmp (name, "main") == 0)
+    m_main = result;
+
+  return result;
+}
+
+/* See cooked-index.h.  */
+
+cooked_index::range
+cooked_index::find (gdb::string_view name, bool completing)
+{
+  auto lower = std::lower_bound (m_entries.begin (), m_entries.end (),
+				 name,
+				 [=] (const cooked_index_entry *entry,
+				      const gdb::string_view &n)
+  {
+    int cmp = strncasecmp (entry->canonical, n.data (), n.length ());
+    if (cmp != 0 || completing)
+      return cmp < 0;
+    return strlen (entry->canonical) < n.length ();
+  });
+
+  auto upper = std::upper_bound (m_entries.begin (), m_entries.end (),
+				 name,
+				 [=] (const gdb::string_view &n,
+				      const cooked_index_entry *entry)
+  {
+    int cmp = strncasecmp (n.data (), entry->canonical, n.length ());
+    if (cmp != 0 || completing)
+      return cmp < 0;
+    return n.length () < strlen (entry->canonical);
+  });
+
+  return range (lower, upper);
+}
+
+/* See cooked-index.h.  */
+
+gdb::unique_xmalloc_ptr<char>
+cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
+					 htab_t gnat_entries)
+{
+  std::string canonical = ada_decode (entry->name, false, false);
+  if (canonical.empty ())
+    return {};
+  std::vector<gdb::string_view> names = split_name (canonical.c_str (),
+						    split_style::DOT);
+  gdb::string_view tail = names.back ();
+  names.pop_back ();
+
+  const cooked_index_entry *parent = nullptr;
+  for (const auto &name : names)
+    {
+      uint32_t hashval = dwarf5_djb_hash (name);
+      void **slot = htab_find_slot_with_hash (gnat_entries, &name,
+					      hashval, INSERT);
+      /* CUs are processed in order, so we only need to check the most
+	 recent entry.  */
+      cooked_index_entry *last = (cooked_index_entry *) *slot;
+      if (last == nullptr || last->per_cu != entry->per_cu)
+	{
+	  gdb::unique_xmalloc_ptr<char> new_name
+	    = make_unique_xstrndup (name.data (), name.length ());
+	  last = create (entry->die_offset, DW_TAG_namespace,
+			 0, new_name.get (), parent,
+			 entry->per_cu);
+	  last->canonical = last->name;
+	  m_names.push_back (std::move (new_name));
+	  *slot = last;
+	}
+
+      parent = last;
+    }
+
+  entry->parent_entry = parent;
+  return make_unique_xstrndup (tail.data (), tail.length ());
+}
+
+/* See cooked-index.h.  */
+
+void
+cooked_index::finalize ()
+{
+  auto hash_name_ptr = [] (const void *p)
+    {
+      const cooked_index_entry *entry = (const cooked_index_entry *) p;
+      return htab_hash_pointer (entry->name);
+    };
+
+  auto eq_name_ptr = [] (const void *a, const void *b) -> int
+    {
+      const cooked_index_entry *ea = (const cooked_index_entry *) a;
+      const cooked_index_entry *eb = (const cooked_index_entry *) b;
+      return ea->name == eb->name;
+    };
+
+  /* We can use pointer equality here because names come from
+     .debug_str, which will normally be unique-ified by the linker.
+     Also, duplicates are relatively harmless -- they just mean a bit
+     of extra memory is used.  */
+  htab_up seen_names (htab_create_alloc (10, hash_name_ptr, eq_name_ptr,
+					 nullptr, xcalloc, xfree));
+
+  htab_up gnat_entries (htab_create_alloc (10, hash_entry, eq_entry,
+					   nullptr, xcalloc, xfree));
+
+  for (cooked_index_entry *entry : m_entries)
+    {
+      gdb_assert (entry->canonical == nullptr);
+      if ((entry->per_cu->lang != language_cplus
+	   && entry->per_cu->lang != language_ada)
+	  || (entry->flags & IS_LINKAGE) != 0)
+	entry->canonical = entry->name;
+      else
+	{
+	  if (entry->per_cu->lang == language_ada)
+	    {
+	      gdb::unique_xmalloc_ptr<char> canon_name
+		= handle_gnat_encoded_entry (entry, gnat_entries.get ());
+	      if (canon_name == nullptr)
+		entry->canonical = entry->name;
+	      else
+		{
+		  entry->canonical = canon_name.get ();
+		  m_names.push_back (std::move (canon_name));
+		}
+	    }
+	  else
+	    {
+	      void **slot = htab_find_slot (seen_names.get (), entry,
+					    INSERT);
+	      if (*slot == nullptr)
+		{
+		  gdb::unique_xmalloc_ptr<char> canon_name
+		    = cp_canonicalize_string (entry->name);
+		  if (canon_name == nullptr)
+		    entry->canonical = entry->name;
+		  else
+		    {
+		      entry->canonical = canon_name.get ();
+		      m_names.push_back (std::move (canon_name));
+		    }
+		}
+	      else
+		{
+		  const cooked_index_entry *other
+		    = (const cooked_index_entry *) *slot;
+		  entry->canonical = other->canonical;
+		}
+	    }
+	}
+    }
+
+  m_names.shrink_to_fit ();
+  m_entries.shrink_to_fit ();
+  std::sort (m_entries.begin (), m_entries.end (),
+	     [] (const cooked_index_entry *a, const cooked_index_entry *b)
+	     {
+	       return *a < *b;
+	     });
+}
diff --git a/gdb/dwarf2/cooked-index.h b/gdb/dwarf2/cooked-index.h
new file mode 100644
index 0000000..0a38fc8
--- /dev/null
+++ b/gdb/dwarf2/cooked-index.h
@@ -0,0 +1,260 @@
+/* DIE indexing 
+
+   Copyright (C) 2022 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef GDB_DWARF2_COOKED_INDEX_H
+#define GDB_DWARF2_COOKED_INDEX_H
+
+#include "dwarf2.h"
+#include "gdbtypes.h"
+#include "symtab.h"
+#include "hashtab.h"
+#include "dwarf2/index-common.h"
+#include "gdbsupport/gdb_string_view.h"
+#include "quick-symbol.h"
+#include "gdbsupport/gdb_obstack.h"
+#include "addrmap.h"
+#include "gdbsupport/iterator-range.h"
+
+struct dwarf2_per_cu_data;
+
+/* Flags that describe an entry in the index.  */
+enum cooked_index_flag_enum : unsigned char
+{
+  /* True if this entry is the program's "main".  */
+  IS_MAIN = 1,
+  /* True if this entry represents a "static" object.  */
+  IS_STATIC = 2,
+  /* True if this entry is an "enum class".  */
+  IS_ENUM_CLASS = 4,
+  /* True if this entry uses the linkage name.  */
+  IS_LINKAGE = 8,
+};
+DEF_ENUM_FLAGS_TYPE (enum cooked_index_flag_enum, cooked_index_flag);
+
+/* A cooked_index_entry represents a single item in the index.  Note
+   that two entries can be created for the same DIE -- one using the
+   name, and another one using the linkage name, if any.
+
+   This is an "open" class and the members are all directly
+   accessible.  It is read-only after the index has been fully read
+   and processed.  */
+struct cooked_index_entry : public allocate_on_obstack
+{
+  cooked_index_entry (sect_offset die_offset_, enum dwarf_tag tag_,
+		      cooked_index_flag flags_, const char *name_,
+		      const cooked_index_entry *parent_entry_,
+		      dwarf2_per_cu_data *per_cu_)
+    : name (name_),
+      tag (tag_),
+      flags (flags_),
+      die_offset (die_offset_),
+      parent_entry (parent_entry_),
+      per_cu (per_cu_)
+  {
+  }
+
+  /* Return true if this entry matches SEARCH_FLAGS.  */
+  bool matches (block_search_flags search_flags) const
+  {
+    if ((search_flags & SEARCH_STATIC_BLOCK) != 0
+	&& (flags & IS_STATIC) != 0)
+      return true;
+    if ((search_flags & SEARCH_GLOBAL_BLOCK) != 0
+	&& (flags & IS_STATIC) == 0)
+      return true;
+    return false;
+  }
+
+  /* Return true if this entry matches DOMAIN.  */
+  bool matches (domain_enum domain) const
+  {
+    switch (domain)
+      {
+      case LABEL_DOMAIN:
+	return false;
+
+      case MODULE_DOMAIN:
+	return tag == DW_TAG_module;
+
+      case COMMON_BLOCK_DOMAIN:
+	return tag == DW_TAG_common_block;
+      }
+
+    return true;
+  }
+
+  /* Return true if this entry matches KIND.  */
+  bool matches (enum search_domain kind) const
+  {
+    switch (kind)
+      {
+      case VARIABLES_DOMAIN:
+	return tag == DW_TAG_variable;
+      case FUNCTIONS_DOMAIN:
+	return tag == DW_TAG_subprogram;
+      case TYPES_DOMAIN:
+	return tag == DW_TAG_typedef || tag == DW_TAG_structure_type;
+      case MODULES_DOMAIN:
+	return tag == DW_TAG_module;
+      }
+
+    return true;
+  }
+
+  /* Construct the fully-qualified name of this entry and return a
+     pointer to it.  If allocation is needed, it will be done on
+     STORAGE.  */
+  const char *full_name (struct obstack *storage) const;
+
+  /* Entries must be sorted case-insensitively; this compares two
+     entries.  */
+  bool operator< (const cooked_index_entry &other) const
+  {
+    return strcasecmp (canonical, other.canonical) < 0;
+  }
+
+  /* The name as it appears in DWARF.  This always points into one of
+     the mapped DWARF sections.  Note that this may be the name or the
+     linkage name -- two entries are created for DIEs which have both
+     attributes.  */
+  const char *name;
+  /* The canonical name.  For C++ names, this may differ from NAME.
+     In all other cases, this is equal to NAME.  */
+  const char *canonical = nullptr;
+  /* The DWARF tag.  */
+  enum dwarf_tag tag;
+  /* Any flags attached to this entry.  */
+  cooked_index_flag flags;
+  /* The offset of this DIE.  */
+  sect_offset die_offset;
+  /* The parent entry.  This is NULL for top-level entries.
+     Otherwise, it points to the parent entry, such as a namespace or
+     class.  */
+  const cooked_index_entry *parent_entry;
+  /* The CU from which this entry originates.  */
+  dwarf2_per_cu_data *per_cu;
+
+private:
+
+  void write_scope (struct obstack *storage, const char *sep) const;
+};
+
+/* An index of interesting DIEs.  This is "cooked", in contrast to a
+   mapped .debug_names or .gdb_index, which are "raw".  An entry in
+   the index is of type cooked_index_entry.
+
+   Operations on the index are described below.  They are chosen to
+   make it relatively simple to implement the symtab "quick"
+   methods.  */
+class cooked_index
+{
+public:
+  cooked_index () = default;
+  explicit cooked_index (cooked_index &&other) = default;
+  DISABLE_COPY_AND_ASSIGN (cooked_index);
+  cooked_index &operator= (cooked_index &&other) = default;
+
+  /* Create a new cooked_index_entry and register it with this object.
+     Entries are owned by this object.  The new item is returned.  */
+  const cooked_index_entry *add (sect_offset die_offset, enum dwarf_tag tag,
+				 cooked_index_flag flags,
+				 const char *name,
+				 const cooked_index_entry *parent_entry,
+				 dwarf2_per_cu_data *per_cu);
+
+  /* Return the entry that is believed to represent the program's
+     "main".  This will return NULL if no such entry is available.  */
+  const cooked_index_entry *get_main () const
+  {
+    return m_main;
+  }
+
+  /* Install a new fixed addrmap from the given mutable addrmap.  */
+  void install_addrmap (addrmap *map)
+  {
+    gdb_assert (m_addrmap == nullptr);
+    m_addrmap = addrmap_create_fixed (map, &m_storage);
+  }
+
+  /* Look up ADDR in the address map, and return either the
+     corresponding CU, or nullptr if the address could not be
+     found.  */
+  dwarf2_per_cu_data *lookup (CORE_ADDR addr)
+  {
+    return (dwarf2_per_cu_data *) addrmap_find (m_addrmap, addr);
+  }
+
+  /* Finalize the index.  This should be called a single time, when
+     the index has been fully populated.  It enters all the entries
+     into the internal hash table.  */
+  void finalize ();
+
+  /* A simple range over part of m_entries.  */
+  typedef iterator_range<std::vector<cooked_index_entry *>::iterator> range;
+
+  /* Look up an entry by name.  Returns a range of all matching
+     results.  If COMPLETING is true, then a larger range, suitable
+     for completion, will be returned.  */
+  range find (gdb::string_view name, bool completing);
+
+  /* Return a range of all the entries.  */
+  range all_entries ()
+  {
+    return { m_entries.begin (), m_entries.end () };
+  }
+
+private:
+
+  /* GNAT only emits mangled ("encoded") names in the DWARF, and does
+     not emit the module structure.  However, we need this structure
+     to do lookups.  This function recreates that structure for an
+     existing entry.  It returns the base name (last element) of the
+     full decoded name.  */
+  gdb::unique_xmalloc_ptr<char> handle_gnat_encoded_entry
+       (cooked_index_entry *entry, htab_t gnat_entries);
+
+  /* Create a new cooked_index_entry and register it with this object.
+     Entries are owned by this object.  The new item is returned.  */
+  cooked_index_entry *create (sect_offset die_offset,
+			      enum dwarf_tag tag,
+			      cooked_index_flag flags,
+			      const char *name,
+			      const cooked_index_entry *parent_entry,
+			      dwarf2_per_cu_data *per_cu)
+  {
+    return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
+						name, parent_entry,
+						per_cu);
+  }
+
+  /* Storage for the entries.  */
+  auto_obstack m_storage;
+  /* List of all entries.  */
+  std::vector<cooked_index_entry *> m_entries;
+  /* If we found "main" or an entry with 'is_main' set, store it
+     here.  */
+  cooked_index_entry *m_main = nullptr;
+  /* Storage for canonical names.  */
+  std::vector<gdb::unique_xmalloc_ptr<char>> m_names;
+  /* The addrmap.  This maps address ranges to dwarf2_per_cu_data
+     objects.  */
+  addrmap *m_addrmap = nullptr;
+};
+
+#endif /* GDB_DWARF2_COOKED_INDEX_H */
author	Tom Tromey <tom@tromey.com>	2021-03-14 11:38:54 -0600
committer	Tom Tromey <tom@tromey.com>	2022-04-12 09:31:16 -0600
commit	51f5a4b8e9397ae9e93789cd7974fa62aeee6cd2 (patch)
tree	b128706b2b30b9dd0c67efa1e88a500c98245676 /gdb
parent	a2f0ab9310cb2ff235b436e0492fbaa804ce1bc9 (diff)
download	gdb-51f5a4b8e9397ae9e93789cd7974fa62aeee6cd2.zip gdb-51f5a4b8e9397ae9e93789cd7974fa62aeee6cd2.tar.gz gdb-51f5a4b8e9397ae9e93789cd7974fa62aeee6cd2.tar.bz2