gdb/dwarf2/cooked-index-shard.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346

/* Shards for the cooked index

   Copyright (C) 2022-2025 Free Software Foundation, Inc.

   This file is part of GDB.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include "dwarf2/cooked-index-shard.h"
#include "dwarf2/tag.h"
#include "dwarf2/index-common.h"
#include "cp-support.h"
#include "c-lang.h"
#include "ada-lang.h"

/* Return true if a plain "main" could be the main program for this
   language.  Languages that are known to use some other mechanism are
   excluded here.  */

static bool
language_may_use_plain_main (enum language lang)
{
  /* No need to handle "unknown" here.  */
  return (lang == language_c
	  || lang == language_objc
	  || lang == language_cplus
	  || lang == language_m2
	  || lang == language_asm
	  || lang == language_opencl
	  || lang == language_minimal);
}

/* See cooked-index-shard.h.  */

cooked_index_entry *
cooked_index_shard::create (sect_offset die_offset,
			    enum dwarf_tag tag,
			    cooked_index_flag flags,
			    enum language lang,
			    const char *name,
			    cooked_index_entry_ref parent_entry,
			    dwarf2_per_cu *per_cu)
{
  if (tag == DW_TAG_module || tag == DW_TAG_namespace)
    flags &= ~IS_STATIC;
  else if (lang == language_cplus
	   && (tag == DW_TAG_class_type
	       || tag == DW_TAG_interface_type
	       || tag == DW_TAG_structure_type
	       || tag == DW_TAG_union_type
	       || tag == DW_TAG_enumeration_type
	       || tag == DW_TAG_enumerator))
    flags &= ~IS_STATIC;
  else if (tag_is_type (tag))
    flags |= IS_STATIC;

  return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
					      lang, name, parent_entry,
					      per_cu);
}

/* See cooked-index-shard.h.  */

cooked_index_entry *
cooked_index_shard::add (sect_offset die_offset, enum dwarf_tag tag,
			 cooked_index_flag flags, enum language lang,
			 const char *name, cooked_index_entry_ref parent_entry,
			 dwarf2_per_cu *per_cu)
{
  cooked_index_entry *result = create (die_offset, tag, flags, lang, name,
				       parent_entry, per_cu);
  m_entries.push_back (result);

  /* An explicitly-tagged main program should always override the
     implicit "main" discovery.  */
  if ((flags & IS_MAIN) != 0)
    m_main = result;
  /* The language check here is subtle: it exists solely to work
     around a bug in .gdb_index.  That index does not record
     languages, but it might emit an entry for "main".  However,
     recognizing this "main" as being the main program would be wrong
     -- for example, an Ada program has a C "main" but this is not the
     desired target of the "start" command.  Requiring the language to
     be set here avoids over-eagerly setting the "main" when using
     .gdb_index.  Should .gdb_index ever be removed (PR symtab/31363),
     the language_unknown check here could also be removed.

     Note that this explicit check isn't truly needed (it is covered
     by language_may_use_plain_main as well), but it's handy as a spot
     to document.  */
  else if (lang != language_unknown
	   && (flags & IS_PARENT_DEFERRED) == 0
	   && parent_entry.resolved == nullptr
	   && m_main == nullptr
	   && language_may_use_plain_main (lang)
	   && strcmp (name, "main") == 0)
    m_main = result;

  return result;
}

/* See cooked-index-shard.h.  */

void
cooked_index_shard::handle_gnat_encoded_entry
     (cooked_index_entry *entry,
      htab_t gnat_entries,
      std::vector<cooked_index_entry *> &new_entries)
{
  /* We decode Ada names in a particular way: operators and wide
     characters are left as-is.  This is done to make name matching a
     bit simpler; and for wide characters, it means the choice of Ada
     source charset does not affect the indexer directly.  */
  std::string canonical = ada_decode (entry->name, false, false);
  if (canonical.empty ())
    {
      entry->canonical = entry->name;
      return;
    }
  std::vector<std::string_view> names = split_name (canonical.c_str (),
						    split_style::DOT_STYLE);
  std::string_view tail = names.back ();
  names.pop_back ();

  const cooked_index_entry *parent = nullptr;
  for (const auto &name : names)
    {
      uint32_t hashval = dwarf5_djb_hash (name);
      void **slot = htab_find_slot_with_hash (gnat_entries, &name,
					      hashval, INSERT);
      /* CUs are processed in order, so we only need to check the most
	 recent entry.  */
      cooked_index_entry *last = (cooked_index_entry *) *slot;
      if (last == nullptr || last->per_cu != entry->per_cu)
	{
	  const char *new_name = m_names.insert (name);
	  last = create (entry->die_offset, DW_TAG_module,
			 IS_SYNTHESIZED, language_ada, new_name, parent,
			 entry->per_cu);
	  last->canonical = last->name;
	  new_entries.push_back (last);
	  *slot = last;
	}

      parent = last;
    }

  entry->set_parent (parent);
  entry->canonical = m_names.insert (tail);
}

/* Hash a cooked index entry by name pointer value.

   We can use pointer equality here because names come from .debug_str, which
   will normally be unique-ified by the linker.  Also, duplicates are relatively
   harmless -- they just mean a bit of extra memory is used.  */

struct cooked_index_entry_name_ptr_hash
{
  using is_avalanching = void;

  std::uint64_t operator () (const cooked_index_entry *entry) const noexcept
  {
    return ankerl::unordered_dense::hash<const char *> () (entry->name);
  }
};

/* Compare cooked index entries by name pointer value.  */

struct cooked_index_entry_name_ptr_eq
{
  bool operator () (const cooked_index_entry *a,
		    const cooked_index_entry *b) const noexcept
  {
    return a->name == b->name;
  }
};

/* See cooked-index-shard.h.  */

void
cooked_index_shard::finalize (const parent_map_map *parent_maps)
{
  gdb::unordered_set<const cooked_index_entry *,
		     cooked_index_entry_name_ptr_hash,
		     cooked_index_entry_name_ptr_eq> seen_names;

  auto hash_entry = [] (const void *e)
    {
      const cooked_index_entry *entry = (const cooked_index_entry *) e;
      return dwarf5_djb_hash (entry->canonical);
    };

  auto eq_entry = [] (const void *a, const void *b) -> int
    {
      const cooked_index_entry *ae = (const cooked_index_entry *) a;
      const std::string_view *sv = (const std::string_view *) b;
      return (strlen (ae->canonical) == sv->length ()
	      && strncasecmp (ae->canonical, sv->data (), sv->length ()) == 0);
    };

  htab_up gnat_entries (htab_create_alloc (10, hash_entry, eq_entry,
					   nullptr, xcalloc, xfree));
  std::vector<cooked_index_entry *> new_gnat_entries;

  for (cooked_index_entry *entry : m_entries)
    {
      if ((entry->flags & IS_PARENT_DEFERRED) != 0)
	{
	  const cooked_index_entry *new_parent
	    = parent_maps->find (entry->get_deferred_parent ());
	  entry->resolve_parent (new_parent);
	}

      /* Note that this code must be kept in sync with
	 cooked_index::get_main -- if canonicalization is required
	 here, then a check might be required there.  */
      gdb_assert (entry->canonical == nullptr);
      if ((entry->flags & IS_LINKAGE) != 0)
	entry->canonical = entry->name;
      else if (entry->lang == language_ada)
	{
	  /* Newer versions of GNAT emit DW_TAG_module and use a
	     hierarchical structure.  In this case, we don't need to
	     do any extra work.  This can be detected by looking for a
	     GNAT-encoded name.  */
	  if (strstr (entry->name, "__") == nullptr)
	    {
	      entry->canonical = entry->name;

	      /* If the entry does not have a parent, then there's
		 nothing extra to do here -- the entry itself is
		 sufficient.

		 However, if it does have a parent, we have to
		 synthesize an entry with the full name.  This is
		 unfortunate, but it's necessary due to how some of
		 the Ada name-lookup code currently works.  For
		 example, without this, ada_get_tsd_type will
		 fail.

		 Eventually it would be good to change the Ada lookup
		 code, and then remove these entries (and supporting
		 code in cooked_index_entry::full_name).  */
	      if (entry->get_parent () != nullptr)
		{
		  const char *fullname
		    = entry->full_name (&m_storage, FOR_ADA_LINKAGE_NAME);
		  cooked_index_entry *linkage = create (entry->die_offset,
							entry->tag,
							(entry->flags
							 | IS_LINKAGE
							 | IS_SYNTHESIZED),
							language_ada,
							fullname,
							nullptr,
							entry->per_cu);
		  linkage->canonical = fullname;
		  new_gnat_entries.push_back (linkage);
		}
	    }
	  else
	    handle_gnat_encoded_entry (entry, gnat_entries.get (),
				       new_gnat_entries);
	}
      else if (entry->lang == language_cplus || entry->lang == language_c)
	{
	  auto [it, inserted] = seen_names.insert (entry);

	  if (inserted)
	    {
	      /* No entry with that name was present, compute the canonical
		 name.  */
	      gdb::unique_xmalloc_ptr<char> canon_name
		= (entry->lang == language_cplus
		   ? cp_canonicalize_string (entry->name)
		   : c_canonicalize_name (entry->name));
	      if (canon_name == nullptr)
		entry->canonical = entry->name;
	      else
		entry->canonical = m_names.insert (std::move (canon_name));
	    }
	  else
	    {
	      /* An entry with that name was present, reuse its canonical
		 name.  */
	      entry->canonical = (*it)->canonical;
	    }
	}
      else
	entry->canonical = entry->name;
    }

  /* Make sure any new Ada entries end up in the results.  This isn't
     done when creating these new entries to avoid invalidating the
     m_entries iterator used in the foreach above.  */
  m_entries.insert (m_entries.end (), new_gnat_entries.begin (),
		    new_gnat_entries.end ());

  m_entries.shrink_to_fit ();
  std::sort (m_entries.begin (), m_entries.end (),
	     [] (const cooked_index_entry *a, const cooked_index_entry *b)
	     {
	       return *a < *b;
	     });
}

/* See cooked-index-shard.h.  */

cooked_index_shard::range
cooked_index_shard::find (const std::string &name, bool completing) const
{
  struct comparator
  {
    cooked_index_entry::comparison_mode mode;

    bool operator() (const cooked_index_entry *entry,
		     const char *name) const noexcept
    {
      return cooked_index_entry::compare (entry->canonical, name, mode) < 0;
    }

    bool operator() (const char *name,
		     const cooked_index_entry *entry) const noexcept
    {
      return cooked_index_entry::compare (entry->canonical, name, mode) > 0;
    }
  };

  return std::make_from_tuple<range>
    (std::equal_range (m_entries.cbegin (), m_entries.cend (), name.c_str (),
		       comparator { (completing
				     ? cooked_index_entry::COMPLETE
				     : cooked_index_entry::MATCH) }));
}