/* DWARF indexer Copyright (C) 2022-2025 Free Software Foundation, Inc. This file is part of GDB. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "dwarf2/cooked-indexer.h" #include "dwarf2/cooked-index-storage.h" #include "dwarf2/error.h" /* See cooked-indexer.h. */ cooked_indexer::cooked_indexer (cooked_index_storage *storage, dwarf2_per_cu *per_cu, enum language language) : m_index_storage (storage), m_per_cu (per_cu), m_language (language), m_die_range_map (storage->get_parent_map ()) { } /* See cooked-indexer.h. */ void cooked_indexer::check_bounds (cutu_reader *reader) { dwarf2_cu *cu = reader->cu (); if (cu->per_cu->addresses_seen) return; unrelocated_addr best_lowpc = {}, best_highpc = {}; /* Possibly set the default values of LOWPC and HIGHPC from `DW_AT_ranges'. */ dwarf2_find_base_address (reader->top_level_die (), cu); enum pc_bounds_kind cu_bounds_kind = dwarf2_get_pc_bounds (reader->top_level_die (), &best_lowpc, &best_highpc, cu, m_index_storage->get_addrmap (), cu->per_cu); if (cu_bounds_kind == PC_BOUNDS_HIGH_LOW && best_lowpc < best_highpc) { /* Store the contiguous range if it is not empty; it can be empty for CUs with no code. addrmap requires CORE_ADDR, so we cast here. */ m_index_storage->get_addrmap ()->set_empty ((CORE_ADDR) best_lowpc, (CORE_ADDR) best_highpc - 1, cu->per_cu); cu->per_cu->addresses_seen = true; } } /* Helper function that returns true if TAG can have a linkage name. */ static bool tag_can_have_linkage_name (enum dwarf_tag tag) { switch (tag) { case DW_TAG_variable: case DW_TAG_subprogram: return true; default: return false; } } /* See cooked-indexer.h. */ cutu_reader * cooked_indexer::ensure_cu_exists (cutu_reader *reader, sect_offset sect_off, bool is_dwz, bool for_scanning) { /* Lookups for type unit references are always in the CU, and cross-CU references will crash. */ if (reader->cu ()->per_cu->is_dwz == is_dwz && reader->cu ()->header.offset_in_cu_p (sect_off)) return reader; dwarf2_per_objfile *per_objfile = reader->cu ()->per_objfile; dwarf2_per_cu *per_cu = dwarf2_find_containing_comp_unit (sect_off, is_dwz, per_objfile->per_bfd); /* When scanning, we only want to visit a given CU a single time. Doing this check here avoids self-imports as well. */ if (for_scanning) { bool nope = false; if (!per_cu->scanned.compare_exchange_strong (nope, true)) return nullptr; } cutu_reader *result = m_index_storage->get_reader (per_cu); if (result == nullptr) { cutu_reader new_reader (per_cu, per_objfile, nullptr, nullptr, false, language_minimal, &m_index_storage->get_abbrev_table_cache ()); if (new_reader.is_dummy () || new_reader.top_level_die () == nullptr || !new_reader.top_level_die ()->has_children) return nullptr; auto copy = std::make_unique (std::move (new_reader)); result = m_index_storage->preserve (std::move (copy)); } if (result->is_dummy () || result->top_level_die () == nullptr || !result->top_level_die ()->has_children) return nullptr; if (for_scanning) check_bounds (result); return result; } /* See cooked-indexer.h. */ const gdb_byte * cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, cutu_reader *reader, const gdb_byte *watermark_ptr, const gdb_byte *info_ptr, const abbrev_info *abbrev, const char **name, const char **linkage_name, cooked_index_flag *flags, sect_offset *sibling_offset, const cooked_index_entry **parent_entry, parent_map::addr_type *maybe_defer, bool *is_enum_class, bool for_specification) { bool origin_is_dwz = false; bool is_declaration = false; sect_offset origin_offset {}; std::optional low_pc; std::optional high_pc; bool high_pc_relative = false; for (int i = 0; i < abbrev->num_attrs; ++i) { attribute attr; info_ptr = reader->read_attribute (&attr, &abbrev->attrs[i], info_ptr); /* Store the data if it is of an attribute we want to keep in a partial symbol table. */ switch (attr.name) { case DW_AT_name: switch (abbrev->tag) { case DW_TAG_compile_unit: case DW_TAG_partial_unit: case DW_TAG_type_unit: /* Compilation units have a DW_AT_name that is a filename, not a source language identifier. */ break; default: if (*name == nullptr) *name = attr.as_string (); break; } break; case DW_AT_linkage_name: case DW_AT_MIPS_linkage_name: /* Note that both forms of linkage name might appear. We assume they will be the same, and we only store the last one we see. */ if (*linkage_name == nullptr) *linkage_name = attr.as_string (); break; /* DWARF 4 has defined a dedicated DW_AT_main_subprogram attribute to indicate the starting function of the program... */ case DW_AT_main_subprogram: if (attr.as_boolean ()) *flags |= IS_MAIN; break; /* ... however with older versions the DW_CC_program value of the DW_AT_calling_convention attribute was used instead as the only means available. We handle both variants then. */ case DW_AT_calling_convention: { std::optional value = attr.unsigned_constant (); if (value.has_value () && *value == DW_CC_program) *flags |= IS_MAIN; } break; case DW_AT_declaration: is_declaration = attr.as_boolean (); break; case DW_AT_sibling: if (sibling_offset != nullptr) *sibling_offset = attr.get_ref_die_offset (); break; case DW_AT_specification: case DW_AT_abstract_origin: case DW_AT_extension: origin_offset = attr.get_ref_die_offset (); origin_is_dwz = attr.form == DW_FORM_GNU_ref_alt; break; case DW_AT_external: if (attr.as_boolean ()) *flags &= ~IS_STATIC; break; case DW_AT_enum_class: if (attr.as_boolean ()) *is_enum_class = true; break; case DW_AT_low_pc: low_pc = attr.as_address (); break; case DW_AT_high_pc: high_pc = attr.as_address (); if (reader->cu ()->header.version >= 4 && attr.form_is_constant ()) high_pc_relative = true; break; case DW_AT_location: if (!scanning_per_cu->addresses_seen && attr.form_is_block ()) { struct dwarf_block *locdesc = attr.as_block (); CORE_ADDR addr; dwarf2_cu *cu = reader->cu (); if (decode_locdesc (locdesc, cu, &addr) && (addr != 0 || cu->per_objfile->per_bfd->has_section_at_zero)) { low_pc = (unrelocated_addr) addr; /* For variables, we don't want to try decoding the type just to find the size -- for gdb's purposes we only need the address of a variable. */ high_pc = (unrelocated_addr) (addr + 1); high_pc_relative = false; } } break; case DW_AT_ranges: if (!scanning_per_cu->addresses_seen) { /* Offset in the .debug_ranges or .debug_rnglist section (depending on DWARF version). */ ULONGEST ranges_offset = attr.as_unsigned (); /* See dwarf2_cu::gnu_ranges_base's doc for why we might want to add this value. */ ranges_offset += reader->cu ()->gnu_ranges_base; unrelocated_addr lowpc, highpc; dwarf2_ranges_read (ranges_offset, &lowpc, &highpc, reader->cu (), m_index_storage->get_addrmap (), scanning_per_cu, abbrev->tag); } break; } } /* We don't want to examine declarations, but if we found a declaration when handling DW_AT_specification or the like, then that is ok. Similarly, we allow an external variable without a location; those are resolved via minimal symbols. */ if (is_declaration && !for_specification && !(abbrev->tag == DW_TAG_variable && (*flags & IS_STATIC) == 0)) { /* We always want to recurse into some types, but we may not want to treat them as definitions. */ if ((abbrev->tag == DW_TAG_class_type || abbrev->tag == DW_TAG_structure_type || abbrev->tag == DW_TAG_union_type || abbrev->tag == DW_TAG_namespace) && abbrev->has_children) *flags |= IS_TYPE_DECLARATION; else { *linkage_name = nullptr; *name = nullptr; } } else if ((*name == nullptr || (*linkage_name == nullptr && tag_can_have_linkage_name (abbrev->tag)) || (*parent_entry == nullptr && m_language != language_c)) && origin_offset != sect_offset (0)) { cutu_reader *new_reader = ensure_cu_exists (reader, origin_offset, origin_is_dwz, false); if (new_reader == nullptr) error (_(DWARF_ERROR_PREFIX "cannot follow reference to DIE at %s" " [in module %s]"), sect_offset_str (origin_offset), bfd_get_filename (reader->abfd ())); const gdb_byte *new_info_ptr = (new_reader->buffer () + to_underlying (origin_offset)); if (*parent_entry == nullptr) { /* We only perform immediate lookups of parents for DIEs from earlier in this CU. This avoids any problem with a NULL result when when we see a reference to a DIE in another CU that we may or may not have imported locally. */ parent_map::addr_type addr = parent_map::form_addr (new_info_ptr); if (new_reader->cu () != reader->cu () || new_info_ptr > watermark_ptr) *maybe_defer = addr; else *parent_entry = m_die_range_map->find (addr); } unsigned int bytes_read; const abbrev_info *new_abbrev = new_reader->peek_die_abbrev (new_info_ptr, &bytes_read); if (new_abbrev == nullptr) error (_(DWARF_ERROR_PREFIX "Unexpected null DIE at offset %s [in module %s]"), sect_offset_str (origin_offset), bfd_get_filename (new_reader->abfd ())); new_info_ptr += bytes_read; if (new_reader->cu () == reader->cu () && new_info_ptr == watermark_ptr) { /* Self-reference, we're done. */ } else scan_attributes (scanning_per_cu, new_reader, new_info_ptr, new_info_ptr, new_abbrev, name, linkage_name, flags, nullptr, parent_entry, maybe_defer, is_enum_class, true); } if (!for_specification) { /* Older versions of GNAT emit full-qualified encoded names. In this case, also use this name as the linkage name. */ if (m_language == language_ada && *linkage_name == nullptr && *name != nullptr && strstr (*name, "__") != nullptr) *linkage_name = *name; if (!scanning_per_cu->addresses_seen && low_pc.has_value () && (reader->cu ()->per_objfile->per_bfd->has_section_at_zero || *low_pc != (unrelocated_addr) 0) && high_pc.has_value ()) { if (high_pc_relative) high_pc = (unrelocated_addr) ((ULONGEST) *high_pc + (ULONGEST) *low_pc); if (*high_pc > *low_pc) { /* Need CORE_ADDR casts for addrmap. */ m_index_storage->get_addrmap ()->set_empty ((CORE_ADDR) *low_pc, (CORE_ADDR) *high_pc - 1, scanning_per_cu); } } if (abbrev->tag == DW_TAG_namespace && *name == nullptr) *name = "(anonymous namespace)"; /* Keep in sync with new_symbol. */ if (abbrev->tag == DW_TAG_subprogram && (m_language == language_ada || m_language == language_fortran)) *flags &= ~IS_STATIC; } return info_ptr; } /* See cooked-indexer.h. */ const gdb_byte * cooked_indexer::index_imported_unit (cutu_reader *reader, const gdb_byte *info_ptr, const abbrev_info *abbrev) { sect_offset sect_off {}; bool is_dwz = false; for (int i = 0; i < abbrev->num_attrs; ++i) { /* Note that we never need to reprocess attributes here. */ attribute attr; info_ptr = reader->read_attribute (&attr, &abbrev->attrs[i], info_ptr); if (attr.name == DW_AT_import) { sect_off = attr.get_ref_die_offset (); is_dwz = (attr.form == DW_FORM_GNU_ref_alt || reader->cu ()->per_cu->is_dwz); } } /* Did not find DW_AT_import. */ if (sect_off == sect_offset (0)) return info_ptr; cutu_reader *new_reader = ensure_cu_exists (reader, sect_off, is_dwz, true); if (new_reader != nullptr) { index_dies (new_reader, new_reader->info_ptr (), nullptr, false); reader->cu ()->add_dependence (new_reader->cu ()->per_cu); } return info_ptr; } /* See cooked-indexer.h. */ const gdb_byte * cooked_indexer::recurse (cutu_reader *reader, const gdb_byte *info_ptr, std::variant parent, bool fully) { info_ptr = index_dies (reader, info_ptr, parent, fully); if (!std::holds_alternative (parent)) return info_ptr; const cooked_index_entry *parent_entry = std::get (parent); if (parent_entry != nullptr) { /* Both start and end are inclusive, so use both "+ 1" and "- 1" to limit the range to the children of parent_entry. */ parent_map::addr_type start = parent_map::form_addr (reader->buffer () + to_underlying (parent_entry->die_offset) + 1); parent_map::addr_type end = parent_map::form_addr (info_ptr - 1); m_die_range_map->add_entry (start, end, parent_entry); } return info_ptr; } /* See cooked-indexer.h. */ const gdb_byte * cooked_indexer::index_dies (cutu_reader *reader, const gdb_byte *info_ptr, std::variant parent, bool fully) { const gdb_byte *end_ptr = (reader->buffer () + to_underlying (reader->cu ()->header.sect_off) + reader->cu ()->header.get_length_with_initial ()); while (info_ptr < end_ptr) { sect_offset this_die = (sect_offset) (info_ptr - reader->buffer ()); unsigned int bytes_read; const abbrev_info *abbrev = reader->peek_die_abbrev (info_ptr, &bytes_read); info_ptr += bytes_read; if (abbrev == nullptr) break; if (abbrev->tag == DW_TAG_imported_unit) { info_ptr = index_imported_unit (reader, info_ptr, abbrev); continue; } parent_map::addr_type defer {}; if (std::holds_alternative (parent)) defer = std::get (parent); const cooked_index_entry *parent_entry = nullptr; if (std::holds_alternative (parent)) parent_entry = std::get (parent); /* If a DIE parent is a DW_TAG_subprogram, then the DIE is only interesting if it's a DW_TAG_subprogram or a DW_TAG_entry_point. */ bool die_interesting = (abbrev->interesting && (parent_entry == nullptr || parent_entry->tag != DW_TAG_subprogram || abbrev->tag == DW_TAG_subprogram || abbrev->tag == DW_TAG_entry_point)); if (!die_interesting) { info_ptr = reader->skip_one_die (info_ptr, abbrev, !fully); if (fully && abbrev->has_children) info_ptr = index_dies (reader, info_ptr, parent, fully); continue; } const char *name = nullptr; const char *linkage_name = nullptr; cooked_index_flag flags = IS_STATIC; sect_offset sibling {}; const cooked_index_entry *this_parent_entry = parent_entry; bool is_enum_class = false; /* The scope of a DW_TAG_entry_point cooked_index_entry is the one of its surrounding subroutine. */ if (abbrev->tag == DW_TAG_entry_point) this_parent_entry = parent_entry->get_parent (); info_ptr = scan_attributes (reader->cu ()->per_cu, reader, info_ptr, info_ptr, abbrev, &name, &linkage_name, &flags, &sibling, &this_parent_entry, &defer, &is_enum_class, false); /* A DW_TAG_entry_point inherits its static/extern property from the enclosing subroutine. */ if (abbrev->tag == DW_TAG_entry_point) { flags &= ~IS_STATIC; flags |= parent_entry->flags & IS_STATIC; } if (abbrev->tag == DW_TAG_namespace && m_language == language_cplus && strcmp (name, "::") == 0) { /* GCC 4.0 and 4.1 had a bug (PR c++/28460) where they generated bogus DW_TAG_namespace DIEs with a name of "::" for the global namespace. Work around this problem here. */ name = nullptr; } cooked_index_entry *this_entry = nullptr; if (name != nullptr) { if (defer != 0) this_entry = m_index_storage->add (this_die, abbrev->tag, flags | IS_PARENT_DEFERRED, name, defer, m_per_cu); else this_entry = m_index_storage->add (this_die, abbrev->tag, flags, name, this_parent_entry, m_per_cu); } if (linkage_name != nullptr) { /* We only want this to be "main" if it has a linkage name but not an ordinary name. */ if (name != nullptr) flags = flags & ~IS_MAIN; /* Set the IS_LINKAGE on for everything except when functions have linkage name present but name is absent. */ if (name != nullptr || (abbrev->tag != DW_TAG_subprogram && abbrev->tag != DW_TAG_inlined_subroutine && abbrev->tag != DW_TAG_entry_point)) flags = flags | IS_LINKAGE; m_index_storage->add (this_die, abbrev->tag, flags, linkage_name, nullptr, m_per_cu); } if (abbrev->has_children) { switch (abbrev->tag) { case DW_TAG_class_type: case DW_TAG_interface_type: case DW_TAG_structure_type: case DW_TAG_union_type: if (m_language != language_c && this_entry != nullptr) { info_ptr = recurse (reader, info_ptr, this_entry, fully); continue; } break; case DW_TAG_enumeration_type: /* Some versions of gdc could emit an "enum class" without a name, which is nonsensical. These are skipped. */ if (is_enum_class && this_entry == nullptr) continue; /* We need to recurse even for an anonymous enumeration. Which scope we record as the parent scope depends on whether we're reading an "enum class". If so, we use the enum itself as the parent, yielding names like "enum_class::enumerator"; otherwise we inject the names into our own parent scope. */ { std::variant recurse_parent; if (is_enum_class) { gdb_assert (this_entry != nullptr); recurse_parent = this_entry; } else if (defer != 0) recurse_parent = defer; else recurse_parent = this_parent_entry; info_ptr = recurse (reader, info_ptr, recurse_parent, fully); } continue; case DW_TAG_module: if (this_entry == nullptr) break; [[fallthrough]]; case DW_TAG_namespace: /* We don't check THIS_ENTRY for a namespace, to handle the ancient G++ workaround pointed out above. */ info_ptr = recurse (reader, info_ptr, this_entry, fully); continue; case DW_TAG_subprogram: if ((m_language == language_fortran || m_language == language_ada) && this_entry != nullptr) { info_ptr = recurse (reader, info_ptr, this_entry, true); continue; } break; } if (sibling != sect_offset (0)) { const gdb_byte *sibling_ptr = reader->buffer () + to_underlying (sibling); if (sibling_ptr < info_ptr) complaint (_("DW_AT_sibling points backwards")); else if (sibling_ptr > reader->buffer_end ()) reader->section ()->overflow_complaint (); else info_ptr = sibling_ptr; } else info_ptr = reader->skip_children (info_ptr); } } return info_ptr; } /* See cooked-indexer.h. */ void cooked_indexer::make_index (cutu_reader *reader) { check_bounds (reader); find_file_and_directory (reader->top_level_die (), reader->cu ()); if (!reader->top_level_die ()->has_children) return; index_dies (reader, reader->info_ptr (), nullptr, false); }