diff options
Diffstat (limited to 'gdb/dwarf2/cooked-indexer.c')
-rw-r--r-- | gdb/dwarf2/cooked-indexer.c | 694 |
1 files changed, 694 insertions, 0 deletions
diff --git a/gdb/dwarf2/cooked-indexer.c b/gdb/dwarf2/cooked-indexer.c new file mode 100644 index 0000000..c093984 --- /dev/null +++ b/gdb/dwarf2/cooked-indexer.c @@ -0,0 +1,694 @@ +/* DWARF indexer + + Copyright (C) 2022-2025 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "dwarf2/cooked-indexer.h" +#include "dwarf2/cooked-index-worker.h" +#include "dwarf2/error.h" + +/* See cooked-indexer.h. */ + +cooked_indexer::cooked_indexer (cooked_index_worker_result *storage, + dwarf2_per_cu *per_cu, enum language language) + : m_index_storage (storage), + m_per_cu (per_cu), + m_language (language), + m_die_range_map (storage->get_parent_map ()) +{ +} + +/* See cooked-indexer.h. */ + +void +cooked_indexer::check_bounds (cutu_reader *reader) +{ + dwarf2_cu *cu = reader->cu (); + + if (cu->per_cu->addresses_seen) + return; + + unrelocated_addr best_lowpc = {}, best_highpc = {}; + /* Possibly set the default values of LOWPC and HIGHPC from + `DW_AT_ranges'. */ + dwarf2_find_base_address (reader->top_level_die (), cu); + enum pc_bounds_kind cu_bounds_kind + = dwarf2_get_pc_bounds (reader->top_level_die (), &best_lowpc, &best_highpc, + cu, m_index_storage->get_addrmap (), cu->per_cu); + if (cu_bounds_kind == PC_BOUNDS_HIGH_LOW && best_lowpc < best_highpc) + { + /* Store the contiguous range if it is not empty; it can be + empty for CUs with no code. addrmap requires CORE_ADDR, so + we cast here. */ + m_index_storage->get_addrmap ()->set_empty ((CORE_ADDR) best_lowpc, + (CORE_ADDR) best_highpc - 1, + cu->per_cu); + + cu->per_cu->addresses_seen = true; + } +} + +/* Helper function that returns true if TAG can have a linkage + name. */ + +static bool +tag_can_have_linkage_name (enum dwarf_tag tag) +{ + switch (tag) + { + case DW_TAG_variable: + case DW_TAG_subprogram: + return true; + + default: + return false; + } +} + +/* See cooked-indexer.h. */ + +cutu_reader * +cooked_indexer::ensure_cu_exists (cutu_reader *reader, + sect_offset sect_off, bool is_dwz, + bool for_scanning) +{ + /* Lookups for type unit references are always in the CU, and + cross-CU references will crash. */ + if (reader->cu ()->per_cu->is_dwz == is_dwz + && reader->cu ()->header.offset_in_unit_p (sect_off)) + return reader; + + dwarf2_per_objfile *per_objfile = reader->cu ()->per_objfile; + dwarf2_per_cu *per_cu + = dwarf2_find_containing_comp_unit (sect_off, is_dwz, + per_objfile->per_bfd); + + /* When scanning, we only want to visit a given CU a single time. + Doing this check here avoids self-imports as well. */ + if (for_scanning) + { + bool nope = false; + if (!per_cu->scanned.compare_exchange_strong (nope, true)) + return nullptr; + } + + cutu_reader *result = m_index_storage->get_reader (per_cu); + if (result == nullptr) + { + const abbrev_table_cache &abbrev_table_cache + = m_index_storage->get_abbrev_table_cache (); + auto new_reader + = std::make_unique<cutu_reader> (*per_cu, *per_objfile, nullptr, + nullptr, false, language_minimal, + &abbrev_table_cache); + + if (new_reader->is_dummy ()) + return nullptr; + + result = m_index_storage->preserve (std::move (new_reader)); + } + + if (result->is_dummy ()) + return nullptr; + + if (for_scanning) + check_bounds (result); + + return result; +} + +/* See cooked-indexer.h. */ + +const gdb_byte * +cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, + cutu_reader *reader, + const gdb_byte *watermark_ptr, + const gdb_byte *info_ptr, + const abbrev_info *abbrev, + const char **name, + const char **linkage_name, + cooked_index_flag *flags, + sect_offset *sibling_offset, + const cooked_index_entry **parent_entry, + parent_map::addr_type *maybe_defer, + bool *is_enum_class, + bool for_specification) +{ + bool origin_is_dwz = false; + bool is_declaration = false; + sect_offset origin_offset {}; + + std::optional<unrelocated_addr> low_pc; + std::optional<unrelocated_addr> high_pc; + bool high_pc_relative = false; + + for (int i = 0; i < abbrev->num_attrs; ++i) + { + attribute attr; + info_ptr = reader->read_attribute (&attr, &abbrev->attrs[i], info_ptr); + + /* Store the data if it is of an attribute we want to keep in a + partial symbol table. */ + switch (attr.name) + { + case DW_AT_name: + switch (abbrev->tag) + { + case DW_TAG_compile_unit: + case DW_TAG_partial_unit: + case DW_TAG_type_unit: + /* Compilation units have a DW_AT_name that is a filename, not + a source language identifier. */ + break; + + default: + if (*name == nullptr) + *name = attr.as_string (); + break; + } + break; + + case DW_AT_linkage_name: + case DW_AT_MIPS_linkage_name: + /* Note that both forms of linkage name might appear. We + assume they will be the same, and we only store the last + one we see. */ + if (*linkage_name == nullptr) + *linkage_name = attr.as_string (); + break; + + /* DWARF 4 has defined a dedicated DW_AT_main_subprogram + attribute to indicate the starting function of the program... */ + case DW_AT_main_subprogram: + if (attr.as_boolean ()) + *flags |= IS_MAIN; + break; + + /* ... however with older versions the DW_CC_program value of + the DW_AT_calling_convention attribute was used instead as + the only means available. We handle both variants then. */ + case DW_AT_calling_convention: + { + std::optional<ULONGEST> value = attr.unsigned_constant (); + if (value.has_value () && *value == DW_CC_program) + *flags |= IS_MAIN; + } + break; + + case DW_AT_declaration: + is_declaration = attr.as_boolean (); + break; + + case DW_AT_sibling: + if (sibling_offset != nullptr) + *sibling_offset = attr.get_ref_die_offset (); + break; + + case DW_AT_specification: + case DW_AT_abstract_origin: + case DW_AT_extension: + origin_offset = attr.get_ref_die_offset (); + origin_is_dwz = attr.form_is_alt (); + break; + + case DW_AT_external: + if (attr.as_boolean ()) + *flags &= ~IS_STATIC; + break; + + case DW_AT_enum_class: + if (attr.as_boolean ()) + *is_enum_class = true; + break; + + case DW_AT_low_pc: + low_pc = attr.as_address (); + break; + + case DW_AT_high_pc: + high_pc = attr.as_address (); + if (reader->cu ()->header.version >= 4 && attr.form_is_constant ()) + high_pc_relative = true; + break; + + case DW_AT_location: + if (!scanning_per_cu->addresses_seen && attr.form_is_block ()) + { + struct dwarf_block *locdesc = attr.as_block (); + CORE_ADDR addr; + dwarf2_cu *cu = reader->cu (); + + if (decode_locdesc (locdesc, cu, &addr) + && (addr != 0 + || cu->per_objfile->per_bfd->has_section_at_zero)) + { + low_pc = (unrelocated_addr) addr; + /* For variables, we don't want to try decoding the + type just to find the size -- for gdb's purposes + we only need the address of a variable. */ + high_pc = (unrelocated_addr) (addr + 1); + high_pc_relative = false; + } + } + break; + + case DW_AT_ranges: + if (!scanning_per_cu->addresses_seen) + { + /* Offset in the .debug_ranges or .debug_rnglist section + (depending on DWARF version). */ + ULONGEST ranges_offset = attr.as_unsigned (); + + /* See dwarf2_cu::gnu_ranges_base's doc for why we might + want to add this value. */ + ranges_offset += reader->cu ()->gnu_ranges_base; + + unrelocated_addr lowpc, highpc; + dwarf2_ranges_read (ranges_offset, &lowpc, &highpc, reader->cu (), + m_index_storage->get_addrmap (), + scanning_per_cu, abbrev->tag); + } + break; + } + } + + /* We don't want to examine declarations, but if we found a + declaration when handling DW_AT_specification or the like, then + that is ok. Similarly, we allow an external variable without a + location; those are resolved via minimal symbols. */ + if (is_declaration && !for_specification + && !(abbrev->tag == DW_TAG_variable && (*flags & IS_STATIC) == 0)) + { + /* We always want to recurse into some types, but we may not + want to treat them as definitions. */ + if ((abbrev->tag == DW_TAG_class_type + || abbrev->tag == DW_TAG_structure_type + || abbrev->tag == DW_TAG_union_type + || abbrev->tag == DW_TAG_namespace) + && abbrev->has_children) + *flags |= IS_TYPE_DECLARATION; + else + { + *linkage_name = nullptr; + *name = nullptr; + } + } + else if ((*name == nullptr + || (*linkage_name == nullptr + && tag_can_have_linkage_name (abbrev->tag)) + || (*parent_entry == nullptr && m_language != language_c)) + && origin_offset != sect_offset (0)) + { + cutu_reader *new_reader + = ensure_cu_exists (reader, origin_offset, origin_is_dwz, false); + if (new_reader == nullptr) + error (_(DWARF_ERROR_PREFIX + "cannot follow reference to DIE at %s" + " [in module %s]"), + sect_offset_str (origin_offset), + bfd_get_filename (reader->abfd ())); + + const gdb_byte *new_info_ptr + = (new_reader->buffer () + to_underlying (origin_offset)); + + if (*parent_entry == nullptr) + { + /* We only perform immediate lookups of parents for DIEs + from earlier in this CU. This avoids any problem + with a NULL result when when we see a reference to a + DIE in another CU that we may or may not have + imported locally. */ + parent_map::addr_type addr = parent_map::form_addr (new_info_ptr); + if (new_reader->cu () != reader->cu () + || new_info_ptr > watermark_ptr) + *maybe_defer = addr; + else + *parent_entry = m_die_range_map->find (addr); + } + + unsigned int bytes_read; + const abbrev_info *new_abbrev + = new_reader->peek_die_abbrev (new_info_ptr, &bytes_read); + + if (new_abbrev == nullptr) + error (_(DWARF_ERROR_PREFIX + "Unexpected null DIE at offset %s [in module %s]"), + sect_offset_str (origin_offset), + bfd_get_filename (new_reader->abfd ())); + + new_info_ptr += bytes_read; + + if (new_reader->cu () == reader->cu () && new_info_ptr == watermark_ptr) + { + /* Self-reference, we're done. */ + } + else + scan_attributes (scanning_per_cu, new_reader, new_info_ptr, + new_info_ptr, new_abbrev, name, linkage_name, + flags, nullptr, parent_entry, maybe_defer, + is_enum_class, true); + } + + if (!for_specification) + { + /* Older versions of GNAT emit full-qualified encoded names. In + this case, also use this name as the linkage name. */ + if (m_language == language_ada + && *linkage_name == nullptr + && *name != nullptr + && strstr (*name, "__") != nullptr) + *linkage_name = *name; + + if (!scanning_per_cu->addresses_seen && low_pc.has_value () + && (reader->cu ()->per_objfile->per_bfd->has_section_at_zero + || *low_pc != (unrelocated_addr) 0) + && high_pc.has_value ()) + { + if (high_pc_relative) + high_pc = (unrelocated_addr) ((ULONGEST) *high_pc + + (ULONGEST) *low_pc); + + if (*high_pc > *low_pc) + { + /* Need CORE_ADDR casts for addrmap. */ + m_index_storage->get_addrmap ()->set_empty + ((CORE_ADDR) *low_pc, (CORE_ADDR) *high_pc - 1, + scanning_per_cu); + } + } + + if (abbrev->tag == DW_TAG_namespace && *name == nullptr) + *name = "(anonymous namespace)"; + + /* Keep in sync with new_symbol. */ + if (abbrev->tag == DW_TAG_subprogram + && (m_language == language_ada + || m_language == language_fortran)) + *flags &= ~IS_STATIC; + } + + return info_ptr; +} + +/* See cooked-indexer.h. */ + +const gdb_byte * +cooked_indexer::index_imported_unit (cutu_reader *reader, + const gdb_byte *info_ptr, + const abbrev_info *abbrev) +{ + sect_offset sect_off {}; + bool is_dwz = false; + + for (int i = 0; i < abbrev->num_attrs; ++i) + { + /* Note that we never need to reprocess attributes here. */ + attribute attr; + info_ptr = reader->read_attribute (&attr, &abbrev->attrs[i], info_ptr); + + if (attr.name == DW_AT_import) + { + sect_off = attr.get_ref_die_offset (); + is_dwz = (attr.form_is_alt () + || reader->cu ()->per_cu->is_dwz); + } + } + + /* Did not find DW_AT_import. */ + if (sect_off == sect_offset (0)) + return info_ptr; + + cutu_reader *new_reader + = ensure_cu_exists (reader, sect_off, is_dwz, true); + if (new_reader != nullptr) + { + index_dies (new_reader, new_reader->info_ptr (), nullptr, false); + + reader->cu ()->add_dependence (new_reader->cu ()->per_cu); + } + + return info_ptr; +} + +/* See cooked-indexer.h. */ + +const gdb_byte * +cooked_indexer::recurse (cutu_reader *reader, + const gdb_byte *info_ptr, + std::variant<const cooked_index_entry *, + parent_map::addr_type> parent, + bool fully) +{ + info_ptr = index_dies (reader, info_ptr, parent, fully); + + if (!std::holds_alternative<const cooked_index_entry *> (parent)) + return info_ptr; + const cooked_index_entry *parent_entry + = std::get<const cooked_index_entry *> (parent); + + if (parent_entry != nullptr) + { + /* Both start and end are inclusive, so use both "+ 1" and "- 1" to + limit the range to the children of parent_entry. */ + parent_map::addr_type start + = parent_map::form_addr (reader->buffer () + + to_underlying (parent_entry->die_offset) + + 1); + parent_map::addr_type end = parent_map::form_addr (info_ptr - 1); + m_die_range_map->add_entry (start, end, parent_entry); + } + + return info_ptr; +} + +/* See cooked-indexer.h. */ + +const gdb_byte * +cooked_indexer::index_dies (cutu_reader *reader, + const gdb_byte *info_ptr, + std::variant<const cooked_index_entry *, + parent_map::addr_type> parent, + bool fully) +{ + const gdb_byte *end_ptr + = (reader->buffer () + to_underlying (reader->cu ()->header.sect_off) + + reader->cu ()->header.get_length_with_initial ()); + + while (info_ptr < end_ptr) + { + sect_offset this_die = (sect_offset) (info_ptr - reader->buffer ()); + unsigned int bytes_read; + const abbrev_info *abbrev + = reader->peek_die_abbrev (info_ptr, &bytes_read); + info_ptr += bytes_read; + if (abbrev == nullptr) + break; + + if (abbrev->tag == DW_TAG_imported_unit) + { + info_ptr = index_imported_unit (reader, info_ptr, abbrev); + continue; + } + + parent_map::addr_type defer {}; + if (std::holds_alternative<parent_map::addr_type> (parent)) + defer = std::get<parent_map::addr_type> (parent); + const cooked_index_entry *parent_entry = nullptr; + if (std::holds_alternative<const cooked_index_entry *> (parent)) + parent_entry = std::get<const cooked_index_entry *> (parent); + + /* If a DIE parent is a DW_TAG_subprogram, then the DIE is only + interesting if it's a DW_TAG_subprogram or a DW_TAG_entry_point. */ + bool die_interesting + = (abbrev->interesting + && (parent_entry == nullptr + || parent_entry->tag != DW_TAG_subprogram + || abbrev->tag == DW_TAG_subprogram + || abbrev->tag == DW_TAG_entry_point)); + + if (!die_interesting) + { + info_ptr = reader->skip_one_die (info_ptr, abbrev, !fully); + if (fully && abbrev->has_children) + info_ptr = index_dies (reader, info_ptr, parent, fully); + continue; + } + + const char *name = nullptr; + const char *linkage_name = nullptr; + cooked_index_flag flags = IS_STATIC; + sect_offset sibling {}; + const cooked_index_entry *this_parent_entry = parent_entry; + bool is_enum_class = false; + + /* The scope of a DW_TAG_entry_point cooked_index_entry is the one of + its surrounding subroutine. */ + if (abbrev->tag == DW_TAG_entry_point) + this_parent_entry = parent_entry->get_parent (); + info_ptr + = scan_attributes (reader->cu ()->per_cu, reader, info_ptr, info_ptr, + abbrev, &name, &linkage_name, &flags, &sibling, + &this_parent_entry, &defer, &is_enum_class, false); + /* A DW_TAG_entry_point inherits its static/extern property from + the enclosing subroutine. */ + if (abbrev->tag == DW_TAG_entry_point) + { + flags &= ~IS_STATIC; + flags |= parent_entry->flags & IS_STATIC; + } + + if (abbrev->tag == DW_TAG_namespace + && m_language == language_cplus + && strcmp (name, "::") == 0) + { + /* GCC 4.0 and 4.1 had a bug (PR c++/28460) where they + generated bogus DW_TAG_namespace DIEs with a name of "::" + for the global namespace. Work around this problem + here. */ + name = nullptr; + } + + cooked_index_entry *this_entry = nullptr; + if (name != nullptr) + { + if (defer != 0) + this_entry + = m_index_storage->add (this_die, abbrev->tag, + flags | IS_PARENT_DEFERRED, name, + defer, m_per_cu); + else + this_entry + = m_index_storage->add (this_die, abbrev->tag, flags, name, + this_parent_entry, m_per_cu); + } + + if (linkage_name != nullptr) + { + /* We only want this to be "main" if it has a linkage name + but not an ordinary name. */ + if (name != nullptr) + flags = flags & ~IS_MAIN; + /* Set the IS_LINKAGE on for everything except when functions + have linkage name present but name is absent. */ + if (name != nullptr + || (abbrev->tag != DW_TAG_subprogram + && abbrev->tag != DW_TAG_inlined_subroutine + && abbrev->tag != DW_TAG_entry_point)) + flags = flags | IS_LINKAGE; + m_index_storage->add (this_die, abbrev->tag, flags, + linkage_name, nullptr, m_per_cu); + } + + if (abbrev->has_children) + { + switch (abbrev->tag) + { + case DW_TAG_class_type: + case DW_TAG_interface_type: + case DW_TAG_structure_type: + case DW_TAG_union_type: + if (m_language != language_c && this_entry != nullptr) + { + info_ptr = recurse (reader, info_ptr, this_entry, fully); + continue; + } + break; + + case DW_TAG_enumeration_type: + /* Some versions of gdc could emit an "enum class" + without a name, which is nonsensical. These are + skipped. */ + if (is_enum_class && this_entry == nullptr) + continue; + + /* We need to recurse even for an anonymous enumeration. + Which scope we record as the parent scope depends on + whether we're reading an "enum class". If so, we use + the enum itself as the parent, yielding names like + "enum_class::enumerator"; otherwise we inject the + names into our own parent scope. */ + { + std::variant<const cooked_index_entry *, + parent_map::addr_type> recurse_parent; + if (is_enum_class) + { + gdb_assert (this_entry != nullptr); + recurse_parent = this_entry; + } + else if (defer != 0) + recurse_parent = defer; + else + recurse_parent = this_parent_entry; + + info_ptr = recurse (reader, info_ptr, recurse_parent, fully); + } + continue; + + case DW_TAG_module: + if (this_entry == nullptr) + break; + [[fallthrough]]; + case DW_TAG_namespace: + /* We don't check THIS_ENTRY for a namespace, to handle + the ancient G++ workaround pointed out above. */ + info_ptr = recurse (reader, info_ptr, this_entry, fully); + continue; + + case DW_TAG_subprogram: + if ((m_language == language_fortran + || m_language == language_ada) + && this_entry != nullptr) + { + info_ptr = recurse (reader, info_ptr, this_entry, true); + continue; + } + break; + } + + if (sibling != sect_offset (0)) + { + const gdb_byte *sibling_ptr + = reader->buffer () + to_underlying (sibling); + + if (sibling_ptr < info_ptr) + complaint (_("DW_AT_sibling points backwards")); + else if (sibling_ptr > reader->buffer_end ()) + reader->section ()->overflow_complaint (); + else + info_ptr = sibling_ptr; + } + else + info_ptr = reader->skip_children (info_ptr); + } + } + + return info_ptr; +} + +/* See cooked-indexer.h. */ + +void +cooked_indexer::make_index (cutu_reader *reader) +{ + check_bounds (reader); + find_file_and_directory (reader->top_level_die (), reader->cu ()); + + if (!reader->top_level_die ()->has_children) + return; + + index_dies (reader, reader->info_ptr (), nullptr, false); +} |