aboutsummaryrefslogtreecommitdiff
path: root/gdb
diff options
context:
space:
mode:
authorTom Tromey <tromey@adacore.com>2025-03-06 10:59:41 -0700
committerTom Tromey <tromey@adacore.com>2025-03-24 09:47:28 -0600
commit89e59ca6dce9bc7ca88b5d6c1a073c39b8f9bcc9 (patch)
treec6ccbeba11b3ad8a2056c9a8c4b85737d503cbb4 /gdb
parent15bbe2b14ca5b15a6e7c5c30c554fb8fd2f83e61 (diff)
downloadbinutils-89e59ca6dce9bc7ca88b5d6c1a073c39b8f9bcc9.zip
binutils-89e59ca6dce9bc7ca88b5d6c1a073c39b8f9bcc9.tar.gz
binutils-89e59ca6dce9bc7ca88b5d6c1a073c39b8f9bcc9.tar.bz2
Introduce gdb_bfd_canonicalize_symtab
bfd_canonicalize_symtab stores the symbols in the BFD, and returns pointers to these. The ELF reader does not reuse these stored symbols, so each call to bfd_canonicalize_symtab causes an allocation. This interacts poorly with code like arm_pikeos_osabi_sniffer, which searches the BFD symbol when called. PR gdb/32758 points out a particularly pathological case: using "maint info sections" on a program with a large number of sections (10000) will cause 10000 calls to arm_pikeos_osabi_sniffer, allocating 20G. I'm not sure BFD always worked this way. And, fixing BFD was an option. However it seemed maybe better for GDB to adapt, since adapting would mean that the fix would apply to all BFD back ends, and not just ELF. To that end, this patch adds a new gdb_bfd_canonicalize_symtab and changes all callers of bfd_canonicalize_symtab to use it instead. This new function caches the result in the per-BFD object. I looked into having this return a view of "const asymbol *". However both the compile module and machoread modify the returned symbols. And while I think this is wrong, I haven't tried to fix this here. Regression tested on x86-64 Fedora 40. Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=32758
Diffstat (limited to 'gdb')
-rw-r--r--gdb/arm-pikeos-tdep.c17
-rw-r--r--gdb/compile/compile-object-load.c26
-rw-r--r--gdb/dicos-tdep.c41
-rw-r--r--gdb/elfread.c35
-rw-r--r--gdb/gdb_bfd.c55
-rw-r--r--gdb/gdb_bfd.h12
-rw-r--r--gdb/solib-darwin.c18
-rw-r--r--gdb/solib.c59
8 files changed, 122 insertions, 141 deletions
diff --git a/gdb/arm-pikeos-tdep.c b/gdb/arm-pikeos-tdep.c
index 4760755..b2c93bd 100644
--- a/gdb/arm-pikeos-tdep.c
+++ b/gdb/arm-pikeos-tdep.c
@@ -36,8 +36,6 @@ arm_pikeos_init_abi (struct gdbarch_info info, struct gdbarch *gdbarch)
static enum gdb_osabi
arm_pikeos_osabi_sniffer (bfd *abfd)
{
- long number_of_symbols;
- long i;
int pikeos_stack_found = 0;
int pikeos_stack_size_found = 0;
@@ -50,20 +48,15 @@ arm_pikeos_osabi_sniffer (bfd *abfd)
OS ABI sniffers are called before the minimal symtabs are
created. So inspect the symbol table using BFD. */
- long storage_needed = bfd_get_symtab_upper_bound (abfd);
- if (storage_needed <= 0)
- return GDB_OSABI_UNKNOWN;
-
- gdb::unique_xmalloc_ptr<asymbol *> symbol_table
- ((asymbol **) xmalloc (storage_needed));
- number_of_symbols = bfd_canonicalize_symtab (abfd, symbol_table.get ());
+ gdb::array_view<asymbol *> symbol_table
+ = gdb_bfd_canonicalize_symtab (abfd, false);
- if (number_of_symbols <= 0)
+ if (symbol_table.empty ())
return GDB_OSABI_UNKNOWN;
- for (i = 0; i < number_of_symbols; i++)
+ for (const asymbol *sym : symbol_table)
{
- const char *name = bfd_asymbol_name (symbol_table.get ()[i]);
+ const char *name = bfd_asymbol_name (sym);
if (strcmp (name, "_vm_stack") == 0
|| strcmp (name, "__p4_stack") == 0)
diff --git a/gdb/compile/compile-object-load.c b/gdb/compile/compile-object-load.c
index ef77ee3..05e5b43 100644
--- a/gdb/compile/compile-object-load.c
+++ b/gdb/compile/compile-object-load.c
@@ -605,9 +605,7 @@ compile_object_load (const compile_file_names &file_names,
CORE_ADDR regs_addr, out_value_addr = 0;
struct symbol *func_sym;
struct type *func_type;
- long storage_needed;
- asymbol **symbol_table, **symp;
- long number_of_symbols, missing_symbols;
+ long missing_symbols;
struct type *regs_type, *out_value_type = NULL;
char **matching;
struct objfile *objfile;
@@ -635,11 +633,6 @@ compile_object_load (const compile_file_names &file_names,
setup_sections_data.setup_one_section (sect);
setup_sections_data.setup_one_section (nullptr);
- storage_needed = bfd_get_symtab_upper_bound (abfd.get ());
- if (storage_needed < 0)
- error (_("Cannot read symbols of compiled module \"%s\": %s"),
- filename.get (), bfd_errmsg (bfd_get_error ()));
-
/* SYMFILE_VERBOSE is not passed even if FROM_TTY, user is not interested in
"Reading symbols from ..." message for automatically generated file. */
scoped_objfile_unlinker objfile_holder (symbol_file_add_from_bfd
@@ -692,21 +685,12 @@ compile_object_load (const compile_file_names &file_names,
"module \"%s\"."),
GCC_FE_WRAPPER_FUNCTION, objfile_name (objfile));
- /* The memory may be later needed
- by bfd_generic_get_relocated_section_contents
- called from default_symfile_relocate. */
- symbol_table = (asymbol **) obstack_alloc (&objfile->objfile_obstack,
- storage_needed);
- number_of_symbols = bfd_canonicalize_symtab (abfd.get (), symbol_table);
- if (number_of_symbols < 0)
- error (_("Cannot parse symbols of compiled module \"%s\": %s"),
- filename.get (), bfd_errmsg (bfd_get_error ()));
+ gdb::array_view<asymbol *> symbol_table
+ = gdb_bfd_canonicalize_symtab (abfd.get ());
missing_symbols = 0;
- for (symp = symbol_table; symp < symbol_table + number_of_symbols; symp++)
+ for (asymbol *sym : symbol_table)
{
- asymbol *sym = *symp;
-
if (sym->flags != 0)
continue;
sym->flags = BSF_GLOBAL;
@@ -800,7 +784,7 @@ compile_object_load (const compile_file_names &file_names,
if (missing_symbols)
error (_("%ld symbols were missing, cannot continue."), missing_symbols);
- bfd_map_over_sections (abfd.get (), copy_sections, symbol_table);
+ bfd_map_over_sections (abfd.get (), copy_sections, symbol_table.data ());
regs_type = get_regs_type (func_sym, objfile);
if (regs_type == NULL)
diff --git a/gdb/dicos-tdep.c b/gdb/dicos-tdep.c
index 3627426..96b841a 100644
--- a/gdb/dicos-tdep.c
+++ b/gdb/dicos-tdep.c
@@ -53,9 +53,7 @@ dicos_init_abi (struct gdbarch *gdbarch)
int
dicos_load_module_p (bfd *abfd, int header_size)
{
- long storage_needed;
int ret = 0;
- asymbol **symbol_table = NULL;
const char *symname = "Dicos_loadModuleInfo";
asection *section;
@@ -75,42 +73,19 @@ dicos_load_module_p (bfd *abfd, int header_size)
/* Dicos LMs always have a "Dicos_loadModuleInfo" symbol
defined. Look for it. */
- storage_needed = bfd_get_symtab_upper_bound (abfd);
- if (storage_needed < 0)
- {
- warning (_("Can't read elf symbols from %s: %s"),
- bfd_get_filename (abfd),
- bfd_errmsg (bfd_get_error ()));
- return 0;
- }
+ gdb::array_view<asymbol *> symbol_table
+ = gdb_bfd_canonicalize_symtab (abfd, false);
- if (storage_needed > 0)
+ for (asymbol *sym : symbol_table)
{
- long i, symcount;
-
- symbol_table = (asymbol **) xmalloc (storage_needed);
- symcount = bfd_canonicalize_symtab (abfd, symbol_table);
-
- if (symcount < 0)
- warning (_("Can't read elf symbols from %s: %s"),
- bfd_get_filename (abfd),
- bfd_errmsg (bfd_get_error ()));
- else
+ if (sym->name != NULL
+ && symname[0] == sym->name[0]
+ && strcmp (symname + 1, sym->name + 1) == 0)
{
- for (i = 0; i < symcount; i++)
- {
- asymbol *sym = symbol_table[i];
- if (sym->name != NULL
- && symname[0] == sym->name[0]
- && strcmp (symname + 1, sym->name + 1) == 0)
- {
- ret = 1;
- break;
- }
- }
+ ret = 1;
+ break;
}
}
- xfree (symbol_table);
return ret;
}
diff --git a/gdb/elfread.c b/gdb/elfread.c
index 5be3118..3756fa3 100644
--- a/gdb/elfread.c
+++ b/gdb/elfread.c
@@ -1062,8 +1062,8 @@ elf_read_minimal_symbols (struct objfile *objfile, int symfile_flags,
const struct elfinfo *ei)
{
bfd *synth_abfd, *abfd = objfile->obfd.get ();
- long symcount = 0, dynsymcount = 0, synthcount, storage_needed;
- asymbol **symbol_table = NULL, **dyn_symbol_table = NULL;
+ long dynsymcount = 0, synthcount;
+ asymbol **dyn_symbol_table = NULL;
asymbol *synthsyms;
symtab_create_debug_printf ("reading minimal symbols of objfile %s",
@@ -1087,32 +1087,16 @@ elf_read_minimal_symbols (struct objfile *objfile, int symfile_flags,
/* Process the normal ELF symbol table first. */
- storage_needed = bfd_get_symtab_upper_bound (objfile->obfd.get ());
- if (storage_needed < 0)
- error (_("Can't read symbols from %s: %s"),
- bfd_get_filename (objfile->obfd.get ()),
- bfd_errmsg (bfd_get_error ()));
+ gdb::array_view<asymbol *> symbol_table
+ = gdb_bfd_canonicalize_symtab (objfile->obfd.get ());
- if (storage_needed > 0)
- {
- /* Memory gets permanently referenced from ABFD after
- bfd_canonicalize_symtab so it must not get freed before ABFD gets. */
-
- symbol_table = (asymbol **) bfd_alloc (abfd, storage_needed);
- symcount = bfd_canonicalize_symtab (objfile->obfd.get (), symbol_table);
-
- if (symcount < 0)
- error (_("Can't read symbols from %s: %s"),
- bfd_get_filename (objfile->obfd.get ()),
- bfd_errmsg (bfd_get_error ()));
-
- elf_symtab_read (reader, objfile, ST_REGULAR, symcount, symbol_table,
- false);
- }
+ elf_symtab_read (reader, objfile, ST_REGULAR, symbol_table.size (),
+ symbol_table.data (), false);
/* Add the dynamic symbols. */
- storage_needed = bfd_get_dynamic_symtab_upper_bound (objfile->obfd.get ());
+ long storage_needed
+ = bfd_get_dynamic_symtab_upper_bound (objfile->obfd.get ());
if (storage_needed > 0)
{
@@ -1157,7 +1141,8 @@ elf_read_minimal_symbols (struct objfile *objfile, int symfile_flags,
/* Add synthetic symbols - for instance, names for any PLT entries. */
- synthcount = bfd_get_synthetic_symtab (synth_abfd, symcount, symbol_table,
+ synthcount = bfd_get_synthetic_symtab (synth_abfd, symbol_table.size (),
+ symbol_table.data (),
dynsymcount, dyn_symbol_table,
&synthsyms);
if (synthcount > 0)
diff --git a/gdb/gdb_bfd.c b/gdb/gdb_bfd.c
index 8380c53..1a57b3c 100644
--- a/gdb/gdb_bfd.c
+++ b/gdb/gdb_bfd.c
@@ -143,6 +143,13 @@ struct gdb_bfd_data
/* Table of all the bfds this bfd has included. */
std::vector<gdb_bfd_ref_ptr> included_bfds;
+ /* This is used by gdb_bfd_canonicalize_symtab to hold the symbols
+ returned by canonicalization. */
+ std::optional<gdb::def_vector<asymbol *>> symbol_table;
+ /* If an error occurred while canonicalizing the symtab, this holds
+ the error message. */
+ std::string symbol_error;
+
/* The registry. */
registry<bfd> registry_fields;
@@ -1177,6 +1184,54 @@ gdb_bfd_errmsg (bfd_error_type error_tag, char **matching)
return ret;
}
+/* See gdb_bfd.h. */
+
+gdb::array_view<asymbol *>
+gdb_bfd_canonicalize_symtab (bfd *abfd, bool should_throw)
+{
+ struct gdb_bfd_data *gdata = (struct gdb_bfd_data *) bfd_usrdata (abfd);
+
+ if (!gdata->symbol_table.has_value ())
+ {
+ /* Ensure it exists. */
+ gdb::def_vector<asymbol *> &symbol_table
+ = gdata->symbol_table.emplace ();
+
+ long storage_needed = bfd_get_symtab_upper_bound (abfd);
+ if (storage_needed < 0)
+ gdata->symbol_error = bfd_errmsg (bfd_get_error ());
+ else if (storage_needed > 0)
+ {
+ symbol_table.resize (storage_needed / sizeof (asymbol *));
+ long number_of_symbols
+ = bfd_canonicalize_symtab (abfd, symbol_table.data ());
+ if (number_of_symbols < 0)
+ {
+ symbol_table.clear ();
+ gdata->symbol_error = bfd_errmsg (bfd_get_error ());
+ }
+ }
+ }
+
+ if (!gdata->symbol_error.empty ())
+ {
+ if (should_throw)
+ error (_("Cannot parse symbols of \"%s\": %s"),
+ bfd_get_filename (abfd), gdata->symbol_error.c_str ());
+ return {};
+ }
+
+ gdb::def_vector<asymbol *> &symbol_table = *gdata->symbol_table;
+ if (symbol_table.empty ())
+ return {};
+
+ /* bfd_canonicalize_symtab adds a trailing NULL, but don't include
+ this in the array view. */
+ gdb_assert (symbol_table.back () == nullptr);
+ return gdb::make_array_view (symbol_table.data (),
+ symbol_table.size () - 1);
+}
+
/* Implement the 'maint info bfd' command. */
static void
diff --git a/gdb/gdb_bfd.h b/gdb/gdb_bfd.h
index d35f2d6..7830bf3 100644
--- a/gdb/gdb_bfd.h
+++ b/gdb/gdb_bfd.h
@@ -274,4 +274,16 @@ extern std::string gdb_bfd_errmsg (bfd_error_type error_tag, char **matching);
extern void gdb_bfd_init ();
+/* A wrapper for bfd_canonicalize_symtab that caches the result. This
+ is important to avoid excess memory use on repeated calls. See
+ PR gdb/32758. bfd_canonicalize_symtab should not be called directly
+ by other code in gdb.
+
+ When SHOULD_THROW is true (the default), this will throw an
+ exception if symbols could not be read. When SHOULD_THROW is
+ false, an empty view is returned instead. */
+
+extern gdb::array_view<asymbol *> gdb_bfd_canonicalize_symtab
+ (bfd *abfd, bool should_throw = true);
+
#endif /* GDB_GDB_BFD_H */
diff --git a/gdb/solib-darwin.c b/gdb/solib-darwin.c
index 6c7d906..cbd89b1 100644
--- a/gdb/solib-darwin.c
+++ b/gdb/solib-darwin.c
@@ -146,24 +146,13 @@ struct lm_info_darwin final : public lm_info
static CORE_ADDR
lookup_symbol_from_bfd (bfd *abfd, const char *symname)
{
- long storage_needed;
- asymbol **symbol_table;
- unsigned int number_of_symbols;
- unsigned int i;
CORE_ADDR symaddr = 0;
- storage_needed = bfd_get_symtab_upper_bound (abfd);
+ gdb::array_view<asymbol *> symbol_table
+ = gdb_bfd_canonicalize_symtab (abfd, false);
- if (storage_needed <= 0)
- return 0;
-
- symbol_table = (asymbol **) xmalloc (storage_needed);
- number_of_symbols = bfd_canonicalize_symtab (abfd, symbol_table);
-
- for (i = 0; i < number_of_symbols; i++)
+ for (const asymbol *sym : symbol_table)
{
- asymbol *sym = symbol_table[i];
-
if (strcmp (sym->name, symname) == 0
&& (sym->section->flags & (SEC_CODE | SEC_DATA)) != 0)
{
@@ -172,7 +161,6 @@ lookup_symbol_from_bfd (bfd *abfd, const char *symname)
break;
}
}
- xfree (symbol_table);
return symaddr;
}
diff --git a/gdb/solib.c b/gdb/solib.c
index 7782c8d..b1fdea9 100644
--- a/gdb/solib.c
+++ b/gdb/solib.c
@@ -1431,49 +1431,38 @@ CORE_ADDR
gdb_bfd_lookup_symbol_from_symtab (
bfd *abfd, gdb::function_view<bool (const asymbol *)> match_sym)
{
- long storage_needed = bfd_get_symtab_upper_bound (abfd);
CORE_ADDR symaddr = 0;
+ gdb::array_view<asymbol *> symbol_table
+ = gdb_bfd_canonicalize_symtab (abfd, false);
- if (storage_needed > 0)
+ for (asymbol *sym : symbol_table)
{
- unsigned int i;
-
- gdb::def_vector<asymbol *> storage (storage_needed / sizeof (asymbol *));
- asymbol **symbol_table = storage.data ();
- unsigned int number_of_symbols
- = bfd_canonicalize_symtab (abfd, symbol_table);
-
- for (i = 0; i < number_of_symbols; i++)
+ if (match_sym (sym))
{
- asymbol *sym = *symbol_table++;
-
- if (match_sym (sym))
+ gdbarch *gdbarch = current_inferior ()->arch ();
+ symaddr = sym->value;
+
+ /* Some ELF targets fiddle with addresses of symbols they
+ consider special. They use minimal symbols to do that
+ and this is needed for correct breakpoint placement,
+ but we do not have full data here to build a complete
+ minimal symbol, so just set the address and let the
+ targets cope with that. */
+ if (bfd_get_flavour (abfd) == bfd_target_elf_flavour
+ && gdbarch_elf_make_msymbol_special_p (gdbarch))
{
- gdbarch *gdbarch = current_inferior ()->arch ();
- symaddr = sym->value;
-
- /* Some ELF targets fiddle with addresses of symbols they
- consider special. They use minimal symbols to do that
- and this is needed for correct breakpoint placement,
- but we do not have full data here to build a complete
- minimal symbol, so just set the address and let the
- targets cope with that. */
- if (bfd_get_flavour (abfd) == bfd_target_elf_flavour
- && gdbarch_elf_make_msymbol_special_p (gdbarch))
+ struct minimal_symbol msym
{
- struct minimal_symbol msym
- {
- };
-
- msym.set_value_address (symaddr);
- gdbarch_elf_make_msymbol_special (gdbarch, sym, &msym);
- symaddr = CORE_ADDR (msym.unrelocated_address ());
- }
+ };
- /* BFD symbols are section relative. */
- symaddr += sym->section->vma;
- break;
+ msym.set_value_address (symaddr);
+ gdbarch_elf_make_msymbol_special (gdbarch, sym, &msym);
+ symaddr = CORE_ADDR (msym.unrelocated_address ());
}
+
+ /* BFD symbols are section relative. */
+ symaddr += sym->section->vma;
+ break;
}
}