aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Tromey <tom@tromey.com>2021-04-17 13:56:36 -0600
committerTom Tromey <tom@tromey.com>2021-04-17 13:56:36 -0600
commit42c2c69462fd83db2e0532ee57c44091bc1032f9 (patch)
tree13355973b52c4ffc51df5db1838a4907138e6270
parentda314dd397ab967af558f2929a79349aa5f96ac8 (diff)
downloadgdb-42c2c69462fd83db2e0532ee57c44091bc1032f9.zip
gdb-42c2c69462fd83db2e0532ee57c44091bc1032f9.tar.gz
gdb-42c2c69462fd83db2e0532ee57c44091bc1032f9.tar.bz2
Handle unaligned mapping of .gdb_index
The .gdb_index was designed such that all data would be aligned. Unfortunately, we neglected to require this alignment in the objcopy instructions in the manual. As a result, in many cases, a .gdb_index in the wild will not be properly aligned by mmap. This yields undefined behavior, which is PR gdb/23743. This patch fixes the bug by always assuming that the mapping is unaligned, and using extract_unsigned_integer when needed. A new helper class is introduced to make this less painful. gdb/ChangeLog 2021-04-17 Tom Tromey <tom@tromey.com> PR gdb/23743: * dwarf2/read.c (class offset_view): New. (struct symbol_table_slot): Remove. (struct mapped_index) <symbol_table, constant_pool>: Change type. <symbol_name_index, symbol_vec_index>: New methods. <symbol_name_slot_invalid, symbol_name_at, symbol_name_count>: Rewrite. (read_gdb_index_from_buffer): Update. (struct dw2_symtab_iterator) <vec>: Change type. (dw2_symtab_iter_init_common, dw2_symtab_iter_init) (dw2_symtab_iter_next, dw2_expand_marked_cus): Update. * dwarf2/index-write.c (class data_buf) <append_data>: Remove. <append_array, append_offset>: New methods. (write_hash_table, add_address_entry, write_gdbindex_1) (write_debug_names): Update. * dwarf2/index-common.h (byte_swap, MAYBE_SWAP): Remove.
-rw-r--r--gdb/ChangeLog19
-rw-r--r--gdb/dwarf2/index-common.h21
-rw-r--r--gdb/dwarf2/index-write.c41
-rw-r--r--gdb/dwarf2/read.c132
4 files changed, 132 insertions, 81 deletions
diff --git a/gdb/ChangeLog b/gdb/ChangeLog
index 8035975..678ad27 100644
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@@ -1,5 +1,24 @@
2021-04-17 Tom Tromey <tom@tromey.com>
+ PR gdb/23743:
+ * dwarf2/read.c (class offset_view): New.
+ (struct symbol_table_slot): Remove.
+ (struct mapped_index) <symbol_table, constant_pool>: Change type.
+ <symbol_name_index, symbol_vec_index>: New methods.
+ <symbol_name_slot_invalid, symbol_name_at, symbol_name_count>:
+ Rewrite.
+ (read_gdb_index_from_buffer): Update.
+ (struct dw2_symtab_iterator) <vec>: Change type.
+ (dw2_symtab_iter_init_common, dw2_symtab_iter_init)
+ (dw2_symtab_iter_next, dw2_expand_marked_cus): Update.
+ * dwarf2/index-write.c (class data_buf) <append_data>: Remove.
+ <append_array, append_offset>: New methods.
+ (write_hash_table, add_address_entry, write_gdbindex_1)
+ (write_debug_names): Update.
+ * dwarf2/index-common.h (byte_swap, MAYBE_SWAP): Remove.
+
+2021-04-17 Tom Tromey <tom@tromey.com>
+
* dwarf2/index-write.c (write_psymtabs_to_index): Check
partial_symtabs.
diff --git a/gdb/dwarf2/index-common.h b/gdb/dwarf2/index-common.h
index fa762c4..8618074 100644
--- a/gdb/dwarf2/index-common.h
+++ b/gdb/dwarf2/index-common.h
@@ -29,28 +29,15 @@
architecture-independent. */
typedef uint32_t offset_type;
-#if WORDS_BIGENDIAN
-
-/* Convert VALUE between big- and little-endian. */
+/* Unpack a 32-bit little-endian value. */
static inline offset_type
-byte_swap (offset_type value)
+gdb_index_unpack (const gdb_byte *value)
{
- offset_type result;
-
- result = (value & 0xff) << 24;
- result |= (value & 0xff00) << 8;
- result |= (value & 0xff0000) >> 8;
- result |= (value & 0xff000000) >> 24;
- return result;
+ return (offset_type) extract_unsigned_integer (value, sizeof (offset_type),
+ BFD_ENDIAN_LITTLE);
}
-#define MAYBE_SWAP(V) byte_swap (V)
-
-#else
-#define MAYBE_SWAP(V) static_cast<offset_type> (V)
-#endif /* WORDS_BIGENDIAN */
-
/* The hash function for strings in the mapped index. This is the same as
SYMBOL_HASH_NEXT, but we keep a separate copy to maintain control over the
implementation. This is necessary because the hash function is tied to the
diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c
index 6cfe415..e27e1e8 100644
--- a/gdb/dwarf2/index-write.c
+++ b/gdb/dwarf2/index-write.c
@@ -94,13 +94,10 @@ file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
class data_buf
{
public:
- /* Copy DATA to the end of the buffer. */
- template<typename T>
- void append_data (const T &data)
+ /* Copy ARRAY to the end of the buffer. */
+ void append_array (gdb::array_view<const gdb_byte> array)
{
- std::copy (reinterpret_cast<const gdb_byte *> (&data),
- reinterpret_cast<const gdb_byte *> (&data + 1),
- grow (sizeof (data)));
+ std::copy (array.begin (), array.end (), grow (array.size ()));
}
/* Copy CSTR (a zero-terminated string) to the end of buffer. The
@@ -120,7 +117,7 @@ public:
input >>= 7;
if (input)
output |= 0x80;
- append_data (output);
+ m_vec.push_back (output);
if (input == 0)
break;
}
@@ -133,6 +130,12 @@ public:
::store_unsigned_integer (grow (len), len, byte_order, val);
}
+ /* Copy VALUE to the end of the buffer, little-endian. */
+ void append_offset (offset_type value)
+ {
+ append_uint (sizeof (value), BFD_ENDIAN_LITTLE, value);
+ }
+
/* Return the size of the buffer. */
size_t size () const
{
@@ -371,9 +374,9 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
entry.index_offset = cpool.size ();
- cpool.append_data (MAYBE_SWAP (entry.cu_indices.size ()));
+ cpool.append_offset (entry.cu_indices.size ());
for (const auto index : entry.cu_indices)
- cpool.append_data (MAYBE_SWAP (index));
+ cpool.append_offset (index);
}
}
@@ -399,8 +402,8 @@ write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
vec_off = 0;
}
- output.append_data (MAYBE_SWAP (str_off));
- output.append_data (MAYBE_SWAP (vec_off));
+ output.append_offset (str_off);
+ output.append_offset (vec_off);
}
}
@@ -434,7 +437,7 @@ add_address_entry (data_buf &addr_vec,
{
addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
- addr_vec.append_data (MAYBE_SWAP (cu_index));
+ addr_vec.append_offset (cu_index);
}
/* Worker function for traversing an addrmap to build the address table. */
@@ -1374,26 +1377,26 @@ write_gdbindex_1 (FILE *out_file,
offset_type total_len = size_of_header;
/* The version number. */
- contents.append_data (MAYBE_SWAP (8));
+ contents.append_offset (8);
/* The offset of the CU list from the start of the file. */
- contents.append_data (MAYBE_SWAP (total_len));
+ contents.append_offset (total_len);
total_len += cu_list.size ();
/* The offset of the types CU list from the start of the file. */
- contents.append_data (MAYBE_SWAP (total_len));
+ contents.append_offset (total_len);
total_len += types_cu_list.size ();
/* The offset of the address table from the start of the file. */
- contents.append_data (MAYBE_SWAP (total_len));
+ contents.append_offset (total_len);
total_len += addr_vec.size ();
/* The offset of the symbol table from the start of the file. */
- contents.append_data (MAYBE_SWAP (total_len));
+ contents.append_offset (total_len);
total_len += symtab_vec.size ();
/* The offset of the constant pool from the start of the file. */
- contents.append_data (MAYBE_SWAP (total_len));
+ contents.append_offset (total_len);
total_len += constant_pool.size ();
gdb_assert (contents.size () == size_of_header);
@@ -1611,7 +1614,7 @@ write_debug_names (dwarf2_per_objfile *per_objfile,
string. This value is rounded up to a multiple of 4. */
static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
- header.append_data (dwarf5_gdb_augmentation);
+ header.append_array (dwarf5_gdb_augmentation);
gdb_assert (header.size () == bytes_of_header);
diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c
index 22a5f33..40aeb29 100644
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -223,17 +223,50 @@ protected:
~mapped_index_base() = default;
};
+/* This is a view into the index that converts from bytes to an
+ offset_type, and allows indexing. Unaligned bytes are specifically
+ allowed here, and handled via unpacking. */
+
+class offset_view
+{
+public:
+ offset_view () = default;
+
+ explicit offset_view (gdb::array_view<const gdb_byte> bytes)
+ : m_bytes (bytes)
+ {
+ }
+
+ /* Extract the INDEXth offset_type from the array. */
+ offset_type operator[] (size_t index) const
+ {
+ const gdb_byte *bytes = &m_bytes[index * sizeof (offset_type)];
+ return (offset_type) extract_unsigned_integer (bytes,
+ sizeof (offset_type),
+ BFD_ENDIAN_LITTLE);
+ }
+
+ /* Return the number of offset_types in this array. */
+ size_t size () const
+ {
+ return m_bytes.size () / sizeof (offset_type);
+ }
+
+ /* Return true if this view is empty. */
+ bool empty () const
+ {
+ return m_bytes.empty ();
+ }
+
+private:
+ /* The underlying bytes. */
+ gdb::array_view<const gdb_byte> m_bytes;
+};
+
/* A description of the mapped index. The file format is described in
a comment by the code that writes the index. */
struct mapped_index final : public mapped_index_base
{
- /* A slot/bucket in the symbol table hash. */
- struct symbol_table_slot
- {
- const offset_type name;
- const offset_type vec;
- };
-
/* Index data format version. */
int version = 0;
@@ -241,25 +274,42 @@ struct mapped_index final : public mapped_index_base
gdb::array_view<const gdb_byte> address_table;
/* The symbol table, implemented as a hash table. */
- gdb::array_view<symbol_table_slot> symbol_table;
+ offset_view symbol_table;
/* A pointer to the constant pool. */
- const char *constant_pool = nullptr;
+ gdb::array_view<const gdb_byte> constant_pool;
+
+ /* Return the index into the constant pool of the name of the IDXth
+ symbol in the symbol table. */
+ offset_type symbol_name_index (offset_type idx) const
+ {
+ return symbol_table[2 * idx];
+ }
+
+ /* Return the index into the constant pool of the CU vector of the
+ IDXth symbol in the symbol table. */
+ offset_type symbol_vec_index (offset_type idx) const
+ {
+ return symbol_table[2 * idx + 1];
+ }
bool symbol_name_slot_invalid (offset_type idx) const override
{
- const auto &bucket = this->symbol_table[idx];
- return bucket.name == 0 && bucket.vec == 0;
+ return (symbol_name_index (idx) == 0
+ && symbol_vec_index (idx) == 0);
}
/* Convenience method to get at the name of the symbol at IDX in the
symbol table. */
const char *symbol_name_at
(offset_type idx, dwarf2_per_objfile *per_objfile) const override
- { return this->constant_pool + MAYBE_SWAP (this->symbol_table[idx].name); }
+ {
+ return (const char *) (this->constant_pool.data ()
+ + symbol_name_index (idx));
+ }
size_t symbol_name_count () const override
- { return this->symbol_table.size (); }
+ { return this->symbol_table.size () / 2; }
};
/* A description of the mapped .debug_names.
@@ -2950,9 +3000,10 @@ read_gdb_index_from_buffer (const char *filename,
offset_type *types_list_elements)
{
const gdb_byte *addr = &buffer[0];
+ offset_view metadata (buffer);
/* Version check. */
- offset_type version = MAYBE_SWAP (*(offset_type *) addr);
+ offset_type version = metadata[0];
/* Versions earlier than 3 emitted every copy of a psymbol. This
causes the index to behave very poorly for certain requests. Version 3
contained incomplete addrmap. So, it seems better to just ignore such
@@ -3005,35 +3056,29 @@ to use the section anyway."),
map->version = version;
- offset_type *metadata = (offset_type *) (addr + sizeof (offset_type));
-
- int i = 0;
- *cu_list = addr + MAYBE_SWAP (metadata[i]);
- *cu_list_elements = ((MAYBE_SWAP (metadata[i + 1]) - MAYBE_SWAP (metadata[i]))
- / 8);
+ int i = 1;
+ *cu_list = addr + metadata[i];
+ *cu_list_elements = (metadata[i + 1] - metadata[i]) / 8;
++i;
- *types_list = addr + MAYBE_SWAP (metadata[i]);
- *types_list_elements = ((MAYBE_SWAP (metadata[i + 1])
- - MAYBE_SWAP (metadata[i]))
- / 8);
+ *types_list = addr + metadata[i];
+ *types_list_elements = (metadata[i + 1] - metadata[i]) / 8;
++i;
- const gdb_byte *address_table = addr + MAYBE_SWAP (metadata[i]);
- const gdb_byte *address_table_end = addr + MAYBE_SWAP (metadata[i + 1]);
+ const gdb_byte *address_table = addr + metadata[i];
+ const gdb_byte *address_table_end = addr + metadata[i + 1];
map->address_table
= gdb::array_view<const gdb_byte> (address_table, address_table_end);
++i;
- const gdb_byte *symbol_table = addr + MAYBE_SWAP (metadata[i]);
- const gdb_byte *symbol_table_end = addr + MAYBE_SWAP (metadata[i + 1]);
+ const gdb_byte *symbol_table = addr + metadata[i];
+ const gdb_byte *symbol_table_end = addr + metadata[i + 1];
map->symbol_table
- = gdb::array_view<mapped_index::symbol_table_slot>
- ((mapped_index::symbol_table_slot *) symbol_table,
- (mapped_index::symbol_table_slot *) symbol_table_end);
+ = offset_view (gdb::array_view<const gdb_byte> (symbol_table,
+ symbol_table_end));
++i;
- map->constant_pool = (char *) (addr + MAYBE_SWAP (metadata[i]));
+ map->constant_pool = buffer.slice (metadata[i]);
return 1;
}
@@ -3317,7 +3362,7 @@ struct dw2_symtab_iterator
domain_enum domain;
/* The list of CUs from the index entry of the symbol,
or NULL if not found. */
- offset_type *vec;
+ offset_view vec;
/* The next element in VEC to look at. */
int next;
/* The number of elements in VEC, or zero if there is no match. */
@@ -3342,7 +3387,7 @@ dw2_symtab_iter_init (struct dw2_symtab_iterator *iter,
iter->domain = domain;
iter->next = 0;
iter->global_seen = 0;
- iter->vec = NULL;
+ iter->vec = {};
iter->length = 0;
mapped_index *index = per_objfile->per_bfd->index_table.get ();
@@ -3351,11 +3396,10 @@ dw2_symtab_iter_init (struct dw2_symtab_iterator *iter,
return;
gdb_assert (!index->symbol_name_slot_invalid (namei));
- const auto &bucket = index->symbol_table[namei];
+ offset_type vec_idx = index->symbol_vec_index (namei);
- iter->vec = (offset_type *) (index->constant_pool
- + MAYBE_SWAP (bucket.vec));
- iter->length = MAYBE_SWAP (*iter->vec);
+ iter->vec = offset_view (index->constant_pool.slice (vec_idx));
+ iter->length = iter->vec[0];
}
/* Return the next matching CU or NULL if there are no more. */
@@ -3367,8 +3411,7 @@ dw2_symtab_iter_next (struct dw2_symtab_iterator *iter)
for ( ; iter->next < iter->length; ++iter->next)
{
- offset_type cu_index_and_attrs =
- MAYBE_SWAP (iter->vec[iter->next + 1]);
+ offset_type cu_index_and_attrs = iter->vec[iter->next + 1];
offset_type cu_index = GDB_INDEX_CU_VALUE (cu_index_and_attrs);
gdb_index_symbol_kind symbol_kind =
GDB_INDEX_SYMBOL_KIND_VALUE (cu_index_and_attrs);
@@ -4387,16 +4430,15 @@ dw2_expand_marked_cus
block_search_flags search_flags,
search_domain kind)
{
- offset_type *vec, vec_len, vec_idx;
+ offset_type vec_len, vec_idx;
bool global_seen = false;
mapped_index &index = *per_objfile->per_bfd->index_table;
- vec = (offset_type *) (index.constant_pool
- + MAYBE_SWAP (index.symbol_table[idx].vec));
- vec_len = MAYBE_SWAP (vec[0]);
+ offset_view vec (index.constant_pool.slice (index.symbol_vec_index (idx)));
+ vec_len = vec[0];
for (vec_idx = 0; vec_idx < vec_len; ++vec_idx)
{
- offset_type cu_index_and_attrs = MAYBE_SWAP (vec[vec_idx + 1]);
+ offset_type cu_index_and_attrs = vec[vec_idx + 1];
/* This value is only valid for index versions >= 7. */
int is_static = GDB_INDEX_SYMBOL_STATIC_VALUE (cu_index_and_attrs);
gdb_index_symbol_kind symbol_kind =