From 0882710510059d9bf10d3e2324e0441029b50ce9 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Mon, 31 Oct 2022 00:15:54 +0000 Subject: ld: Add publics stream to PDB files --- ld/pdb.c | 347 ++++++++++++++++++++++++++++++++++- ld/pdb.h | 47 +++++ ld/testsuite/ld-pe/pdb.exp | 107 ++++++++++- ld/testsuite/ld-pe/pdb1-publics.d | 41 +++++ ld/testsuite/ld-pe/pdb1-sym-record.d | 7 + ld/testsuite/ld-pe/pdb1.s | 18 +- 6 files changed, 559 insertions(+), 8 deletions(-) create mode 100644 ld/testsuite/ld-pe/pdb1-publics.d create mode 100644 ld/testsuite/ld-pe/pdb1-sym-record.d (limited to 'ld') diff --git a/ld/pdb.c b/ld/pdb.c index 1190dcf..8e151da 100644 --- a/ld/pdb.c +++ b/ld/pdb.c @@ -383,10 +383,31 @@ get_arch_number (bfd *abfd) return IMAGE_FILE_MACHINE_I386; } +/* Return the index of a given output section. */ +static uint16_t +find_section_number (bfd *abfd, asection *sect) +{ + uint16_t i = 1; + + for (asection *s = abfd->sections; s; s = s->next) + { + if (s == sect) + return i; + + /* Empty sections aren't output. */ + if (s->size != 0) + i++; + } + + return 0; +} + /* Stream 4 is the debug information (DBI) stream. */ static bool populate_dbi_stream (bfd *stream, bfd *abfd, - uint16_t section_header_stream_num) + uint16_t section_header_stream_num, + uint16_t sym_rec_stream_num, + uint16_t publics_stream_num) { struct pdb_dbi_stream_header h; struct optional_dbg_header opt; @@ -396,9 +417,9 @@ populate_dbi_stream (bfd *stream, bfd *abfd, bfd_putl32 (1, &h.age); bfd_putl16 (0xffff, &h.global_stream_index); bfd_putl16 (0x8e1d, &h.build_number); // MSVC 14.29 - bfd_putl16 (0xffff, &h.public_stream_index); + bfd_putl16 (publics_stream_num, &h.public_stream_index); bfd_putl16 (0, &h.pdb_dll_version); - bfd_putl16 (0xffff, &h.sym_record_stream); + bfd_putl16 (sym_rec_stream_num, &h.sym_record_stream); bfd_putl16 (0, &h.pdb_dll_rbld); bfd_putl32 (0, &h.mod_info_size); bfd_putl32 (0, &h.section_contribution_size); @@ -433,6 +454,293 @@ populate_dbi_stream (bfd *stream, bfd *abfd, return true; } +/* Used as parameter to qsort, to sort publics by hash. */ +static int +public_compare_hash (const void *s1, const void *s2) +{ + const struct public *p1 = *(const struct public **) s1; + const struct public *p2 = *(const struct public **) s2; + + if (p1->hash < p2->hash) + return -1; + if (p1->hash > p2->hash) + return 1; + + return 0; +} + +/* Used as parameter to qsort, to sort publics by address. */ +static int +public_compare_addr (const void *s1, const void *s2) +{ + const struct public *p1 = *(const struct public **) s1; + const struct public *p2 = *(const struct public **) s2; + + if (p1->section < p2->section) + return -1; + if (p1->section > p2->section) + return 1; + + if (p1->address < p2->address) + return -1; + if (p1->address > p2->address) + return 1; + + return 0; +} + +/* The publics stream is a hash map of S_PUB32 records, which are stored + in the symbol record stream. Each S_PUB32 entry represents a symbol + from the point of view of the linker: a section index, an offset within + the section, and a mangled name. Compare with S_GDATA32 and S_GPROC32, + which are the same thing but generated by the compiler. */ +static bool +populate_publics_stream (bfd *stream, bfd *abfd, bfd *sym_rec_stream) +{ + struct publics_header header; + struct globals_hash_header hash_header; + const unsigned int num_buckets = 4096; + unsigned int num_entries = 0, filled_buckets = 0; + unsigned int buckets_size, sym_hash_size; + char int_buf[sizeof (uint32_t)]; + struct public *publics_head = NULL, *publics_tail = NULL; + struct public **buckets; + struct public **sorted = NULL; + bool ret = false; + + buckets = xmalloc (sizeof (struct public *) * num_buckets); + memset (buckets, 0, sizeof (struct public *) * num_buckets); + + /* Loop through the global symbols in our input files, and write S_PUB32 + records in the symbol record stream for those that make it into the + final image. */ + for (bfd *in = coff_data (abfd)->link_info->input_bfds; in; + in = in->link.next) + { + for (unsigned int i = 0; i < in->symcount; i++) + { + struct bfd_symbol *sym = in->outsymbols[i]; + + if (sym->flags & BSF_GLOBAL) + { + struct pubsym ps; + uint16_t record_length; + const char *name = sym->name; + size_t name_len = strlen (name); + struct public *p = xmalloc (sizeof (struct public)); + unsigned int padding = 0; + uint16_t section; + uint32_t flags = 0; + + section = + find_section_number (abfd, sym->section->output_section); + + if (section == 0) + continue; + + p->next = NULL; + p->offset = bfd_tell (sym_rec_stream); + p->hash = calc_hash (name, name_len) % num_buckets; + p->section = section; + p->address = sym->section->output_offset + sym->value; + + record_length = sizeof (struct pubsym) + name_len + 1; + + if (record_length % 4) + padding = 4 - (record_length % 4); + + /* Assume that all global symbols in executable sections + are functions. */ + if (sym->section->flags & SEC_CODE) + flags = PUBSYM_FUNCTION; + + bfd_putl16 (record_length + padding - sizeof (uint16_t), + &ps.record_length); + bfd_putl16 (S_PUB32, &ps.record_type); + bfd_putl32 (flags, &ps.flags); + bfd_putl32 (p->address, &ps.offset); + bfd_putl16 (p->section, &ps.section); + + if (bfd_bwrite (&ps, sizeof (struct pubsym), sym_rec_stream) != + sizeof (struct pubsym)) + goto end; + + if (bfd_bwrite (name, name_len + 1, sym_rec_stream) != + name_len + 1) + goto end; + + for (unsigned int j = 0; j < padding; j++) + { + uint8_t b = 0; + + if (bfd_bwrite (&b, sizeof (uint8_t), sym_rec_stream) != + sizeof (uint8_t)) + goto end; + } + + if (!publics_head) + publics_head = p; + else + publics_tail->next = p; + + publics_tail = p; + num_entries++; + } + } + } + + + if (num_entries > 0) + { + /* Create an array of pointers, sorted by hash value. */ + + sorted = xmalloc (sizeof (struct public *) * num_entries); + + struct public *p = publics_head; + for (unsigned int i = 0; i < num_entries; i++) + { + sorted[i] = p; + p = p->next; + } + + qsort (sorted, num_entries, sizeof (struct public *), + public_compare_hash); + + /* Populate the buckets. */ + + for (unsigned int i = 0; i < num_entries; i++) + { + if (!buckets[sorted[i]->hash]) + { + buckets[sorted[i]->hash] = sorted[i]; + filled_buckets++; + } + + sorted[i]->index = i; + } + } + + buckets_size = num_buckets / 8; + buckets_size += sizeof (uint32_t); + buckets_size += filled_buckets * sizeof (uint32_t); + + sym_hash_size = sizeof (hash_header); + sym_hash_size += num_entries * sizeof (struct hash_record); + sym_hash_size += buckets_size; + + /* Output the publics header. */ + + bfd_putl32 (sym_hash_size, &header.sym_hash_size); + bfd_putl32 (num_entries * sizeof (uint32_t), &header.addr_map_size); + bfd_putl32 (0, &header.num_thunks); + bfd_putl32 (0, &header.thunks_size); + bfd_putl32 (0, &header.thunk_table); + bfd_putl32 (0, &header.thunk_table_offset); + bfd_putl32 (0, &header.num_sects); + + if (bfd_bwrite (&header, sizeof (header), stream) != sizeof (header)) + goto end; + + /* Output the global hash header. */ + + bfd_putl32 (GLOBALS_HASH_SIGNATURE, &hash_header.signature); + bfd_putl32 (GLOBALS_HASH_VERSION_70, &hash_header.version); + bfd_putl32 (num_entries * sizeof (struct hash_record), + &hash_header.entries_size); + bfd_putl32 (buckets_size, &hash_header.buckets_size); + + if (bfd_bwrite (&hash_header, sizeof (hash_header), stream) != + sizeof (hash_header)) + goto end; + + /* Write the entries in hash order. */ + + for (unsigned int i = 0; i < num_entries; i++) + { + struct hash_record hr; + + bfd_putl32 (sorted[i]->offset + 1, &hr.offset); + bfd_putl32 (1, &hr.reference); + + if (bfd_bwrite (&hr, sizeof (hr), stream) != sizeof (hr)) + goto end; + } + + /* Write the bitmap for filled and unfilled buckets. */ + + for (unsigned int i = 0; i < num_buckets; i += 8) + { + uint8_t v = 0; + + for (unsigned int j = 0; j < 8; j++) + { + if (buckets[i + j]) + v |= 1 << j; + } + + if (bfd_bwrite (&v, sizeof (v), stream) != sizeof (v)) + goto end; + } + + /* Add a 4-byte gap. */ + + bfd_putl32 (0, int_buf); + + if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) != sizeof (uint32_t)) + goto end; + + /* Write the bucket offsets. */ + + for (unsigned int i = 0; i < num_buckets; i++) + { + if (buckets[i]) + { + /* 0xc is size of internal hash_record structure in + Microsoft's parser. */ + bfd_putl32 (buckets[i]->index * 0xc, int_buf); + + if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) != + sizeof (uint32_t)) + goto end; + } + } + + /* Write the address map: offsets into the symbol record stream of + S_PUB32 records, ordered by address. */ + + if (num_entries > 0) + { + qsort (sorted, num_entries, sizeof (struct public *), + public_compare_addr); + + for (unsigned int i = 0; i < num_entries; i++) + { + bfd_putl32 (sorted[i]->offset, int_buf); + + if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) != + sizeof (uint32_t)) + goto end; + } + } + + ret = true; + +end: + free (buckets); + + while (publics_head) + { + struct public *p = publics_head->next; + + free (publics_head); + publics_head = p; + } + + free (sorted); + + return ret; +} + /* The section header stream contains a copy of the section headers from the PE file, in the same format. */ static bool @@ -494,8 +802,9 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid) { bfd *pdb; bool ret = false; - bfd *info_stream, *dbi_stream, *names_stream; - uint16_t section_header_stream_num; + bfd *info_stream, *dbi_stream, *names_stream, *sym_rec_stream, + *publics_stream; + uint16_t section_header_stream_num, sym_rec_stream_num, publics_stream_num; pdb = bfd_openw (pdb_name, "pdb"); if (!pdb) @@ -554,6 +863,24 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid) goto end; } + sym_rec_stream = add_stream (pdb, NULL, &sym_rec_stream_num); + + if (!sym_rec_stream) + { + einfo (_("%P: warning: cannot create symbol record stream " + "in PDB file: %E\n")); + goto end; + } + + publics_stream = add_stream (pdb, NULL, &publics_stream_num); + + if (!publics_stream) + { + einfo (_("%P: warning: cannot create publics stream " + "in PDB file: %E\n")); + goto end; + } + if (!create_section_header_stream (pdb, abfd, §ion_header_stream_num)) { einfo (_("%P: warning: cannot create section header stream " @@ -561,13 +888,21 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid) goto end; } - if (!populate_dbi_stream (dbi_stream, abfd, section_header_stream_num)) + if (!populate_dbi_stream (dbi_stream, abfd, section_header_stream_num, + sym_rec_stream_num, publics_stream_num)) { einfo (_("%P: warning: cannot populate DBI stream " "in PDB file: %E\n")); goto end; } + if (!populate_publics_stream (publics_stream, abfd, sym_rec_stream)) + { + einfo (_("%P: warning: cannot populate publics stream " + "in PDB file: %E\n")); + goto end; + } + if (!populate_info_stream (pdb, info_stream, guid)) { einfo (_("%P: warning: cannot populate info stream " diff --git a/ld/pdb.h b/ld/pdb.h index e5f53b4..1a80101 100644 --- a/ld/pdb.h +++ b/ld/pdb.h @@ -28,6 +28,8 @@ #include "bfd.h" #include +#define S_PUB32 0x110e + /* PDBStream70 in pdb1.h */ struct pdb_stream_70 { @@ -91,6 +93,51 @@ struct pdb_dbi_stream_header #define DBI_STREAM_VERSION_70 19990903 +/* PSGSIHDR in gsi.h */ +struct publics_header +{ + uint32_t sym_hash_size; + uint32_t addr_map_size; + uint32_t num_thunks; + uint32_t thunks_size; + uint32_t thunk_table; + uint32_t thunk_table_offset; + uint32_t num_sects; +}; + +/* GSIHashHdr in gsi.h */ +struct globals_hash_header +{ + uint32_t signature; + uint32_t version; + uint32_t entries_size; + uint32_t buckets_size; +}; + +/* HRFile in gsi.h */ +struct hash_record +{ + uint32_t offset; + uint32_t reference; +}; + +#define GLOBALS_HASH_SIGNATURE 0xffffffff +#define GLOBALS_HASH_VERSION_70 0xf12f091a + +/* PUBSYM32 in cvinfo.h */ +struct pubsym +{ + uint16_t record_length; + uint16_t record_type; + uint32_t flags; + uint32_t offset; + uint16_t section; + /* followed by null-terminated string */ +} ATTRIBUTE_PACKED; + +/* see bitset CV_PUBSYMFLAGS in cvinfo.h */ +#define PUBSYM_FUNCTION 0x2 + struct optional_dbg_header { uint16_t fpo_stream; diff --git a/ld/testsuite/ld-pe/pdb.exp b/ld/testsuite/ld-pe/pdb.exp index cee0721..ee314c4 100644 --- a/ld/testsuite/ld-pe/pdb.exp +++ b/ld/testsuite/ld-pe/pdb.exp @@ -395,12 +395,111 @@ proc check_section_stream { img pdb } { return 1 } +proc get_publics_stream_index { pdb } { + global ar + + set exec_output [run_host_cmd "$ar" "x --output tmpdir $pdb 0003"] + + if ![string match "" $exec_output] { + return -1 + } + + set fi [open tmpdir/0003] + fconfigure $fi -translation binary + + # skip fields + seek $fi 16 + + # read substream sizes + + set data [read $fi 2] + binary scan $data s index + + close $fi + + return $index +} + +proc get_sym_record_stream_index { pdb } { + global ar + + set exec_output [run_host_cmd "$ar" "x --output tmpdir $pdb 0003"] + + if ![string match "" $exec_output] { + return -1 + } + + set fi [open tmpdir/0003] + fconfigure $fi -translation binary + + # skip fields + seek $fi 20 + + # read substream sizes + + set data [read $fi 2] + binary scan $data s index + + close $fi + + return $index +} + +proc check_publics_stream { pdb } { + global ar + global objdump + global srcdir + global subdir + + set publics_index [get_publics_stream_index $pdb] + + if { $publics_index == -1 } { + return 0 + } + + set index_str [format "%04x" $publics_index] + + set exec_output [run_host_cmd "$ar" "x --output tmpdir $pdb $index_str"] + + if ![string match "" $exec_output] { + return 0 + } + + set exp [file_contents "$srcdir/$subdir/pdb1-publics.d"] + set got [run_host_cmd "$objdump" "-s --target=binary tmpdir/$index_str"] + if ![string match $exp $got] { + return 0 + } + + set sym_record_index [get_sym_record_stream_index $pdb] + + if { $sym_record_index == -1 } { + return 0 + } + + set index_str [format "%04x" $sym_record_index] + + set exec_output [run_host_cmd "$ar" "x --output tmpdir $pdb $index_str"] + + if ![string match "" $exec_output] { + return 0 + } + + set exp [file_contents "$srcdir/$subdir/pdb1-sym-record.d"] + set got [run_host_cmd "$objdump" "-s --target=binary tmpdir/$index_str"] + if ![string match $exp $got] { + return 0 + } + + return 1 +} + if ![ld_assemble $as $srcdir/$subdir/pdb1.s tmpdir/pdb1.o] { unsupported "Build pdb1.o" return } -if ![ld_link $ld "tmpdir/pdb1.exe" "--pdb=tmpdir/pdb1.pdb tmpdir/pdb1.o"] { +if ![ld_link $ld "tmpdir/pdb1.exe" "--pdb=tmpdir/pdb1.pdb --gc-sections -e foo tmpdir/pdb1.o"] { fail "Could not create a PE image with a PDB file" return } @@ -441,3 +540,9 @@ if [check_section_stream tmpdir/pdb1.exe tmpdir/pdb1.pdb] { } else { fail "Invalid section stream" } + +if [check_publics_stream tmpdir/pdb1.pdb] { + pass "Valid publics stream" +} else { + fail "Invalid publics stream" +} diff --git a/ld/testsuite/ld-pe/pdb1-publics.d b/ld/testsuite/ld-pe/pdb1-publics.d new file mode 100644 index 0000000..f7df2d9 --- /dev/null +++ b/ld/testsuite/ld-pe/pdb1-publics.d @@ -0,0 +1,41 @@ + +*: file format binary + +Contents of section .data: + 0000 2c020000 08000000 00000000 00000000 ,............... + 0010 00000000 00000000 00000000 ffffffff ................ + 0020 1a092ff1 10000000 0c020000 15000000 ../............. + 0030 01000000 01000000 01000000 00000000 ................ + 0040 00000000 00000000 00000000 00000000 ................ + 0050 00000000 00000000 00000000 00000000 ................ + 0060 00000000 00000000 00000000 00000000 ................ + 0070 00000000 00000000 00000000 00000000 ................ + 0080 00000000 00000000 00000000 00000000 ................ + 0090 00000000 00000000 00000000 00000000 ................ + 00a0 00000000 00000000 00000000 00000000 ................ + 00b0 00000000 00000000 00000000 00000000 ................ + 00c0 00000000 00000000 00000000 00000000 ................ + 00d0 00000000 00000000 00000000 00000001 ................ + 00e0 00000000 00000000 00000000 00000000 ................ + 00f0 00000000 00000000 00000000 00000000 ................ + 0100 00000000 00000000 00000000 00000000 ................ + 0110 00000000 00000000 00000000 00000000 ................ + 0120 00000000 00000000 00000000 00000000 ................ + 0130 00000000 00000000 00000000 00000000 ................ + 0140 00000000 00000000 00000000 00000000 ................ + 0150 00000000 00000000 00000000 00000000 ................ + 0160 00000000 00000000 00000000 00000000 ................ + 0170 00000000 00000000 00000000 00000000 ................ + 0180 00000000 00000000 00000000 00000000 ................ + 0190 00000000 00000000 00000000 01000000 ................ + 01a0 00000000 00000000 00000000 00000000 ................ + 01b0 00000000 00000000 00000000 00000000 ................ + 01c0 00000000 00000000 00000000 00000000 ................ + 01d0 00000000 00000000 00000000 00000000 ................ + 01e0 00000000 00000000 00000000 00000000 ................ + 01f0 00000000 00000000 00000000 00000000 ................ + 0200 00000000 00000000 00000000 00000000 ................ + 0210 00000000 00000000 00000000 00000000 ................ + 0220 00000000 00000000 00000000 00000000 ................ + 0230 00000000 00000000 00000000 00000000 ................ + 0240 00000000 0c000000 00000000 14000000 ................ \ No newline at end of file diff --git a/ld/testsuite/ld-pe/pdb1-sym-record.d b/ld/testsuite/ld-pe/pdb1-sym-record.d new file mode 100644 index 0000000..2078a5e --- /dev/null +++ b/ld/testsuite/ld-pe/pdb1-sym-record.d @@ -0,0 +1,7 @@ + +*: file format binary + +Contents of section .data: + 0000 12000e11 02000000 08000000 0100666f ..............fo + 0010 6f000000 12000e11 00000000 04000000 o............... + 0020 02006261 72000000 ..bar... \ No newline at end of file diff --git a/ld/testsuite/ld-pe/pdb1.s b/ld/testsuite/ld-pe/pdb1.s index 30a8cfc..846814b 100644 --- a/ld/testsuite/ld-pe/pdb1.s +++ b/ld/testsuite/ld-pe/pdb1.s @@ -1,5 +1,21 @@ .text + .long 0x12345678 + .long 0x9abcdef0 + .global foo -foo: +foo: # section 0001, offset 00000008 + .secrel32 bar + +.data + .long 0x12345678 + +.global bar +bar: # section 0002, offset 00000004 + .long 0x9abcdef0 + +.section "gcsect" + +.global baz +baz: # unreferenced, will be GC'd out .long 0x12345678 -- cgit v1.1