aboutsummaryrefslogtreecommitdiff
path: root/bfd
diff options
context:
space:
mode:
authorOleg Tolmatcev <oleg.tolmatcev@gmail.com>2023-06-18 19:35:38 +0200
committerAlan Modra <amodra@gmail.com>2023-08-24 15:54:39 +0930
commit6aadf8a04d162feb2afe3c41f5b36534d661d447 (patch)
tree1f4e29669179cb6ff3e0c1e5e50ad2006b99c833 /bfd
parentfb9b7fbf17f50fcfabf6e3d7d06a93e1f17c52b7 (diff)
downloadbinutils-6aadf8a04d162feb2afe3c41f5b36534d661d447.zip
binutils-6aadf8a04d162feb2afe3c41f5b36534d661d447.tar.gz
binutils-6aadf8a04d162feb2afe3c41f5b36534d661d447.tar.bz2
optimize handle_COMDAT
Signed-off-by: Oleg Tolmatcev <oleg.tolmatcev@gmail.com>
Diffstat (limited to 'bfd')
-rw-r--r--bfd/coffcode.h427
-rw-r--r--bfd/coffgen.c8
-rw-r--r--bfd/libcoff-in.h12
-rw-r--r--bfd/libcoff.h12
-rw-r--r--bfd/peicode.h17
5 files changed, 283 insertions, 193 deletions
diff --git a/bfd/coffcode.h b/bfd/coffcode.h
index 6789f7f..e3f4afd 100644
--- a/bfd/coffcode.h
+++ b/bfd/coffcode.h
@@ -352,6 +352,7 @@ CODE_FRAGMENT
*/
#include "libiberty.h"
+#include <string.h>
#ifdef COFF_WITH_PE
#include "peicode.h"
@@ -852,19 +853,19 @@ styp_to_sec_flags (bfd *abfd,
#else /* COFF_WITH_PE */
+static struct comdat_hash_entry *
+find_flags (htab_t comdat_hash, int target_index)
+{
+ struct comdat_hash_entry needle;
+ needle.target_index = target_index;
+
+ return htab_find (comdat_hash, &needle);
+}
+
static bool
-handle_COMDAT (bfd * abfd,
- flagword *sec_flags,
- void * hdr,
- const char *name,
- asection *section)
+fill_comdat_hash (bfd *abfd)
{
- struct internal_scnhdr *internal_s = (struct internal_scnhdr *) hdr;
bfd_byte *esymstart, *esym, *esymend;
- int seen_state = 0;
- char *target_name = NULL;
-
- *sec_flags |= SEC_LINK_ONCE;
/* Unfortunately, the PE format stores essential information in
the symbol table, of all places. We need to extract that
@@ -895,190 +896,160 @@ handle_COMDAT (bfd * abfd,
{
char buf[SYMNMLEN + 1];
const char *symname;
+ flagword sec_flags = SEC_LINK_ONCE;
bfd_coff_swap_sym_in (abfd, esym, &isym);
- BFD_ASSERT (sizeof (internal_s->s_name) <= SYMNMLEN);
-
- if (isym.n_scnum == section->target_index)
+ /* According to the MSVC documentation, the first
+ TWO entries with the section # are both of
+ interest to us. The first one is the "section
+ symbol" (section name). The second is the comdat
+ symbol name. Here, we've found the first
+ qualifying entry; we distinguish it from the
+ second with a state flag.
+
+ In the case of gas-generated (at least until that
+ is fixed) .o files, it isn't necessarily the
+ second one. It may be some other later symbol.
+
+ Since gas also doesn't follow MS conventions and
+ emits the section similar to .text$<name>, where
+ <something> is the name we're looking for, we
+ distinguish the two as follows:
+
+ If the section name is simply a section name (no
+ $) we presume it's MS-generated, and look at
+ precisely the second symbol for the comdat name.
+ If the section name has a $, we assume it's
+ gas-generated, and look for <something> (whatever
+ follows the $) as the comdat symbol. */
+
+ /* All 3 branches use this. */
+ symname = _bfd_coff_internal_syment_name (abfd, &isym, buf);
+
+ /* PR 17512 file: 078-11867-0.004 */
+ if (symname == NULL)
{
- /* According to the MSVC documentation, the first
- TWO entries with the section # are both of
- interest to us. The first one is the "section
- symbol" (section name). The second is the comdat
- symbol name. Here, we've found the first
- qualifying entry; we distinguish it from the
- second with a state flag.
-
- In the case of gas-generated (at least until that
- is fixed) .o files, it isn't necessarily the
- second one. It may be some other later symbol.
-
- Since gas also doesn't follow MS conventions and
- emits the section similar to .text$<name>, where
- <something> is the name we're looking for, we
- distinguish the two as follows:
-
- If the section name is simply a section name (no
- $) we presume it's MS-generated, and look at
- precisely the second symbol for the comdat name.
- If the section name has a $, we assume it's
- gas-generated, and look for <something> (whatever
- follows the $) as the comdat symbol. */
-
- /* All 3 branches use this. */
- symname = _bfd_coff_internal_syment_name (abfd, &isym, buf);
-
- /* PR 17512 file: 078-11867-0.004 */
- if (symname == NULL)
- {
- _bfd_error_handler (_("%pB: unable to load COMDAT section name"),
- abfd);
- return false;
- }
+ _bfd_error_handler (_("%pB: unable to load COMDAT section name"),
+ abfd);
+ continue;
+ }
- switch (seen_state)
- {
- case 0:
- {
- /* The first time we've seen the symbol. */
- union internal_auxent aux;
-
- /* If it isn't the stuff we're expecting, die;
- The MS documentation is vague, but it
- appears that the second entry serves BOTH
- as the comdat symbol and the defining
- symbol record (either C_STAT or C_EXT,
- possibly with an aux entry with debug
- information if it's a function.) It
- appears the only way to find the second one
- is to count. (On Intel, they appear to be
- adjacent, but on Alpha, they have been
- found separated.)
-
- Here, we think we've found the first one,
- but there's some checking we can do to be
- sure. */
-
- if (! ((isym.n_sclass == C_STAT
- || isym.n_sclass == C_EXT)
- && BTYPE (isym.n_type) == T_NULL
- && isym.n_value == 0))
- {
- /* Malformed input files can trigger this test.
- cf PR 21781. */
- _bfd_error_handler (_("%pB: error: unexpected symbol '%s' in COMDAT section"),
- abfd, symname);
- return false;
- }
+ union internal_auxent aux;
- /* FIXME LATER: MSVC generates section names
- like .text for comdats. Gas generates
- names like .text$foo__Fv (in the case of a
- function). See comment above for more. */
+ struct comdat_hash_entry needle;
+ needle.target_index = isym.n_scnum;
- if (isym.n_sclass == C_STAT && strcmp (name, symname) != 0)
- /* xgettext:c-format */
- _bfd_error_handler (_("%pB: warning: COMDAT symbol '%s'"
- " does not match section name '%s'"),
- abfd, symname, name);
-
- /* This is the section symbol. */
- seen_state = 1;
- target_name = strchr (name, '$');
- if (target_name != NULL)
- {
- /* Gas mode. */
- seen_state = 2;
- /* Skip the `$'. */
- target_name += 1;
- }
+ void **slot
+ = htab_find_slot (pe_data (abfd)->comdat_hash, &needle, INSERT);
+ if (slot == NULL)
+ return false;
- if (isym.n_numaux == 0)
- aux.x_scn.x_comdat = 0;
- else
- {
- /* PR 17512: file: e2cfe54f. */
- if (esym + bfd_coff_symesz (abfd) >= esymend)
- {
- /* xgettext:c-format */
- _bfd_error_handler (_("%pB: warning: no symbol for"
- " section '%s' found"),
- abfd, symname);
- break;
- }
- bfd_coff_swap_aux_in (abfd, esym + bfd_coff_symesz (abfd),
- isym.n_type, isym.n_sclass,
- 0, isym.n_numaux, &aux);
- }
+ if (*slot == NULL)
+ {
+ if (isym.n_numaux == 0)
+ aux.x_scn.x_comdat = 0;
+ else
+ {
+ /* PR 17512: file: e2cfe54f. */
+ if (esym + bfd_coff_symesz (abfd) >= esymend)
+ {
+ /* xgettext:c-format */
+ _bfd_error_handler (_ ("%pB: warning: no symbol for"
+ " section '%s' found"),
+ abfd, symname);
+ continue;
+ }
+ bfd_coff_swap_aux_in (abfd, (esym + bfd_coff_symesz (abfd)),
+ isym.n_type, isym.n_sclass, 0,
+ isym.n_numaux, &aux);
+ }
- /* FIXME: Microsoft uses NODUPLICATES and
- ASSOCIATIVE, but gnu uses ANY and
- SAME_SIZE. Unfortunately, gnu doesn't do
- the comdat symbols right. So, until we can
- fix it to do the right thing, we are
- temporarily disabling comdats for the MS
- types (they're used in DLLs and C++, but we
- don't support *their* C++ libraries anyway
- - DJ. */
-
- /* Cygwin does not follow the MS style, and
- uses ANY and SAME_SIZE where NODUPLICATES
- and ASSOCIATIVE should be used. For
- Interix, we just do the right thing up
- front. */
-
- switch (aux.x_scn.x_comdat)
- {
- case IMAGE_COMDAT_SELECT_NODUPLICATES:
+ /* FIXME: Microsoft uses NODUPLICATES and
+ ASSOCIATIVE, but gnu uses ANY and
+ SAME_SIZE. Unfortunately, gnu doesn't do
+ the comdat symbols right. So, until we can
+ fix it to do the right thing, we are
+ temporarily disabling comdats for the MS
+ types (they're used in DLLs and C++, but we
+ don't support *their* C++ libraries anyway
+ - DJ. */
+
+ /* Cygwin does not follow the MS style, and
+ uses ANY and SAME_SIZE where NODUPLICATES
+ and ASSOCIATIVE should be used. For
+ Interix, we just do the right thing up
+ front. */
+
+ switch (aux.x_scn.x_comdat)
+ {
+ case IMAGE_COMDAT_SELECT_NODUPLICATES:
#ifdef STRICT_PE_FORMAT
- *sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
+ sec_flags |= SEC_LINK_DUPLICATES_ONE_ONLY;
#else
- *sec_flags &= ~SEC_LINK_ONCE;
+ sec_flags &= ~SEC_LINK_ONCE;
#endif
- break;
+ break;
- case IMAGE_COMDAT_SELECT_ANY:
- *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
- break;
+ case IMAGE_COMDAT_SELECT_ANY:
+ sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+ break;
- case IMAGE_COMDAT_SELECT_SAME_SIZE:
- *sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
- break;
+ case IMAGE_COMDAT_SELECT_SAME_SIZE:
+ sec_flags |= SEC_LINK_DUPLICATES_SAME_SIZE;
+ break;
- case IMAGE_COMDAT_SELECT_EXACT_MATCH:
- /* Not yet fully implemented ??? */
- *sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
- break;
+ case IMAGE_COMDAT_SELECT_EXACT_MATCH:
+ /* Not yet fully implemented ??? */
+ sec_flags |= SEC_LINK_DUPLICATES_SAME_CONTENTS;
+ break;
- /* debug$S gets this case; other
- implications ??? */
+ /* debug$S gets this case; other
+ implications ??? */
- /* There may be no symbol... we'll search
- the whole table... Is this the right
- place to play this game? Or should we do
- it when reading it in. */
- case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
+ /* There may be no symbol... we'll search
+ the whole table... Is this the right
+ place to play this game? Or should we do
+ it when reading it in. */
+ case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
#ifdef STRICT_PE_FORMAT
- /* FIXME: This is not currently implemented. */
- *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
+ /* FIXME: This is not currently implemented. */
+ sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
#else
- *sec_flags &= ~SEC_LINK_ONCE;
+ sec_flags &= ~SEC_LINK_ONCE;
#endif
- break;
+ break;
- default: /* 0 means "no symbol" */
- /* debug$F gets this case; other
- implications ??? */
- *sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
- break;
- }
- }
+ default: /* 0 means "no symbol" */
+ /* debug$F gets this case; other
+ implications ??? */
+ sec_flags |= SEC_LINK_DUPLICATES_DISCARD;
break;
+ }
+
+ *slot = bfd_zmalloc (sizeof (struct comdat_hash_entry));
+ if (*slot == NULL)
+ return false;
+ struct comdat_hash_entry *newentry = *slot;
+ newentry->sec_flags = sec_flags;
+ newentry->symname = bfd_strdup (symname);
+ newentry->target_index = isym.n_scnum;
+ newentry->isym = isym;
+ newentry->comdat_symbol = -1;
+ }
+ else
+ {
+ struct comdat_hash_entry *entry = *slot;
- case 2:
+ if (entry->comdat_symbol != -1)
+ continue;
+
+ char *target_name = strchr (entry->symname, '$');
+ if (target_name != NULL)
+ {
/* Gas mode: the first matching on partial name. */
+ target_name += 1;
#ifndef TARGET_UNDERSCORE
#define TARGET_UNDERSCORE 0
#endif
@@ -1089,34 +1060,104 @@ handle_COMDAT (bfd * abfd,
/* Not the name we're looking for */
continue;
}
- /* Fall through. */
- case 1:
- /* MSVC mode: the lexically second symbol (or
- drop through from the above). */
- {
- /* This must the second symbol with the
- section #. It is the actual symbol name.
- Intel puts the two adjacent, but Alpha (at
- least) spreads them out. */
+ }
+ /* MSVC mode: the lexically second symbol (or
+ drop through from the above). */
+ /* This must the second symbol with the
+ section #. It is the actual symbol name.
+ Intel puts the two adjacent, but Alpha (at
+ least) spreads them out. */
+
+ entry->comdat_symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
+ entry->comdat_name = bfd_strdup (symname);
+ }
+ }
- struct coff_comdat_info *comdat;
- size_t len = strlen (symname) + 1;
+ return true;
+}
- comdat = bfd_alloc (abfd, sizeof (*comdat) + len);
- if (comdat == NULL)
- return false;
+static bool
+insert_coff_comdat_info (bfd *abfd, asection *section, const char *symname,
+ long symbol)
+{
+ struct coff_comdat_info *comdat;
+ size_t len = strlen (symname) + 1;
- coff_section_data (abfd, section)->comdat = comdat;
- comdat->symbol = (esym - esymstart) / bfd_coff_symesz (abfd);
- char *newname = (char *) (comdat + 1);
- comdat->name = newname;
- memcpy (newname, symname, len);
- return true;
- }
- }
+ comdat = bfd_alloc (abfd, sizeof (*comdat) + len);
+ if (comdat == NULL)
+ return false;
+
+ coff_section_data (abfd, section)->comdat = comdat;
+ comdat->symbol = symbol;
+ char *newname = (char *) (comdat + 1);
+ comdat->name = newname;
+ memcpy (newname, symname, len);
+ return true;
+}
+
+static bool
+handle_COMDAT (bfd *abfd, flagword *sec_flags, const char *name,
+ asection *section)
+{
+ if (htab_elements (pe_data (abfd)->comdat_hash) == 0)
+ if (! fill_comdat_hash (abfd))
+ return false;
+
+ struct comdat_hash_entry *found
+ = find_flags (pe_data (abfd)->comdat_hash, section->target_index);
+ if (found != NULL)
+ {
+
+ struct internal_syment isym = found->isym;
+
+ /* If it isn't the stuff we're expecting, die;
+ The MS documentation is vague, but it
+ appears that the second entry serves BOTH
+ as the comdat symbol and the defining
+ symbol record (either C_STAT or C_EXT,
+ possibly with an aux entry with debug
+ information if it's a function.) It
+ appears the only way to find the second one
+ is to count. (On Intel, they appear to be
+ adjacent, but on Alpha, they have been
+ found separated.)
+
+ Here, we think we've found the first one,
+ but there's some checking we can do to be
+ sure. */
+
+ if (! ((isym.n_sclass == C_STAT || isym.n_sclass == C_EXT)
+ && BTYPE (isym.n_type) == T_NULL && isym.n_value == 0))
+ {
+ /* Malformed input files can trigger this test.
+ cf PR 21781. */
+ _bfd_error_handler (
+ _ ("%pB: error: unexpected symbol '%s' in COMDAT section"), abfd,
+ found->symname);
+ return false;
}
- }
+ /* FIXME LATER: MSVC generates section names
+ like .text for comdats. Gas generates
+ names like .text$foo__Fv (in the case of a
+ function). See comment above for more. */
+
+ if (isym.n_sclass == C_STAT && strcmp (name, found->symname) != 0)
+ /* xgettext:c-format */
+ _bfd_error_handler (_ ("%pB: warning: COMDAT symbol '%s'"
+ " does not match section name '%s'"),
+ abfd, found->symname, name);
+
+ if (found->comdat_symbol != -1)
+ {
+ if (! insert_coff_comdat_info (abfd, section, found->comdat_name,
+ found->comdat_symbol))
+ return false;
+ }
+ *sec_flags = *sec_flags | found->sec_flags;
+ return true;
+ }
+ *sec_flags = *sec_flags | SEC_LINK_ONCE;
return true;
}
@@ -1268,7 +1309,7 @@ styp_to_sec_flags (bfd *abfd,
break;
case IMAGE_SCN_LNK_COMDAT:
/* COMDAT gets very special treatment. */
- if (!handle_COMDAT (abfd, &sec_flags, hdr, name, section))
+ if (!handle_COMDAT (abfd, &sec_flags, name, section))
result = false;
break;
default:
diff --git a/bfd/coffgen.c b/bfd/coffgen.c
index 1ec9a51..bf9633a 100644
--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -293,6 +293,8 @@ coff_object_cleanup (bfd *abfd)
htab_delete (td->section_by_index);
if (td->section_by_target_index)
htab_delete (td->section_by_target_index);
+ if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+ htab_delete (pe_data (abfd)->comdat_hash);
}
}
}
@@ -3292,6 +3294,12 @@ _bfd_coff_free_cached_info (bfd *abfd)
tdata->section_by_target_index = NULL;
}
+ if (obj_pe (abfd) && pe_data (abfd)->comdat_hash)
+ {
+ htab_delete (pe_data (abfd)->comdat_hash);
+ pe_data (abfd)->comdat_hash = NULL;
+ }
+
_bfd_dwarf2_cleanup_debug_info (abfd, &tdata->dwarf2_find_line_info);
_bfd_stab_cleanup (abfd, &tdata->line_info);
diff --git a/bfd/libcoff-in.h b/bfd/libcoff-in.h
index 4e22036..eacfcb3 100644
--- a/bfd/libcoff-in.h
+++ b/bfd/libcoff-in.h
@@ -161,10 +161,22 @@ typedef struct pe_tdata
const char *style;
asection *sec;
} build_id;
+
+ htab_t comdat_hash;
} pe_data_type;
#define pe_data(bfd) ((bfd)->tdata.pe_obj_data)
+struct comdat_hash_entry
+{
+ int target_index;
+ struct internal_syment isym;
+ char *symname;
+ flagword sec_flags;
+ char *comdat_name;
+ long comdat_symbol;
+};
+
/* Tdata for XCOFF files. */
struct xcoff_tdata
diff --git a/bfd/libcoff.h b/bfd/libcoff.h
index b53c311..ad6138e 100644
--- a/bfd/libcoff.h
+++ b/bfd/libcoff.h
@@ -165,10 +165,22 @@ typedef struct pe_tdata
const char *style;
asection *sec;
} build_id;
+
+ htab_t comdat_hash;
} pe_data_type;
#define pe_data(bfd) ((bfd)->tdata.pe_obj_data)
+struct comdat_hash_entry
+{
+ int target_index;
+ struct internal_syment isym;
+ char *symname;
+ flagword sec_flags;
+ char *comdat_name;
+ long comdat_symbol;
+};
+
/* Tdata for XCOFF files. */
struct xcoff_tdata
diff --git a/bfd/peicode.h b/bfd/peicode.h
index 2cd020e..5ac6b0d 100644
--- a/bfd/peicode.h
+++ b/bfd/peicode.h
@@ -255,6 +255,21 @@ coff_swap_scnhdr_in (bfd * abfd, void * ext, void * in)
#endif
}
+static hashval_t
+htab_hash_flags (const void *entry)
+{
+ const struct comdat_hash_entry *fe = entry;
+ return fe->target_index;
+}
+
+static int
+htab_eq_flags (const void *e1, const void *e2)
+{
+ const struct comdat_hash_entry *fe1 = e1;
+ const struct comdat_hash_entry *fe2 = e2;
+ return fe1->target_index == fe2->target_index;
+}
+
static bool
pe_mkobject (bfd * abfd)
{
@@ -291,6 +306,8 @@ pe_mkobject (bfd * abfd)
pe->dos_message[14] = 0x24;
pe->dos_message[15] = 0x0;
+ pe->comdat_hash = htab_create (10, htab_hash_flags, htab_eq_flags, NULL);
+
memset (& pe->pe_opthdr, 0, sizeof pe->pe_opthdr);
bfd_coff_long_section_names (abfd)