aboutsummaryrefslogtreecommitdiff
path: root/libctf/ctf-link.c
diff options
context:
space:
mode:
authorNick Alcock <nick.alcock@oracle.com>2024-07-15 23:29:02 +0100
committerNick Alcock <nick.alcock@oracle.com>2025-02-28 14:47:24 +0000
commit4d2d5afa6078355074fa284c49a2cad2657fd367 (patch)
treed38db3ac404980d4163baff5f5464b82954c356f /libctf/ctf-link.c
parent9daceda7960df4b142d5c925eb16e813469978c3 (diff)
downloadbinutils-4d2d5afa6078355074fa284c49a2cad2657fd367.zip
binutils-4d2d5afa6078355074fa284c49a2cad2657fd367.tar.gz
binutils-4d2d5afa6078355074fa284c49a2cad2657fd367.tar.bz2
libctf: actually deduplicate the strtab
This commit finally implements strtab deduplication, putting together all the pieces assembled in the earlier commits. The magic is entirely localized to ctf_link_write, which preserializes all the dicts (parent first), and calls ctf_dedup_strings on the parent. (The error paths get tweaked a bit too.) Calling ctf_dedup_strings has implications elsewhere: the lifetime rules for the inputs versus outputs change a bit now that the child output dicts contain references to the parent dict's atoms table. We also pre-purge movable refs from all the deduplicated strings before freeing any of this because movable refs contain backreferences into the dict they came from, which means the parent contains references to all the children! Purging the refs first makes those references go away so we can free the children without creating any wild pointers, even temporarily. There's a new testcase that identifies a regression whereby offset 0 (the null string) and index 0 (in children now often the parent dict name, ".ctf") got mixed up, leading to anonymous structs and unions getting the not entirely C-valid name ".ctf" instead. May other testcases get adjusted to no longer depend on the precise layout of the strtab. TODO: add new tests to verify that strings are actually being deduplicated. libctf/ * ctf-link.c (ctf_link_write): Deduplicate strings. * ctf-open.c (ctf_dict_close): Free refs, then the link outputs, then the out cu_mapping, then the inputs, in that order. * ctf-string.c (ctf_str_purge_refs): Not static any more. * ctf-impl.h: Declare it. ld/ * testsuite/ld-ctf/conflicting-cycle-2.A-1.d: Don't depend on strtab contents. * testsuite/ld-ctf/conflicting-cycle-2.A-2.d: Likewise. * testsuite/ld-ctf/conflicting-cycle-2.parent.d: Likewise. * testsuite/ld-ctf/conflicting-cycle-3.C-1.d: Likewise. * testsuite/ld-ctf/conflicting-cycle-3.C-2.d: Likewise. * testsuite/ld-ctf/anonymous-conflicts*: New test.
Diffstat (limited to 'libctf/ctf-link.c')
-rw-r--r--libctf/ctf-link.c50
1 files changed, 43 insertions, 7 deletions
diff --git a/libctf/ctf-link.c b/libctf/ctf-link.c
index b992fb1..6a24344 100644
--- a/libctf/ctf-link.c
+++ b/libctf/ctf-link.c
@@ -1826,7 +1826,7 @@ typedef struct ctf_name_list_accum_cb_arg
char **names;
ctf_dict_t *fp;
ctf_dict_t **files;
- size_t i;
+ ssize_t i;
char **dynames;
size_t ndynames;
} ctf_name_list_accum_cb_arg_t;
@@ -1961,11 +1961,12 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
char *transformed_name = NULL;
ctf_dict_t **files;
FILE *f = NULL;
- size_t i;
+ ssize_t i;
int err;
long fsize;
const char *errloc;
unsigned char *buf = NULL;
+ uint64_t old_parent_strlen, all_strlens = 0;
memset (&arg, 0, sizeof (ctf_name_list_accum_cb_arg_t));
arg.fp = fp;
@@ -1983,7 +1984,7 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
}
}
- /* No extra outputs? Just write a simple ctf_dict_t. */
+ /* No extra outputs? Just write a simple ctf_dict_t. */
if (arg.i == 0)
{
unsigned char *ret = ctf_write_mem (fp, size, threshold);
@@ -1992,7 +1993,9 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
}
/* Writing an archive. Stick ourselves (the shared repository, parent of all
- other archives) on the front of it with the default name. */
+ other archives) on the front of it with the default name. (Writing the parent
+ dict out first is essential for strings in child dicts shared with the parent
+ to get their proper offsets.) */
if ((names = realloc (arg.names, sizeof (char *) * (arg.i + 1))) == NULL)
{
errloc = "name reallocation";
@@ -2034,6 +2037,39 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
memmove (&(arg.files[1]), arg.files, sizeof (ctf_dict_t *) * (arg.i));
arg.files[0] = fp;
+ /* Preserialize everything, doing everything but strtab generation and things that
+ depend on that. */
+ for (i = 0; i < arg.i + 1; i++)
+ {
+ if (ctf_preserialize (arg.files[i]) < 0)
+ {
+ errno = ctf_errno (arg.files[i]);
+ for (i--; i >= 0; i--)
+ ctf_depreserialize (arg.files[i]);
+ errloc = "preserialization";
+ goto err_no;
+ }
+ }
+
+ ctf_dprintf ("Deduplicating strings.\n");
+
+ for (i = 0; i < arg.i; i++)
+ all_strlens += arg.files[i]->ctf_str_prov_offset;
+ old_parent_strlen = arg.files[0]->ctf_str_prov_offset;
+
+ if (ctf_dedup_strings (fp) < 0)
+ {
+ for (i = 0; i < arg.i + 1; i++)
+ ctf_depreserialize (arg.files[i]);
+ errloc = "string deduplication";
+ goto err_str_dedup;
+ }
+
+ ctf_dprintf ("Deduplicated strings: original parent strlen: %zu; "
+ "original lengths: %zu; final length: %zu.\n",
+ (size_t) old_parent_strlen, (size_t) all_strlens,
+ (size_t) arg.files[0]->ctf_str_prov_offset);
+
if ((f = tmpfile ()) == NULL)
{
errloc = "tempfile creation";
@@ -2045,8 +2081,8 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
threshold)) < 0)
{
errloc = "archive writing";
- ctf_set_errno (fp, err);
- goto err;
+ errno = err;
+ goto err_no;
}
if (fseek (f, 0, SEEK_END) < 0)
@@ -2105,7 +2141,7 @@ ctf_link_write (ctf_dict_t *fp, size_t *size, size_t threshold)
err_no:
ctf_set_errno (fp, errno);
-
+ err_str_dedup:
/* Turn off the is-linking flag on all the dicts in this link, as above. */
for (i = 0; i < arg.i; i++)
{