aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Alcock <nick.alcock@oracle.com>2024-07-15 23:21:20 +0100
committerNick Alcock <nick.alcock@oracle.com>2024-11-18 11:50:09 +0000
commita87e7ec4fe37e7a8791a358cb16ecdafb3c695d3 (patch)
tree900cf4c8ee41546b9555deab05102a92301ecc56
parentefefe2ec714a680f0c70a5777c1d474bae9061f1 (diff)
downloadbinutils-a87e7ec4fe37e7a8791a358cb16ecdafb3c695d3.zip
binutils-a87e7ec4fe37e7a8791a358cb16ecdafb3c695d3.tar.gz
binutils-a87e7ec4fe37e7a8791a358cb16ecdafb3c695d3.tar.bz2
libctf: dedup: add strtab deduplicator
This is a pretty simple two-phase process (count duplicates that are actually going to end up in the strtab and aren't e.g. strings without refs, strings with external refs etc, and move them into the parent) with one wrinkle: we sorta-abuse the csa_external_offset field in the deduplicated child atom (normally used to indicate that this string is located in the ELF strtab) to indicate that this atom is in the *parent*. If you think of "external" as meaning simply "is in some other strtab, we don't care which one", this still makes enough sense to not need to change the name, I hope. This is still not called from anywhere, so strings are (still!) not deduplicated, and none of the dedup machinery added in earlier commits does anything yet. libctf/ * ctf-dedup.c (ctf_dedup_emit_struct_members): Note that strtab dedup happens (well) after struct member emission. (ctf_dedup_strings): New. * ctf-impl.h (ctf_dedup_strings): Declare.
-rw-r--r--libctf/ctf-dedup.c173
-rw-r--r--libctf/ctf-impl.h1
2 files changed, 165 insertions, 9 deletions
diff --git a/libctf/ctf-dedup.c b/libctf/ctf-dedup.c
index be423c4..1638a8f 100644
--- a/libctf/ctf-dedup.c
+++ b/libctf/ctf-dedup.c
@@ -48,6 +48,12 @@
least a forward emitted into the shared dict so that other dicts can cite
it if needed.
+ [ctf_dedup_strings]
+ 4) deduplicate all the strings. (Called later, during late link time:
+ this must be done right before serialization, and after
+ 'preserialization' has written out all the strings that all the dicts
+ will need.)
+
[id_to_packed_id]
This all works over an array of inputs (usually in the same order as the
inputs on the link line). We don't use the ctf_link_inputs hash directly
@@ -247,15 +253,18 @@
structures.
[ctf_dedup_emit_struct_members]
- The final stage of emission is to walk over all structures with members
- that need emission and emit all of them. Every type has been emitted at
- this stage, so emission cannot fail.
-
- [ctf_dedup_populate_type_mappings, ctf_dedup_populate_type_mapping]
- Finally, we update the input -> output type ID mappings used by the ctf-link
- machinery to update all the other sections. This is surprisingly expensive
- and may be replaced with a scheme which lets the ctf-link machinery extract
- the needed info directly from the deduplicator. */
+ Walk over all structures with members that need emission and emit all of
+ them. Every type has been emitted at this stage, so emission cannot
+ fail.
+
+ [ctf_dedup_strings]
+ Accumulate a (string -> dict count) hash for all strings with refs in
+ child dicts, then add all strings with counts > 1 to the parent dict,
+ transplanting all their refs (strings already in the parent are also
+ treated in the obvious way). Since refs are just pointers, as long as
+ the parent dict is written out first, the children will automatically get
+ all their refs to parent strings appropriately updated without having to
+ do anything at all at child write time. */
/* Possible future optimizations are flagged with 'optimization opportunity'
below. */
@@ -3083,6 +3092,8 @@ ctf_dedup_emit_struct_members (ctf_dict_t *output, ctf_dict_t **inputs,
dict at the corresponding index in the INPUTS (for non-child dicts, the value
is undefined and can just be left at zero).
+ The strtabs are not deduplicated yet.
+
Return an array of fps with content emitted into them (starting with OUTPUT,
which is the parent of all others, then all the newly-generated outputs).
@@ -3143,6 +3154,150 @@ ctf_dedup_emit (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs,
return outputs;
}
+/* Deduplicate strings. The child dict ctf_parent_strlen is not updated
+ yet: this must be done after parent serialization. */
+
+int
+ctf_dedup_strings (ctf_dict_t *fp)
+{
+ ctf_dynhash_t *str_counts;
+ int err;
+ ctf_next_t *i = NULL;
+ void *dict;
+
+ str_counts = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
+ NULL, NULL);
+ if (!str_counts)
+ return ctf_set_errno (fp, ENOMEM);
+
+ /* Count the strings. */
+
+ while ((err = ctf_dynhash_next (fp->ctf_link_outputs, &i, NULL, &dict)) == 0)
+ {
+ ctf_dict_t *out = (ctf_dict_t *) dict;
+ ctf_next_t *j = NULL;
+ void *v;
+
+ while ((err = ctf_dynhash_next (out->ctf_str_atoms, &j, NULL, &v)) == 0)
+ {
+ ctf_str_atom_t *atom = (ctf_str_atom_t *) v;
+ uintptr_t count = 0;
+
+ if (ctf_list_empty_p (&atom->csa_refs)
+ && ctf_list_empty_p (&atom->csa_movable_refs))
+ continue;
+
+ if (atom->csa_external_offset
+ || atom->csa_str[0] == '\0'
+ || atom->csa_flags & CTF_STR_ATOM_NO_DEDUP)
+ continue;
+
+ if (ctf_dynhash_lookup_kv (str_counts, atom->csa_str, NULL, &v))
+ {
+ count = (uintptr_t) v;
+ }
+ else
+ {
+ /* First reference. Check the parent too -- if the parent has a
+ string already, bump the count once more. */
+
+ if (ctf_dynhash_lookup (fp->ctf_str_atoms, atom->csa_str) != NULL)
+ count++;
+ }
+ count++;
+
+ if ((err = ctf_dynhash_insert (str_counts, atom->csa_str, (void *) count)) < 0)
+ {
+ ctf_set_errno (fp, err);
+ ctf_next_destroy (j);
+ goto err;
+ }
+ }
+ if (err != ECTF_NEXT_END)
+ goto iterr;
+ }
+ if (err != ECTF_NEXT_END)
+ goto iterr;
+
+ /* Deduplicate the strings, adding them to the parent and transplanting
+ their ref lists there. After transplantation, some of the movable refs
+ in the parent will contain references to a ctf_str_movable_refs hashtab
+ in the child dicts, but that's fine: it only means we can still do things
+ that involve ref movement in children without trouble. */
+
+ while ((err = ctf_dynhash_next (fp->ctf_link_outputs, &i, NULL, &dict)) == 0)
+ {
+ ctf_dict_t *out = (ctf_dict_t *) dict;
+ ctf_next_t *j = NULL;
+ void *v;
+
+ while ((err = ctf_dynhash_next (out->ctf_str_atoms, &j, NULL, &v)) == 0)
+ {
+ ctf_str_atom_t *atom = (ctf_str_atom_t *) v;
+ ctf_str_atom_t *dedupped_atom;
+
+ if (ctf_list_empty_p (&atom->csa_refs)
+ && ctf_list_empty_p (&atom->csa_movable_refs))
+ continue;
+
+ if (atom->csa_external_offset
+ || atom->csa_str[0] == '\0'
+ || atom->csa_flags & CTF_STR_ATOM_NO_DEDUP)
+ continue;
+
+ if ((uintptr_t) ctf_dynhash_lookup (str_counts, atom->csa_str) <= 1)
+ continue;
+
+ if (ctf_str_add_copy (fp, atom->csa_str) == 0)
+ {
+ ctf_set_errno (fp, ENOMEM);
+ ctf_next_destroy (j);
+ goto err;
+ }
+
+ dedupped_atom = ctf_dynhash_lookup (fp->ctf_str_atoms,
+ atom->csa_str);
+ if (!ctf_assert (fp, dedupped_atom))
+ {
+ ctf_next_destroy (j);
+ goto err;
+ }
+
+ /* Splice all refs into the new atom. */
+
+ ctf_list_splice (&dedupped_atom->csa_refs, &atom->csa_refs);
+ ctf_list_splice (&dedupped_atom->csa_movable_refs,
+ &atom->csa_movable_refs);
+
+ /* This atom is now "external" from the perspective of the child:
+ mostly, this means the child will not try to write it to its
+ strtab. */
+ atom->csa_external_offset = atom->csa_offset;
+ atom->csa_offset = 0;
+
+ /* Further ref additions of this atom will use the parent's offset
+ instead (but augment the child's ref list). */
+ atom->csa_flags |= CTF_STR_ATOM_IN_PARENT;
+ }
+ }
+ if (err != ECTF_NEXT_END)
+ goto iterr;
+
+ ctf_dynhash_destroy (str_counts);
+ return 0;
+
+ err:
+ ctf_dynhash_destroy (str_counts);
+ ctf_err_warn (fp, 0, 0, _("error deduplicating strings"));
+ return -1;
+
+ iterr:
+ ctf_dynhash_destroy (str_counts);
+ ctf_set_errno (fp, err);
+ ctf_err_warn (fp, 0, 0, _("Iteration failure deduplicating strings"));
+ return -1;
+}
+
/* Determine what type SRC_FP / SRC_TYPE was emitted as in the FP, which
must be the shared dict or have it as a parent: return 0 if none. The SRC_FP
must be a past input to ctf_dedup. */
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h
index 5e1c081..b15b37c 100644
--- a/libctf/ctf-impl.h
+++ b/libctf/ctf-impl.h
@@ -739,6 +739,7 @@ extern int ctf_dedup (ctf_dict_t *, ctf_dict_t **, uint32_t ninputs,
extern ctf_dict_t **ctf_dedup_emit (ctf_dict_t *, ctf_dict_t **,
uint32_t ninputs, uint32_t *parents,
uint32_t *noutputs, int cu_mapped);
+extern int ctf_dedup_strings (ctf_dict_t *fp);
extern void ctf_dedup_fini (ctf_dict_t *, ctf_dict_t **, uint32_t);
extern ctf_id_t ctf_dedup_type_mapping (ctf_dict_t *fp, ctf_dict_t *src_fp,
ctf_id_t src_type);