diff options
-rw-r--r-- | libctf/ctf-dedup.c | 173 | ||||
-rw-r--r-- | libctf/ctf-impl.h | 1 |
2 files changed, 165 insertions, 9 deletions
diff --git a/libctf/ctf-dedup.c b/libctf/ctf-dedup.c index be423c4..1638a8f 100644 --- a/libctf/ctf-dedup.c +++ b/libctf/ctf-dedup.c @@ -48,6 +48,12 @@ least a forward emitted into the shared dict so that other dicts can cite it if needed. + [ctf_dedup_strings] + 4) deduplicate all the strings. (Called later, during late link time: + this must be done right before serialization, and after + 'preserialization' has written out all the strings that all the dicts + will need.) + [id_to_packed_id] This all works over an array of inputs (usually in the same order as the inputs on the link line). We don't use the ctf_link_inputs hash directly @@ -247,15 +253,18 @@ structures. [ctf_dedup_emit_struct_members] - The final stage of emission is to walk over all structures with members - that need emission and emit all of them. Every type has been emitted at - this stage, so emission cannot fail. - - [ctf_dedup_populate_type_mappings, ctf_dedup_populate_type_mapping] - Finally, we update the input -> output type ID mappings used by the ctf-link - machinery to update all the other sections. This is surprisingly expensive - and may be replaced with a scheme which lets the ctf-link machinery extract - the needed info directly from the deduplicator. */ + Walk over all structures with members that need emission and emit all of + them. Every type has been emitted at this stage, so emission cannot + fail. + + [ctf_dedup_strings] + Accumulate a (string -> dict count) hash for all strings with refs in + child dicts, then add all strings with counts > 1 to the parent dict, + transplanting all their refs (strings already in the parent are also + treated in the obvious way). Since refs are just pointers, as long as + the parent dict is written out first, the children will automatically get + all their refs to parent strings appropriately updated without having to + do anything at all at child write time. */ /* Possible future optimizations are flagged with 'optimization opportunity' below. */ @@ -3083,6 +3092,8 @@ ctf_dedup_emit_struct_members (ctf_dict_t *output, ctf_dict_t **inputs, dict at the corresponding index in the INPUTS (for non-child dicts, the value is undefined and can just be left at zero). + The strtabs are not deduplicated yet. + Return an array of fps with content emitted into them (starting with OUTPUT, which is the parent of all others, then all the newly-generated outputs). @@ -3143,6 +3154,150 @@ ctf_dedup_emit (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs, return outputs; } +/* Deduplicate strings. The child dict ctf_parent_strlen is not updated + yet: this must be done after parent serialization. */ + +int +ctf_dedup_strings (ctf_dict_t *fp) +{ + ctf_dynhash_t *str_counts; + int err; + ctf_next_t *i = NULL; + void *dict; + + str_counts = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, + NULL, NULL); + if (!str_counts) + return ctf_set_errno (fp, ENOMEM); + + /* Count the strings. */ + + while ((err = ctf_dynhash_next (fp->ctf_link_outputs, &i, NULL, &dict)) == 0) + { + ctf_dict_t *out = (ctf_dict_t *) dict; + ctf_next_t *j = NULL; + void *v; + + while ((err = ctf_dynhash_next (out->ctf_str_atoms, &j, NULL, &v)) == 0) + { + ctf_str_atom_t *atom = (ctf_str_atom_t *) v; + uintptr_t count = 0; + + if (ctf_list_empty_p (&atom->csa_refs) + && ctf_list_empty_p (&atom->csa_movable_refs)) + continue; + + if (atom->csa_external_offset + || atom->csa_str[0] == '\0' + || atom->csa_flags & CTF_STR_ATOM_NO_DEDUP) + continue; + + if (ctf_dynhash_lookup_kv (str_counts, atom->csa_str, NULL, &v)) + { + count = (uintptr_t) v; + } + else + { + /* First reference. Check the parent too -- if the parent has a + string already, bump the count once more. */ + + if (ctf_dynhash_lookup (fp->ctf_str_atoms, atom->csa_str) != NULL) + count++; + } + count++; + + if ((err = ctf_dynhash_insert (str_counts, atom->csa_str, (void *) count)) < 0) + { + ctf_set_errno (fp, err); + ctf_next_destroy (j); + goto err; + } + } + if (err != ECTF_NEXT_END) + goto iterr; + } + if (err != ECTF_NEXT_END) + goto iterr; + + /* Deduplicate the strings, adding them to the parent and transplanting + their ref lists there. After transplantation, some of the movable refs + in the parent will contain references to a ctf_str_movable_refs hashtab + in the child dicts, but that's fine: it only means we can still do things + that involve ref movement in children without trouble. */ + + while ((err = ctf_dynhash_next (fp->ctf_link_outputs, &i, NULL, &dict)) == 0) + { + ctf_dict_t *out = (ctf_dict_t *) dict; + ctf_next_t *j = NULL; + void *v; + + while ((err = ctf_dynhash_next (out->ctf_str_atoms, &j, NULL, &v)) == 0) + { + ctf_str_atom_t *atom = (ctf_str_atom_t *) v; + ctf_str_atom_t *dedupped_atom; + + if (ctf_list_empty_p (&atom->csa_refs) + && ctf_list_empty_p (&atom->csa_movable_refs)) + continue; + + if (atom->csa_external_offset + || atom->csa_str[0] == '\0' + || atom->csa_flags & CTF_STR_ATOM_NO_DEDUP) + continue; + + if ((uintptr_t) ctf_dynhash_lookup (str_counts, atom->csa_str) <= 1) + continue; + + if (ctf_str_add_copy (fp, atom->csa_str) == 0) + { + ctf_set_errno (fp, ENOMEM); + ctf_next_destroy (j); + goto err; + } + + dedupped_atom = ctf_dynhash_lookup (fp->ctf_str_atoms, + atom->csa_str); + if (!ctf_assert (fp, dedupped_atom)) + { + ctf_next_destroy (j); + goto err; + } + + /* Splice all refs into the new atom. */ + + ctf_list_splice (&dedupped_atom->csa_refs, &atom->csa_refs); + ctf_list_splice (&dedupped_atom->csa_movable_refs, + &atom->csa_movable_refs); + + /* This atom is now "external" from the perspective of the child: + mostly, this means the child will not try to write it to its + strtab. */ + atom->csa_external_offset = atom->csa_offset; + atom->csa_offset = 0; + + /* Further ref additions of this atom will use the parent's offset + instead (but augment the child's ref list). */ + atom->csa_flags |= CTF_STR_ATOM_IN_PARENT; + } + } + if (err != ECTF_NEXT_END) + goto iterr; + + ctf_dynhash_destroy (str_counts); + return 0; + + err: + ctf_dynhash_destroy (str_counts); + ctf_err_warn (fp, 0, 0, _("error deduplicating strings")); + return -1; + + iterr: + ctf_dynhash_destroy (str_counts); + ctf_set_errno (fp, err); + ctf_err_warn (fp, 0, 0, _("Iteration failure deduplicating strings")); + return -1; +} + /* Determine what type SRC_FP / SRC_TYPE was emitted as in the FP, which must be the shared dict or have it as a parent: return 0 if none. The SRC_FP must be a past input to ctf_dedup. */ diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h index 5e1c081..b15b37c 100644 --- a/libctf/ctf-impl.h +++ b/libctf/ctf-impl.h @@ -739,6 +739,7 @@ extern int ctf_dedup (ctf_dict_t *, ctf_dict_t **, uint32_t ninputs, extern ctf_dict_t **ctf_dedup_emit (ctf_dict_t *, ctf_dict_t **, uint32_t ninputs, uint32_t *parents, uint32_t *noutputs, int cu_mapped); +extern int ctf_dedup_strings (ctf_dict_t *fp); extern void ctf_dedup_fini (ctf_dict_t *, ctf_dict_t **, uint32_t); extern ctf_id_t ctf_dedup_type_mapping (ctf_dict_t *fp, ctf_dict_t *src_fp, ctf_id_t src_type); |