aboutsummaryrefslogtreecommitdiff
path: root/libctf/ctf-string.c
diff options
context:
space:
mode:
Diffstat (limited to 'libctf/ctf-string.c')
-rw-r--r--libctf/ctf-string.c389
1 files changed, 261 insertions, 128 deletions
diff --git a/libctf/ctf-string.c b/libctf/ctf-string.c
index dcb8bf0..46b984b 100644
--- a/libctf/ctf-string.c
+++ b/libctf/ctf-string.c
@@ -20,10 +20,14 @@
#include <assert.h>
#include <ctf-impl.h>
#include <string.h>
-#include <assert.h>
-/* Convert an encoded CTF string name into a pointer to a C string, using an
- explicit internal strtab rather than the fp-based one. */
+static ctf_str_atom_t *
+ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
+ int flags, uint32_t *ref);
+
+/* Convert an encoded CTF string name into a pointer to a C string, possibly
+ using an explicit internal provisional strtab rather than the fp-based
+ one. */
const char *
ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
{
@@ -32,18 +36,20 @@ ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL))
ctsp = strtab;
- /* If this name is in the external strtab, and there is a synthetic strtab,
- use it in preference. */
+ /* If this name is in the external strtab, and there is a synthetic
+ strtab, use it in preference. (This is used to add the set of strings
+ -- symbol names, etc -- the linker knows about before the strtab is
+ written out.) */
if (CTF_NAME_STID (name) == CTF_STRTAB_1
&& fp->ctf_syn_ext_strtab != NULL)
return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab,
(void *) (uintptr_t) name);
- /* If the name is in the internal strtab, and the offset is beyond the end of
- the ctsp->cts_len but below the ctf_str_prov_offset, this is a provisional
- string added by ctf_str_add*() but not yet built into a real strtab: get
- the value out of the ctf_prov_strtab. */
+ /* If the name is in the internal strtab, and the name offset is beyond
+ the end of the ctsp->cts_len but below the ctf_str_prov_offset, this is
+ a provisional string added by ctf_str_add*() but not yet built into a
+ real strtab: get the value out of the ctf_prov_strtab. */
if (CTF_NAME_STID (name) == CTF_STRTAB_0
&& name >= ctsp->cts_len && name < fp->ctf_str_prov_offset)
@@ -134,13 +140,25 @@ ctf_str_free_atom (void *a)
}
/* Create the atoms table. There is always at least one atom in it, the null
- string. */
+ string: but also pull in atoms from the internal strtab. (We rely on
+ calls to ctf_str_add_external to populate external strtab entries, since
+ these are often not quite the same as what appears in any external
+ strtab, and the external strtab is often huge and best not aggressively
+ pulled in.)
+
+ Alternatively, if passed, populate atoms from the passed-in table, but do
+ not propagate their flags or refs: they are all non-freeable and
+ non-movable. (This is used when serializing a dict: this entire atoms
+ table will be thrown away shortly, so it is important that we not create
+ any new strings.) */
int
-ctf_str_create_atoms (ctf_dict_t *fp)
+ctf_str_create_atoms (ctf_dict_t *fp, ctf_dynhash_t *atoms)
{
+ size_t i;
+
fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
- free, ctf_str_free_atom);
- if (fp->ctf_str_atoms == NULL)
+ NULL, ctf_str_free_atom);
+ if (!fp->ctf_str_atoms)
return -ENOMEM;
if (!fp->ctf_prov_strtab)
@@ -161,6 +179,63 @@ ctf_str_create_atoms (ctf_dict_t *fp)
if (errno == ENOMEM)
goto oom_str_add;
+ /* Serializing. We have existing strings in an existing atoms table with
+ possibly-live pointers to them which must be used unchanged. Import
+ them into this atoms table. */
+
+ if (atoms)
+ {
+ ctf_next_t *it = NULL;
+ void *k, *v;
+ int err;
+
+ while ((err = ctf_dynhash_next (atoms, &it, &k, &v)) == 0)
+ {
+ ctf_str_atom_t *existing = v;
+ ctf_str_atom_t *atom;
+
+ if (existing->csa_str[0] == 0)
+ continue;
+
+ if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL)
+ goto oom_str_add;
+ memcpy (atom, existing, sizeof (struct ctf_str_atom));
+ memset (&atom->csa_refs, 0, sizeof(ctf_list_t));
+ atom->csa_flags = 0;
+
+ if (ctf_dynhash_insert (fp->ctf_str_atoms, atom->csa_str, atom) < 0)
+ {
+ free (atom);
+ goto oom_str_add;
+ }
+ }
+ }
+ else
+ {
+ /* Not serializing. Pull in all the strings in the strtab as new
+ atoms. The provisional strtab must be empty at this point, so
+ there is no need to populate atoms from it as well. Types in this
+ subset are frozen and readonly, so the refs list and movable refs
+ list need not be populated. */
+
+ for (i = 0; i < fp->ctf_str[CTF_STRTAB_0].cts_len;
+ i += strlen (&fp->ctf_str[CTF_STRTAB_0].cts_strs[i]) + 1)
+ {
+ ctf_str_atom_t *atom;
+
+ if (fp->ctf_str[CTF_STRTAB_0].cts_strs[i] == 0)
+ continue;
+
+ atom = ctf_str_add_ref_internal (fp, &fp->ctf_str[CTF_STRTAB_0].cts_strs[i],
+ 0, 0);
+
+ if (!atom)
+ goto oom_str_add;
+
+ atom->csa_offset = i;
+ }
+ }
+
return 0;
oom_str_add:
@@ -182,6 +257,11 @@ ctf_str_free_atoms (ctf_dict_t *fp)
ctf_dynhash_destroy (fp->ctf_prov_strtab);
ctf_dynhash_destroy (fp->ctf_str_atoms);
ctf_dynhash_destroy (fp->ctf_str_movable_refs);
+ if (fp->ctf_dynstrtab)
+ {
+ free (fp->ctf_dynstrtab->cts_strs);
+ free (fp->ctf_dynstrtab);
+ }
}
#define CTF_STR_ADD_REF 0x1
@@ -538,69 +618,6 @@ ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value)
*(ref->caf_ref) = value;
}
-/* State shared across the strtab write process. */
-typedef struct ctf_strtab_write_state
-{
- /* Strtab we are writing, and the number of strings in it. */
- ctf_strs_writable_t *strtab;
- size_t strtab_count;
-
- /* Pointers to (existing) atoms in the atoms table, for qsorting. */
- ctf_str_atom_t **sorttab;
-
- /* Loop counter for sorttab population. */
- size_t i;
-
- /* The null-string atom (skipped during population). */
- ctf_str_atom_t *nullstr;
-} ctf_strtab_write_state_t;
-
-/* Count the number of entries in the strtab, and its length. */
-static void
-ctf_str_count_strtab (void *key _libctf_unused_, void *value,
- void *arg)
-{
- ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
- ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
-
- /* We only factor in the length of items that have no offset and have refs:
- other items are in the external strtab, or will simply not be written out
- at all. They still contribute to the total count, though, because we still
- have to sort them. We add in the null string's length explicitly, outside
- this function, since it is explicitly written out even if it has no refs at
- all. */
-
- if (s->nullstr == atom)
- {
- s->strtab_count++;
- return;
- }
-
- if (!ctf_list_empty_p (&atom->csa_refs))
- {
- if (!atom->csa_external_offset)
- s->strtab->cts_len += strlen (atom->csa_str) + 1;
- s->strtab_count++;
- }
-}
-
-/* Populate the sorttab with pointers to the strtab atoms. */
-static void
-ctf_str_populate_sorttab (void *key _libctf_unused_, void *value,
- void *arg)
-{
- ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
- ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
-
- /* Skip the null string. */
- if (s->nullstr == atom)
- return;
-
- /* Skip atoms with no refs. */
- if (!ctf_list_empty_p (&atom->csa_refs))
- s->sorttab[s->i++] = atom;
-}
-
/* Sort the strtab. */
static int
ctf_str_sort_strtab (const void *a, const void *b)
@@ -612,79 +629,182 @@ ctf_str_sort_strtab (const void *a, const void *b)
}
/* Write out and return a strtab containing all strings with recorded refs,
- adjusting the refs to refer to the corresponding string. The returned strtab
- may be NULL on error. Also populate the synthetic strtab with mappings from
- external strtab offsets to names, so we can look them up with ctf_strptr().
- Only external strtab offsets with references are added. */
-ctf_strs_writable_t
+ adjusting the refs to refer to the corresponding string. The returned
+ strtab is already assigned to strtab 0 in this dict, is owned by this
+ dict, and may be NULL on error. Also populate the synthetic strtab with
+ mappings from external strtab offsets to names, so we can look them up
+ with ctf_strptr(). Only external strtab offsets with references are
+ added.
+
+ As a side effect, replaces the strtab of the current dict with the newly-
+ generated strtab. This is an exception to the general rule that
+ serialization does not change the dict passed in, because the alternative
+ is to copy the entire atoms table on every reserialization just to avoid
+ modifying the original, which is excessively costly for minimal gain.
+
+ We use the lazy man's approach and double memory costs by always storing
+ atoms as individually allocated entities whenever they come from anywhere
+ but a freshly-opened, mmapped dict, even though after serialization there
+ is another copy in the strtab; this ensures that ctf_strptr()-returned
+ pointers to them remain valid for the lifetime of the dict.
+
+ This is all rendered more complex because if a dict is ctf_open()ed it
+ will have a bunch of strings in its strtab already, and their strtab
+ offsets can never change (without piles of complexity to rescan the
+ entire dict just to get all the offsets to all of them into the atoms
+ table). Entries below the existing strtab limit are just copied into the
+ new dict: entries above it are new, and are are sorted first, then
+ appended to it. The sorting is purely a compression-efficiency
+ improvement, and we get nearly as good an improvement from sorting big
+ chunks like this as we would from sorting the whole thing. */
+
+const ctf_strs_writable_t *
ctf_str_write_strtab (ctf_dict_t *fp)
{
- ctf_strs_writable_t strtab;
- ctf_str_atom_t *nullstr;
+ ctf_strs_writable_t *strtab;
+ size_t strtab_count = 0;
uint32_t cur_stroff = 0;
- ctf_strtab_write_state_t s;
ctf_str_atom_t **sorttab;
+ ctf_next_t *it = NULL;
size_t i;
+ void *v;
+ int err;
+ int new_strtab = 0;
int any_external = 0;
- memset (&strtab, 0, sizeof (struct ctf_strs_writable));
- memset (&s, 0, sizeof (struct ctf_strtab_write_state));
- s.strtab = &strtab;
+ strtab = calloc (1, sizeof (ctf_strs_writable_t));
+ if (!strtab)
+ return NULL;
+
+ /* The strtab contains the existing string table at its start: figure out
+ how many new strings we need to add. We only need to add new strings
+ that have no external offset, that have refs, and that are found in the
+ provisional strtab. If the existing strtab is empty we also need to
+ add the null string at its start. */
+
+ strtab->cts_len = fp->ctf_str[CTF_STRTAB_0].cts_len;
- nullstr = ctf_dynhash_lookup (fp->ctf_str_atoms, "");
- if (!nullstr)
+ if (strtab->cts_len == 0)
{
- ctf_err_warn (fp, 0, ECTF_INTERNAL, _("null string not found in strtab"));
- strtab.cts_strs = NULL;
- return strtab;
+ new_strtab = 1;
+ strtab->cts_len++; /* For the \0. */
}
- s.nullstr = nullstr;
- ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_count_strtab, &s);
- strtab.cts_len++; /* For the null string. */
+ /* Count new entries in the strtab: i.e. entries in the provisional
+ strtab. Ignore any entry for \0, entries which ended up in the
+ external strtab, and unreferenced entries. */
- ctf_dprintf ("%lu bytes of strings in strtab.\n",
- (unsigned long) strtab.cts_len);
+ while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0)
+ {
+ const char *str = (const char *) v;
+ ctf_str_atom_t *atom;
+
+ atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
+ if (!ctf_assert (fp, atom))
+ goto err_strtab;
+
+ if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) ||
+ atom->csa_external_offset)
+ continue;
+
+ strtab->cts_len += strlen (atom->csa_str) + 1;
+ strtab_count++;
+ }
+ if (err != ECTF_NEXT_END)
+ {
+ ctf_dprintf ("ctf_str_write_strtab: error counting strtab entries: %s\n",
+ ctf_errmsg (err));
+ goto err_strtab;
+ }
- /* Sort the strtab. Force the null string to be first. */
- sorttab = calloc (s.strtab_count, sizeof (ctf_str_atom_t *));
+ ctf_dprintf ("%lu bytes of strings in strtab: %lu pre-existing.\n",
+ (unsigned long) strtab->cts_len,
+ (unsigned long) fp->ctf_str[CTF_STRTAB_0].cts_len);
+
+ /* Sort the new part of the strtab. */
+
+ sorttab = calloc (strtab_count, sizeof (ctf_str_atom_t *));
if (!sorttab)
- goto oom;
+ {
+ ctf_set_errno (fp, ENOMEM);
+ goto err_strtab;
+ }
- sorttab[0] = nullstr;
- s.i = 1;
- s.sorttab = sorttab;
- ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_populate_sorttab, &s);
+ i = 0;
+ while ((err = ctf_dynhash_next (fp->ctf_prov_strtab, &it, NULL, &v)) == 0)
+ {
+ ctf_str_atom_t *atom;
- qsort (&sorttab[1], s.strtab_count - 1, sizeof (ctf_str_atom_t *),
+ atom = ctf_dynhash_lookup (fp->ctf_str_atoms, v);
+ if (!ctf_assert (fp, atom))
+ goto err_sorttab;
+
+ if (atom->csa_str[0] == 0 || ctf_list_empty_p (&atom->csa_refs) ||
+ atom->csa_external_offset)
+ continue;
+
+ sorttab[i++] = atom;
+ }
+
+ qsort (sorttab, strtab_count, sizeof (ctf_str_atom_t *),
ctf_str_sort_strtab);
- if ((strtab.cts_strs = malloc (strtab.cts_len)) == NULL)
- goto oom_sorttab;
+ if ((strtab->cts_strs = malloc (strtab->cts_len)) == NULL)
+ goto err_sorttab;
+
+ cur_stroff = fp->ctf_str[CTF_STRTAB_0].cts_len;
- /* Update all refs: also update the strtab appropriately. */
- for (i = 0; i < s.strtab_count; i++)
+ if (new_strtab)
{
- if (sorttab[i]->csa_external_offset)
- {
- /* External strtab entry. */
+ strtab->cts_strs[0] = 0;
+ cur_stroff++;
+ }
+ else
+ memcpy (strtab->cts_strs, fp->ctf_str[CTF_STRTAB_0].cts_strs,
+ fp->ctf_str[CTF_STRTAB_0].cts_len);
+
+ /* Work over the sorttab, add its strings to the strtab, and remember
+ where they are in the csa_offset for the appropriate atom. No ref
+ updating is done at this point, because refs might well relate to
+ already-existing strings, or external strings, which do not need adding
+ to the strtab and may not be in the sorttab. */
+
+ for (i = 0; i < strtab_count; i++)
+ {
+ sorttab[i]->csa_offset = cur_stroff;
+ strcpy (&strtab->cts_strs[cur_stroff], sorttab[i]->csa_str);
+ cur_stroff += strlen (sorttab[i]->csa_str) + 1;
+ }
+ free (sorttab);
+ sorttab = NULL;
+ /* Update all refs, then purge them as no longer necessary: also update
+ the strtab appropriately. */
+
+ while ((err = ctf_dynhash_next (fp->ctf_str_atoms, &it, NULL, &v)) == 0)
+ {
+ ctf_str_atom_t *atom = (ctf_str_atom_t *) v;
+ uint32_t offset;
+
+ if (ctf_list_empty_p (&atom->csa_refs))
+ continue;
+
+ if (atom->csa_external_offset)
+ {
any_external = 1;
- ctf_str_update_refs (sorttab[i], sorttab[i]->csa_external_offset);
- sorttab[i]->csa_offset = sorttab[i]->csa_external_offset;
+ offset = atom->csa_external_offset;
}
else
- {
- /* Internal strtab entry with refs: actually add to the string
- table. */
-
- ctf_str_update_refs (sorttab[i], cur_stroff);
- sorttab[i]->csa_offset = cur_stroff;
- strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
- cur_stroff += strlen (sorttab[i]->csa_str) + 1;
- }
+ offset = atom->csa_offset;
+ ctf_str_update_refs (atom, offset);
}
- free (sorttab);
+ if (err != ECTF_NEXT_END)
+ {
+ ctf_dprintf ("ctf_str_write_strtab: error iterating over atoms while updating refs: %s\n",
+ ctf_errmsg (err));
+ goto err_strtab;
+ }
+ ctf_str_purge_refs (fp);
if (!any_external)
{
@@ -692,16 +812,29 @@ ctf_str_write_strtab (ctf_dict_t *fp)
fp->ctf_syn_ext_strtab = NULL;
}
+ /* Replace the old strtab with the new one in this dict. */
+
+ if (fp->ctf_dynstrtab)
+ {
+ free (fp->ctf_dynstrtab->cts_strs);
+ free (fp->ctf_dynstrtab);
+ }
+
+ fp->ctf_dynstrtab = strtab;
+ fp->ctf_str[CTF_STRTAB_0].cts_strs = strtab->cts_strs;
+ fp->ctf_str[CTF_STRTAB_0].cts_len = strtab->cts_len;
+
/* All the provisional strtab entries are now real strtab entries, and
ctf_strptr() will find them there. The provisional offset now starts right
beyond the new end of the strtab. */
ctf_dynhash_empty (fp->ctf_prov_strtab);
- fp->ctf_str_prov_offset = strtab.cts_len + 1;
+ fp->ctf_str_prov_offset = strtab->cts_len + 1;
return strtab;
- oom_sorttab:
+ err_sorttab:
free (sorttab);
- oom:
- return strtab;
+ err_strtab:
+ free (strtab);
+ return NULL;
}