diff options
author | Nick Alcock <nick.alcock@oracle.com> | 2020-06-05 18:35:46 +0100 |
---|---|---|
committer | Nick Alcock <nick.alcock@oracle.com> | 2020-07-22 18:02:19 +0100 |
commit | 0f0c11f7fc9f0ab6bd63fc5f8a4cee7367a81849 (patch) | |
tree | eff35c0824fa60651cd492f8fa63316da9a205da /libctf/ctf-impl.h | |
parent | a9b9870206658564272fe17d2079ed9eb6ffb15f (diff) | |
download | gdb-0f0c11f7fc9f0ab6bd63fc5f8a4cee7367a81849.zip gdb-0f0c11f7fc9f0ab6bd63fc5f8a4cee7367a81849.tar.gz gdb-0f0c11f7fc9f0ab6bd63fc5f8a4cee7367a81849.tar.bz2 |
libctf, dedup: add deduplicator
This adds the core deduplicator that the ctf_link machinery calls
(possibly repeatedly) to link the CTF sections: it takes an array
of input ctf_file_t's and another array that indicates which entries in
the input array are parents of which other entries, and returns an array
of outputs. The first output is always the ctf_file_t on which
ctf_link/ctf_dedup/etc was called: the other outputs are child dicts
that have the first output as their parent.
include/
* ctf-api.h (CTF_LINK_SHARE_DUPLICATED): No longer unimplemented.
libctf/
* ctf-impl.h (ctf_type_id_key): New, the key in the
cd_id_to_file_t.
(ctf_dedup): New, core deduplicator state.
(ctf_file_t) <ctf_dedup>: New.
<ctf_dedup_atoms>: New.
<ctf_dedup_atoms_alloc>: New.
(ctf_hash_type_id_key): New prototype.
(ctf_hash_eq_type_id_key): Likewise.
(ctf_dedup_atoms_init): Likewise.
* ctf-hash.c (ctf_hash_eq_type_id_key): New.
(ctf_dedup_atoms_init): Likewise.
* ctf-create.c (ctf_serialize): Adjusted.
(ctf_add_encoded): No longer static.
(ctf_add_reftype): Likewise.
* ctf-open.c (ctf_file_close): Destroy the
ctf_dedup_atoms_alloc.
* ctf-dedup.c: New file.
* ctf-decls.h [!HAVE_DECL_STPCPY]: Add prototype.
* configure.ac: Check for stpcpy.
* Makefile.am: Add it.
* Makefile.in: Regenerate.
* config.h.in: Regenerate.
* configure: Regenerate.
Diffstat (limited to 'libctf/ctf-impl.h')
-rw-r--r-- | libctf/ctf-impl.h | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h index c2fcc92..a8a16f7 100644 --- a/libctf/ctf-impl.h +++ b/libctf/ctf-impl.h @@ -247,6 +247,106 @@ typedef struct ctf_link_type_key ctf_id_t cltk_idx; } ctf_link_type_key_t; +/* The structure used as the key in a cd_id_to_file_t on 32-bit platforms. */ +typedef struct ctf_type_id_key +{ + int ctii_input_num; + ctf_id_t ctii_type; +} ctf_type_id_key_t; + +/* Deduplicator state. + + The dedup state below uses three terms consistently. A "hash" is a + ctf_dynhash_t; a "hash value" is the hash value of a type as returned by + ctf_dedup_hash_type; a "global type ID" or "global ID" is a packed-together + reference to a single ctf_file_t (by array index in an array of inputs) and + ctf_id_t, i.e. a single instance of some hash value in some input. + + The deduplication algorithm takes a bunch of inputs and yields a single + shared "output" and possibly many outputs corresponding to individual inputs + that still contain types after sharing of unconflicted types. Almost all + deduplicator state is stored in the struct ctf_dedup in the output, though a + (very) few things are stored in inputs for simplicity's sake, usually if they + are linking together things within the scope of a single TU. + + Flushed at the end of every ctf_dedup run. */ + +typedef struct ctf_dedup +{ + /* The CTF linker flags in force for this dedup run. */ + int cd_link_flags; + + /* On 32-bit platforms only, a hash of global type IDs, in the form of + a ctf_link_type_id_key_t. */ + ctf_dynhash_t *cd_id_to_file_t; + + /* Atoms tables of decorated names: maps undecorated name to decorated name. + (The actual allocations are in the CTF file for the former and the real + atoms table for the latter). Uses the same namespaces as ctf_lookups, + below, but has no need for null-termination. */ + ctf_dynhash_t *cd_decorated_names[4]; + + /* Map type names to a hash from type hash value -> number of times each value + has appeared. */ + ctf_dynhash_t *cd_name_counts; + + /* Map global type IDs to type hash values. Used to determine if types are + already hashed without having to recompute their hash values again, and to + link types together at later stages. Forwards that are peeked through to + structs and unions are not represented in here, so lookups that might be + such a type (in practice, all lookups) must go via cd_replaced_types first + to take this into account. Discarded before each rehashing. */ + ctf_dynhash_t *cd_type_hashes; + + /* Maps from the names of structs/unions/enums to a a single GID which is the + only appearance of that type in any input: if it appears in more than one + input, a value which is a GID with an input_num of -1 appears. Used in + share-duplicated link mode link modes to determine whether structs/unions + can be cited from multiple TUs. Only populated in that link mode. */ + ctf_dynhash_t *cd_struct_origin; + + /* Maps type hash values to a set of hash values of the types that cite them: + i.e., pointing backwards up the type graph. Used for recursive conflict + marking. Citations from tagged structures, unions, and forwards do not + appear in this graph. */ + ctf_dynhash_t *cd_citers; + + /* Maps type hash values to input global type IDs. The value is a set (a + hash) of global type IDs. Discarded before each rehashing. The result of + the ctf_dedup function. */ + ctf_dynhash_t *cd_output_mapping; + + /* A map giving the GID of the first appearance of each type for each type + hash value. */ + ctf_dynhash_t *cd_output_first_gid; + + /* Used to ensure that we never try to map a single type ID to more than one + hash. */ + ctf_dynhash_t *cd_output_mapping_guard; + + /* Maps the global type IDs of structures in input TUs whose members still + need emission to the global type ID of the already-emitted target type + (which has no members yet) in the appropriate target. Uniquely, the latter + ID represents a *target* ID (i.e. the cd_output_mapping of some specified + input): we encode the shared (parent) dict with an ID of -1. */ + ctf_dynhash_t *cd_emission_struct_members; + + /* A set (a hash) of hash values of conflicting types. */ + ctf_dynset_t *cd_conflicting_types; + + /* Maps type hashes to ctf_id_t's in this dictionary. Populated only at + emission time, in the dictionary where emission is taking place. */ + ctf_dynhash_t *cd_output_emission_hashes; + + /* Maps the decorated names of conflicted cross-TU forwards that were forcibly + emitted in this TU to their emitted ctf_id_ts. Populated only at emission + time, in the dictionary where emission is taking place. */ + ctf_dynhash_t *cd_output_emission_conflicted_forwards; + + /* Points to the output counterpart of this input dictionary, at emission + time. */ + ctf_file_t *cd_output; +} ctf_dedup_t; /* The ctf_file is the structure used to represent a CTF container to library clients, who see it only as an opaque pointer. Modifications can therefore @@ -346,6 +446,18 @@ struct ctf_file void *ctf_link_variable_filter_arg; /* Argument for it. */ ctf_dynhash_t *ctf_add_processing; /* Types ctf_add_type is working on now. */ + + /* Atoms table for dedup string storage. All strings in the ctf_dedup_t are + stored here. Only the _alloc copy is allocated or freed: the + ctf_dedup_atoms may be pointed to some other CTF dict, to share its atoms. + We keep the atoms table outside the ctf_dedup so that atoms can be + preserved across multiple similar links, such as when doing cu-mapped + links. */ + ctf_dynset_t *ctf_dedup_atoms; + ctf_dynset_t *ctf_dedup_atoms_alloc; + + ctf_dedup_t ctf_dedup; /* Deduplicator state. */ + char *ctf_tmp_typeslice; /* Storage for slicing up type names. */ size_t ctf_tmp_typeslicelen; /* Size of the typeslice. */ void *ctf_specific; /* Data for ctf_get/setspecific(). */ @@ -451,11 +563,13 @@ typedef unsigned int (*ctf_hash_fun) (const void *ptr); extern unsigned int ctf_hash_integer (const void *ptr); extern unsigned int ctf_hash_string (const void *ptr); extern unsigned int ctf_hash_type_key (const void *ptr); +extern unsigned int ctf_hash_type_id_key (const void *ptr); typedef int (*ctf_hash_eq_fun) (const void *, const void *); extern int ctf_hash_eq_integer (const void *, const void *); extern int ctf_hash_eq_string (const void *, const void *); extern int ctf_hash_eq_type_key (const void *, const void *); +extern int ctf_hash_eq_type_id_key (const void *, const void *); extern int ctf_dynset_eq_string (const void *, const void *); @@ -526,11 +640,24 @@ extern int ctf_dvd_insert (ctf_file_t *, ctf_dvdef_t *); extern void ctf_dvd_delete (ctf_file_t *, ctf_dvdef_t *); extern ctf_dvdef_t *ctf_dvd_lookup (const ctf_file_t *, const char *); +extern ctf_id_t ctf_add_encoded (ctf_file_t *, uint32_t, const char *, + const ctf_encoding_t *, uint32_t kind); +extern ctf_id_t ctf_add_reftype (ctf_file_t *, uint32_t, ctf_id_t, + uint32_t kind); + extern void ctf_add_type_mapping (ctf_file_t *src_fp, ctf_id_t src_type, ctf_file_t *dst_fp, ctf_id_t dst_type); extern ctf_id_t ctf_type_mapping (ctf_file_t *src_fp, ctf_id_t src_type, ctf_file_t **dst_fp); +extern int ctf_dedup_atoms_init (ctf_file_t *); +extern int ctf_dedup (ctf_file_t *, ctf_file_t **, uint32_t ninputs, + uint32_t *parents, int cu_mapped); +extern void ctf_dedup_fini (ctf_file_t *, ctf_file_t **, uint32_t); +extern ctf_file_t **ctf_dedup_emit (ctf_file_t *, ctf_file_t **, + uint32_t ninputs, uint32_t *parents, + uint32_t *noutputs, int cu_mapped); + extern void ctf_decl_init (ctf_decl_t *); extern void ctf_decl_fini (ctf_decl_t *); extern void ctf_decl_push (ctf_decl_t *, ctf_file_t *, ctf_id_t); |