From 53651de80f811495262a5b17b774a486dd37b326 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Mon, 23 Nov 2020 21:17:44 +0000 Subject: libctf, include: support foreign-endianness symtabs with CTF The CTF symbol lookup machinery added recently has one deficit: it assumes the symtab is in the machine's native endianness. This is always true when the linker is writing out symtabs (because cross linkers byteswap symbols only after libctf has been called on them), but may be untrue in the cross case when the linker or another tool (objdump, etc) is reading them. Unfortunately the easy way to model this to the caller, as an endianness field in the ctf_sect_t, is precluded because doing so would change the size of the ctf_sect_t, which would be an ABI break. So, instead, allow the endianness of the symtab to be set after open time, by calling one of the two new API functions ctf_symsect_endianness (for ctf_dict_t's) or ctf_arc_symsect_endianness (for entire ctf_archive_t's). libctf calls these functions automatically for objects opened via any of the BFD-aware mechanisms (ctf_bfdopen, ctf_bfdopen_ctfsect, ctf_fdopen, ctf_open, or ctf_arc_open), but the various mechanisms that just take raw ctf_sect_t's will assume the symtab is in native endianness and need a later call to ctf_*symsect_endianness to adjust it if needed. (This call is basically free if the endianness is actually native: it only costs anything if the symtab endianness was previously guessed wrong, and there is a symtab, and we are using it directly rather than using symtab indexing.) Obviously, calling ctf_lookup_by_symbol or ctf_symbol_next before the symtab endianness is correctly set will probably give wrong answers -- but you can set it at any time as long as it is before then. include/ChangeLog 2020-11-23 Nick Alcock * ctf-api.h: Style nit: remove () on function names in comments. (ctf_sect_t): Mention endianness concerns. (ctf_symsect_endianness): New declaration. (ctf_arc_symsect_endianness): Likewise. libctf/ChangeLog 2020-11-23 Nick Alcock * ctf-impl.h (ctf_dict_t) : New. (struct ctf_archive_internal) : Likewise. * ctf-create.c (ctf_serialize): Adjust for new field. * ctf-open.c (init_symtab): Note the semantics of repeated calls. (ctf_symsect_endianness): New. (ctf_bufopen_internal): Set ctf_symtab_little_endian suitably for the native endianness. (_Static_assert): Moved... (swap_thing): ... with this... * swap.h: ... to here. * ctf-util.c (ctf_elf32_to_link_sym): Use it, byteswapping the Elf32_Sym if the ctf_symtab_little_endian demands it. (ctf_elf64_to_link_sym): Likewise swap the Elf64_Sym if needed. * ctf-archive.c (ctf_arc_symsect_endianness): New, set the endianness of the symtab used by the dicts in an archive. (ctf_archive_iter_internal): Initialize to unknown (assumed native, do not call ctf_symsect_endianness). (ctf_dict_open_by_offset): Call ctf_symsect_endianness if need be. (ctf_dict_open_internal): Propagate the endianness down. (ctf_dict_open_sections): Likewise. * ctf-open-bfd.c (ctf_bfdopen_ctfsect): Get the endianness from the struct bfd and pass it down to the archive. * libctf.ver: Add ctf_symsect_endianness and ctf_arc_symsect_endianness. --- libctf/ChangeLog | 27 ++++++++++++++++++++++ libctf/ctf-archive.c | 35 +++++++++++++++++++++++------ libctf/ctf-create.c | 1 + libctf/ctf-impl.h | 2 ++ libctf/ctf-open-bfd.c | 7 ++++++ libctf/ctf-open.c | 56 +++++++++++++++++++++++++--------------------- libctf/ctf-util.c | 62 ++++++++++++++++++++++++++++++++++++++++++--------- libctf/libctf.ver | 2 ++ libctf/swap.h | 24 ++++++++++++++++++++ 9 files changed, 173 insertions(+), 43 deletions(-) (limited to 'libctf') diff --git a/libctf/ChangeLog b/libctf/ChangeLog index 7fdb355..adeedeb 100644 --- a/libctf/ChangeLog +++ b/libctf/ChangeLog @@ -1,3 +1,30 @@ +2020-11-23 Nick Alcock + + * ctf-impl.h (ctf_dict_t) : New. + (struct ctf_archive_internal) : Likewise. + * ctf-create.c (ctf_serialize): Adjust for new field. + * ctf-open.c (init_symtab): Note the semantics of repeated calls. + (ctf_symsect_endianness): New. + (ctf_bufopen_internal): Set ctf_symtab_little_endian suitably for + the native endianness. + (_Static_assert): Moved... + (swap_thing): ... with this... + * swap.h: ... to here. + * ctf-util.c (ctf_elf32_to_link_sym): Use it, byteswapping the + Elf32_Sym if the ctf_symtab_little_endian demands it. + (ctf_elf64_to_link_sym): Likewise swap the Elf64_Sym if needed. + * ctf-archive.c (ctf_arc_symsect_endianness): New, set the + endianness of the symtab used by the dicts in an archive. + (ctf_archive_iter_internal): Initialize to unknown (assumed native, + do not call ctf_symsect_endianness). + (ctf_dict_open_by_offset): Call ctf_symsect_endianness if need be. + (ctf_dict_open_internal): Propagate the endianness down. + (ctf_dict_open_sections): Likewise. + * ctf-open-bfd.c (ctf_bfdopen_ctfsect): Get the endianness from the + struct bfd and pass it down to the archive. + * libctf.ver: Add ctf_symsect_endianness and + ctf_arc_symsect_endianness. + 2020-11-20 Nick Alcock * ctf-link.c (ctf_link_deduplicating): Clean up the ctf_link_outputs diff --git a/libctf/ctf-archive.c b/libctf/ctf-archive.c index dc312d3..a74ab47 100644 --- a/libctf/ctf-archive.c +++ b/libctf/ctf-archive.c @@ -36,7 +36,8 @@ static off_t arc_write_one_ctf (ctf_dict_t * f, int fd, size_t threshold); static ctf_dict_t *ctf_dict_open_by_offset (const struct ctf_archive *arc, const ctf_sect_t *symsect, const ctf_sect_t *strsect, - size_t offset, int *errp); + size_t offset, int little_endian, + int *errp); static int sort_modent_by_name (const void *one, const void *two, void *n); static void *arc_mmap_header (int fd, size_t headersz); static void *arc_mmap_file (int fd, size_t size); @@ -378,10 +379,21 @@ ctf_new_archive_internal (int is_archive, int unmap_on_close, arci->ctfi_free_symsect = 0; arci->ctfi_free_strsect = 0; arci->ctfi_unmap_on_close = unmap_on_close; + arci->ctfi_symsect_little_endian = -1; return arci; } +/* Set the symbol-table endianness of an archive (defaulting the symtab + endianness of all ctf_file_t's opened from that archive). */ +void +ctf_arc_symsect_endianness (ctf_archive_t *arc, int little_endian) +{ + arc->ctfi_symsect_little_endian = !!little_endian; + if (!arc->ctfi_is_archive) + ctf_symsect_endianness (arc->ctfi_dict, arc->ctfi_symsect_little_endian); +} + /* Get the CTF preamble from data in a buffer, which may be either an archive or a CTF dict. If multiple dicts are present in an archive, the preamble comes from an arbitrary dict. The preamble is a pointer into the ctfsect passed @@ -536,7 +548,8 @@ static ctf_dict_t * ctf_dict_open_internal (const struct ctf_archive *arc, const ctf_sect_t *symsect, const ctf_sect_t *strsect, - const char *name, int *errp) + const char *name, int little_endian, + int *errp) { struct ctf_archive_modent *modent; const char *search_nametbl; @@ -564,7 +577,8 @@ ctf_dict_open_internal (const struct ctf_archive *arc, } return ctf_dict_open_by_offset (arc, symsect, strsect, - le64toh (modent->ctf_offset), errp); + le64toh (modent->ctf_offset), + little_endian, errp); } /* Return the ctf_dict_t with the given name, or NULL if none, setting 'err' if @@ -584,7 +598,8 @@ ctf_dict_open_sections (const ctf_archive_t *arc, { ctf_dict_t *ret; ret = ctf_dict_open_internal (arc->ctfi_archive, symsect, strsect, - name, errp); + name, arc->ctfi_symsect_little_endian, + errp); if (ret) { ret->ctf_archive = (ctf_archive_t *) arc; @@ -691,7 +706,7 @@ static ctf_dict_t * ctf_dict_open_by_offset (const struct ctf_archive *arc, const ctf_sect_t *symsect, const ctf_sect_t *strsect, size_t offset, - int *errp) + int little_endian, int *errp) { ctf_sect_t ctfsect; ctf_dict_t *fp; @@ -708,7 +723,11 @@ ctf_dict_open_by_offset (const struct ctf_archive *arc, ctfsect.cts_data = (void *) ((char *) arc + offset + sizeof (uint64_t)); fp = ctf_bufopen (&ctfsect, symsect, strsect, errp); if (fp) - ctf_setmodel (fp, le64toh (arc->ctfa_model)); + { + ctf_setmodel (fp, le64toh (arc->ctfa_model)); + if (little_endian >= 0) + ctf_symsect_endianness (fp, little_endian); + } return fp; } @@ -961,7 +980,9 @@ ctf_archive_iter_internal (const ctf_archive_t *wrapper, name = &nametbl[le64toh (modent[i].name_offset)]; if ((f = ctf_dict_open_internal (arc, symsect, strsect, - name, &rc)) == NULL) + name, + wrapper->ctfi_symsect_little_endian, + &rc)) == NULL) return rc; f->ctf_archive = (ctf_archive_t *) wrapper; diff --git a/libctf/ctf-create.c b/libctf/ctf-create.c index c3223a7..e03a046 100644 --- a/libctf/ctf-create.c +++ b/libctf/ctf-create.c @@ -1162,6 +1162,7 @@ ctf_serialize (ctf_dict_t *fp) nfp->ctf_link_memb_name_changer_arg = fp->ctf_link_memb_name_changer_arg; nfp->ctf_link_variable_filter = fp->ctf_link_variable_filter; nfp->ctf_link_variable_filter_arg = fp->ctf_link_variable_filter_arg; + nfp->ctf_symsect_little_endian = fp->ctf_symsect_little_endian; nfp->ctf_link_flags = fp->ctf_link_flags; nfp->ctf_dedup_atoms = fp->ctf_dedup_atoms; nfp->ctf_dedup_atoms_alloc = fp->ctf_dedup_atoms_alloc; diff --git a/libctf/ctf-impl.h b/libctf/ctf-impl.h index a9f7245..84460ea 100644 --- a/libctf/ctf-impl.h +++ b/libctf/ctf-impl.h @@ -381,6 +381,7 @@ struct ctf_dict ctf_sect_t ctf_data; /* CTF data from object file. */ ctf_sect_t ctf_symtab; /* Symbol table from object file. */ ctf_sect_t ctf_strtab; /* String table from object file. */ + int ctf_symsect_little_endian; /* Endianness of the ctf_symtab. */ ctf_dynhash_t *ctf_prov_strtab; /* Maps provisional-strtab offsets to names. */ ctf_dynhash_t *ctf_syn_ext_strtab; /* Maps ext-strtab offsets to names. */ @@ -506,6 +507,7 @@ struct ctf_archive_internal ctf_dict_t **ctfi_symdicts; /* Array of index -> ctf_dict_t *. */ ctf_id_t *ctfi_syms; /* Array of index -> ctf_id_t. */ ctf_sect_t ctfi_symsect; + int ctfi_symsect_little_endian; /* -1 for unknown / do not set. */ ctf_sect_t ctfi_strsect; int ctfi_free_symsect; int ctfi_free_strsect; diff --git a/libctf/ctf-open-bfd.c b/libctf/ctf-open-bfd.c index a6f0d3f..1a00cb6 100644 --- a/libctf/ctf-open-bfd.c +++ b/libctf/ctf-open-bfd.c @@ -97,6 +97,7 @@ ctf_bfdopen_ctfsect (struct bfd *abfd _libctf_unused_, ctf_sect_t *strsectp = NULL; const char *bfderrstr = NULL; char *strtab_alloc = NULL; + int symsect_endianness = -1; #ifdef HAVE_BFD_ELF ctf_sect_t symsect, strsect; @@ -206,6 +207,8 @@ ctf_bfdopen_ctfsect (struct bfd *abfd _libctf_unused_, symsect.cts_data = symtab; symsectp = &symsect; } + + symsect_endianness = bfd_little_endian (abfd); #endif arci = ctf_arc_bufopen (ctfsect, symsectp, strsectp, errp); @@ -215,6 +218,10 @@ ctf_bfdopen_ctfsect (struct bfd *abfd _libctf_unused_, arci->ctfi_free_symsect = 1; if (strtab_alloc) arci->ctfi_free_strsect = 1; + + /* Get the endianness right. */ + if (symsect_endianness > -1) + ctf_arc_symsect_endianness (arci, symsect_endianness); return arci; } #ifdef HAVE_BFD_ELF diff --git a/libctf/ctf-open.c b/libctf/ctf-open.c index ecf27ee..7816fd0 100644 --- a/libctf/ctf-open.c +++ b/libctf/ctf-open.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "swap.h" #include #include @@ -227,7 +226,10 @@ static const ctf_dictops_t ctf_dictops[] = { symtypetabs come from the compiler, and all the linker does is iteration over all entries, which doesn't need this initialization.) - The SP symbol table section may be NULL if there is no symtab. */ + The SP symbol table section may be NULL if there is no symtab. + + If init_symtab works on one call, it cannot fail on future calls to the same + fp: ctf_symsect_endianness relies on this. */ static int init_symtab (ctf_dict_t *fp, const ctf_header_t *hp, const ctf_sect_t *sp) @@ -290,6 +292,10 @@ init_symtab (ctf_dict_t *fp, const ctf_header_t *hp, const ctf_sect_t *sp) return ECTF_SYMTAB; } + /* This call may be led astray if our idea of the symtab's endianness is + wrong, but when this is fixed by a call to ctf_symsect_endianness, + init_symtab will be called again with the right endianness in + force. */ if (ctf_symtab_skippable (&sym)) { *xp = -1u; @@ -974,28 +980,6 @@ init_types (ctf_dict_t *fp, ctf_header_t *cth) We flip everything, mindlessly, even 1-byte entities, so that future expansions do not require changes to this code. */ -/* < C11? define away static assertions. */ - -#if !defined (__STDC_VERSION__) || __STDC_VERSION__ < 201112L -#define _Static_assert(cond, err) -#endif - -/* Swap the endianness of something. */ - -#define swap_thing(x) \ - do { \ - _Static_assert (sizeof (x) == 1 || (sizeof (x) % 2 == 0 \ - && sizeof (x) <= 8), \ - "Invalid size, update endianness code"); \ - switch (sizeof (x)) { \ - case 2: x = bswap_16 (x); break; \ - case 4: x = bswap_32 (x); break; \ - case 8: x = bswap_64 (x); break; \ - case 1: /* Nothing needs doing */ \ - break; \ - } \ - } while (0); - /* Flip the endianness of the CTF header. */ static void @@ -1652,7 +1636,13 @@ ctf_bufopen_internal (const ctf_sect_t *ctfsect, const ctf_sect_t *symsect, large for the actual size of the object and function info sections: if so, ctf_nsyms will be adjusted and the excess will never be used. It's possible to do indexed symbol lookups even without a symbol table, so check - even in that case. */ + even in that case. Initially, we assume the symtab is native-endian: if it + isn't, the caller will inform us later by calling ctf_symsect_endianness. */ +#ifdef WORDS_BIGENDIAN + fp->ctf_symsect_little_endian = 0; +#else + fp->ctf_symsect_little_endian = 1; +#endif if (symsect != NULL) { @@ -1866,6 +1856,22 @@ ctf_getstrsect (const ctf_dict_t *fp) return fp->ctf_strtab; } +/* Set the endianness of the symbol table attached to FP. */ +void +ctf_symsect_endianness (ctf_dict_t *fp, int little_endian) +{ + int old_endianness = fp->ctf_symsect_little_endian; + + fp->ctf_symsect_little_endian = !!little_endian; + + /* If we already have a symtab translation table, we need to repopulate it if + our idea of the endianness has changed. */ + + if (old_endianness != fp->ctf_symsect_little_endian + && fp->ctf_sxlate != NULL && fp->ctf_symtab.cts_data != NULL) + assert (init_symtab (fp, fp->ctf_header, &fp->ctf_symtab) == 0); +} + /* Return the CTF handle for the parent CTF dict, if one exists. Otherwise return NULL to indicate this dict has no imported parent. */ ctf_dict_t * diff --git a/libctf/ctf-util.c b/libctf/ctf-util.c index 0a15b86..ab34bc0 100644 --- a/libctf/ctf-util.c +++ b/libctf/ctf-util.c @@ -19,6 +19,7 @@ #include #include +#include "ctf-endian.h" /* Simple doubly-linked list append routine. This implementation assumes that each list element contains an embedded ctf_list_t as the first member. @@ -114,16 +115,35 @@ ctf_link_sym_t * ctf_elf32_to_link_sym (ctf_dict_t *fp, ctf_link_sym_t *dst, const Elf32_Sym *src, uint32_t symidx) { + Elf32_Sym tmp; + int needs_flipping = 0; + +#ifdef WORDS_BIGENDIAN + if (fp->ctf_symsect_little_endian) + needs_flipping = 1; +#else + if (!fp->ctf_symsect_little_endian) + needs_flipping = 1; +#endif + + memcpy (&tmp, src, sizeof (Elf32_Sym)); + if (needs_flipping) + { + swap_thing (tmp.st_name); + swap_thing (tmp.st_size); + swap_thing (tmp.st_shndx); + swap_thing (tmp.st_value); + } /* The name must be in the external string table. */ - if (src->st_name < fp->ctf_str[CTF_STRTAB_1].cts_len) - dst->st_name = (const char *) fp->ctf_str[CTF_STRTAB_1].cts_strs + src->st_name; + if (tmp.st_name < fp->ctf_str[CTF_STRTAB_1].cts_len) + dst->st_name = (const char *) fp->ctf_str[CTF_STRTAB_1].cts_strs + tmp.st_name; else dst->st_name = _CTF_NULLSTR; dst->st_nameidx_set = 0; dst->st_symidx = symidx; - dst->st_shndx = src->st_shndx; - dst->st_type = ELF32_ST_TYPE (src->st_info); - dst->st_value = src->st_value; + dst->st_shndx = tmp.st_shndx; + dst->st_type = ELF32_ST_TYPE (tmp.st_info); + dst->st_value = tmp.st_value; return dst; } @@ -134,22 +154,42 @@ ctf_link_sym_t * ctf_elf64_to_link_sym (ctf_dict_t *fp, ctf_link_sym_t *dst, const Elf64_Sym *src, uint32_t symidx) { + Elf64_Sym tmp; + int needs_flipping = 0; + +#ifdef WORDS_BIGENDIAN + if (fp->ctf_symsect_little_endian) + needs_flipping = 1; +#else + if (!fp->ctf_symsect_little_endian) + needs_flipping = 1; +#endif + + memcpy (&tmp, src, sizeof (Elf64_Sym)); + if (needs_flipping) + { + swap_thing (tmp.st_name); + swap_thing (tmp.st_size); + swap_thing (tmp.st_shndx); + swap_thing (tmp.st_value); + } + /* The name must be in the external string table. */ - if (src->st_name < fp->ctf_str[CTF_STRTAB_1].cts_len) - dst->st_name = (const char *) fp->ctf_str[CTF_STRTAB_1].cts_strs + src->st_name; + if (tmp.st_name < fp->ctf_str[CTF_STRTAB_1].cts_len) + dst->st_name = (const char *) fp->ctf_str[CTF_STRTAB_1].cts_strs + tmp.st_name; else dst->st_name = _CTF_NULLSTR; dst->st_nameidx_set = 0; dst->st_symidx = symidx; - dst->st_shndx = src->st_shndx; - dst->st_type = ELF32_ST_TYPE (src->st_info); + dst->st_shndx = tmp.st_shndx; + dst->st_type = ELF32_ST_TYPE (tmp.st_info); /* We only care if the value is zero, so avoid nonzeroes turning into zeroes. */ - if (_libctf_unlikely_ (src->st_value != 0 && ((uint32_t) src->st_value == 0))) + if (_libctf_unlikely_ (tmp.st_value != 0 && ((uint32_t) tmp.st_value == 0))) dst->st_value = 1; else - dst->st_value = (uint32_t) src->st_value; + dst->st_value = (uint32_t) tmp.st_value; return dst; } diff --git a/libctf/libctf.ver b/libctf/libctf.ver index cfecc89..3286e60 100644 --- a/libctf/libctf.ver +++ b/libctf/libctf.ver @@ -193,4 +193,6 @@ LIBCTF_1.1 { ctf_getsymsect; ctf_getstrsect; + ctf_symsect_endianness; + ctf_arc_symsect_endianness; } LIBCTF_1.0; diff --git a/libctf/swap.h b/libctf/swap.h index c8962a2..413f0ce 100644 --- a/libctf/swap.h +++ b/libctf/swap.h @@ -22,6 +22,7 @@ #include "config.h" #include +#include #ifdef HAVE_BYTESWAP_H #include @@ -63,4 +64,27 @@ bswap_64 (uint64_t v) } #endif /* !HAVE_DECL_BSWAP64 */ +/* < C11? define away static assertions. */ + +#if !defined (__STDC_VERSION__) || __STDC_VERSION__ < 201112L +#define _Static_assert(cond, err) +#endif + +/* Swap the endianness of something. */ + +#define swap_thing(x) \ + do { \ + _Static_assert (sizeof (x) == 1 || (sizeof (x) % 2 == 0 \ + && sizeof (x) <= 8), \ + "Invalid size, update endianness code"); \ + switch (sizeof (x)) { \ + case 2: x = bswap_16 (x); break; \ + case 4: x = bswap_32 (x); break; \ + case 8: x = bswap_64 (x); break; \ + case 1: /* Nothing needs doing */ \ + break; \ + } \ + } while (0); + + #endif /* !defined(_CTF_SWAP_H) */ -- cgit v1.1