diff options
author | Nick Clifton <nickc@redhat.com> | 2020-01-17 14:13:22 +0000 |
---|---|---|
committer | Nick Clifton <nickc@redhat.com> | 2020-01-17 14:13:22 +0000 |
commit | 533da48302a26885a972e4379eccc26b364e5b53 (patch) | |
tree | 9b2bedcac203ce68d4b81aa152d334305a3199aa /libiberty/rust-demangle.c | |
parent | bf8e4b6c8144a687d5edc24eda1bf0a3687ce71e (diff) | |
download | fsf-binutils-gdb-533da48302a26885a972e4379eccc26b364e5b53.zip fsf-binutils-gdb-533da48302a26885a972e4379eccc26b364e5b53.tar.gz fsf-binutils-gdb-533da48302a26885a972e4379eccc26b364e5b53.tar.bz2 |
Update libiberty sources with changes in the gcc mainline.
+2020-01-01 Jakub Jelinek <jakub@redhat.com>
+
+ Update copyright years.
+
+2019-12-06 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * make-relative-prefix.c (split_directories):
+ Return early on empty 'name'
+
+2019-11-16 Tim Ruehsen <tim.ruehsen@gmx.de>
+
+ * cp-demangle.c (d_print_init): Remove const from 4th param.
+ (cplus_demangle_fill_name): Initialize d->d_counting.
+ (cplus_demangle_fill_extended_operator): Likewise.
+ (cplus_demangle_fill_ctor): Likewise.
+ (cplus_demangle_fill_dtor): Likewise.
+ (d_make_empty): Likewise.
+ (d_count_templates_scopes): Remobe const from 3rd param,
+ Return on dc->d_counting > 1,
+ Increment dc->d_counting.
+ * cp-demint.c (cplus_demangle_fill_component): Initialize d->d_counting.
+ (cplus_demangle_fill_builtin_type): Likewise.
+ (cplus_demangle_fill_operator): Likewise.
+
+2019-11-16 Eduard-Mihai Burtescu <eddyb@lyken.rs>
+
+ * cplus-dem.c (cplus_demangle): Use rust_demangle directly.
+ (rust_demangle): Remove.
+ * rust-demangle.c (is_prefixed_hash): Rename to is_legacy_prefixed_hash.
+ (parse_lower_hex_nibble): Rename to decode_lower_hex_nibble.
+ (parse_legacy_escape): Rename to decode_legacy_escape.
+ (rust_is_mangled): Remove.
+ (struct rust_demangler): Add.
+ (peek): Add.
+ (next): Add.
+ (struct rust_mangled_ident): Add.
+ (parse_ident): Add.
+ (rust_demangle_sym): Remove.
+ (print_str): Add.
+ (PRINT): Add.
+ (print_ident): Add.
+ (rust_demangle_callback): Add.
+ (struct str_buf): Add.
+ (str_buf_reserve): Add.
+ (str_buf_append): Add.
+ (str_buf_demangle_callback): Add.
+ (rust_demangle): Add.
+ * rust-demangle.h: Remove.
+
+2019-11-15 Miguel Saldivar <saldivarcher@gmail.com>
+
+ * testsuite/demangle-expected: Fix test.
+
+2019-11-04 Kamlesh Kumar <kamleshbhalui@gmail.com>
+
+ * cp-demangle.c (d_expr_primary): Handle
+ nullptr demangling.
+ * testsuite/demangle-expected: Added test.
+
+2019-10-29 Paul Pluzhnikov <ppluzhnikov@google.com>
+
+ * cp-demangle.c (d_number): Avoid signed int overflow.
+
+2019-10-28 Miguel Saldivar <saldivarcher@gmail.com>
+
+ * cp-demangle.c (d_print_mod): Add a space before printing `complex`
+ and `imaginary`, as opposed to after.
+ * testsuite/demangle-expected: Adjust test.
+
+2019-10-03 Eduard-Mihai Burtescu <eddyb@lyken.rs>
+
+ * rust-demangle.c (looks_like_rust): Remove.
+ (rust_is_mangled): Don't check escapes.
+ (is_prefixed_hash): Allow 0-9a-f permutations.
+ (rust_demangle_sym): Don't bail on unknown escapes.
+ * testsuite/rust-demangle-expected: Update 'main::$99$' test.
+
+2019-09-03 Eduard-Mihai Burtescu <eddyb@lyken.rs>
+
+ * rust-demangle.c (unescape): Remove.
+ (parse_lower_hex_nibble): New function.
+ (parse_legacy_escape): New function.
+ (is_prefixed_hash): Use parse_lower_hex_nibble.
+ (looks_like_rust): Use parse_legacy_escape.
+ (rust_demangle_sym): Use parse_legacy_escape.
+ * testsuite/rust-demangle-expected: Add 'llv$u6d$' test.
+
+2019-08-27 Martin Liska <mliska@suse.cz>
+
+ PR lto/91478
+ * simple-object-elf.c (simple_object_elf_copy_lto_debug_sections):
+ First find a WEAK HIDDEN symbol in symbol table that will be
+ preserved. Later, use the symbol name for all removed symbols.
+
+2019-08-12 Martin Liska <mliska@suse.cz>
+
+ * Makefile.in: Add filedescriptor.c.
+ * filedescriptor.c: New file.
+ * lrealpath.c (is_valid_fd): Remove.
diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in
index 0be45b4ae8..fe738d0db4 100644
--- a/libiberty/Makefile.in
+++ b/libiberty/Makefile.in
@@ -1,7 +1,7 @@
# Makefile for the libiberty library.
# Originally written by K. Richard Pixley <rich@cygnus.com>.
#
-# Copyright (C) 1990-2019 Free Software Foundation, Inc.
+# Copyright (C) 1990-2020 Free Software Foundation, Inc.
#
# This file is part of the libiberty library.
# Libiberty is free software; you can redistribute it and/or
@@ -127,7 +127,7 @@ CFILES = alloca.c argv.c asprintf.c atexit.c \
calloc.c choose-temp.c clock.c concat.c cp-demangle.c \
cp-demint.c cplus-dem.c crc32.c \
d-demangle.c dwarfnames.c dyn-string.c \
- fdmatch.c ffs.c fibheap.c filename_cmp.c floatformat.c \
+ fdmatch.c ffs.c fibheap.c filedescriptor.c filename_cmp.c floatformat.c \
fnmatch.c fopen_unlocked.c \
getcwd.c getopt.c getopt1.c getpagesize.c getpwd.c getruntime.c \
gettimeofday.c \
@@ -171,6 +171,7 @@ REQUIRED_OFILES = \
./cp-demint.$(objext) ./crc32.$(objext) ./d-demangle.$(objext) \
./dwarfnames.$(objext) ./dyn-string.$(objext) \
./fdmatch.$(objext) ./fibheap.$(objext) \
+ ./filedescriptor.$(objext) \
./filename_cmp.$(objext) ./floatformat.$(objext) \
./fnmatch.$(objext) ./fopen_unlocked.$(objext) \
./getopt.$(objext) ./getopt1.$(objext) ./getpwd.$(objext) \
@@ -756,6 +757,17 @@ $(CONFIGURED_OFILES): stamp-picdir stamp-noasandir
else true; fi
$(COMPILE.c) $(srcdir)/fibheap.c $(OUTPUT_OPTION)
+./filedescriptor.$(objext): $(srcdir)/filedescriptor.c config.h $(INCDIR)/ansidecl.h \
+ $(INCDIR)/libiberty.h
+ if [ x"$(PICFLAG)" != x ]; then \
+ $(COMPILE.c) $(PICFLAG) $(srcdir)/filedescriptor.c -o pic/$@; \
+ else true; fi
+ if [ x"$(NOASANFLAG)" != x ]; then \
+ $(COMPILE.c) $(PICFLAG) $(NOASANFLAG) $(srcdir)/filedescriptor.c -o noasan/$@; \
+ else true; fi
+ $(COMPILE.c) $(srcdir)/filedescriptor.c $(OUTPUT_OPTION)
+
+
./filename_cmp.$(objext): $(srcdir)/filename_cmp.c config.h $(INCDIR)/ansidecl.h \
$(INCDIR)/filenames.h $(INCDIR)/hashtab.h \
$(INCDIR)/safe-ctype.h
diff --git a/libiberty/_doprnt.c b/libiberty/_doprnt.c
index d44dc415ed..a739f4304f 100644
--- a/libiberty/_doprnt.c
+++ b/libiberty/_doprnt.c
@@ -1,5 +1,5 @@
/* Provide a version of _doprnt in terms of fprintf.
- Copyright (C) 1998-2019 Free Software Foundation, Inc.
+ Copyright (C) 1998-2020 Free Software Foundation, Inc.
Contributed by Kaveh Ghazi (ghazi@caip.rutgers.edu) 3/29/98
This program is free software; you can redistribute it and/or modify it
diff --git a/libiberty/argv.c b/libiberty/argv.c
index 6444896f99..8c9794db6a 100644
--- a/libiberty/argv.c
+++ b/libiberty/argv.c
@@ -1,5 +1,5 @@
/* Create and destroy argument vectors (argv's)
- Copyright (C) 1992-2019 Free Software Foundation, Inc.
+ Copyright (C) 1992-2020 Free Software Foundation, Inc.
Written by Fred Fish @ Cygnus Support
This file is part of the libiberty library.
diff --git a/libiberty/asprintf.c b/libiberty/asprintf.c
index 5718682f69..6e38e2234d 100644
--- a/libiberty/asprintf.c
+++ b/libiberty/asprintf.c
@@ -1,6 +1,6 @@
/* Like sprintf but provides a pointer to malloc'd storage, which must
be freed by the caller.
- Copyright (C) 1997-2019 Free Software Foundation, Inc.
+ Copyright (C) 1997-2020 Free Software Foundation, Inc.
Contributed by Cygnus Solutions.
This file is part of the libiberty library.
diff --git a/libiberty/choose-temp.c b/libiberty/choose-temp.c
index 72c1b710bd..49a2faaa51 100644
--- a/libiberty/choose-temp.c
+++ b/libiberty/choose-temp.c
@@ -1,5 +1,5 @@
/* Utility to pick a temporary filename prefix.
- Copyright (C) 1996-2019 Free Software Foundation, Inc.
+ Copyright (C) 1996-2020 Free Software Foundation, Inc.
This file is part of the libiberty library.
Libiberty is free software; you can redistribute it and/or
diff --git a/libiberty/clock.c b/libiberty/clock.c
index a3730714bd..0de74657d0 100644
--- a/libiberty/clock.c
+++ b/libiberty/clock.c
@@ -1,5 +1,5 @@
/* ANSI-compatible clock function.
- Copyright (C) 1994-2019 Free Software Foundation, Inc.
+ Copyright (C) 1994-2020 Free Software Foundation, Inc.
This file is part of the libiberty library. This library is free
software; you can redistribute it and/or modify it under the
diff --git
Diffstat (limited to 'libiberty/rust-demangle.c')
-rw-r--r-- | libiberty/rust-demangle.c | 687 |
1 files changed, 435 insertions, 252 deletions
diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c index 2302db4..b87365c 100644 --- a/libiberty/rust-demangle.c +++ b/libiberty/rust-demangle.c @@ -1,5 +1,5 @@ /* Demangler for the Rust programming language - Copyright (C) 2016-2019 Free Software Foundation, Inc. + Copyright (C) 2016-2020 Free Software Foundation, Inc. Written by David Tolnay (dtolnay@gmail.com). This file is part of the libiberty library. @@ -33,9 +33,11 @@ If not, see <http://www.gnu.org/licenses/>. */ #include "safe-ctype.h" +#include <inttypes.h> #include <sys/types.h> #include <string.h> #include <stdio.h> +#include <stdlib.h> #ifdef HAVE_STRING_H #include <string.h> @@ -47,303 +49,484 @@ extern void *memset(void *s, int c, size_t n); #include <demangle.h> #include "libiberty.h" -#include "rust-demangle.h" +struct rust_demangler +{ + const char *sym; + size_t sym_len; -/* Mangled Rust symbols look like this: - _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a - - The original symbol is: - <std::sys::fd::FileDesc as core::ops::Drop>::drop - - The last component of the path is a 64-bit hash in lowercase hex, - prefixed with "h". Rust does not have a global namespace between - crates, an illusion which Rust maintains by using the hash to - distinguish things that would otherwise have the same symbol. - - Any path component not starting with a XID_Start character is - prefixed with "_". - - The following escape sequences are used: + void *callback_opaque; + demangle_callbackref callback; - "," => $C$ - "@" => $SP$ - "*" => $BP$ - "&" => $RF$ - "<" => $LT$ - ">" => $GT$ - "(" => $LP$ - ")" => $RP$ - " " => $u20$ - "\"" => $u22$ - "'" => $u27$ - "+" => $u2b$ - ";" => $u3b$ - "[" => $u5b$ - "]" => $u5d$ - "{" => $u7b$ - "}" => $u7d$ - "~" => $u7e$ + /* Position of the next character to read from the symbol. */ + size_t next; - A double ".." means "::" and a single "." means "-". + /* Non-zero if any error occurred. */ + int errored; - The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$ */ + /* Non-zero if printing should be verbose (e.g. include hashes). */ + int verbose; -static const char *hash_prefix = "::h"; -static const size_t hash_prefix_len = 3; -static const size_t hash_len = 16; + /* Rust mangling version, with legacy mangling being -1. */ + int version; +}; -static int is_prefixed_hash (const char *start); -static int looks_like_rust (const char *sym, size_t len); -static int unescape (const char **in, char **out, const char *seq, char value); +/* Parsing functions. */ -/* INPUT: sym: symbol that has been through C++ (gnu v3) demangling +static char +peek (const struct rust_demangler *rdm) +{ + if (rdm->next < rdm->sym_len) + return rdm->sym[rdm->next]; + return 0; +} - This function looks for the following indicators: +static char +next (struct rust_demangler *rdm) +{ + char c = peek (rdm); + if (!c) + rdm->errored = 1; + else + rdm->next++; + return c; +} - 1. The hash must consist of "h" followed by 16 lowercase hex digits. +struct rust_mangled_ident +{ + /* ASCII part of the identifier. */ + const char *ascii; + size_t ascii_len; +}; - 2. As a sanity check, the hash must use between 5 and 15 of the 16 - possible hex digits. This is true of 99.9998% of hashes so once - in your life you may see a false negative. The point is to - notice path components that could be Rust hashes but are - probably not, like "haaaaaaaaaaaaaaaa". In this case a false - positive (non-Rust symbol has an important path component - removed because it looks like a Rust hash) is worse than a false - negative (the rare Rust symbol is not demangled) so this sets - the balance in favor of false negatives. +static struct rust_mangled_ident +parse_ident (struct rust_demangler *rdm) +{ + char c; + size_t start, len; + struct rust_mangled_ident ident; + + ident.ascii = NULL; + ident.ascii_len = 0; + + c = next (rdm); + if (!ISDIGIT (c)) + { + rdm->errored = 1; + return ident; + } + len = c - '0'; + + if (c != '0') + while (ISDIGIT (peek (rdm))) + len = len * 10 + (next (rdm) - '0'); + + start = rdm->next; + rdm->next += len; + /* Check for overflows. */ + if ((start > rdm->next) || (rdm->next > rdm->sym_len)) + { + rdm->errored = 1; + return ident; + } + + ident.ascii = rdm->sym + start; + ident.ascii_len = len; + + if (ident.ascii_len == 0) + ident.ascii = NULL; + + return ident; +} - 3. There must be no characters other than a-zA-Z0-9 and _.:$ +/* Printing functions. */ - 4. There must be no unrecognized $-sign sequences. +static void +print_str (struct rust_demangler *rdm, const char *data, size_t len) +{ + if (!rdm->errored) + rdm->callback (data, len, rdm->callback_opaque); +} - 5. There must be no sequence of three or more dots in a row ("..."). */ +#define PRINT(s) print_str (rdm, s, strlen (s)) -int -rust_is_mangled (const char *sym) +/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */ +static int +decode_lower_hex_nibble (char nibble) { - size_t len, len_without_hash; + if ('0' <= nibble && nibble <= '9') + return nibble - '0'; + if ('a' <= nibble && nibble <= 'f') + return 0xa + (nibble - 'a'); + return -1; +} - if (!sym) - return 0; +/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */ +static char +decode_legacy_escape (const char *e, size_t len, size_t *out_len) +{ + char c = 0; + size_t escape_len = 0; + int lo_nibble = -1, hi_nibble = -1; - len = strlen (sym); - if (len <= hash_prefix_len + hash_len) - /* Not long enough to contain "::h" + hash + something else */ + if (len < 3 || e[0] != '$') return 0; - len_without_hash = len - (hash_prefix_len + hash_len); - if (!is_prefixed_hash (sym + len_without_hash)) + e++; + len--; + + if (e[0] == 'C') + { + escape_len = 1; + + c = ','; + } + else if (len > 2) + { + escape_len = 2; + + if (e[0] == 'S' && e[1] == 'P') + c = '@'; + else if (e[0] == 'B' && e[1] == 'P') + c = '*'; + else if (e[0] == 'R' && e[1] == 'F') + c = '&'; + else if (e[0] == 'L' && e[1] == 'T') + c = '<'; + else if (e[0] == 'G' && e[1] == 'T') + c = '>'; + else if (e[0] == 'L' && e[1] == 'P') + c = '('; + else if (e[0] == 'R' && e[1] == 'P') + c = ')'; + else if (e[0] == 'u' && len > 3) + { + escape_len = 3; + + hi_nibble = decode_lower_hex_nibble (e[1]); + if (hi_nibble < 0) + return 0; + lo_nibble = decode_lower_hex_nibble (e[2]); + if (lo_nibble < 0) + return 0; + + /* Only allow non-control ASCII characters. */ + if (hi_nibble > 7) + return 0; + c = (hi_nibble << 4) | lo_nibble; + if (c < 0x20) + return 0; + } + } + + if (!c || len <= escape_len || e[escape_len] != '$') return 0; - return looks_like_rust (sym, len_without_hash); + *out_len = 2 + escape_len; + return c; } -/* A hash is the prefix "::h" followed by 16 lowercase hex digits. The - hex digits must comprise between 5 and 15 (inclusive) distinct - digits. */ +static void +print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident) +{ + char unescaped; + size_t len; + if (rdm->errored) + return; + + if (rdm->version == -1) + { + /* Ignore leading underscores preceding escape sequences. + The mangler inserts an underscore to make sure the + identifier begins with a XID_Start character. */ + if (ident.ascii_len >= 2 && ident.ascii[0] == '_' + && ident.ascii[1] == '$') + { + ident.ascii++; + ident.ascii_len--; + } + + while (ident.ascii_len > 0) + { + /* Handle legacy escape sequences ("$...$", ".." or "."). */ + if (ident.ascii[0] == '$') + { + unescaped + = decode_legacy_escape (ident.ascii, ident.ascii_len, &len); + if (unescaped) + print_str (rdm, &unescaped, 1); + else + { + /* Unexpected escape sequence, print the rest verbatim. */ + print_str (rdm, ident.ascii, ident.ascii_len); + return; + } + } + else if (ident.ascii[0] == '.') + { + if (ident.ascii_len >= 2 && ident.ascii[1] == '.') + { + /* ".." becomes "::" */ + PRINT ("::"); + len = 2; + } + else + { + /* "." becomes "-" */ + PRINT ("-"); + len = 1; + } + } + else + { + /* Print everything before the next escape sequence, at once. */ + for (len = 0; len < ident.ascii_len; len++) + if (ident.ascii[len] == '$' || ident.ascii[len] == '.') + break; + + print_str (rdm, ident.ascii, len); + } + + ident.ascii += len; + ident.ascii_len -= len; + } + + return; + } +} + +/* A legacy hash is the prefix "h" followed by 16 lowercase hex digits. + The hex digits must contain at least 5 distinct digits. */ static int -is_prefixed_hash (const char *str) +is_legacy_prefixed_hash (struct rust_mangled_ident ident) { - const char *end; - char seen[16]; - size_t i; - int count; + uint16_t seen; + int nibble; + size_t i, count; - if (strncmp (str, hash_prefix, hash_prefix_len)) + if (ident.ascii_len != 17 || ident.ascii[0] != 'h') return 0; - str += hash_prefix_len; - - memset (seen, 0, sizeof(seen)); - for (end = str + hash_len; str < end; str++) - if (*str >= '0' && *str <= '9') - seen[*str - '0'] = 1; - else if (*str >= 'a' && *str <= 'f') - seen[*str - 'a' + 10] = 1; - else - return 0; - /* Count how many distinct digits seen */ - count = 0; + seen = 0; for (i = 0; i < 16; i++) - if (seen[i]) - count++; - - return count >= 5 && count <= 15; + { + nibble = decode_lower_hex_nibble (ident.ascii[1 + i]); + if (nibble < 0) + return 0; + seen |= (uint16_t)1 << nibble; + } + + /* Count how many distinct digits were seen. */ + count = 0; + while (seen) + { + if (seen & 1) + count++; + seen >>= 1; + } + + return count >= 5; } -static int -looks_like_rust (const char *str, size_t len) +int +rust_demangle_callback (const char *mangled, int options, + demangle_callbackref callback, void *opaque) { - const char *end = str + len; - - while (str < end) - switch (*str) - { - case '$': - if (!strncmp (str, "$C$", 3)) - str += 3; - else if (!strncmp (str, "$SP$", 4) - || !strncmp (str, "$BP$", 4) - || !strncmp (str, "$RF$", 4) - || !strncmp (str, "$LT$", 4) - || !strncmp (str, "$GT$", 4) - || !strncmp (str, "$LP$", 4) - || !strncmp (str, "$RP$", 4)) - str += 4; - else if (!strncmp (str, "$u20$", 5) - || !strncmp (str, "$u22$", 5) - || !strncmp (str, "$u27$", 5) - || !strncmp (str, "$u2b$", 5) - || !strncmp (str, "$u3b$", 5) - || !strncmp (str, "$u5b$", 5) - || !strncmp (str, "$u5d$", 5) - || !strncmp (str, "$u7b$", 5) - || !strncmp (str, "$u7d$", 5) - || !strncmp (str, "$u7e$", 5)) - str += 5; - else - return 0; - break; - case '.': - /* Do not allow three or more consecutive dots */ - if (!strncmp (str, "...", 3)) - return 0; - /* Fall through */ - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - case '_': - case ':': - str++; - break; - default: - return 0; - } - - return 1; -} + const char *p; + struct rust_demangler rdm; + struct rust_mangled_ident ident; + + rdm.sym = mangled; + rdm.sym_len = 0; + + rdm.callback_opaque = opaque; + rdm.callback = callback; + + rdm.next = 0; + rdm.errored = 0; + rdm.verbose = (options & DMGL_VERBOSE) != 0; + rdm.version = 0; + + /* Rust symbols always start with _ZN (legacy). */ + if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N') + { + rdm.sym += 3; + rdm.version = -1; + } + else + return 0; + + /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */ + for (p = rdm.sym; *p; p++) + { + rdm.sym_len++; -/* - INPUT: sym: symbol for which rust_is_mangled(sym) returned 1. + if (*p == '_' || ISALNUM (*p)) + continue; - The input is demangled in-place because the mangled name is always - longer than the demangled one. */ + if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':')) + continue; -void -rust_demangle_sym (char *sym) + return 0; + } + + /* Legacy Rust symbols need to be handled separately. */ + if (rdm.version == -1) + { + /* Legacy Rust symbols always end with E. */ + if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E')) + return 0; + rdm.sym_len--; + + /* Legacy Rust symbols also always end with a path segment + that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'. + This early check, before any parse_ident calls, should + quickly filter out most C++ symbols unrelated to Rust. */ + if (!(rdm.sym_len > 19 + && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3))) + return 0; + + do + { + ident = parse_ident (&rdm); + if (rdm.errored || !ident.ascii) + return 0; + } + while (rdm.next < rdm.sym_len); + + /* The last path segment should be the hash. */ + if (!is_legacy_prefixed_hash (ident)) + return 0; + + /* Reset the state for a second pass, to print the symbol. */ + rdm.next = 0; + if (!rdm.verbose && rdm.sym_len > 19) + { + /* Hide the last segment, containing the hash, if not verbose. */ + rdm.sym_len -= 19; + } + + do + { + if (rdm.next > 0) + print_str (&rdm, "::", 2); + + ident = parse_ident (&rdm); + print_ident (&rdm, ident); + } + while (rdm.next < rdm.sym_len); + } + else + return 0; + + return !rdm.errored; +} + +/* Growable string buffers. */ +struct str_buf +{ + char *ptr; + size_t len; + size_t cap; + int errored; +}; + +static void +str_buf_reserve (struct str_buf *buf, size_t extra) { - const char *in; - char *out; - const char *end; + size_t available, min_new_cap, new_cap; + char *new_ptr; - if (!sym) + /* Allocation failed before. */ + if (buf->errored) return; - in = sym; - out = sym; - end = sym + strlen (sym) - (hash_prefix_len + hash_len); - - while (in < end) - switch (*in) - { - case '$': - if (!(unescape (&in, &out, "$C$", ',') - || unescape (&in, &out, "$SP$", '@') - || unescape (&in, &out, "$BP$", '*') - || unescape (&in, &out, "$RF$", '&') - || unescape (&in, &out, "$LT$", '<') - || unescape (&in, &out, "$GT$", '>') - || unescape (&in, &out, "$LP$", '(') - || unescape (&in, &out, "$RP$", ')') - || unescape (&in, &out, "$u20$", ' ') - || unescape (&in, &out, "$u22$", '\"') - || unescape (&in, &out, "$u27$", '\'') - || unescape (&in, &out, "$u2b$", '+') - || unescape (&in, &out, "$u3b$", ';') - || unescape (&in, &out, "$u5b$", '[') - || unescape (&in, &out, "$u5d$", ']') - || unescape (&in, &out, "$u7b$", '{') - || unescape (&in, &out, "$u7d$", '}') - || unescape (&in, &out, "$u7e$", '~'))) { - /* unexpected escape sequence, not looks_like_rust. */ - goto fail; - } - break; - case '_': - /* If this is the start of a path component and the next - character is an escape sequence, ignore the underscore. The - mangler inserts an underscore to make sure the path - component begins with a XID_Start character. */ - if ((in == sym || in[-1] == ':') && in[1] == '$') - in++; - else - *out++ = *in++; - break; - case '.': - if (in[1] == '.') - { - /* ".." becomes "::" */ - *out++ = ':'; - *out++ = ':'; - in += 2; - } - else - { - /* "." becomes "-" */ - *out++ = '-'; - in++; - } - break; - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - case ':': - *out++ = *in++; - break; - default: - /* unexpected character in symbol, not looks_like_rust. */ - goto fail; - } - goto done; - -fail: - *out++ = '?'; /* This is pretty lame, but it's hard to do better. */ -done: - *out = '\0'; + available = buf->cap - buf->len; + + if (extra <= available) + return; + + min_new_cap = buf->cap + (extra - available); + + /* Check for overflows. */ + if (min_new_cap < buf->cap) + { + buf->errored = 1; + return; + } + + new_cap = buf->cap; + + if (new_cap == 0) + new_cap = 4; + + /* Double capacity until sufficiently large. */ + while (new_cap < min_new_cap) + { + new_cap *= 2; + + /* Check for overflows. */ + if (new_cap < buf->cap) + { + buf->errored = 1; + return; + } + } + + new_ptr = (char *)realloc (buf->ptr, new_cap); + if (new_ptr == NULL) + { + free (buf->ptr); + buf->ptr = NULL; + buf->len = 0; + buf->cap = 0; + buf->errored = 1; + } + else + { + buf->ptr = new_ptr; + buf->cap = new_cap; + } } -static int -unescape (const char **in, char **out, const char *seq, char value) +static void +str_buf_append (struct str_buf *buf, const char *data, size_t len) { - size_t len = strlen (seq); + str_buf_reserve (buf, len); + if (buf->errored) + return; - if (strncmp (*in, seq, len)) - return 0; + memcpy (buf->ptr + buf->len, data, len); + buf->len += len; +} + +static void +str_buf_demangle_callback (const char *data, size_t len, void *opaque) +{ + str_buf_append ((struct str_buf *)opaque, data, len); +} + +char * +rust_demangle (const char *mangled, int options) +{ + struct str_buf out; + int success; + + out.ptr = NULL; + out.len = 0; + out.cap = 0; + out.errored = 0; - **out = value; + success = rust_demangle_callback (mangled, options, + str_buf_demangle_callback, &out); - *in += len; - *out += 1; + if (!success) + { + free (out.ptr); + return NULL; + } - return 1; + str_buf_append (&out, "\0", 1); + return out.ptr; } |