aboutsummaryrefslogtreecommitdiff
path: root/libiberty/rust-demangle.c
diff options
context:
space:
mode:
authorNick Clifton <nickc@redhat.com>2020-01-17 14:13:22 +0000
committerNick Clifton <nickc@redhat.com>2020-01-17 14:13:22 +0000
commit533da48302a26885a972e4379eccc26b364e5b53 (patch)
tree9b2bedcac203ce68d4b81aa152d334305a3199aa /libiberty/rust-demangle.c
parentbf8e4b6c8144a687d5edc24eda1bf0a3687ce71e (diff)
downloadfsf-binutils-gdb-533da48302a26885a972e4379eccc26b364e5b53.zip
fsf-binutils-gdb-533da48302a26885a972e4379eccc26b364e5b53.tar.gz
fsf-binutils-gdb-533da48302a26885a972e4379eccc26b364e5b53.tar.bz2
Update libiberty sources with changes in the gcc mainline.
+2020-01-01 Jakub Jelinek <jakub@redhat.com> + + Update copyright years. + +2019-12-06 Tim Ruehsen <tim.ruehsen@gmx.de> + + * make-relative-prefix.c (split_directories): + Return early on empty 'name' + +2019-11-16 Tim Ruehsen <tim.ruehsen@gmx.de> + + * cp-demangle.c (d_print_init): Remove const from 4th param. + (cplus_demangle_fill_name): Initialize d->d_counting. + (cplus_demangle_fill_extended_operator): Likewise. + (cplus_demangle_fill_ctor): Likewise. + (cplus_demangle_fill_dtor): Likewise. + (d_make_empty): Likewise. + (d_count_templates_scopes): Remobe const from 3rd param, + Return on dc->d_counting > 1, + Increment dc->d_counting. + * cp-demint.c (cplus_demangle_fill_component): Initialize d->d_counting. + (cplus_demangle_fill_builtin_type): Likewise. + (cplus_demangle_fill_operator): Likewise. + +2019-11-16 Eduard-Mihai Burtescu <eddyb@lyken.rs> + + * cplus-dem.c (cplus_demangle): Use rust_demangle directly. + (rust_demangle): Remove. + * rust-demangle.c (is_prefixed_hash): Rename to is_legacy_prefixed_hash. + (parse_lower_hex_nibble): Rename to decode_lower_hex_nibble. + (parse_legacy_escape): Rename to decode_legacy_escape. + (rust_is_mangled): Remove. + (struct rust_demangler): Add. + (peek): Add. + (next): Add. + (struct rust_mangled_ident): Add. + (parse_ident): Add. + (rust_demangle_sym): Remove. + (print_str): Add. + (PRINT): Add. + (print_ident): Add. + (rust_demangle_callback): Add. + (struct str_buf): Add. + (str_buf_reserve): Add. + (str_buf_append): Add. + (str_buf_demangle_callback): Add. + (rust_demangle): Add. + * rust-demangle.h: Remove. + +2019-11-15 Miguel Saldivar <saldivarcher@gmail.com> + + * testsuite/demangle-expected: Fix test. + +2019-11-04 Kamlesh Kumar <kamleshbhalui@gmail.com> + + * cp-demangle.c (d_expr_primary): Handle + nullptr demangling. + * testsuite/demangle-expected: Added test. + +2019-10-29 Paul Pluzhnikov <ppluzhnikov@google.com> + + * cp-demangle.c (d_number): Avoid signed int overflow. + +2019-10-28 Miguel Saldivar <saldivarcher@gmail.com> + + * cp-demangle.c (d_print_mod): Add a space before printing `complex` + and `imaginary`, as opposed to after. + * testsuite/demangle-expected: Adjust test. + +2019-10-03 Eduard-Mihai Burtescu <eddyb@lyken.rs> + + * rust-demangle.c (looks_like_rust): Remove. + (rust_is_mangled): Don't check escapes. + (is_prefixed_hash): Allow 0-9a-f permutations. + (rust_demangle_sym): Don't bail on unknown escapes. + * testsuite/rust-demangle-expected: Update 'main::$99$' test. + +2019-09-03 Eduard-Mihai Burtescu <eddyb@lyken.rs> + + * rust-demangle.c (unescape): Remove. + (parse_lower_hex_nibble): New function. + (parse_legacy_escape): New function. + (is_prefixed_hash): Use parse_lower_hex_nibble. + (looks_like_rust): Use parse_legacy_escape. + (rust_demangle_sym): Use parse_legacy_escape. + * testsuite/rust-demangle-expected: Add 'llv$u6d$' test. + +2019-08-27 Martin Liska <mliska@suse.cz> + + PR lto/91478 + * simple-object-elf.c (simple_object_elf_copy_lto_debug_sections): + First find a WEAK HIDDEN symbol in symbol table that will be + preserved. Later, use the symbol name for all removed symbols. + +2019-08-12 Martin Liska <mliska@suse.cz> + + * Makefile.in: Add filedescriptor.c. + * filedescriptor.c: New file. + * lrealpath.c (is_valid_fd): Remove. diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in index 0be45b4ae8..fe738d0db4 100644 --- a/libiberty/Makefile.in +++ b/libiberty/Makefile.in @@ -1,7 +1,7 @@ # Makefile for the libiberty library. # Originally written by K. Richard Pixley <rich@cygnus.com>. # -# Copyright (C) 1990-2019 Free Software Foundation, Inc. +# Copyright (C) 1990-2020 Free Software Foundation, Inc. # # This file is part of the libiberty library. # Libiberty is free software; you can redistribute it and/or @@ -127,7 +127,7 @@ CFILES = alloca.c argv.c asprintf.c atexit.c \ calloc.c choose-temp.c clock.c concat.c cp-demangle.c \ cp-demint.c cplus-dem.c crc32.c \ d-demangle.c dwarfnames.c dyn-string.c \ - fdmatch.c ffs.c fibheap.c filename_cmp.c floatformat.c \ + fdmatch.c ffs.c fibheap.c filedescriptor.c filename_cmp.c floatformat.c \ fnmatch.c fopen_unlocked.c \ getcwd.c getopt.c getopt1.c getpagesize.c getpwd.c getruntime.c \ gettimeofday.c \ @@ -171,6 +171,7 @@ REQUIRED_OFILES = \ ./cp-demint.$(objext) ./crc32.$(objext) ./d-demangle.$(objext) \ ./dwarfnames.$(objext) ./dyn-string.$(objext) \ ./fdmatch.$(objext) ./fibheap.$(objext) \ + ./filedescriptor.$(objext) \ ./filename_cmp.$(objext) ./floatformat.$(objext) \ ./fnmatch.$(objext) ./fopen_unlocked.$(objext) \ ./getopt.$(objext) ./getopt1.$(objext) ./getpwd.$(objext) \ @@ -756,6 +757,17 @@ $(CONFIGURED_OFILES): stamp-picdir stamp-noasandir else true; fi $(COMPILE.c) $(srcdir)/fibheap.c $(OUTPUT_OPTION) +./filedescriptor.$(objext): $(srcdir)/filedescriptor.c config.h $(INCDIR)/ansidecl.h \ + $(INCDIR)/libiberty.h + if [ x"$(PICFLAG)" != x ]; then \ + $(COMPILE.c) $(PICFLAG) $(srcdir)/filedescriptor.c -o pic/$@; \ + else true; fi + if [ x"$(NOASANFLAG)" != x ]; then \ + $(COMPILE.c) $(PICFLAG) $(NOASANFLAG) $(srcdir)/filedescriptor.c -o noasan/$@; \ + else true; fi + $(COMPILE.c) $(srcdir)/filedescriptor.c $(OUTPUT_OPTION) + + ./filename_cmp.$(objext): $(srcdir)/filename_cmp.c config.h $(INCDIR)/ansidecl.h \ $(INCDIR)/filenames.h $(INCDIR)/hashtab.h \ $(INCDIR)/safe-ctype.h diff --git a/libiberty/_doprnt.c b/libiberty/_doprnt.c index d44dc415ed..a739f4304f 100644 --- a/libiberty/_doprnt.c +++ b/libiberty/_doprnt.c @@ -1,5 +1,5 @@ /* Provide a version of _doprnt in terms of fprintf. - Copyright (C) 1998-2019 Free Software Foundation, Inc. + Copyright (C) 1998-2020 Free Software Foundation, Inc. Contributed by Kaveh Ghazi (ghazi@caip.rutgers.edu) 3/29/98 This program is free software; you can redistribute it and/or modify it diff --git a/libiberty/argv.c b/libiberty/argv.c index 6444896f99..8c9794db6a 100644 --- a/libiberty/argv.c +++ b/libiberty/argv.c @@ -1,5 +1,5 @@ /* Create and destroy argument vectors (argv's) - Copyright (C) 1992-2019 Free Software Foundation, Inc. + Copyright (C) 1992-2020 Free Software Foundation, Inc. Written by Fred Fish @ Cygnus Support This file is part of the libiberty library. diff --git a/libiberty/asprintf.c b/libiberty/asprintf.c index 5718682f69..6e38e2234d 100644 --- a/libiberty/asprintf.c +++ b/libiberty/asprintf.c @@ -1,6 +1,6 @@ /* Like sprintf but provides a pointer to malloc'd storage, which must be freed by the caller. - Copyright (C) 1997-2019 Free Software Foundation, Inc. + Copyright (C) 1997-2020 Free Software Foundation, Inc. Contributed by Cygnus Solutions. This file is part of the libiberty library. diff --git a/libiberty/choose-temp.c b/libiberty/choose-temp.c index 72c1b710bd..49a2faaa51 100644 --- a/libiberty/choose-temp.c +++ b/libiberty/choose-temp.c @@ -1,5 +1,5 @@ /* Utility to pick a temporary filename prefix. - Copyright (C) 1996-2019 Free Software Foundation, Inc. + Copyright (C) 1996-2020 Free Software Foundation, Inc. This file is part of the libiberty library. Libiberty is free software; you can redistribute it and/or diff --git a/libiberty/clock.c b/libiberty/clock.c index a3730714bd..0de74657d0 100644 --- a/libiberty/clock.c +++ b/libiberty/clock.c @@ -1,5 +1,5 @@ /* ANSI-compatible clock function. - Copyright (C) 1994-2019 Free Software Foundation, Inc. + Copyright (C) 1994-2020 Free Software Foundation, Inc. This file is part of the libiberty library. This library is free software; you can redistribute it and/or modify it under the diff --git
Diffstat (limited to 'libiberty/rust-demangle.c')
-rw-r--r--libiberty/rust-demangle.c687
1 files changed, 435 insertions, 252 deletions
diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c
index 2302db4..b87365c 100644
--- a/libiberty/rust-demangle.c
+++ b/libiberty/rust-demangle.c
@@ -1,5 +1,5 @@
/* Demangler for the Rust programming language
- Copyright (C) 2016-2019 Free Software Foundation, Inc.
+ Copyright (C) 2016-2020 Free Software Foundation, Inc.
Written by David Tolnay (dtolnay@gmail.com).
This file is part of the libiberty library.
@@ -33,9 +33,11 @@ If not, see <http://www.gnu.org/licenses/>. */
#include "safe-ctype.h"
+#include <inttypes.h>
#include <sys/types.h>
#include <string.h>
#include <stdio.h>
+#include <stdlib.h>
#ifdef HAVE_STRING_H
#include <string.h>
@@ -47,303 +49,484 @@ extern void *memset(void *s, int c, size_t n);
#include <demangle.h>
#include "libiberty.h"
-#include "rust-demangle.h"
+struct rust_demangler
+{
+ const char *sym;
+ size_t sym_len;
-/* Mangled Rust symbols look like this:
- _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
-
- The original symbol is:
- <std::sys::fd::FileDesc as core::ops::Drop>::drop
-
- The last component of the path is a 64-bit hash in lowercase hex,
- prefixed with "h". Rust does not have a global namespace between
- crates, an illusion which Rust maintains by using the hash to
- distinguish things that would otherwise have the same symbol.
-
- Any path component not starting with a XID_Start character is
- prefixed with "_".
-
- The following escape sequences are used:
+ void *callback_opaque;
+ demangle_callbackref callback;
- "," => $C$
- "@" => $SP$
- "*" => $BP$
- "&" => $RF$
- "<" => $LT$
- ">" => $GT$
- "(" => $LP$
- ")" => $RP$
- " " => $u20$
- "\"" => $u22$
- "'" => $u27$
- "+" => $u2b$
- ";" => $u3b$
- "[" => $u5b$
- "]" => $u5d$
- "{" => $u7b$
- "}" => $u7d$
- "~" => $u7e$
+ /* Position of the next character to read from the symbol. */
+ size_t next;
- A double ".." means "::" and a single "." means "-".
+ /* Non-zero if any error occurred. */
+ int errored;
- The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$ */
+ /* Non-zero if printing should be verbose (e.g. include hashes). */
+ int verbose;
-static const char *hash_prefix = "::h";
-static const size_t hash_prefix_len = 3;
-static const size_t hash_len = 16;
+ /* Rust mangling version, with legacy mangling being -1. */
+ int version;
+};
-static int is_prefixed_hash (const char *start);
-static int looks_like_rust (const char *sym, size_t len);
-static int unescape (const char **in, char **out, const char *seq, char value);
+/* Parsing functions. */
-/* INPUT: sym: symbol that has been through C++ (gnu v3) demangling
+static char
+peek (const struct rust_demangler *rdm)
+{
+ if (rdm->next < rdm->sym_len)
+ return rdm->sym[rdm->next];
+ return 0;
+}
- This function looks for the following indicators:
+static char
+next (struct rust_demangler *rdm)
+{
+ char c = peek (rdm);
+ if (!c)
+ rdm->errored = 1;
+ else
+ rdm->next++;
+ return c;
+}
- 1. The hash must consist of "h" followed by 16 lowercase hex digits.
+struct rust_mangled_ident
+{
+ /* ASCII part of the identifier. */
+ const char *ascii;
+ size_t ascii_len;
+};
- 2. As a sanity check, the hash must use between 5 and 15 of the 16
- possible hex digits. This is true of 99.9998% of hashes so once
- in your life you may see a false negative. The point is to
- notice path components that could be Rust hashes but are
- probably not, like "haaaaaaaaaaaaaaaa". In this case a false
- positive (non-Rust symbol has an important path component
- removed because it looks like a Rust hash) is worse than a false
- negative (the rare Rust symbol is not demangled) so this sets
- the balance in favor of false negatives.
+static struct rust_mangled_ident
+parse_ident (struct rust_demangler *rdm)
+{
+ char c;
+ size_t start, len;
+ struct rust_mangled_ident ident;
+
+ ident.ascii = NULL;
+ ident.ascii_len = 0;
+
+ c = next (rdm);
+ if (!ISDIGIT (c))
+ {
+ rdm->errored = 1;
+ return ident;
+ }
+ len = c - '0';
+
+ if (c != '0')
+ while (ISDIGIT (peek (rdm)))
+ len = len * 10 + (next (rdm) - '0');
+
+ start = rdm->next;
+ rdm->next += len;
+ /* Check for overflows. */
+ if ((start > rdm->next) || (rdm->next > rdm->sym_len))
+ {
+ rdm->errored = 1;
+ return ident;
+ }
+
+ ident.ascii = rdm->sym + start;
+ ident.ascii_len = len;
+
+ if (ident.ascii_len == 0)
+ ident.ascii = NULL;
+
+ return ident;
+}
- 3. There must be no characters other than a-zA-Z0-9 and _.:$
+/* Printing functions. */
- 4. There must be no unrecognized $-sign sequences.
+static void
+print_str (struct rust_demangler *rdm, const char *data, size_t len)
+{
+ if (!rdm->errored)
+ rdm->callback (data, len, rdm->callback_opaque);
+}
- 5. There must be no sequence of three or more dots in a row ("..."). */
+#define PRINT(s) print_str (rdm, s, strlen (s))
-int
-rust_is_mangled (const char *sym)
+/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
+static int
+decode_lower_hex_nibble (char nibble)
{
- size_t len, len_without_hash;
+ if ('0' <= nibble && nibble <= '9')
+ return nibble - '0';
+ if ('a' <= nibble && nibble <= 'f')
+ return 0xa + (nibble - 'a');
+ return -1;
+}
- if (!sym)
- return 0;
+/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
+static char
+decode_legacy_escape (const char *e, size_t len, size_t *out_len)
+{
+ char c = 0;
+ size_t escape_len = 0;
+ int lo_nibble = -1, hi_nibble = -1;
- len = strlen (sym);
- if (len <= hash_prefix_len + hash_len)
- /* Not long enough to contain "::h" + hash + something else */
+ if (len < 3 || e[0] != '$')
return 0;
- len_without_hash = len - (hash_prefix_len + hash_len);
- if (!is_prefixed_hash (sym + len_without_hash))
+ e++;
+ len--;
+
+ if (e[0] == 'C')
+ {
+ escape_len = 1;
+
+ c = ',';
+ }
+ else if (len > 2)
+ {
+ escape_len = 2;
+
+ if (e[0] == 'S' && e[1] == 'P')
+ c = '@';
+ else if (e[0] == 'B' && e[1] == 'P')
+ c = '*';
+ else if (e[0] == 'R' && e[1] == 'F')
+ c = '&';
+ else if (e[0] == 'L' && e[1] == 'T')
+ c = '<';
+ else if (e[0] == 'G' && e[1] == 'T')
+ c = '>';
+ else if (e[0] == 'L' && e[1] == 'P')
+ c = '(';
+ else if (e[0] == 'R' && e[1] == 'P')
+ c = ')';
+ else if (e[0] == 'u' && len > 3)
+ {
+ escape_len = 3;
+
+ hi_nibble = decode_lower_hex_nibble (e[1]);
+ if (hi_nibble < 0)
+ return 0;
+ lo_nibble = decode_lower_hex_nibble (e[2]);
+ if (lo_nibble < 0)
+ return 0;
+
+ /* Only allow non-control ASCII characters. */
+ if (hi_nibble > 7)
+ return 0;
+ c = (hi_nibble << 4) | lo_nibble;
+ if (c < 0x20)
+ return 0;
+ }
+ }
+
+ if (!c || len <= escape_len || e[escape_len] != '$')
return 0;
- return looks_like_rust (sym, len_without_hash);
+ *out_len = 2 + escape_len;
+ return c;
}
-/* A hash is the prefix "::h" followed by 16 lowercase hex digits. The
- hex digits must comprise between 5 and 15 (inclusive) distinct
- digits. */
+static void
+print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
+{
+ char unescaped;
+ size_t len;
+ if (rdm->errored)
+ return;
+
+ if (rdm->version == -1)
+ {
+ /* Ignore leading underscores preceding escape sequences.
+ The mangler inserts an underscore to make sure the
+ identifier begins with a XID_Start character. */
+ if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
+ && ident.ascii[1] == '$')
+ {
+ ident.ascii++;
+ ident.ascii_len--;
+ }
+
+ while (ident.ascii_len > 0)
+ {
+ /* Handle legacy escape sequences ("$...$", ".." or "."). */
+ if (ident.ascii[0] == '$')
+ {
+ unescaped
+ = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
+ if (unescaped)
+ print_str (rdm, &unescaped, 1);
+ else
+ {
+ /* Unexpected escape sequence, print the rest verbatim. */
+ print_str (rdm, ident.ascii, ident.ascii_len);
+ return;
+ }
+ }
+ else if (ident.ascii[0] == '.')
+ {
+ if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
+ {
+ /* ".." becomes "::" */
+ PRINT ("::");
+ len = 2;
+ }
+ else
+ {
+ /* "." becomes "-" */
+ PRINT ("-");
+ len = 1;
+ }
+ }
+ else
+ {
+ /* Print everything before the next escape sequence, at once. */
+ for (len = 0; len < ident.ascii_len; len++)
+ if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
+ break;
+
+ print_str (rdm, ident.ascii, len);
+ }
+
+ ident.ascii += len;
+ ident.ascii_len -= len;
+ }
+
+ return;
+ }
+}
+
+/* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
+ The hex digits must contain at least 5 distinct digits. */
static int
-is_prefixed_hash (const char *str)
+is_legacy_prefixed_hash (struct rust_mangled_ident ident)
{
- const char *end;
- char seen[16];
- size_t i;
- int count;
+ uint16_t seen;
+ int nibble;
+ size_t i, count;
- if (strncmp (str, hash_prefix, hash_prefix_len))
+ if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
return 0;
- str += hash_prefix_len;
-
- memset (seen, 0, sizeof(seen));
- for (end = str + hash_len; str < end; str++)
- if (*str >= '0' && *str <= '9')
- seen[*str - '0'] = 1;
- else if (*str >= 'a' && *str <= 'f')
- seen[*str - 'a' + 10] = 1;
- else
- return 0;
- /* Count how many distinct digits seen */
- count = 0;
+ seen = 0;
for (i = 0; i < 16; i++)
- if (seen[i])
- count++;
-
- return count >= 5 && count <= 15;
+ {
+ nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
+ if (nibble < 0)
+ return 0;
+ seen |= (uint16_t)1 << nibble;
+ }
+
+ /* Count how many distinct digits were seen. */
+ count = 0;
+ while (seen)
+ {
+ if (seen & 1)
+ count++;
+ seen >>= 1;
+ }
+
+ return count >= 5;
}
-static int
-looks_like_rust (const char *str, size_t len)
+int
+rust_demangle_callback (const char *mangled, int options,
+ demangle_callbackref callback, void *opaque)
{
- const char *end = str + len;
-
- while (str < end)
- switch (*str)
- {
- case '$':
- if (!strncmp (str, "$C$", 3))
- str += 3;
- else if (!strncmp (str, "$SP$", 4)
- || !strncmp (str, "$BP$", 4)
- || !strncmp (str, "$RF$", 4)
- || !strncmp (str, "$LT$", 4)
- || !strncmp (str, "$GT$", 4)
- || !strncmp (str, "$LP$", 4)
- || !strncmp (str, "$RP$", 4))
- str += 4;
- else if (!strncmp (str, "$u20$", 5)
- || !strncmp (str, "$u22$", 5)
- || !strncmp (str, "$u27$", 5)
- || !strncmp (str, "$u2b$", 5)
- || !strncmp (str, "$u3b$", 5)
- || !strncmp (str, "$u5b$", 5)
- || !strncmp (str, "$u5d$", 5)
- || !strncmp (str, "$u7b$", 5)
- || !strncmp (str, "$u7d$", 5)
- || !strncmp (str, "$u7e$", 5))
- str += 5;
- else
- return 0;
- break;
- case '.':
- /* Do not allow three or more consecutive dots */
- if (!strncmp (str, "...", 3))
- return 0;
- /* Fall through */
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- case '_':
- case ':':
- str++;
- break;
- default:
- return 0;
- }
-
- return 1;
-}
+ const char *p;
+ struct rust_demangler rdm;
+ struct rust_mangled_ident ident;
+
+ rdm.sym = mangled;
+ rdm.sym_len = 0;
+
+ rdm.callback_opaque = opaque;
+ rdm.callback = callback;
+
+ rdm.next = 0;
+ rdm.errored = 0;
+ rdm.verbose = (options & DMGL_VERBOSE) != 0;
+ rdm.version = 0;
+
+ /* Rust symbols always start with _ZN (legacy). */
+ if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
+ {
+ rdm.sym += 3;
+ rdm.version = -1;
+ }
+ else
+ return 0;
+
+ /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */
+ for (p = rdm.sym; *p; p++)
+ {
+ rdm.sym_len++;
-/*
- INPUT: sym: symbol for which rust_is_mangled(sym) returned 1.
+ if (*p == '_' || ISALNUM (*p))
+ continue;
- The input is demangled in-place because the mangled name is always
- longer than the demangled one. */
+ if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
+ continue;
-void
-rust_demangle_sym (char *sym)
+ return 0;
+ }
+
+ /* Legacy Rust symbols need to be handled separately. */
+ if (rdm.version == -1)
+ {
+ /* Legacy Rust symbols always end with E. */
+ if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
+ return 0;
+ rdm.sym_len--;
+
+ /* Legacy Rust symbols also always end with a path segment
+ that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
+ This early check, before any parse_ident calls, should
+ quickly filter out most C++ symbols unrelated to Rust. */
+ if (!(rdm.sym_len > 19
+ && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
+ return 0;
+
+ do
+ {
+ ident = parse_ident (&rdm);
+ if (rdm.errored || !ident.ascii)
+ return 0;
+ }
+ while (rdm.next < rdm.sym_len);
+
+ /* The last path segment should be the hash. */
+ if (!is_legacy_prefixed_hash (ident))
+ return 0;
+
+ /* Reset the state for a second pass, to print the symbol. */
+ rdm.next = 0;
+ if (!rdm.verbose && rdm.sym_len > 19)
+ {
+ /* Hide the last segment, containing the hash, if not verbose. */
+ rdm.sym_len -= 19;
+ }
+
+ do
+ {
+ if (rdm.next > 0)
+ print_str (&rdm, "::", 2);
+
+ ident = parse_ident (&rdm);
+ print_ident (&rdm, ident);
+ }
+ while (rdm.next < rdm.sym_len);
+ }
+ else
+ return 0;
+
+ return !rdm.errored;
+}
+
+/* Growable string buffers. */
+struct str_buf
+{
+ char *ptr;
+ size_t len;
+ size_t cap;
+ int errored;
+};
+
+static void
+str_buf_reserve (struct str_buf *buf, size_t extra)
{
- const char *in;
- char *out;
- const char *end;
+ size_t available, min_new_cap, new_cap;
+ char *new_ptr;
- if (!sym)
+ /* Allocation failed before. */
+ if (buf->errored)
return;
- in = sym;
- out = sym;
- end = sym + strlen (sym) - (hash_prefix_len + hash_len);
-
- while (in < end)
- switch (*in)
- {
- case '$':
- if (!(unescape (&in, &out, "$C$", ',')
- || unescape (&in, &out, "$SP$", '@')
- || unescape (&in, &out, "$BP$", '*')
- || unescape (&in, &out, "$RF$", '&')
- || unescape (&in, &out, "$LT$", '<')
- || unescape (&in, &out, "$GT$", '>')
- || unescape (&in, &out, "$LP$", '(')
- || unescape (&in, &out, "$RP$", ')')
- || unescape (&in, &out, "$u20$", ' ')
- || unescape (&in, &out, "$u22$", '\"')
- || unescape (&in, &out, "$u27$", '\'')
- || unescape (&in, &out, "$u2b$", '+')
- || unescape (&in, &out, "$u3b$", ';')
- || unescape (&in, &out, "$u5b$", '[')
- || unescape (&in, &out, "$u5d$", ']')
- || unescape (&in, &out, "$u7b$", '{')
- || unescape (&in, &out, "$u7d$", '}')
- || unescape (&in, &out, "$u7e$", '~'))) {
- /* unexpected escape sequence, not looks_like_rust. */
- goto fail;
- }
- break;
- case '_':
- /* If this is the start of a path component and the next
- character is an escape sequence, ignore the underscore. The
- mangler inserts an underscore to make sure the path
- component begins with a XID_Start character. */
- if ((in == sym || in[-1] == ':') && in[1] == '$')
- in++;
- else
- *out++ = *in++;
- break;
- case '.':
- if (in[1] == '.')
- {
- /* ".." becomes "::" */
- *out++ = ':';
- *out++ = ':';
- in += 2;
- }
- else
- {
- /* "." becomes "-" */
- *out++ = '-';
- in++;
- }
- break;
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- case ':':
- *out++ = *in++;
- break;
- default:
- /* unexpected character in symbol, not looks_like_rust. */
- goto fail;
- }
- goto done;
-
-fail:
- *out++ = '?'; /* This is pretty lame, but it's hard to do better. */
-done:
- *out = '\0';
+ available = buf->cap - buf->len;
+
+ if (extra <= available)
+ return;
+
+ min_new_cap = buf->cap + (extra - available);
+
+ /* Check for overflows. */
+ if (min_new_cap < buf->cap)
+ {
+ buf->errored = 1;
+ return;
+ }
+
+ new_cap = buf->cap;
+
+ if (new_cap == 0)
+ new_cap = 4;
+
+ /* Double capacity until sufficiently large. */
+ while (new_cap < min_new_cap)
+ {
+ new_cap *= 2;
+
+ /* Check for overflows. */
+ if (new_cap < buf->cap)
+ {
+ buf->errored = 1;
+ return;
+ }
+ }
+
+ new_ptr = (char *)realloc (buf->ptr, new_cap);
+ if (new_ptr == NULL)
+ {
+ free (buf->ptr);
+ buf->ptr = NULL;
+ buf->len = 0;
+ buf->cap = 0;
+ buf->errored = 1;
+ }
+ else
+ {
+ buf->ptr = new_ptr;
+ buf->cap = new_cap;
+ }
}
-static int
-unescape (const char **in, char **out, const char *seq, char value)
+static void
+str_buf_append (struct str_buf *buf, const char *data, size_t len)
{
- size_t len = strlen (seq);
+ str_buf_reserve (buf, len);
+ if (buf->errored)
+ return;
- if (strncmp (*in, seq, len))
- return 0;
+ memcpy (buf->ptr + buf->len, data, len);
+ buf->len += len;
+}
+
+static void
+str_buf_demangle_callback (const char *data, size_t len, void *opaque)
+{
+ str_buf_append ((struct str_buf *)opaque, data, len);
+}
+
+char *
+rust_demangle (const char *mangled, int options)
+{
+ struct str_buf out;
+ int success;
+
+ out.ptr = NULL;
+ out.len = 0;
+ out.cap = 0;
+ out.errored = 0;
- **out = value;
+ success = rust_demangle_callback (mangled, options,
+ str_buf_demangle_callback, &out);
- *in += len;
- *out += 1;
+ if (!success)
+ {
+ free (out.ptr);
+ return NULL;
+ }
- return 1;
+ str_buf_append (&out, "\0", 1);
+ return out.ptr;
}