aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2021-10-18 09:24:56 +0000
committerGitHub <noreply@github.com>2021-10-18 09:24:56 +0000
commit649e3e074bf8306bf0eb042f10483dbd61cd040b (patch)
treeae3b4402a9e8fdf23dfe27df0823219bd2064cae /gcc
parenta1a450641004c45b78b76034161f7b2efb0eeb1f (diff)
parentfd9d37c68ca363503ef5a515c7e409a3b15b43e1 (diff)
downloadgcc-649e3e074bf8306bf0eb042f10483dbd61cd040b.zip
gcc-649e3e074bf8306bf0eb042f10483dbd61cd040b.tar.gz
gcc-649e3e074bf8306bf0eb042f10483dbd61cd040b.tar.bz2
Merge #747
747: Base v0 mangling grammar r=philberty a=CohenArthur This PR adds base functions to deal with the v0 mangling grammar, [found here](https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#syntax-of-mangled-names). I have a few questions regarding this implementation: 1/ Is there any existing implementation for the base62 algorithm used here? This is directly adapted from [rustc's base_n module](https://github.com/rust-lang/rust/blob/6f53ddfa74ac3c10ceb63ad4a7a9c95e55853c87/compiler/rustc_data_structures/src/base_n.rs#L16) which I'm assuming is relatively standard and might already exist in the compiler. I haven't been able to find it however. 2/ gccrs cannot yet deal with unicode identifiers, as pointed out by `@bjorn3` in #418. This means that a big chunk of the `v0_add_identifier` implementation is missing. Should it be added in this PR too? 3/ As mentionned in zulip, it would be great to be able to create unit tests for this piece of code. It would be quite easy to generate a bunch of base62 strings and ensure that the algorithm here matches with them. Co-authored-by: CohenArthur <arthur.cohen@epita.fr>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/rust/Make-lang.in1
-rw-r--r--gcc/rust/backend/rust-mangle.cc65
-rw-r--r--gcc/rust/util/rust-base62.cc48
-rw-r--r--gcc/rust/util/rust-base62.h34
4 files changed, 148 insertions, 0 deletions
diff --git a/gcc/rust/Make-lang.in b/gcc/rust/Make-lang.in
index 0e181a6..57e8299 100644
--- a/gcc/rust/Make-lang.in
+++ b/gcc/rust/Make-lang.in
@@ -88,6 +88,7 @@ GRS_OBJS = \
rust/rust-lint-marklive.o \
rust/rust-hir-type-check-path.o \
rust/rust-compile-intrinsic.o \
+ rust/rust-base62.o \
$(END)
# removed object files from here
diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc
index 0e6643c..15ac3b1 100644
--- a/gcc/rust/backend/rust-mangle.cc
+++ b/gcc/rust/backend/rust-mangle.cc
@@ -1,5 +1,7 @@
#include "rust-mangle.h"
#include "fnv-hash.h"
+#include "rust-base62.h"
+#include <algorithm>
// FIXME: Rename those to legacy_*
static const std::string kMangledSymbolPrefix = "_ZN";
@@ -154,6 +156,63 @@ v0_simple_type_prefix (const TyTy::BaseType *ty)
gcc_unreachable ();
}
+// Add an underscore-terminated base62 integer to the mangling string.
+// This corresponds to the `<base-62-number>` grammar in the v0 mangling RFC:
+// - 0 is encoded as "_"
+// - any other value is encoded as itself minus one in base 62, followed by "_"
+static void
+v0_add_integer_62 (std::string &mangled, uint64_t x)
+{
+ if (x > 0)
+ mangled.append (base62_integer (x - 1));
+
+ mangled.append ("_");
+}
+
+// Add a tag-prefixed base62 integer to the mangling string when the
+// integer is greater than 0:
+// - 0 is encoded as "" (nothing)
+// - any other value is encoded as <tag> + v0_add_integer_62(itself), that is
+// <tag> + base62(itself - 1) + '_'
+static void
+v0_add_opt_integer_62 (std::string &mangled, std::string tag, uint64_t x)
+{
+ if (x > 0)
+ {
+ mangled.append (tag);
+ v0_add_integer_62 (mangled, x);
+ }
+}
+
+static void
+v0_add_disambiguator (std::string &mangled, uint64_t dis)
+{
+ v0_add_opt_integer_62 (mangled, "s", dis);
+}
+
+// Add an identifier to the mangled string. This corresponds to the
+// `<identifier>` grammar in the v0 mangling RFC.
+static void
+v0_add_identifier (std::string &mangled, const std::string &identifier)
+{
+ // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
+ // create mangling for unicode values for now. However, this is handled
+ // by the v0 mangling scheme. The grammar for unicode identifier is contained
+ // in <undisambiguated-identifier>, right under the <identifier> one. If the
+ // identifier contains unicode values, then an extra "u" needs to be added
+ // to the mangling string and `punycode` must be used to encode the
+ // characters.
+
+ mangled += std::to_string (identifier.size ());
+
+ // If the first character of the identifier is a digit or an underscore, we
+ // add an extra underscore
+ if (identifier[0] == '_')
+ mangled.append ("_");
+
+ mangled.append (identifier);
+}
+
static std::string
v0_type_prefix (const TyTy::BaseType *ty)
{
@@ -194,7 +253,13 @@ static std::string
v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path,
const std::string &crate_name)
{
+ std::string mangled;
+
+ // FIXME: Add real algorithm once all pieces are implemented
auto ty_prefix = v0_type_prefix (ty);
+ v0_add_identifier (mangled, crate_name);
+ v0_add_disambiguator (mangled, 62);
+
gcc_unreachable ();
}
diff --git a/gcc/rust/util/rust-base62.cc b/gcc/rust/util/rust-base62.cc
new file mode 100644
index 0000000..f1e3202
--- /dev/null
+++ b/gcc/rust/util/rust-base62.cc
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+#include "rust-base62.h"
+
+#include <algorithm>
+
+namespace Rust {
+
+std::string
+base62_integer (uint64_t value)
+{
+ const static std::string base_64
+ = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
+ std::string buffer (128, '\0');
+ size_t idx = 0;
+ size_t base = 62;
+
+ do
+ {
+ buffer[idx] = base_64[(value % base)];
+ idx++;
+ value = value / base;
+ }
+ while (value != 0);
+
+ std::reverse (buffer.begin (), buffer.begin () + idx);
+ return buffer.substr (0, idx);
+}
+
+} // namespace Rust
+
+// FIXME: Add unit testing using the selftest framework
diff --git a/gcc/rust/util/rust-base62.h b/gcc/rust/util/rust-base62.h
new file mode 100644
index 0000000..7a6e3cf
--- /dev/null
+++ b/gcc/rust/util/rust-base62.h
@@ -0,0 +1,34 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+#ifndef RUST_BASE62_H
+#define RUST_BASE62_H
+
+#include <string>
+
+namespace Rust {
+
+/**
+ * Get the Base62 representation of an integer
+ */
+std::string
+base62_integer (uint64_t value);
+
+} // namespace Rust
+
+#endif /* !RUST_BASE62_H */