aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust/backend
diff options
context:
space:
mode:
authorRaiki Tamura <tamaron1203@gmail.com>2023-08-08 02:08:38 +0900
committerCohenArthur <arthur.cohen@embecosm.com>2023-08-18 10:52:24 +0000
commitb1dd53faa1aa9ebd935742e57166647c055bae2a (patch)
tree2e71e9257d0d69b8afe05b3363a6ca3a70be077b /gcc/rust/backend
parent879a62f50dd693dba84e0e983e38d1480efaa69a (diff)
downloadgcc-b1dd53faa1aa9ebd935742e57166647c055bae2a.zip
gcc-b1dd53faa1aa9ebd935742e57166647c055bae2a.tar.gz
gcc-b1dd53faa1aa9ebd935742e57166647c055bae2a.tar.bz2
gccrs: Add punycode encoding to v0 mangling
gcc/rust/ChangeLog: * backend/rust-mangle.cc (v0_add_identifier): Added punycode encoding (v0_mangle_item): Likewise. * lex/rust-lex.cc (assert_source_content): Change type (test_buffer_input_source): Change type (test_file_input_source): Change type * resolve/rust-ast-resolve-toplevel.h: fix typo * rust-session-manager.cc (Session::load_extern_crate): fix typo * util/rust-canonical-path.h: fix typo * util/rust-hir-map.cc (NodeMapping::get_error): fix typo (Mappings::Mappings): fix typo * util/rust-mapping-common.h (UNKNOWN_CREATENUM): fix typo (UNKNOWN_CRATENUM): Change 0 to UINT32_MAX Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diffstat (limited to 'gcc/rust/backend')
-rw-r--r--gcc/rust/backend/rust-mangle.cc53
1 files changed, 38 insertions, 15 deletions
diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc
index 62530d6..248d69b 100644
--- a/gcc/rust/backend/rust-mangle.cc
+++ b/gcc/rust/backend/rust-mangle.cc
@@ -1,8 +1,11 @@
#include "rust-mangle.h"
#include "fnv-hash.h"
+#include "optional.h"
#include "rust-base62.h"
#include "rust-unicode.h"
-#include "optional.h"
+#include "rust-diagnostics.h"
+#include "rust-unicode.h"
+#include "rust-punycode.h"
// FIXME: Rename those to legacy_*
static const std::string kMangledSymbolPrefix = "_ZN";
@@ -249,22 +252,42 @@ v0_add_disambiguator (std::string &mangled, uint64_t dis)
static void
v0_add_identifier (std::string &mangled, const std::string &identifier)
{
- // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
- // create mangling for unicode values for now. However, this is handled
- // by the v0 mangling scheme. The grammar for unicode identifier is
- // contained in <undisambiguated-identifier>, right under the <identifier>
- // one. If the identifier contains unicode values, then an extra "u" needs
- // to be added to the mangling string and `punycode` must be used to encode
- // the characters.
-
- mangled += std::to_string (identifier.size ());
-
+ // The grammar for unicode identifier is contained in
+ // <undisambiguated-identifier>, right under the <identifier> one. If the
+ // identifier contains unicode values, then an extra "u" needs to be added to
+ // the mangling string and `punycode` must be used to encode the characters.
+ tl::optional<Utf8String> uident_opt
+ = Utf8String::make_utf8_string (identifier);
+ rust_assert (uident_opt.has_value ());
+ tl::optional<std::string> punycode_opt
+ = encode_punycode (uident_opt.value ());
+ rust_assert (punycode_opt.has_value ());
+
+ bool is_ascii_ident = true;
+ for (auto c : uident_opt.value ().get_chars ())
+ if (c.value > 127)
+ {
+ is_ascii_ident = false;
+ break;
+ }
+
+ std::string punycode = punycode_opt.value ();
+ // remove tailing hyphen
+ if (punycode.back () == '-')
+ punycode.pop_back ();
+ // replace hyphens in punycode with underscores
+ std::replace (punycode.begin (), punycode.end (), '-', '_');
+
+ if (!is_ascii_ident)
+ mangled.append ("u");
+
+ mangled += std::to_string (punycode.size ());
// If the first character of the identifier is a digit or an underscore, we
// add an extra underscore
- if (identifier[0] == '_')
- mangled.append ("_");
+ if (punycode[0] == '_')
+ mangled += "_";
- mangled.append (identifier);
+ mangled += punycode;
}
static std::string
@@ -300,9 +323,9 @@ v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path)
std::string mangled;
// FIXME: Add real algorithm once all pieces are implemented
- auto ty_prefix = v0_type_prefix (ty);
v0_add_identifier (mangled, crate_name);
v0_add_disambiguator (mangled, 62);
+ auto ty_prefix = v0_type_prefix (ty);
rust_unreachable ();
}