aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaiki Tamura <tamaron1203@gmail.com>2023-08-08 02:08:38 +0900
committerCohenArthur <arthur.cohen@embecosm.com>2023-08-18 10:52:24 +0000
commitb1dd53faa1aa9ebd935742e57166647c055bae2a (patch)
tree2e71e9257d0d69b8afe05b3363a6ca3a70be077b
parent879a62f50dd693dba84e0e983e38d1480efaa69a (diff)
downloadgcc-b1dd53faa1aa9ebd935742e57166647c055bae2a.zip
gcc-b1dd53faa1aa9ebd935742e57166647c055bae2a.tar.gz
gcc-b1dd53faa1aa9ebd935742e57166647c055bae2a.tar.bz2
gccrs: Add punycode encoding to v0 mangling
gcc/rust/ChangeLog: * backend/rust-mangle.cc (v0_add_identifier): Added punycode encoding (v0_mangle_item): Likewise. * lex/rust-lex.cc (assert_source_content): Change type (test_buffer_input_source): Change type (test_file_input_source): Change type * resolve/rust-ast-resolve-toplevel.h: fix typo * rust-session-manager.cc (Session::load_extern_crate): fix typo * util/rust-canonical-path.h: fix typo * util/rust-hir-map.cc (NodeMapping::get_error): fix typo (Mappings::Mappings): fix typo * util/rust-mapping-common.h (UNKNOWN_CREATENUM): fix typo (UNKNOWN_CRATENUM): Change 0 to UINT32_MAX Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
-rw-r--r--gcc/rust/backend/rust-mangle.cc53
-rw-r--r--gcc/rust/lex/rust-lex.cc14
-rw-r--r--gcc/rust/resolve/rust-ast-resolve-toplevel.h2
-rw-r--r--gcc/rust/rust-session-manager.cc2
-rw-r--r--gcc/rust/util/rust-canonical-path.h6
-rw-r--r--gcc/rust/util/rust-hir-map.cc4
-rw-r--r--gcc/rust/util/rust-mapping-common.h2
7 files changed, 54 insertions, 29 deletions
diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc
index 62530d6..248d69b 100644
--- a/gcc/rust/backend/rust-mangle.cc
+++ b/gcc/rust/backend/rust-mangle.cc
@@ -1,8 +1,11 @@
#include "rust-mangle.h"
#include "fnv-hash.h"
+#include "optional.h"
#include "rust-base62.h"
#include "rust-unicode.h"
-#include "optional.h"
+#include "rust-diagnostics.h"
+#include "rust-unicode.h"
+#include "rust-punycode.h"
// FIXME: Rename those to legacy_*
static const std::string kMangledSymbolPrefix = "_ZN";
@@ -249,22 +252,42 @@ v0_add_disambiguator (std::string &mangled, uint64_t dis)
static void
v0_add_identifier (std::string &mangled, const std::string &identifier)
{
- // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
- // create mangling for unicode values for now. However, this is handled
- // by the v0 mangling scheme. The grammar for unicode identifier is
- // contained in <undisambiguated-identifier>, right under the <identifier>
- // one. If the identifier contains unicode values, then an extra "u" needs
- // to be added to the mangling string and `punycode` must be used to encode
- // the characters.
-
- mangled += std::to_string (identifier.size ());
-
+ // The grammar for unicode identifier is contained in
+ // <undisambiguated-identifier>, right under the <identifier> one. If the
+ // identifier contains unicode values, then an extra "u" needs to be added to
+ // the mangling string and `punycode` must be used to encode the characters.
+ tl::optional<Utf8String> uident_opt
+ = Utf8String::make_utf8_string (identifier);
+ rust_assert (uident_opt.has_value ());
+ tl::optional<std::string> punycode_opt
+ = encode_punycode (uident_opt.value ());
+ rust_assert (punycode_opt.has_value ());
+
+ bool is_ascii_ident = true;
+ for (auto c : uident_opt.value ().get_chars ())
+ if (c.value > 127)
+ {
+ is_ascii_ident = false;
+ break;
+ }
+
+ std::string punycode = punycode_opt.value ();
+ // remove tailing hyphen
+ if (punycode.back () == '-')
+ punycode.pop_back ();
+ // replace hyphens in punycode with underscores
+ std::replace (punycode.begin (), punycode.end (), '-', '_');
+
+ if (!is_ascii_ident)
+ mangled.append ("u");
+
+ mangled += std::to_string (punycode.size ());
// If the first character of the identifier is a digit or an underscore, we
// add an extra underscore
- if (identifier[0] == '_')
- mangled.append ("_");
+ if (punycode[0] == '_')
+ mangled += "_";
- mangled.append (identifier);
+ mangled += punycode;
}
static std::string
@@ -300,9 +323,9 @@ v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path)
std::string mangled;
// FIXME: Add real algorithm once all pieces are implemented
- auto ty_prefix = v0_type_prefix (ty);
v0_add_identifier (mangled, crate_name);
v0_add_disambiguator (mangled, 62);
+ auto ty_prefix = v0_type_prefix (ty);
rust_unreachable ();
}
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 0798be3..1afcd01 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -2543,8 +2543,9 @@ Lexer::start_line (int current_line, int current_column)
namespace selftest {
// Checks if `src` has the same contents as the given characters
-void
-assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
+static void
+assert_source_content (Rust::InputSource &src,
+ const std::vector<uint32_t> &expected)
{
Rust::Codepoint src_char = src.next ();
for (auto expected_char : expected)
@@ -2559,15 +2560,16 @@ assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
ASSERT_TRUE (src_char.is_eof ());
}
-void
-test_buffer_input_source (std::string str, std::vector<uint32_t> expected)
+static void
+test_buffer_input_source (std::string str,
+ const std::vector<uint32_t> &expected)
{
Rust::BufferInputSource source (str, 0);
assert_source_content (source, expected);
}
-void
-test_file_input_source (std::string str, std::vector<uint32_t> expected)
+static void
+test_file_input_source (std::string str, const std::vector<uint32_t> &expected)
{
FILE *tmpf = tmpfile ();
// Moves to the first character
diff --git a/gcc/rust/resolve/rust-ast-resolve-toplevel.h b/gcc/rust/resolve/rust-ast-resolve-toplevel.h
index 12b7103..9ba8bdb 100644
--- a/gcc/rust/resolve/rust-ast-resolve-toplevel.h
+++ b/gcc/rust/resolve/rust-ast-resolve-toplevel.h
@@ -430,7 +430,7 @@ public:
}
else
{
- CrateNum found_crate_num = UNKNOWN_CREATENUM;
+ CrateNum found_crate_num = UNKNOWN_CRATENUM;
bool found
= mappings->lookup_crate_name (extern_crate.get_referenced_crate (),
found_crate_num);
diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc
index 18e7ea2..abf1272 100644
--- a/gcc/rust/rust-session-manager.cc
+++ b/gcc/rust/rust-session-manager.cc
@@ -979,7 +979,7 @@ NodeId
Session::load_extern_crate (const std::string &crate_name, location_t locus)
{
// has it already been loaded?
- CrateNum found_crate_num = UNKNOWN_CREATENUM;
+ CrateNum found_crate_num = UNKNOWN_CRATENUM;
bool found = mappings->lookup_crate_name (crate_name, found_crate_num);
if (found)
{
diff --git a/gcc/rust/util/rust-canonical-path.h b/gcc/rust/util/rust-canonical-path.h
index b168c93..2f28302 100644
--- a/gcc/rust/util/rust-canonical-path.h
+++ b/gcc/rust/util/rust-canonical-path.h
@@ -58,7 +58,7 @@ public:
{
rust_assert (!path.empty ());
return CanonicalPath ({std::pair<NodeId, std::string> (id, path)},
- UNKNOWN_CREATENUM);
+ UNKNOWN_CRATENUM);
}
static CanonicalPath
@@ -88,7 +88,7 @@ public:
static CanonicalPath create_empty ()
{
- return CanonicalPath ({}, UNKNOWN_CREATENUM);
+ return CanonicalPath ({}, UNKNOWN_CRATENUM);
}
bool is_empty () const { return segs.size () == 0; }
@@ -171,7 +171,7 @@ public:
CrateNum get_crate_num () const
{
- rust_assert (crate_num != UNKNOWN_CREATENUM);
+ rust_assert (crate_num != UNKNOWN_CRATENUM);
return crate_num;
}
diff --git a/gcc/rust/util/rust-hir-map.cc b/gcc/rust/util/rust-hir-map.cc
index cf907e9..62e8c7c 100644
--- a/gcc/rust/util/rust-hir-map.cc
+++ b/gcc/rust/util/rust-hir-map.cc
@@ -29,7 +29,7 @@ namespace Analysis {
NodeMapping
NodeMapping::get_error ()
{
- return NodeMapping (UNKNOWN_CREATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
+ return NodeMapping (UNKNOWN_CRATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
UNKNOWN_LOCAL_DEFID);
}
@@ -94,7 +94,7 @@ static const HirId kDefaultHirIdBegin = 1;
static const HirId kDefaultCrateNumBegin = 0;
Mappings::Mappings ()
- : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CREATENUM),
+ : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CRATENUM),
hirIdIter (kDefaultHirIdBegin), nodeIdIter (kDefaultNodeIdBegin)
{
Analysis::NodeMapping node (0, 0, 0, 0);
diff --git a/gcc/rust/util/rust-mapping-common.h b/gcc/rust/util/rust-mapping-common.h
index d18dab5..93df863 100644
--- a/gcc/rust/util/rust-mapping-common.h
+++ b/gcc/rust/util/rust-mapping-common.h
@@ -61,7 +61,7 @@ struct DefId
}
};
-#define UNKNOWN_CREATENUM ((uint32_t) (0))
+#define UNKNOWN_CRATENUM ((uint32_t) (UINT32_MAX))
#define UNKNOWN_NODEID ((uint32_t) (0))
#define UNKNOWN_HIRID ((uint32_t) (0))
#define UNKNOWN_LOCAL_DEFID ((uint32_t) (0))