diff options
author | Raiki Tamura <tamaron1203@gmail.com> | 2023-08-08 02:08:38 +0900 |
---|---|---|
committer | CohenArthur <arthur.cohen@embecosm.com> | 2023-08-18 10:52:24 +0000 |
commit | b1dd53faa1aa9ebd935742e57166647c055bae2a (patch) | |
tree | 2e71e9257d0d69b8afe05b3363a6ca3a70be077b /gcc | |
parent | 879a62f50dd693dba84e0e983e38d1480efaa69a (diff) | |
download | gcc-b1dd53faa1aa9ebd935742e57166647c055bae2a.zip gcc-b1dd53faa1aa9ebd935742e57166647c055bae2a.tar.gz gcc-b1dd53faa1aa9ebd935742e57166647c055bae2a.tar.bz2 |
gccrs: Add punycode encoding to v0 mangling
gcc/rust/ChangeLog:
* backend/rust-mangle.cc (v0_add_identifier): Added punycode encoding
(v0_mangle_item): Likewise.
* lex/rust-lex.cc (assert_source_content): Change type
(test_buffer_input_source): Change type
(test_file_input_source): Change type
* resolve/rust-ast-resolve-toplevel.h: fix typo
* rust-session-manager.cc (Session::load_extern_crate): fix typo
* util/rust-canonical-path.h: fix typo
* util/rust-hir-map.cc (NodeMapping::get_error): fix typo
(Mappings::Mappings): fix typo
* util/rust-mapping-common.h (UNKNOWN_CREATENUM): fix typo
(UNKNOWN_CRATENUM): Change 0 to UINT32_MAX
Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/rust/backend/rust-mangle.cc | 53 | ||||
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 14 | ||||
-rw-r--r-- | gcc/rust/resolve/rust-ast-resolve-toplevel.h | 2 | ||||
-rw-r--r-- | gcc/rust/rust-session-manager.cc | 2 | ||||
-rw-r--r-- | gcc/rust/util/rust-canonical-path.h | 6 | ||||
-rw-r--r-- | gcc/rust/util/rust-hir-map.cc | 4 | ||||
-rw-r--r-- | gcc/rust/util/rust-mapping-common.h | 2 |
7 files changed, 54 insertions, 29 deletions
diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc index 62530d6..248d69b 100644 --- a/gcc/rust/backend/rust-mangle.cc +++ b/gcc/rust/backend/rust-mangle.cc @@ -1,8 +1,11 @@ #include "rust-mangle.h" #include "fnv-hash.h" +#include "optional.h" #include "rust-base62.h" #include "rust-unicode.h" -#include "optional.h" +#include "rust-diagnostics.h" +#include "rust-unicode.h" +#include "rust-punycode.h" // FIXME: Rename those to legacy_* static const std::string kMangledSymbolPrefix = "_ZN"; @@ -249,22 +252,42 @@ v0_add_disambiguator (std::string &mangled, uint64_t dis) static void v0_add_identifier (std::string &mangled, const std::string &identifier) { - // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to - // create mangling for unicode values for now. However, this is handled - // by the v0 mangling scheme. The grammar for unicode identifier is - // contained in <undisambiguated-identifier>, right under the <identifier> - // one. If the identifier contains unicode values, then an extra "u" needs - // to be added to the mangling string and `punycode` must be used to encode - // the characters. - - mangled += std::to_string (identifier.size ()); - + // The grammar for unicode identifier is contained in + // <undisambiguated-identifier>, right under the <identifier> one. If the + // identifier contains unicode values, then an extra "u" needs to be added to + // the mangling string and `punycode` must be used to encode the characters. + tl::optional<Utf8String> uident_opt + = Utf8String::make_utf8_string (identifier); + rust_assert (uident_opt.has_value ()); + tl::optional<std::string> punycode_opt + = encode_punycode (uident_opt.value ()); + rust_assert (punycode_opt.has_value ()); + + bool is_ascii_ident = true; + for (auto c : uident_opt.value ().get_chars ()) + if (c.value > 127) + { + is_ascii_ident = false; + break; + } + + std::string punycode = punycode_opt.value (); + // remove tailing hyphen + if (punycode.back () == '-') + punycode.pop_back (); + // replace hyphens in punycode with underscores + std::replace (punycode.begin (), punycode.end (), '-', '_'); + + if (!is_ascii_ident) + mangled.append ("u"); + + mangled += std::to_string (punycode.size ()); // If the first character of the identifier is a digit or an underscore, we // add an extra underscore - if (identifier[0] == '_') - mangled.append ("_"); + if (punycode[0] == '_') + mangled += "_"; - mangled.append (identifier); + mangled += punycode; } static std::string @@ -300,9 +323,9 @@ v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path) std::string mangled; // FIXME: Add real algorithm once all pieces are implemented - auto ty_prefix = v0_type_prefix (ty); v0_add_identifier (mangled, crate_name); v0_add_disambiguator (mangled, 62); + auto ty_prefix = v0_type_prefix (ty); rust_unreachable (); } diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 0798be3..1afcd01 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -2543,8 +2543,9 @@ Lexer::start_line (int current_line, int current_column) namespace selftest { // Checks if `src` has the same contents as the given characters -void -assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected) +static void +assert_source_content (Rust::InputSource &src, + const std::vector<uint32_t> &expected) { Rust::Codepoint src_char = src.next (); for (auto expected_char : expected) @@ -2559,15 +2560,16 @@ assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected) ASSERT_TRUE (src_char.is_eof ()); } -void -test_buffer_input_source (std::string str, std::vector<uint32_t> expected) +static void +test_buffer_input_source (std::string str, + const std::vector<uint32_t> &expected) { Rust::BufferInputSource source (str, 0); assert_source_content (source, expected); } -void -test_file_input_source (std::string str, std::vector<uint32_t> expected) +static void +test_file_input_source (std::string str, const std::vector<uint32_t> &expected) { FILE *tmpf = tmpfile (); // Moves to the first character diff --git a/gcc/rust/resolve/rust-ast-resolve-toplevel.h b/gcc/rust/resolve/rust-ast-resolve-toplevel.h index 12b7103..9ba8bdb 100644 --- a/gcc/rust/resolve/rust-ast-resolve-toplevel.h +++ b/gcc/rust/resolve/rust-ast-resolve-toplevel.h @@ -430,7 +430,7 @@ public: } else { - CrateNum found_crate_num = UNKNOWN_CREATENUM; + CrateNum found_crate_num = UNKNOWN_CRATENUM; bool found = mappings->lookup_crate_name (extern_crate.get_referenced_crate (), found_crate_num); diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc index 18e7ea2..abf1272 100644 --- a/gcc/rust/rust-session-manager.cc +++ b/gcc/rust/rust-session-manager.cc @@ -979,7 +979,7 @@ NodeId Session::load_extern_crate (const std::string &crate_name, location_t locus) { // has it already been loaded? - CrateNum found_crate_num = UNKNOWN_CREATENUM; + CrateNum found_crate_num = UNKNOWN_CRATENUM; bool found = mappings->lookup_crate_name (crate_name, found_crate_num); if (found) { diff --git a/gcc/rust/util/rust-canonical-path.h b/gcc/rust/util/rust-canonical-path.h index b168c93..2f28302 100644 --- a/gcc/rust/util/rust-canonical-path.h +++ b/gcc/rust/util/rust-canonical-path.h @@ -58,7 +58,7 @@ public: { rust_assert (!path.empty ()); return CanonicalPath ({std::pair<NodeId, std::string> (id, path)}, - UNKNOWN_CREATENUM); + UNKNOWN_CRATENUM); } static CanonicalPath @@ -88,7 +88,7 @@ public: static CanonicalPath create_empty () { - return CanonicalPath ({}, UNKNOWN_CREATENUM); + return CanonicalPath ({}, UNKNOWN_CRATENUM); } bool is_empty () const { return segs.size () == 0; } @@ -171,7 +171,7 @@ public: CrateNum get_crate_num () const { - rust_assert (crate_num != UNKNOWN_CREATENUM); + rust_assert (crate_num != UNKNOWN_CRATENUM); return crate_num; } diff --git a/gcc/rust/util/rust-hir-map.cc b/gcc/rust/util/rust-hir-map.cc index cf907e9..62e8c7c 100644 --- a/gcc/rust/util/rust-hir-map.cc +++ b/gcc/rust/util/rust-hir-map.cc @@ -29,7 +29,7 @@ namespace Analysis { NodeMapping NodeMapping::get_error () { - return NodeMapping (UNKNOWN_CREATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID, + return NodeMapping (UNKNOWN_CRATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID, UNKNOWN_LOCAL_DEFID); } @@ -94,7 +94,7 @@ static const HirId kDefaultHirIdBegin = 1; static const HirId kDefaultCrateNumBegin = 0; Mappings::Mappings () - : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CREATENUM), + : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CRATENUM), hirIdIter (kDefaultHirIdBegin), nodeIdIter (kDefaultNodeIdBegin) { Analysis::NodeMapping node (0, 0, 0, 0); diff --git a/gcc/rust/util/rust-mapping-common.h b/gcc/rust/util/rust-mapping-common.h index d18dab5..93df863 100644 --- a/gcc/rust/util/rust-mapping-common.h +++ b/gcc/rust/util/rust-mapping-common.h @@ -61,7 +61,7 @@ struct DefId } }; -#define UNKNOWN_CREATENUM ((uint32_t) (0)) +#define UNKNOWN_CRATENUM ((uint32_t) (UINT32_MAX)) #define UNKNOWN_NODEID ((uint32_t) (0)) #define UNKNOWN_HIRID ((uint32_t) (0)) #define UNKNOWN_LOCAL_DEFID ((uint32_t) (0)) |