#include "rust-mangle.h" #include "fnv-hash.h" #include "rust-base62.h" // FIXME: Rename those to legacy_* static const std::string kMangledSymbolPrefix = "_ZN"; static const std::string kMangledSymbolDelim = "E"; static const std::string kMangledGenericDelim = "$C$"; static const std::string kMangledSubstBegin = "$LT$"; static const std::string kMangledSubstEnd = "$GT$"; static const std::string kMangledSpace = "$u20$"; static const std::string kMangledRef = "$RF$"; static const std::string kMangledPtr = "$BP$"; static const std::string kMangledLeftSqParen = "$u5b$"; // [ static const std::string kMangledRightSqParen = "$u5d$"; // ] static const std::string kMangledLeftBrace = "$u7b$"; // { static const std::string kMangledRightBrace = "$u7d$"; // } static const std::string kQualPathBegin = "_" + kMangledSubstBegin; static const std::string kMangledComma = "$C$"; namespace Rust { namespace Compile { Mangler::MangleVersion Mangler::version = MangleVersion::LEGACY; static std::string legacy_mangle_name (const std::string &name) { // example // <&T as core::fmt::Debug>::fmt: // _ZN42_$LT$$RF$T$u20$as$u20$core..fmt..Debug$GT$3fmt17h6dac924c0051eef7E // replace all white space with $ and & with RF // // ::fooA: // _ZN43_$LT$example..Bar$u20$as$u20$example..A$GT$4fooA17hfc615fa76c7db7a0E: // // core::ptr::const_ptr::::cast: // _ZN4core3ptr9const_ptr33_$LT$impl$u20$$BP$const$u20$T$GT$4cast17hb79f4617226f1d55E: // // core::ptr::const_ptr::::as_ptr: // _ZN4core3ptr9const_ptr43_$LT$impl$u20$$BP$const$u20$$u5b$T$u5d$$GT$6as_ptr17he16e0dcd9473b04fE: // // example::Foo::new: // _ZN7example12Foo$LT$T$GT$3new17h9a2aacb7fd783515E: // // >::call // _ZN74_$LT$example..Identity$u20$as$u20$example..FnLike$LT$$RF$T$C$$RF$T$GT$$GT$4call17ha9ee58935895acb3E std::string buffer; for (size_t i = 0; i < name.size (); i++) { std::string m; char c = name.at (i); if (c == ' ') m = kMangledSpace; else if (c == '&') m = kMangledRef; else if (i == 0 && c == '<') m = kQualPathBegin; else if (c == '<') m = kMangledSubstBegin; else if (c == '>') m = kMangledSubstEnd; else if (c == '*') m = kMangledPtr; else if (c == '[') m = kMangledLeftSqParen; else if (c == ']') m = kMangledRightSqParen; else if (c == '{') m = kMangledLeftBrace; else if (c == '}') m = kMangledRightBrace; else if (c == ',') m = kMangledComma; else if (c == ':') { rust_assert (i + 1 < name.size ()); rust_assert (name.at (i + 1) == ':'); i++; m = ".."; } else m.push_back (c); buffer += m; } return std::to_string (buffer.size ()) + buffer; } static std::string legacy_mangle_canonical_path (const Resolver::CanonicalPath &path) { std::string buffer; for (size_t i = 0; i < path.size (); i++) { auto &seg = path.get_seg_at (i); buffer += legacy_mangle_name (seg.second); } return buffer; } // rustc uses a sip128 hash for legacy mangling, but an fnv 128 was quicker to // implement for now static std::string legacy_hash (const std::string &fingerprint) { Hash::FNV128 hasher; hasher.write ((const unsigned char *) fingerprint.c_str (), fingerprint.size ()); uint64_t hi, lo; hasher.sum (&hi, &lo); char hex[16 + 1]; memset (hex, 0, sizeof hex); snprintf (hex, sizeof hex, "%08" PRIx64 "%08" PRIx64, lo, hi); return "h" + std::string (hex, sizeof (hex) - 1); } static std::string v0_tuple_prefix (const TyTy::BaseType *ty) { if (ty->is_unit ()) return "u"; // FIXME: ARTHUR: Add rest of algorithm return ""; } static std::string v0_numeric_prefix (const TyTy::BaseType *ty) { static const std::map num_prefixes = { {"[i8]", "a"}, {"[u8]", "h"}, {"[i16]", "s"}, {"[u16]", "t"}, {"[i32]", "l"}, {"[u32]", "m"}, {"[i64]", "x"}, {"[u64]", "y"}, {"[isize]", "i"}, {"[usize]", "j"}, {"[f32]", "f"}, {"[f64]", "d"}, }; auto ty_kind = ty->get_kind (); auto ty_str = ty->as_string (); auto numeric_iter = num_prefixes.end (); // Special numeric types if (ty_kind == TyTy::TypeKind::ISIZE) return "i"; else if (ty_kind == TyTy::TypeKind::USIZE) return "j"; numeric_iter = num_prefixes.find (ty_str); if (numeric_iter != num_prefixes.end ()) return numeric_iter->second; return ""; } static std::string v0_simple_type_prefix (const TyTy::BaseType *ty) { switch (ty->get_kind ()) { case TyTy::TypeKind::BOOL: return "b"; case TyTy::TypeKind::CHAR: return "c"; case TyTy::TypeKind::STR: return "e"; case TyTy::TypeKind::NEVER: return "z"; // Placeholder types case TyTy::TypeKind::ERROR: // Fallthrough case TyTy::TypeKind::INFER: // Fallthrough case TyTy::TypeKind::PLACEHOLDER: // Fallthrough case TyTy::TypeKind::PARAM: // FIXME: TyTy::TypeKind::BOUND is also a valid variant in rustc return "p"; case TyTy::TypeKind::TUPLE: return v0_tuple_prefix (ty); case TyTy::TypeKind::UINT: // Fallthrough case TyTy::TypeKind::INT: // Fallthrough case TyTy::TypeKind::FLOAT: // Fallthrough case TyTy::TypeKind::ISIZE: // Fallthrough case TyTy::TypeKind::USIZE: // Fallthrough return v0_numeric_prefix (ty); default: return ""; } gcc_unreachable (); } // Add an underscore-terminated base62 integer to the mangling string. // This corresponds to the `` grammar in the v0 mangling RFC: // - 0 is encoded as "_" // - any other value is encoded as itself minus one in base 62, followed by // "_" static void v0_add_integer_62 (std::string &mangled, uint64_t x) { if (x > 0) mangled.append (base62_integer (x - 1)); mangled.append ("_"); } // Add a tag-prefixed base62 integer to the mangling string when the // integer is greater than 0: // - 0 is encoded as "" (nothing) // - any other value is encoded as + v0_add_integer_62(itself), that is // + base62(itself - 1) + '_' static void v0_add_opt_integer_62 (std::string &mangled, std::string tag, uint64_t x) { if (x > 0) { mangled.append (tag); v0_add_integer_62 (mangled, x); } } static void v0_add_disambiguator (std::string &mangled, uint64_t dis) { v0_add_opt_integer_62 (mangled, "s", dis); } // Add an identifier to the mangled string. This corresponds to the // `` grammar in the v0 mangling RFC. static void v0_add_identifier (std::string &mangled, const std::string &identifier) { // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to // create mangling for unicode values for now. However, this is handled // by the v0 mangling scheme. The grammar for unicode identifier is // contained in , right under the // one. If the identifier contains unicode values, then an extra "u" needs // to be added to the mangling string and `punycode` must be used to encode // the characters. mangled += std::to_string (identifier.size ()); // If the first character of the identifier is a digit or an underscore, we // add an extra underscore if (identifier[0] == '_') mangled.append ("_"); mangled.append (identifier); } static std::string v0_type_prefix (const TyTy::BaseType *ty) { auto ty_prefix = v0_simple_type_prefix (ty); if (!ty_prefix.empty ()) return ty_prefix; // FIXME: We need to fetch more type prefixes gcc_unreachable (); } static std::string legacy_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path) { const std::string hash = legacy_hash (ty->as_string ()); const std::string hash_sig = legacy_mangle_name (hash); return kMangledSymbolPrefix + legacy_mangle_canonical_path (path) + hash_sig + kMangledSymbolDelim; } static std::string v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path) { // we can get this from the canonical_path auto mappings = Analysis::Mappings::get (); std::string crate_name; bool ok = mappings->get_crate_name (path.get_crate_num (), crate_name); rust_assert (ok); std::string mangled; // FIXME: Add real algorithm once all pieces are implemented auto ty_prefix = v0_type_prefix (ty); v0_add_identifier (mangled, crate_name); v0_add_disambiguator (mangled, 62); gcc_unreachable (); } std::string Mangler::mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path) const { switch (version) { case Mangler::MangleVersion::LEGACY: return legacy_mangle_item (ty, path); case Mangler::MangleVersion::V0: return v0_mangle_item (ty, path); default: gcc_unreachable (); } } } // namespace Compile } // namespace Rust