aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust/backend/rust-mangle.cc
diff options
context:
space:
mode:
authorPhilip Herron <philip.herron@embecosm.com>2022-10-21 14:01:04 +0200
committerArthur Cohen <arthur.cohen@embecosm.com>2022-12-13 14:00:07 +0100
commit15f04af347e3b65f436808077cbac4fa566019f9 (patch)
tree32ec5a4c2ca65044848cb37137c1074ed63f5401 /gcc/rust/backend/rust-mangle.cc
parent509e4c32c6a80ede6c6dda0f4cfc96f94d24c4d6 (diff)
downloadgcc-15f04af347e3b65f436808077cbac4fa566019f9.zip
gcc-15f04af347e3b65f436808077cbac4fa566019f9.tar.gz
gcc-15f04af347e3b65f436808077cbac4fa566019f9.tar.bz2
gccrs: Add base for HIR to GCC GENERIC lowering
This pass walks the HIR crate and turns them into GCC `tree`s. We do not have any Rust specific tree's. We are slowly removing the backend abstraction which was ported over from gccgo in favour of using `tree`s directly. gcc/rust/ * backend/rust-builtins.h: New. * backend/rust-compile-base.cc: New. * backend/rust-compile-base.h: New. * backend/rust-mangle.cc: New. * backend/rust-mangle.h: New. * backend/rust-tree.cc: New. * backend/rust-tree.h: New. * rust-backend.h: New. * rust-gcc.cc: New. Co-authored-by: David Faust <david.faust@oracle.com>
Diffstat (limited to 'gcc/rust/backend/rust-mangle.cc')
-rw-r--r--gcc/rust/backend/rust-mangle.cc307
1 files changed, 307 insertions, 0 deletions
diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc
new file mode 100644
index 0000000..4d20207
--- /dev/null
+++ b/gcc/rust/backend/rust-mangle.cc
@@ -0,0 +1,307 @@
+#include "rust-mangle.h"
+#include "fnv-hash.h"
+#include "rust-base62.h"
+
+// FIXME: Rename those to legacy_*
+static const std::string kMangledSymbolPrefix = "_ZN";
+static const std::string kMangledSymbolDelim = "E";
+static const std::string kMangledGenericDelim = "$C$";
+static const std::string kMangledSubstBegin = "$LT$";
+static const std::string kMangledSubstEnd = "$GT$";
+static const std::string kMangledSpace = "$u20$";
+static const std::string kMangledRef = "$RF$";
+static const std::string kMangledPtr = "$BP$";
+static const std::string kMangledLeftSqParen = "$u5b$"; // [
+static const std::string kMangledRightSqParen = "$u5d$"; // ]
+static const std::string kQualPathBegin = "_" + kMangledSubstBegin;
+static const std::string kMangledComma = "$C$";
+
+namespace Rust {
+namespace Compile {
+
+Mangler::MangleVersion Mangler::version = MangleVersion::LEGACY;
+
+static std::string
+legacy_mangle_name (const std::string &name)
+{
+ // example
+ // <&T as core::fmt::Debug>::fmt:
+ // _ZN42_$LT$$RF$T$u20$as$u20$core..fmt..Debug$GT$3fmt17h6dac924c0051eef7E
+ // replace all white space with $ and & with RF
+ //
+ // <example::Bar as example::A>::fooA:
+ // _ZN43_$LT$example..Bar$u20$as$u20$example..A$GT$4fooA17hfc615fa76c7db7a0E:
+ //
+ // core::ptr::const_ptr::<impl *const T>::cast:
+ // _ZN4core3ptr9const_ptr33_$LT$impl$u20$$BP$const$u20$T$GT$4cast17hb79f4617226f1d55E:
+ //
+ // core::ptr::const_ptr::<impl *const [T]>::as_ptr:
+ // _ZN4core3ptr9const_ptr43_$LT$impl$u20$$BP$const$u20$$u5b$T$u5d$$GT$6as_ptr17he16e0dcd9473b04fE:
+ //
+ // example::Foo<T>::new:
+ // _ZN7example12Foo$LT$T$GT$3new17h9a2aacb7fd783515E:
+ //
+ // <example::Identity as example::FnLike<&T,&T>>::call
+ // _ZN74_$LT$example..Identity$u20$as$u20$example..FnLike$LT$$RF$T$C$$RF$T$GT$$GT$4call17ha9ee58935895acb3E
+
+ std::string buffer;
+ for (size_t i = 0; i < name.size (); i++)
+ {
+ std::string m;
+ char c = name.at (i);
+
+ if (c == ' ')
+ m = kMangledSpace;
+ else if (c == '&')
+ m = kMangledRef;
+ else if (i == 0 && c == '<')
+ m = kQualPathBegin;
+ else if (c == '<')
+ m = kMangledSubstBegin;
+ else if (c == '>')
+ m = kMangledSubstEnd;
+ else if (c == '*')
+ m = kMangledPtr;
+ else if (c == '[')
+ m = kMangledLeftSqParen;
+ else if (c == ']')
+ m = kMangledRightSqParen;
+ else if (c == ',')
+ m = kMangledComma;
+ else if (c == ':')
+ {
+ rust_assert (i + 1 < name.size ());
+ rust_assert (name.at (i + 1) == ':');
+ i++;
+ m = "..";
+ }
+ else
+ m.push_back (c);
+
+ buffer += m;
+ }
+
+ return std::to_string (buffer.size ()) + buffer;
+}
+
+static std::string
+legacy_mangle_canonical_path (const Resolver::CanonicalPath &path)
+{
+ std::string buffer;
+ for (size_t i = 0; i < path.size (); i++)
+ {
+ auto &seg = path.get_seg_at (i);
+ buffer += legacy_mangle_name (seg.second);
+ }
+ return buffer;
+}
+
+// rustc uses a sip128 hash for legacy mangling, but an fnv 128 was quicker to
+// implement for now
+static std::string
+legacy_hash (const std::string &fingerprint)
+{
+ Hash::FNV128 hasher;
+ hasher.write ((const unsigned char *) fingerprint.c_str (),
+ fingerprint.size ());
+
+ uint64_t hi, lo;
+ hasher.sum (&hi, &lo);
+
+ char hex[16 + 1];
+ memset (hex, 0, sizeof hex);
+ snprintf (hex, sizeof hex, "%08" PRIx64 "%08" PRIx64, lo, hi);
+
+ return "h" + std::string (hex, sizeof (hex) - 1);
+}
+
+static std::string
+v0_tuple_prefix (const TyTy::BaseType *ty)
+{
+ if (ty->is_unit ())
+ return "u";
+
+ // FIXME: ARTHUR: Add rest of algorithm
+ return "";
+}
+
+static std::string
+v0_numeric_prefix (const TyTy::BaseType *ty)
+{
+ static const std::map<std::string, std::string> num_prefixes = {
+ {"[i8]", "a"}, {"[u8]", "h"}, {"[i16]", "s"}, {"[u16]", "t"},
+ {"[i32]", "l"}, {"[u32]", "m"}, {"[i64]", "x"}, {"[u64]", "y"},
+ {"[isize]", "i"}, {"[usize]", "j"}, {"[f32]", "f"}, {"[f64]", "d"},
+ };
+
+ auto ty_kind = ty->get_kind ();
+ auto ty_str = ty->as_string ();
+ auto numeric_iter = num_prefixes.end ();
+
+ // Special numeric types
+ if (ty_kind == TyTy::TypeKind::ISIZE)
+ return "i";
+ else if (ty_kind == TyTy::TypeKind::USIZE)
+ return "j";
+
+ numeric_iter = num_prefixes.find (ty_str);
+ if (numeric_iter != num_prefixes.end ())
+ return numeric_iter->second;
+
+ return "";
+}
+
+static std::string
+v0_simple_type_prefix (const TyTy::BaseType *ty)
+{
+ switch (ty->get_kind ())
+ {
+ case TyTy::TypeKind::BOOL:
+ return "b";
+ case TyTy::TypeKind::CHAR:
+ return "c";
+ case TyTy::TypeKind::STR:
+ return "e";
+ case TyTy::TypeKind::NEVER:
+ return "z";
+
+ // Placeholder types
+ case TyTy::TypeKind::ERROR: // Fallthrough
+ case TyTy::TypeKind::INFER: // Fallthrough
+ case TyTy::TypeKind::PLACEHOLDER: // Fallthrough
+ case TyTy::TypeKind::PARAM:
+ // FIXME: TyTy::TypeKind::BOUND is also a valid variant in rustc
+ return "p";
+
+ case TyTy::TypeKind::TUPLE:
+ return v0_tuple_prefix (ty);
+
+ case TyTy::TypeKind::UINT: // Fallthrough
+ case TyTy::TypeKind::INT: // Fallthrough
+ case TyTy::TypeKind::FLOAT: // Fallthrough
+ case TyTy::TypeKind::ISIZE: // Fallthrough
+ case TyTy::TypeKind::USIZE: // Fallthrough
+ return v0_numeric_prefix (ty);
+
+ default:
+ return "";
+ }
+
+ gcc_unreachable ();
+}
+
+// Add an underscore-terminated base62 integer to the mangling string.
+// This corresponds to the `<base-62-number>` grammar in the v0 mangling RFC:
+// - 0 is encoded as "_"
+// - any other value is encoded as itself minus one in base 62, followed by
+// "_"
+static void
+v0_add_integer_62 (std::string &mangled, uint64_t x)
+{
+ if (x > 0)
+ mangled.append (base62_integer (x - 1));
+
+ mangled.append ("_");
+}
+
+// Add a tag-prefixed base62 integer to the mangling string when the
+// integer is greater than 0:
+// - 0 is encoded as "" (nothing)
+// - any other value is encoded as <tag> + v0_add_integer_62(itself), that is
+// <tag> + base62(itself - 1) + '_'
+static void
+v0_add_opt_integer_62 (std::string &mangled, std::string tag, uint64_t x)
+{
+ if (x > 0)
+ {
+ mangled.append (tag);
+ v0_add_integer_62 (mangled, x);
+ }
+}
+
+static void
+v0_add_disambiguator (std::string &mangled, uint64_t dis)
+{
+ v0_add_opt_integer_62 (mangled, "s", dis);
+}
+
+// Add an identifier to the mangled string. This corresponds to the
+// `<identifier>` grammar in the v0 mangling RFC.
+static void
+v0_add_identifier (std::string &mangled, const std::string &identifier)
+{
+ // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
+ // create mangling for unicode values for now. However, this is handled
+ // by the v0 mangling scheme. The grammar for unicode identifier is
+ // contained in <undisambiguated-identifier>, right under the <identifier>
+ // one. If the identifier contains unicode values, then an extra "u" needs
+ // to be added to the mangling string and `punycode` must be used to encode
+ // the characters.
+
+ mangled += std::to_string (identifier.size ());
+
+ // If the first character of the identifier is a digit or an underscore, we
+ // add an extra underscore
+ if (identifier[0] == '_')
+ mangled.append ("_");
+
+ mangled.append (identifier);
+}
+
+static std::string
+v0_type_prefix (const TyTy::BaseType *ty)
+{
+ auto ty_prefix = v0_simple_type_prefix (ty);
+ if (!ty_prefix.empty ())
+ return ty_prefix;
+
+ // FIXME: We need to fetch more type prefixes
+ gcc_unreachable ();
+}
+
+static std::string
+legacy_mangle_item (const TyTy::BaseType *ty,
+ const Resolver::CanonicalPath &path)
+{
+ const std::string hash = legacy_hash (ty->as_string ());
+ const std::string hash_sig = legacy_mangle_name (hash);
+
+ return kMangledSymbolPrefix + legacy_mangle_canonical_path (path) + hash_sig
+ + kMangledSymbolDelim;
+}
+
+static std::string
+v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path)
+{
+ // we can get this from the canonical_path
+ auto mappings = Analysis::Mappings::get ();
+ std::string crate_name;
+ bool ok = mappings->get_crate_name (path.get_crate_num (), crate_name);
+ rust_assert (ok);
+
+ std::string mangled;
+ // FIXME: Add real algorithm once all pieces are implemented
+ auto ty_prefix = v0_type_prefix (ty);
+ v0_add_identifier (mangled, crate_name);
+ v0_add_disambiguator (mangled, 62);
+
+ gcc_unreachable ();
+}
+
+std::string
+Mangler::mangle_item (const TyTy::BaseType *ty,
+ const Resolver::CanonicalPath &path) const
+{
+ switch (version)
+ {
+ case Mangler::MangleVersion::LEGACY:
+ return legacy_mangle_item (ty, path);
+ case Mangler::MangleVersion::V0:
+ return v0_mangle_item (ty, path);
+ default:
+ gcc_unreachable ();
+ }
+}
+
+} // namespace Compile
+} // namespace Rust