diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2018-10-25 22:18:08 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2018-10-25 22:18:08 +0000 |
commit | 34489eb2af3bbb7be101bc838615cf4a4dc6828d (patch) | |
tree | 0dbda78980d4553fdaeee92ca666d72a2ab95213 /gcc/go/gofrontend/go-encode-id.cc | |
parent | fc756f9f460d5f0ec73a72128645fdb39fec77a0 (diff) | |
download | gcc-34489eb2af3bbb7be101bc838615cf4a4dc6828d.zip gcc-34489eb2af3bbb7be101bc838615cf4a4dc6828d.tar.gz gcc-34489eb2af3bbb7be101bc838615cf4a4dc6828d.tar.bz2 |
compiler: improve name mangling for packpaths
The current implementation of Gogo::pkgpath_for_symbol was written in
a way that allowed two distinct package paths to map to the same
symbol, which could cause collisions at link- time or compile-time.
Switch to a better mangling scheme to insure that we get a unique
packagepath symbol for each package. In the new scheme instead of having
separate mangling schemes for identifiers and package paths, the
main identifier mangler ("go_encode_id") now handles mangling of
both packagepath characters and identifier characters.
The new mangling scheme is more intrusive: "foo/bar.Baz" is mangled as
"foo..z2fbar.Baz" instead of "foo_bar.Baz". To mitigate this, this
patch also adds a demangling capability so that function names
returned from runtime.CallersFrames are converted back to their
original unmangled form.
Changing the pkgpath_for_symbol scheme requires updating a number of
//go:linkname directives and C "__asm__" directives to match the new
scheme, as well as updating the 'gotest' driver (which makes
assumptions about the correct mapping from pkgpath symbol to package
name).
Fixes golang/go#27534.
Reviewed-on: https://go-review.googlesource.com/c/135455
From-SVN: r265510
Diffstat (limited to 'gcc/go/gofrontend/go-encode-id.cc')
-rw-r--r-- | gcc/go/gofrontend/go-encode-id.cc | 93 |
1 files changed, 82 insertions, 11 deletions
diff --git a/gcc/go/gofrontend/go-encode-id.cc b/gcc/go/gofrontend/go-encode-id.cc index e130ba1..7c7aa13 100644 --- a/gcc/go/gofrontend/go-encode-id.cc +++ b/gcc/go/gofrontend/go-encode-id.cc @@ -1,4 +1,4 @@ -// go-encode-id.cc -- Go identifier encoding hooks +// go-encode-id.cc -- Go identifier and packagepath encoding/decoding hooks // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style @@ -82,10 +82,10 @@ fetch_utf8_char(const char* p, unsigned int* pc) return len; } -// Encode an identifier using ASCII characters. The encoding is -// described in detail near the end of the long comment at the start -// of names.cc. Short version: translate all non-ASCII-alphanumeric -// characters into ..uXXXX or ..UXXXXXXXX. +// Encode an identifier using assembler-friendly characters. The encoding is +// described in detail near the end of the long comment at the start of +// names.cc. Short version: translate all non-ASCII-alphanumeric characters into +// ..uXXXX or ..UXXXXXXXX, translate ASCII non-alphanumerics into ".zXX". std::string go_encode_id(const std::string &id) @@ -97,7 +97,8 @@ go_encode_id(const std::string &id) } // The encoding is only unambiguous if the input string does not - // contain ..u or ..U. + // contain ..z, ..u or ..U. + go_assert(id.find("..z") == std::string::npos); go_assert(id.find("..u") == std::string::npos); go_assert(id.find("..U") == std::string::npos); @@ -116,17 +117,16 @@ go_encode_id(const std::string &id) { unsigned int c; size_t len = fetch_utf8_char(p, &c); - if (len == 1) + if (len == 1 && !char_needs_encoding(c)) { - // At this point we should only be seeing alphanumerics or - // underscore or dot. - go_assert(!char_needs_encoding(c)); ret += c; } else { char buf[16]; - if (c < 0x10000) + if (len == 1) + snprintf(buf, sizeof buf, "..z%02x", c); + else if (c < 0x10000) snprintf(buf, sizeof buf, "..u%04x", c); else snprintf(buf, sizeof buf, "..U%08x", c); @@ -143,6 +143,77 @@ go_encode_id(const std::string &id) return ret; } +// Convert a hex digit string to a unicode codepoint. No checking +// to insure that the hex digit is meaningful. + +static unsigned +hex_digits_to_unicode_codepoint(const char *digits, unsigned ndig) +{ + unsigned result = 0; + for (unsigned i = 0; i < ndig; ++i) { + result <<= 4; + result |= Lex::hex_val(digits[i]); + } + return result; +} + +// Decode/demangle a mangled string produced by go_encode_id(). Returns +// empty string if demangling process fails in some way. At the moment +// this routine is unused; there is an equivalent routine in the runtime +// used for demangling symbols appearing in stack traces. + +std::string +go_decode_id(const std::string &encoded) +{ + std::string ret; + const char* p = encoded.c_str(); + const char* pend = p + encoded.length(); + const Location loc = Linemap::predeclared_location(); + + // Special case for initial "_", in case it was introduced + // as a way to prevent encoded symbol starting with ".". + if (*p == '_' && (strncmp(p+1, "..u", 3) == 0 || strncmp(p+1, "..U", 3) == 0)) + p++; + + while (p < pend) + { + if (strncmp(p, "..z", 3) == 0) + { + const char* digits = p+3; + if (strlen(digits) < 2) + return ""; + unsigned rune = hex_digits_to_unicode_codepoint(digits, 2); + Lex::append_char(rune, true, &ret, loc); + p += 5; + } + else if (strncmp(p, "..u", 3) == 0) + { + const char* digits = p+3; + if (strlen(digits) < 4) + return ""; + unsigned rune = hex_digits_to_unicode_codepoint(digits, 4); + Lex::append_char(rune, true, &ret, loc); + p += 7; + } + else if (strncmp(p, "..U", 3) == 0) + { + const char* digits = p+3; + if (strlen(digits) < 8) + return ""; + unsigned rune = hex_digits_to_unicode_codepoint(digits, 8); + Lex::append_char(rune, true, &ret, loc); + p += 11; + } + else + { + ret += *p; + p += 1; + } + } + + return ret; +} + std::string go_selectively_encode_id(const std::string &id) { |