From 34489eb2af3bbb7be101bc838615cf4a4dc6828d Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 25 Oct 2018 22:18:08 +0000 Subject: compiler: improve name mangling for packpaths The current implementation of Gogo::pkgpath_for_symbol was written in a way that allowed two distinct package paths to map to the same symbol, which could cause collisions at link- time or compile-time. Switch to a better mangling scheme to insure that we get a unique packagepath symbol for each package. In the new scheme instead of having separate mangling schemes for identifiers and package paths, the main identifier mangler ("go_encode_id") now handles mangling of both packagepath characters and identifier characters. The new mangling scheme is more intrusive: "foo/bar.Baz" is mangled as "foo..z2fbar.Baz" instead of "foo_bar.Baz". To mitigate this, this patch also adds a demangling capability so that function names returned from runtime.CallersFrames are converted back to their original unmangled form. Changing the pkgpath_for_symbol scheme requires updating a number of //go:linkname directives and C "__asm__" directives to match the new scheme, as well as updating the 'gotest' driver (which makes assumptions about the correct mapping from pkgpath symbol to package name). Fixes golang/go#27534. Reviewed-on: https://go-review.googlesource.com/c/135455 From-SVN: r265510 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/go-encode-id.cc | 93 ++++++++++++++++++++++++++++++++++----- gcc/go/gofrontend/go-encode-id.h | 5 +++ gcc/go/gofrontend/gogo.cc | 45 ++++++++++++------- gcc/go/gofrontend/gogo.h | 24 ++-------- gcc/go/gofrontend/lex.h | 7 +-- gcc/go/gofrontend/names.cc | 53 ++++++++++++---------- 7 files changed, 153 insertions(+), 76 deletions(-) (limited to 'gcc/go') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index a42d0b9..188ada0 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -771668f7137e560b2ef32c8799e5f8b4c4ee14a9 +407a59831ea4fbfe03f0887c40497b73939e7c44 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/go-encode-id.cc b/gcc/go/gofrontend/go-encode-id.cc index e130ba1..7c7aa13 100644 --- a/gcc/go/gofrontend/go-encode-id.cc +++ b/gcc/go/gofrontend/go-encode-id.cc @@ -1,4 +1,4 @@ -// go-encode-id.cc -- Go identifier encoding hooks +// go-encode-id.cc -- Go identifier and packagepath encoding/decoding hooks // Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style @@ -82,10 +82,10 @@ fetch_utf8_char(const char* p, unsigned int* pc) return len; } -// Encode an identifier using ASCII characters. The encoding is -// described in detail near the end of the long comment at the start -// of names.cc. Short version: translate all non-ASCII-alphanumeric -// characters into ..uXXXX or ..UXXXXXXXX. +// Encode an identifier using assembler-friendly characters. The encoding is +// described in detail near the end of the long comment at the start of +// names.cc. Short version: translate all non-ASCII-alphanumeric characters into +// ..uXXXX or ..UXXXXXXXX, translate ASCII non-alphanumerics into ".zXX". std::string go_encode_id(const std::string &id) @@ -97,7 +97,8 @@ go_encode_id(const std::string &id) } // The encoding is only unambiguous if the input string does not - // contain ..u or ..U. + // contain ..z, ..u or ..U. + go_assert(id.find("..z") == std::string::npos); go_assert(id.find("..u") == std::string::npos); go_assert(id.find("..U") == std::string::npos); @@ -116,17 +117,16 @@ go_encode_id(const std::string &id) { unsigned int c; size_t len = fetch_utf8_char(p, &c); - if (len == 1) + if (len == 1 && !char_needs_encoding(c)) { - // At this point we should only be seeing alphanumerics or - // underscore or dot. - go_assert(!char_needs_encoding(c)); ret += c; } else { char buf[16]; - if (c < 0x10000) + if (len == 1) + snprintf(buf, sizeof buf, "..z%02x", c); + else if (c < 0x10000) snprintf(buf, sizeof buf, "..u%04x", c); else snprintf(buf, sizeof buf, "..U%08x", c); @@ -143,6 +143,77 @@ go_encode_id(const std::string &id) return ret; } +// Convert a hex digit string to a unicode codepoint. No checking +// to insure that the hex digit is meaningful. + +static unsigned +hex_digits_to_unicode_codepoint(const char *digits, unsigned ndig) +{ + unsigned result = 0; + for (unsigned i = 0; i < ndig; ++i) { + result <<= 4; + result |= Lex::hex_val(digits[i]); + } + return result; +} + +// Decode/demangle a mangled string produced by go_encode_id(). Returns +// empty string if demangling process fails in some way. At the moment +// this routine is unused; there is an equivalent routine in the runtime +// used for demangling symbols appearing in stack traces. + +std::string +go_decode_id(const std::string &encoded) +{ + std::string ret; + const char* p = encoded.c_str(); + const char* pend = p + encoded.length(); + const Location loc = Linemap::predeclared_location(); + + // Special case for initial "_", in case it was introduced + // as a way to prevent encoded symbol starting with ".". + if (*p == '_' && (strncmp(p+1, "..u", 3) == 0 || strncmp(p+1, "..U", 3) == 0)) + p++; + + while (p < pend) + { + if (strncmp(p, "..z", 3) == 0) + { + const char* digits = p+3; + if (strlen(digits) < 2) + return ""; + unsigned rune = hex_digits_to_unicode_codepoint(digits, 2); + Lex::append_char(rune, true, &ret, loc); + p += 5; + } + else if (strncmp(p, "..u", 3) == 0) + { + const char* digits = p+3; + if (strlen(digits) < 4) + return ""; + unsigned rune = hex_digits_to_unicode_codepoint(digits, 4); + Lex::append_char(rune, true, &ret, loc); + p += 7; + } + else if (strncmp(p, "..U", 3) == 0) + { + const char* digits = p+3; + if (strlen(digits) < 8) + return ""; + unsigned rune = hex_digits_to_unicode_codepoint(digits, 8); + Lex::append_char(rune, true, &ret, loc); + p += 11; + } + else + { + ret += *p; + p += 1; + } + } + + return ret; +} + std::string go_selectively_encode_id(const std::string &id) { diff --git a/gcc/go/gofrontend/go-encode-id.h b/gcc/go/gofrontend/go-encode-id.h index ec81b63..70126ba 100644 --- a/gcc/go/gofrontend/go-encode-id.h +++ b/gcc/go/gofrontend/go-encode-id.h @@ -20,6 +20,11 @@ go_id_needs_encoding(const std::string& str); extern std::string go_encode_id(const std::string &id); +// Decodes an encoded ID, returning the original string handed off to +// go_encode_id(). +extern std::string +go_decode_id(const std::string &id); + // Returns the empty string if the specified name needs encoding, // otherwise invokes go_encode_id on the name and returns the result. extern std::string diff --git a/gcc/go/gofrontend/gogo.cc b/gcc/go/gofrontend/gogo.cc index 2472245e..70af627 100644 --- a/gcc/go/gofrontend/gogo.cc +++ b/gcc/go/gofrontend/gogo.cc @@ -256,26 +256,11 @@ Gogo::Gogo(Backend* backend, Linemap* linemap, int, int pointer_size) this->globals_->add_function_declaration("delete", NULL, delete_type, loc); } -// Convert a pkgpath into a string suitable for a symbol. Note that -// this transformation is convenient but imperfect. A -fgo-pkgpath -// option of a/b_c will conflict with a -fgo-pkgpath option of a_b/c, -// possibly leading to link time errors. - std::string Gogo::pkgpath_for_symbol(const std::string& pkgpath) { - std::string s = pkgpath; - for (size_t i = 0; i < s.length(); ++i) - { - char c = s[i]; - if ((c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || (c >= '0' && c <= '9')) - ; - else - s[i] = '_'; - } - return s; + go_assert(!pkgpath.empty()); + return go_encode_id(pkgpath); } // Get the package path to use for type reflection data. This should @@ -319,6 +304,32 @@ Gogo::set_prefix(const std::string& arg) this->prefix_from_option_ = true; } +// Given a name which may or may not have been hidden, append the +// appropriate version of the name to the result string. Take care +// to avoid creating a sequence that will be rejected by go_encode_id +// (avoid ..u, ..U, ..z). +void +Gogo::append_possibly_hidden_name(std::string *result, const std::string& name) +{ + // FIXME: This adds in pkgpath twice for hidden symbols, which is + // less than ideal. + if (!Gogo::is_hidden_name(name)) + (*result) += name; + else + { + std::string n = "."; + std::string pkgpath = Gogo::hidden_name_pkgpath(name); + char lastR = result->at(result->length() - 1); + char firstP = pkgpath.at(0); + if (lastR == '.' && (firstP == 'u' || firstP == 'U' || firstP == 'z')) + n = "_."; + n.append(pkgpath); + n.append(1, '.'); + n.append(Gogo::unpack_hidden_name(name)); + (*result) += n; + } +} + // Munge name for use in an error message. std::string diff --git a/gcc/go/gofrontend/gogo.h b/gcc/go/gofrontend/gogo.h index 9c469ca..48359eb 100644 --- a/gcc/go/gofrontend/gogo.h +++ b/gcc/go/gofrontend/gogo.h @@ -199,26 +199,10 @@ class Gogo return name.substr(1, name.rfind('.') - 1); } - // Given a name which may or may not have been hidden, return the - // name to use within a mangled symbol name. - static std::string - mangle_possibly_hidden_name(const std::string& name) - { - // FIXME: This adds in pkgpath twice for hidden symbols, which is - // less than ideal. - std::string n; - if (!Gogo::is_hidden_name(name)) - n = name; - else - { - n = "."; - std::string pkgpath = Gogo::hidden_name_pkgpath(name); - n.append(Gogo::pkgpath_for_symbol(pkgpath)); - n.append(1, '.'); - n.append(Gogo::unpack_hidden_name(name)); - } - return n; - } + // Given a name which may or may not have been hidden, append the + // appropriate version of the name to the result string. + static void + append_possibly_hidden_name(std::string *result, const std::string& name); // Given a name which may or may not have been hidden, return the // name to use in an error message. diff --git a/gcc/go/gofrontend/lex.h b/gcc/go/gofrontend/lex.h index cf3de8d..75e37f8 100644 --- a/gcc/go/gofrontend/lex.h +++ b/gcc/go/gofrontend/lex.h @@ -440,6 +440,10 @@ class Lex static bool is_unicode_space(unsigned int c); + // Convert the specified hex char into an unsigned integer value. + static unsigned + hex_val(char c); + private: ssize_t get_line(); @@ -462,9 +466,6 @@ class Lex octal_value(char c) { return c - '0'; } - static unsigned - hex_val(char c); - Token make_invalid_token() { return Token::make_invalid_token(this->location()); } diff --git a/gcc/go/gofrontend/names.cc b/gcc/go/gofrontend/names.cc index 2e36a1d..d9ae5910 100644 --- a/gcc/go/gofrontend/names.cc +++ b/gcc/go/gofrontend/names.cc @@ -33,7 +33,7 @@ // variable, is simply "PKGPATH.NAME". Note that NAME is not the // packed form used for the "hidden" name internally in the compiler; // it is the name that appears in the source code. PKGPATH is the -// -fgo-pkgpath option as adjusted by Gogo::pkgpath_for_symbol. Note +// -fgo-pkgpath option as adjusted by Gogo::pkgpath_for_symbol. Note // that PKGPATH can not contain a dot and neither can NAME. Also, // NAME may not begin with a digit. NAME may require further encoding // for non-ASCII characters as described below, but until that @@ -188,12 +188,17 @@ // encoding unambiguous, we introduce it with two consecutive dots. // This is followed by the letter u and four hex digits or the letter // U and eight digits, just as in the language only using ..u and ..U -// instead of \u and \U. Since before this encoding names can never -// contain consecutive dots followed by 'u' or 'U', and after this -// encoding "..u" and "..U" are followed by a known number of +// instead of \u and \U. The compiler also produces identifiers that +// are qualified by package path, which means that there may also be ASCII +// characters that are not assembler-friendly (ex: '=', '/'). The encoding +// scheme translates such characters into the "..zNN" where NN is the +// hex value for the character. Since before this encoding names can never +// contain consecutive dots followed by 'z', 'u' or 'U', and after this +// encoding "..z", "..u" and "..U" are followed by a known number of // characters, this is unambiguous. // // Demangling these names is straightforward: +// - replace ..zXX with an ASCII character // - replace ..uXXXX with a unicode character // - replace ..UXXXXXXXX with a unicode character // - replace .D, where D is a digit, with the character from the above @@ -215,9 +220,9 @@ Gogo::function_asm_name(const std::string& go_name, const Package* package, if (rtype != NULL) ret = rtype->deref()->mangled_name(this); else if (package == NULL) - ret = this->pkgpath_symbol(); + ret = this->pkgpath(); else - ret = package->pkgpath_symbol(); + ret = package->pkgpath(); ret.push_back('.'); // Check for special names that will break if we use // Gogo::unpack_hidden_name. @@ -268,7 +273,7 @@ Gogo::stub_method_name(const Package* package, const std::string& mname) // We are creating a stub method for an unexported method of an // imported embedded type. We need to disambiguate the method name. - std::string ret = this->pkgpath_symbol_for_package(mpkgpath); + std::string ret = mpkgpath; ret.push_back('.'); ret.append(Gogo::unpack_hidden_name(mname)); ret.append("..stub"); @@ -302,9 +307,9 @@ Gogo::global_var_asm_name(const std::string& go_name, const Package* package) { std::string ret; if (package == NULL) - ret = this->pkgpath_symbol(); + ret = this->pkgpath(); else - ret = package->pkgpath_symbol(); + ret = package->pkgpath(); ret.append(1, '.'); ret.append(Gogo::unpack_hidden_name(go_name)); return go_encode_id(ret); @@ -341,7 +346,7 @@ Gogo::thunk_name() char thunk_name[50]; snprintf(thunk_name, sizeof thunk_name, "..thunk%d", thunk_count); ++thunk_count; - std::string ret = this->pkgpath_symbol(); + std::string ret = this->pkgpath(); return ret + thunk_name; } @@ -370,7 +375,7 @@ Gogo::init_function_name() char buf[30]; snprintf(buf, sizeof buf, "..init%d", init_count); ++init_count; - std::string ret = this->pkgpath_symbol(); + std::string ret = this->pkgpath(); return ret + buf; } @@ -726,7 +731,7 @@ Struct_type::do_mangled_name(Gogo* gogo, std::string* ret) const if (!p->is_anonymous()) { - ret->append(Gogo::mangle_possibly_hidden_name(p->field_name())); + Gogo::append_possibly_hidden_name(ret, p->field_name()); ret->push_back(' '); } @@ -827,7 +832,7 @@ Interface_type::do_mangled_name(Gogo* gogo, std::string* ret) const if (!p->name().empty()) { - ret->append(Gogo::mangle_possibly_hidden_name(p->name())); + Gogo::append_possibly_hidden_name(ret, p->name()); ret->push_back(' '); } @@ -854,9 +859,9 @@ Forward_declaration_type::do_mangled_name(Gogo* gogo, std::string* ret) const { const Named_object* no = this->named_object(); if (no->package() == NULL) - ret->append(gogo->pkgpath_symbol()); + ret->append(gogo->pkgpath()); else - ret->append(no->package()->pkgpath_symbol()); + ret->append(no->package()->pkgpath()); ret->push_back('.'); ret->append(Gogo::unpack_hidden_name(no->name())); } @@ -894,18 +899,18 @@ Named_type::append_mangled_type_name(Gogo* gogo, bool use_alias, if (rcvr != NULL) ret->append(rcvr->type()->deref()->mangled_name(gogo)); else if (this->in_function_->package() == NULL) - ret->append(gogo->pkgpath_symbol()); + ret->append(gogo->pkgpath()); else - ret->append(this->in_function_->package()->pkgpath_symbol()); + ret->append(this->in_function_->package()->pkgpath()); ret->push_back('.'); ret->append(Gogo::unpack_hidden_name(this->in_function_->name())); } else { if (no->package() == NULL) - ret->append(gogo->pkgpath_symbol()); + ret->append(gogo->pkgpath()); else - ret->append(no->package()->pkgpath_symbol()); + ret->append(no->package()->pkgpath()); } ret->push_back('.'); } @@ -951,22 +956,22 @@ Gogo::type_descriptor_name(Type* type, Named_type* nt) if (rcvr != NULL) ret.append(rcvr->type()->deref()->mangled_name(this)); else if (in_function->package() == NULL) - ret.append(this->pkgpath_symbol()); + ret.append(this->pkgpath()); else - ret.append(in_function->package()->pkgpath_symbol()); + ret.append(in_function->package()->pkgpath()); ret.push_back('.'); ret.append(Gogo::unpack_hidden_name(in_function->name())); ret.push_back('.'); } if (no->package() == NULL) - ret.append(this->pkgpath_symbol()); + ret.append(this->pkgpath()); else - ret.append(no->package()->pkgpath_symbol()); + ret.append(no->package()->pkgpath()); ret.push_back('.'); } - ret.append(Gogo::mangle_possibly_hidden_name(no->name())); + Gogo::append_possibly_hidden_name(&ret, no->name()); if (in_function != NULL && index > 0) { -- cgit v1.1