aboutsummaryrefslogtreecommitdiff
path: root/lld/MachO/SymbolTable.cpp
diff options
context:
space:
mode:
authorNico Weber <thakis@chromium.org>2022-10-02 11:04:31 -0400
committerNico Weber <thakis@chromium.org>2022-10-03 18:53:55 -0400
commit8c45e80298f4e3eb6d9cfbafcb099bc087e4668e (patch)
tree69203bc941e5e1c022bdddef65b8da69a4134f74 /lld/MachO/SymbolTable.cpp
parentd55dd57124d5b22a85c09574ccf2fa0a9e8c44db (diff)
downloadllvm-8c45e80298f4e3eb6d9cfbafcb099bc087e4668e.tar.gz
llvm-8c45e80298f4e3eb6d9cfbafcb099bc087e4668e.tar.bz2
llvm-8c45e80298f4e3eb6d9cfbafcb099bc087e4668e.zip
[lld/mac] Port typo correction for undefined symbols from ELF port
Ports: - core feature: https://reviews.llvm.org/D67039 - case mismatch: https://reviews.llvm.org/D70506 - extern "C" suggestions: https://reviews.llvm.org/D69592, https://reviews.llvm.org/D69650 Does not port https://reviews.llvm.org/D71735 since I believe that that doesn't apply to lld/Mach-O. Differential Revision: https://reviews.llvm.org/D135038
Diffstat (limited to 'lld/MachO/SymbolTable.cpp')
-rw-r--r--lld/MachO/SymbolTable.cpp148
1 files changed, 145 insertions, 3 deletions
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 5173fa069e22..ccb1d51bdbe4 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -392,8 +392,138 @@ void macho::reportPendingDuplicateSymbols() {
}
}
+// Check whether the definition name def is a mangled function name that matches
+// the reference name ref.
+static bool canSuggestExternCForCXX(StringRef ref, StringRef def) {
+ llvm::ItaniumPartialDemangler d;
+ std::string name = def.str();
+ if (d.partialDemangle(name.c_str()))
+ return false;
+ char *buf = d.getFunctionName(nullptr, nullptr);
+ if (!buf)
+ return false;
+ bool ret = ref == buf;
+ free(buf);
+ return ret;
+}
+
+// Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns
+// the suggested symbol, which is either in the symbol table, or in the same
+// file of sym.
+static const Symbol *getAlternativeSpelling(const Undefined &sym,
+ std::string &pre_hint,
+ std::string &post_hint) {
+ DenseMap<StringRef, const Symbol *> map;
+ if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) {
+ // Build a map of local defined symbols.
+ for (const Symbol *s : sym.getFile()->symbols)
+ if (auto *defined = dyn_cast<Defined>(s))
+ if (!defined->isExternal())
+ map.try_emplace(s->getName(), s);
+ }
+
+ auto suggest = [&](StringRef newName) -> const Symbol * {
+ // If defined locally.
+ if (const Symbol *s = map.lookup(newName))
+ return s;
+
+ // If in the symbol table and not undefined.
+ if (const Symbol *s = symtab->find(newName))
+ if (dyn_cast<Undefined>(s) == nullptr)
+ return s;
+
+ return nullptr;
+ };
+
+ // This loop enumerates all strings of Levenshtein distance 1 as typo
+ // correction candidates and suggests the one that exists as a non-undefined
+ // symbol.
+ StringRef name = sym.getName();
+ for (size_t i = 0, e = name.size(); i != e + 1; ++i) {
+ // Insert a character before name[i].
+ std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str();
+ for (char c = '0'; c <= 'z'; ++c) {
+ newName[i] = c;
+ if (const Symbol *s = suggest(newName))
+ return s;
+ }
+ if (i == e)
+ break;
+
+ // Substitute name[i].
+ newName = std::string(name);
+ for (char c = '0'; c <= 'z'; ++c) {
+ newName[i] = c;
+ if (const Symbol *s = suggest(newName))
+ return s;
+ }
+
+ // Transpose name[i] and name[i+1]. This is of edit distance 2 but it is
+ // common.
+ if (i + 1 < e) {
+ newName[i] = name[i + 1];
+ newName[i + 1] = name[i];
+ if (const Symbol *s = suggest(newName))
+ return s;
+ }
+
+ // Delete name[i].
+ newName = (name.substr(0, i) + name.substr(i + 1)).str();
+ if (const Symbol *s = suggest(newName))
+ return s;
+ }
+
+ // Case mismatch, e.g. Foo vs FOO.
+ for (auto &it : map)
+ if (name.equals_insensitive(it.first))
+ return it.second;
+ for (Symbol *sym : symtab->getSymbols())
+ if (dyn_cast<Undefined>(sym) == nullptr &&
+ name.equals_insensitive(sym->getName()))
+ return sym;
+
+ // The reference may be a mangled name while the definition is not. Suggest a
+ // missing extern "C".
+ if (name.startswith("__Z")) {
+ std::string buf = name.str();
+ llvm::ItaniumPartialDemangler d;
+ if (!d.partialDemangle(buf.c_str()))
+ if (char *buf = d.getFunctionName(nullptr, nullptr)) {
+ const Symbol *s = suggest((Twine("_") + buf).str());
+ free(buf);
+ if (s) {
+ pre_hint = ": extern \"C\" ";
+ return s;
+ }
+ }
+ } else {
+ StringRef name_without_underscore = name;
+ name_without_underscore.consume_front("_");
+ const Symbol *s = nullptr;
+ for (auto &it : map)
+ if (canSuggestExternCForCXX(name_without_underscore, it.first)) {
+ s = it.second;
+ break;
+ }
+ if (!s)
+ for (Symbol *sym : symtab->getSymbols())
+ if (canSuggestExternCForCXX(name_without_underscore, sym->getName())) {
+ s = sym;
+ break;
+ }
+ if (s) {
+ pre_hint = " to declare ";
+ post_hint = " as extern \"C\"?";
+ return s;
+ }
+ }
+
+ return nullptr;
+}
+
static void reportUndefinedSymbol(const Undefined &sym,
- const UndefinedDiag &locations) {
+ const UndefinedDiag &locations,
+ bool correctSpelling) {
std::string message = "undefined symbol";
if (config->archMultiple)
message += (" for arch " + getArchitectureName(config->arch())).str();
@@ -426,6 +556,17 @@ static void reportUndefinedSymbol(const Undefined &sym,
("\n>>> referenced " + Twine(totalReferences - i) + " more times")
.str();
+ if (correctSpelling) {
+ std::string pre_hint = ": ", post_hint;
+ if (const Symbol *corrected =
+ getAlternativeSpelling(sym, pre_hint, post_hint)) {
+ message +=
+ "\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint;
+ if (corrected->getFile())
+ message += "\n>>> defined in: " + toString(corrected->getFile());
+ }
+ }
+
if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error)
error(message);
else if (config->undefinedSymbolTreatment ==
@@ -436,8 +577,9 @@ static void reportUndefinedSymbol(const Undefined &sym,
}
void macho::reportPendingUndefinedSymbols() {
- for (const auto &undef : undefs)
- reportUndefinedSymbol(*undef.first, undef.second);
+ // Enable spell corrector for the first 2 diagnostics.
+ for (const auto &[i, undef] : llvm::enumerate(undefs))
+ reportUndefinedSymbol(*undef.first, undef.second, i < 2);
// This function is called multiple times during execution. Clear the printed
// diagnostics to avoid printing the same things again the next time.