aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
diff options
context:
space:
mode:
authorFangrui Song <i@maskray.me>2021-02-10 09:47:10 -0800
committerFangrui Song <i@maskray.me>2021-02-10 09:47:10 -0800
commit04a2e12612f49e65a7dc14fa45f11f0632d427f2 (patch)
tree9d7506a5b251a504b4c6c4f62e14ffd6b95b09c5 /llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
parent4f30a3d3d26d55c0701dc56dde711667320fdd30 (diff)
downloadllvm-04a2e12612f49e65a7dc14fa45f11f0632d427f2.zip
llvm-04a2e12612f49e65a7dc14fa45f11f0632d427f2.tar.gz
llvm-04a2e12612f49e65a7dc14fa45f11f0632d427f2.tar.bz2
DebugInfo/Symbolize: Retrieve filename from the preceding STT_FILE for .symtab symbolization
The ELF spec says: > STT_FILE: Conventionally, the symbol's name gives the name of the source file associated with the object file. A file symbol has STB_LOCAL binding, its section index is SHN_ABS, and it precedes the other STB_LOCAL symbols for the file, if it is present. For a local symbol, the preceding STT_FILE symbol is almost always in the same file[1]. GNU addr2line uses this heuristic to retrieve the filename associated with a local symbol (e.g. internal linkage functions in C/C++). GNU addr2line can assign STT_FILE filename to a non-local symbol, too, but the trick only works if no regular symbol precede STT_FILE. This patch does not implement this corner case (not useful for most executables which have more than one files). In case of filename mismatch between .debug_line & .symtab, arbitrarily make .debug_line win. [1]: LLD does not synthesize STT_FILE symbols (https://bugs.llvm.org/show_bug.cgi?id=48023 see also https://sourceware.org/bugzilla/show_bug.cgi?id=26822). An assembly file without `.file` directives can cause mis-attribution. This is an edge case. Differential Revision: https://reviews.llvm.org/D95927
Diffstat (limited to 'llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp')
-rw-r--r--llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp92
1 files changed, 59 insertions, 33 deletions
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index b260cd9d..2c6a96a 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -16,6 +16,7 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolSize.h"
#include "llvm/Support/Casting.h"
@@ -69,9 +70,8 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj,
return std::move(E);
}
- std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions,
- &Os = res->Objects;
- auto Uniquify = [](std::vector<std::pair<SymbolDesc, StringRef>> &S) {
+ std::vector<SymbolDesc> &Fs = res->Functions, &Os = res->Objects;
+ auto Uniquify = [](std::vector<SymbolDesc> &S) {
// Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
// pick the one with the largest Size. This helps us avoid symbols with no
// size information (Size=0).
@@ -79,7 +79,7 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj,
auto I = S.begin(), E = S.end(), J = S.begin();
while (I != E) {
auto OI = I;
- while (++I != E && OI->first.Addr == I->first.Addr) {
+ while (++I != E && OI->Addr == I->Addr) {
}
*J++ = I[-1];
}
@@ -138,8 +138,7 @@ Error SymbolizableObjectFile::addCoffExportSymbols(
uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
uint64_t SymbolStart = ImageBase + Export.Offset;
uint64_t SymbolSize = NextOffset - Export.Offset;
- SymbolDesc SD = {SymbolStart, SymbolSize};
- Functions.emplace_back(SD, Export.Name);
+ Functions.push_back({SymbolStart, SymbolSize, Export.Name, 0});
}
return Error::success();
}
@@ -150,9 +149,23 @@ Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
uint64_t OpdAddress) {
// Avoid adding symbols from an unknown/undefined section.
const ObjectFile &Obj = *Symbol.getObject();
+ Expected<StringRef> SymbolNameOrErr = Symbol.getName();
+ if (!SymbolNameOrErr)
+ return SymbolNameOrErr.takeError();
+ StringRef SymbolName = *SymbolNameOrErr;
+
+ uint32_t ELFSymIdx =
+ Obj.isELF() ? ELFSymbolRef(Symbol).getRawDataRefImpl().d.b : 0;
Expected<section_iterator> Sec = Symbol.getSection();
- if (!Sec || Obj.section_end() == *Sec)
+ if (!Sec || Obj.section_end() == *Sec) {
+ if (Obj.isELF()) {
+ // Store the (index, filename) pair for a file symbol.
+ ELFSymbolRef ESym(Symbol);
+ if (ESym.getELFType() == ELF::STT_FILE)
+ FileSymbols.emplace_back(ELFSymIdx, SymbolName);
+ }
return Error::success();
+ }
Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
if (!SymbolTypeOrErr)
@@ -190,24 +203,21 @@ Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
if (OpdExtractor->isValidOffsetForAddress(OpdOffset))
SymbolAddress = OpdExtractor->getAddress(&OpdOffset);
}
- Expected<StringRef> SymbolNameOrErr = Symbol.getName();
- if (!SymbolNameOrErr)
- return SymbolNameOrErr.takeError();
- StringRef SymbolName = *SymbolNameOrErr;
// Mach-O symbol table names have leading underscore, skip it.
if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
SymbolName = SymbolName.drop_front();
- SymbolDesc SD = {SymbolAddress, SymbolSize};
-
+ if (Obj.isELF() && ELFSymbolRef(Symbol).getBinding() != ELF::STB_LOCAL)
+ ELFSymIdx = 0;
+ SymbolDesc SD = {SymbolAddress, SymbolSize, SymbolName, ELFSymIdx};
// DATA command symbolizes just ST_Data (ELF STT_OBJECT) symbols as an
// optimization. Treat everything else (e.g. ELF STT_NOTYPE, STT_FUNC and
// STT_GNU_IFUNC) as function symbols which can be used to symbolize
// addresses.
if (SymbolType == SymbolRef::ST_Data)
- Objects.emplace_back(SD, SymbolName);
+ Objects.push_back(SD);
else
- Functions.emplace_back(SD, SymbolName);
+ Functions.push_back(SD);
return Error::success();
}
@@ -223,23 +233,33 @@ uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
return 0;
}
-bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
- uint64_t Address,
- std::string &Name,
- uint64_t &Addr,
- uint64_t &Size) const {
+bool SymbolizableObjectFile::getNameFromSymbolTable(
+ SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr,
+ uint64_t &Size, std::string &FileName) const {
const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
- std::pair<SymbolDesc, StringRef> SD{{Address, UINT64_C(-1)}, StringRef()};
+ SymbolDesc SD{Address, UINT64_C(-1), StringRef(), 0};
auto SymbolIterator = llvm::upper_bound(Symbols, SD);
if (SymbolIterator == Symbols.begin())
return false;
--SymbolIterator;
- if (SymbolIterator->first.Size != 0 &&
- SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
+ if (SymbolIterator->Size != 0 &&
+ SymbolIterator->Addr + SymbolIterator->Size <= Address)
return false;
- Name = SymbolIterator->second.str();
- Addr = SymbolIterator->first.Addr;
- Size = SymbolIterator->first.Size;
+ Name = SymbolIterator->Name.str();
+ Addr = SymbolIterator->Addr;
+ Size = SymbolIterator->Size;
+
+ if (SymbolIterator->ELFLocalSymIdx != 0) {
+ // If this is an ELF local symbol, find the STT_FILE symbol preceding
+ // SymbolIterator to get the filename. The ELF spec requires the STT_FILE
+ // symbol (if present) precedes the other STB_LOCAL symbols for the file.
+ assert(Module->isELF());
+ auto It = llvm::upper_bound(
+ FileSymbols,
+ std::make_pair(SymbolIterator->ELFLocalSymIdx, StringRef()));
+ if (It != FileSymbols.begin())
+ FileName = It[-1].second.str();
+ }
return true;
}
@@ -265,11 +285,13 @@ SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
// Override function name from symbol table if necessary.
if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) {
- std::string FunctionName;
+ std::string FunctionName, FileName;
uint64_t Start, Size;
if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
- FunctionName, Start, Size)) {
+ FunctionName, Start, Size, FileName)) {
LineInfo.FunctionName = FunctionName;
+ if (LineInfo.FileName == DILineInfo::BadString && !FileName.empty())
+ LineInfo.FileName = FileName;
}
}
return LineInfo;
@@ -290,12 +312,15 @@ DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
// Override the function name in lower frame with name from symbol table.
if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) {
- std::string FunctionName;
+ std::string FunctionName, FileName;
uint64_t Start, Size;
if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
- FunctionName, Start, Size)) {
- InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
- ->FunctionName = FunctionName;
+ FunctionName, Start, Size, FileName)) {
+ DILineInfo *LI = InlinedContext.getMutableFrame(
+ InlinedContext.getNumberOfFrames() - 1);
+ LI->FunctionName = FunctionName;
+ if (LI->FileName == DILineInfo::BadString && !FileName.empty())
+ LI->FileName = FileName;
}
}
@@ -305,8 +330,9 @@ DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
DIGlobal SymbolizableObjectFile::symbolizeData(
object::SectionedAddress ModuleOffset) const {
DIGlobal Res;
+ std::string FileName;
getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
- Res.Start, Res.Size);
+ Res.Start, Res.Size, FileName);
return Res;
}