aboutsummaryrefslogtreecommitdiff
path: root/llvm/tools/llvm-objdump/llvm-objdump.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-objdump/llvm-objdump.cpp')
-rw-r--r--llvm/tools/llvm-objdump/llvm-objdump.cpp225
1 files changed, 168 insertions, 57 deletions
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index a5a8638..c15b89d 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -207,6 +207,7 @@ bool objdump::PrintImmHex;
bool objdump::PrivateHeaders;
std::vector<std::string> objdump::FilterSections;
bool objdump::SectionHeaders;
+static bool ShowAllSymbols;
static bool ShowLMA;
bool objdump::PrintSource;
@@ -1481,28 +1482,118 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
std::vector<RelocationRef> Rels = RelocMap[Section];
std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
- // Disassemble symbol by symbol.
- for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
- std::string SymbolName = Symbols[SI].Name.str();
- if (Demangle)
- SymbolName = demangle(SymbolName);
-
- // Skip if --disassemble-symbols is not empty and the symbol is not in
- // the list.
- if (!DisasmSymbolSet.empty() && !DisasmSymbolSet.count(SymbolName))
- continue;
+ // Loop over each chunk of code between two points where at least
+ // one symbol is defined.
+ for (size_t SI = 0, SE = Symbols.size(); SI != SE;) {
+ // Advance SI past all the symbols starting at the same address,
+ // and make an ArrayRef of them.
+ unsigned FirstSI = SI;
uint64_t Start = Symbols[SI].Addr;
+ ArrayRef<SymbolInfoTy> SymbolsHere;
+ while (SI != SE && Symbols[SI].Addr == Start)
+ ++SI;
+ SymbolsHere = ArrayRef<SymbolInfoTy>(&Symbols[FirstSI], SI - FirstSI);
+
+ // Get the demangled names of all those symbols. We end up with a vector
+ // of StringRef that holds the names we're going to use, and a vector of
+ // std::string that stores the new strings returned by demangle(), if
+ // any. If we don't call demangle() then that vector can stay empty.
+ std::vector<StringRef> SymNamesHere;
+ std::vector<std::string> DemangledSymNamesHere;
+ if (Demangle) {
+ // Fetch the demangled names and store them locally.
+ for (const SymbolInfoTy &Symbol : SymbolsHere)
+ DemangledSymNamesHere.push_back(demangle(Symbol.Name.str()));
+ // Now we've finished modifying that vector, it's safe to make
+ // a vector of StringRefs pointing into it.
+ SymNamesHere.insert(SymNamesHere.begin(), DemangledSymNamesHere.begin(),
+ DemangledSymNamesHere.end());
+ } else {
+ for (const SymbolInfoTy &Symbol : SymbolsHere)
+ SymNamesHere.push_back(Symbol.Name);
+ }
+
+ // Distinguish ELF data from code symbols, which will be used later on to
+ // decide whether to 'disassemble' this chunk as a data declaration via
+ // dumpELFData(), or whether to treat it as code.
+ //
+ // If data _and_ code symbols are defined at the same address, the code
+ // takes priority, on the grounds that disassembling code is our main
+ // purpose here, and it would be a worse failure to _not_ interpret
+ // something that _was_ meaningful as code than vice versa.
+ //
+ // Any ELF symbol type that is not clearly data will be regarded as code.
+ // In particular, one of the uses of STT_NOTYPE is for branch targets
+ // inside functions, for which STT_FUNC would be inaccurate.
+ //
+ // So here, we spot whether there's any non-data symbol present at all,
+ // and only set the DisassembleAsData flag if there isn't. Also, we use
+ // this distinction to inform the decision of which symbol to print at
+ // the head of the section, so that if we're printing code, we print a
+ // code-related symbol name to go with it.
+ bool DisassembleAsData = false;
+ size_t DisplaySymIndex = SymbolsHere.size() - 1;
+ if (Obj.isELF() && !DisassembleAll && Section.isText()) {
+ DisassembleAsData = true; // unless we find a code symbol below
+
+ for (size_t i = 0; i < SymbolsHere.size(); ++i) {
+ uint8_t SymTy = SymbolsHere[i].Type;
+ if (SymTy != ELF::STT_OBJECT && SymTy != ELF::STT_COMMON) {
+ DisassembleAsData = false;
+ DisplaySymIndex = i;
+ }
+ }
+ }
+
+ // Decide which symbol(s) from this collection we're going to print.
+ std::vector<bool> SymsToPrint(SymbolsHere.size(), false);
+ // If the user has given the --disassemble-symbols option, then we must
+ // display every symbol in that set, and no others.
+ if (!DisasmSymbolSet.empty()) {
+ bool FoundAny = false;
+ for (size_t i = 0; i < SymbolsHere.size(); ++i) {
+ if (DisasmSymbolSet.count(SymNamesHere[i])) {
+ SymsToPrint[i] = true;
+ FoundAny = true;
+ }
+ }
+
+ // And if none of the symbols here is one that the user asked for, skip
+ // disassembling this entire chunk of code.
+ if (!FoundAny)
+ continue;
+ } else {
+ // Otherwise, print whichever symbol at this location is last in the
+ // Symbols array, because that array is pre-sorted in a way intended to
+ // correlate with priority of which symbol to display.
+ SymsToPrint[DisplaySymIndex] = true;
+ }
+
+ // Now that we know we're disassembling this section, override the choice
+ // of which symbols to display by printing _all_ of them at this address
+ // if the user asked for all symbols.
+ //
+ // That way, '--show-all-symbols --disassemble-symbol=foo' will print
+ // only the chunk of code headed by 'foo', but also show any other
+ // symbols defined at that address, such as aliases for 'foo', or the ARM
+ // mapping symbol preceding its code.
+ if (ShowAllSymbols) {
+ for (size_t i = 0; i < SymbolsHere.size(); ++i)
+ SymsToPrint[i] = true;
+ }
+
if (Start < SectionAddr || StopAddress <= Start)
continue;
- else
- FoundDisasmSymbolSet.insert(SymbolName);
+
+ for (size_t i = 0; i < SymbolsHere.size(); ++i)
+ FoundDisasmSymbolSet.insert(SymNamesHere[i]);
// The end is the section end, the beginning of the next symbol, or
// --stop-address.
uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress);
- if (SI + 1 < SE)
- End = std::min(End, Symbols[SI + 1].Addr);
+ if (SI < SE)
+ End = std::min(End, Symbols[SI].Addr);
if (Start >= End || End <= StartAddress)
continue;
Start -= SectionAddr;
@@ -1517,13 +1608,22 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
}
outs() << '\n';
- if (LeadingAddr)
- outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
- SectionAddr + Start + VMAAdjustment);
- if (Obj.isXCOFF() && SymbolDescription) {
- outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n";
- } else
- outs() << '<' << SymbolName << ">:\n";
+
+ for (size_t i = 0; i < SymbolsHere.size(); ++i) {
+ if (!SymsToPrint[i])
+ continue;
+
+ const SymbolInfoTy &Symbol = SymbolsHere[i];
+ const StringRef SymbolName = SymNamesHere[i];
+
+ if (LeadingAddr)
+ outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
+ SectionAddr + Start + VMAAdjustment);
+ if (Obj.isXCOFF() && SymbolDescription) {
+ outs() << getXCOFFSymbolDescription(Symbol, SymbolName) << ":\n";
+ } else
+ outs() << '<' << SymbolName << ">:\n";
+ }
// Don't print raw contents of a virtual section. A virtual section
// doesn't have any contents in the file.
@@ -1532,57 +1632,67 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
continue;
}
- auto Status = DisAsm->onSymbolStart(Symbols[SI], Size,
- Bytes.slice(Start, End - Start),
- SectionAddr + Start, CommentStream);
- // To have round trippable disassembly, we fall back to decoding the
- // remaining bytes as instructions.
- //
- // If there is a failure, we disassemble the failed region as bytes before
- // falling back. The target is expected to print nothing in this case.
+ // See if any of the symbols defined at this location triggers target-
+ // specific disassembly behavior, e.g. of special descriptors or function
+ // prelude information.
//
- // If there is Success or SoftFail i.e no 'real' failure, we go ahead by
- // Size bytes before falling back.
- // So if the entire symbol is 'eaten' by the target:
- // Start += Size // Now Start = End and we will never decode as
- // // instructions
- //
- // Right now, most targets return None i.e ignore to treat a symbol
- // separately. But WebAssembly decodes preludes for some symbols.
- //
- if (Status) {
+ // We stop this loop at the first symbol that triggers some kind of
+ // interesting behavior (if any), on the assumption that if two symbols
+ // defined at the same address trigger two conflicting symbol handlers,
+ // the object file is probably confused anyway, and it would make even
+ // less sense to present the output of _both_ handlers, because that
+ // would describe the same data twice.
+ for (size_t SHI = 0; SHI < SymbolsHere.size(); ++SHI) {
+ SymbolInfoTy Symbol = SymbolsHere[SHI];
+
+ auto Status =
+ DisAsm->onSymbolStart(Symbol, Size, Bytes.slice(Start, End - Start),
+ SectionAddr + Start, CommentStream);
+
+ if (!Status) {
+ // If onSymbolStart returns None, that means it didn't trigger any
+ // interesting handling for this symbol. Try the other symbols
+ // defined at this address.
+ continue;
+ }
+
if (Status.value() == MCDisassembler::Fail) {
- outs() << "// Error in decoding " << SymbolName
+ // If onSymbolStart returns Fail, that means it identified some kind
+ // of special data at this address, but wasn't able to disassemble it
+ // meaningfully. So we fall back to disassembling the failed region
+ // as bytes, assuming that the target detected the failure before
+ // printing anything.
+ //
+ // Return values Success or SoftFail (i.e no 'real' failure) are
+ // expected to mean that the target has emitted its own output.
+ //
+ // Either way, 'Size' will have been set to the amount of data
+ // covered by whatever prologue the target identified. So we advance
+ // our own position to beyond that. Sometimes that will be the entire
+ // distance to the next symbol, and sometimes it will be just a
+ // prologue and we should start disassembling instructions from where
+ // it left off.
+ outs() << "// Error in decoding " << SymNamesHere[SHI]
<< " : Decoding failed region as bytes.\n";
for (uint64_t I = 0; I < Size; ++I) {
outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
<< "\n";
}
}
- } else {
- Size = 0;
+ Start += Size;
+ break;
}
- Start += Size;
-
Index = Start;
if (SectionAddr < StartAddress)
Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
- // If there is a data/common symbol inside an ELF text section and we are
- // only disassembling text (applicable all architectures), we are in a
- // situation where we must print the data and not disassemble it.
- if (Obj.isELF() && !DisassembleAll && Section.isText()) {
- uint8_t SymTy = Symbols[SI].Type;
- if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) {
- dumpELFData(SectionAddr, Index, End, Bytes);
- Index = End;
- }
+ if (DisassembleAsData) {
+ dumpELFData(SectionAddr, Index, End, Bytes);
+ Index = End;
+ continue;
}
- bool CheckARMELFData = hasMappingSymbols(Obj) &&
- Symbols[SI].Type != ELF::STT_OBJECT &&
- !DisassembleAll;
bool DumpARMELFData = false;
formatted_raw_ostream FOS(outs());
@@ -1600,7 +1710,7 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
// same section. We rely on the markers introduced to understand what
// we need to dump. If the data marker is within a function, it is
// denoted as a word/short etc.
- if (CheckARMELFData) {
+ if (!MappingSymbols.empty()) {
char Kind = getMappingSymbolKind(MappingSymbols, Index);
DumpARMELFData = Kind == 'd';
if (SecondarySTI) {
@@ -2841,6 +2951,7 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
PrivateHeaders = InputArgs.hasArg(OBJDUMP_private_headers);
FilterSections = InputArgs.getAllArgValues(OBJDUMP_section_EQ);
SectionHeaders = InputArgs.hasArg(OBJDUMP_section_headers);
+ ShowAllSymbols = InputArgs.hasArg(OBJDUMP_show_all_symbols);
ShowLMA = InputArgs.hasArg(OBJDUMP_show_lma);
PrintSource = InputArgs.hasArg(OBJDUMP_source);
parseIntArg(InputArgs, OBJDUMP_start_address_EQ, StartAddress);