diff options
Diffstat (limited to 'llvm/tools/llvm-objdump/llvm-objdump.cpp')
-rw-r--r-- | llvm/tools/llvm-objdump/llvm-objdump.cpp | 225 |
1 files changed, 168 insertions, 57 deletions
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index a5a8638..c15b89d 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -207,6 +207,7 @@ bool objdump::PrintImmHex; bool objdump::PrivateHeaders; std::vector<std::string> objdump::FilterSections; bool objdump::SectionHeaders; +static bool ShowAllSymbols; static bool ShowLMA; bool objdump::PrintSource; @@ -1481,28 +1482,118 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, std::vector<RelocationRef> Rels = RelocMap[Section]; std::vector<RelocationRef>::const_iterator RelCur = Rels.begin(); std::vector<RelocationRef>::const_iterator RelEnd = Rels.end(); - // Disassemble symbol by symbol. - for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) { - std::string SymbolName = Symbols[SI].Name.str(); - if (Demangle) - SymbolName = demangle(SymbolName); - - // Skip if --disassemble-symbols is not empty and the symbol is not in - // the list. - if (!DisasmSymbolSet.empty() && !DisasmSymbolSet.count(SymbolName)) - continue; + // Loop over each chunk of code between two points where at least + // one symbol is defined. + for (size_t SI = 0, SE = Symbols.size(); SI != SE;) { + // Advance SI past all the symbols starting at the same address, + // and make an ArrayRef of them. + unsigned FirstSI = SI; uint64_t Start = Symbols[SI].Addr; + ArrayRef<SymbolInfoTy> SymbolsHere; + while (SI != SE && Symbols[SI].Addr == Start) + ++SI; + SymbolsHere = ArrayRef<SymbolInfoTy>(&Symbols[FirstSI], SI - FirstSI); + + // Get the demangled names of all those symbols. We end up with a vector + // of StringRef that holds the names we're going to use, and a vector of + // std::string that stores the new strings returned by demangle(), if + // any. If we don't call demangle() then that vector can stay empty. + std::vector<StringRef> SymNamesHere; + std::vector<std::string> DemangledSymNamesHere; + if (Demangle) { + // Fetch the demangled names and store them locally. + for (const SymbolInfoTy &Symbol : SymbolsHere) + DemangledSymNamesHere.push_back(demangle(Symbol.Name.str())); + // Now we've finished modifying that vector, it's safe to make + // a vector of StringRefs pointing into it. + SymNamesHere.insert(SymNamesHere.begin(), DemangledSymNamesHere.begin(), + DemangledSymNamesHere.end()); + } else { + for (const SymbolInfoTy &Symbol : SymbolsHere) + SymNamesHere.push_back(Symbol.Name); + } + + // Distinguish ELF data from code symbols, which will be used later on to + // decide whether to 'disassemble' this chunk as a data declaration via + // dumpELFData(), or whether to treat it as code. + // + // If data _and_ code symbols are defined at the same address, the code + // takes priority, on the grounds that disassembling code is our main + // purpose here, and it would be a worse failure to _not_ interpret + // something that _was_ meaningful as code than vice versa. + // + // Any ELF symbol type that is not clearly data will be regarded as code. + // In particular, one of the uses of STT_NOTYPE is for branch targets + // inside functions, for which STT_FUNC would be inaccurate. + // + // So here, we spot whether there's any non-data symbol present at all, + // and only set the DisassembleAsData flag if there isn't. Also, we use + // this distinction to inform the decision of which symbol to print at + // the head of the section, so that if we're printing code, we print a + // code-related symbol name to go with it. + bool DisassembleAsData = false; + size_t DisplaySymIndex = SymbolsHere.size() - 1; + if (Obj.isELF() && !DisassembleAll && Section.isText()) { + DisassembleAsData = true; // unless we find a code symbol below + + for (size_t i = 0; i < SymbolsHere.size(); ++i) { + uint8_t SymTy = SymbolsHere[i].Type; + if (SymTy != ELF::STT_OBJECT && SymTy != ELF::STT_COMMON) { + DisassembleAsData = false; + DisplaySymIndex = i; + } + } + } + + // Decide which symbol(s) from this collection we're going to print. + std::vector<bool> SymsToPrint(SymbolsHere.size(), false); + // If the user has given the --disassemble-symbols option, then we must + // display every symbol in that set, and no others. + if (!DisasmSymbolSet.empty()) { + bool FoundAny = false; + for (size_t i = 0; i < SymbolsHere.size(); ++i) { + if (DisasmSymbolSet.count(SymNamesHere[i])) { + SymsToPrint[i] = true; + FoundAny = true; + } + } + + // And if none of the symbols here is one that the user asked for, skip + // disassembling this entire chunk of code. + if (!FoundAny) + continue; + } else { + // Otherwise, print whichever symbol at this location is last in the + // Symbols array, because that array is pre-sorted in a way intended to + // correlate with priority of which symbol to display. + SymsToPrint[DisplaySymIndex] = true; + } + + // Now that we know we're disassembling this section, override the choice + // of which symbols to display by printing _all_ of them at this address + // if the user asked for all symbols. + // + // That way, '--show-all-symbols --disassemble-symbol=foo' will print + // only the chunk of code headed by 'foo', but also show any other + // symbols defined at that address, such as aliases for 'foo', or the ARM + // mapping symbol preceding its code. + if (ShowAllSymbols) { + for (size_t i = 0; i < SymbolsHere.size(); ++i) + SymsToPrint[i] = true; + } + if (Start < SectionAddr || StopAddress <= Start) continue; - else - FoundDisasmSymbolSet.insert(SymbolName); + + for (size_t i = 0; i < SymbolsHere.size(); ++i) + FoundDisasmSymbolSet.insert(SymNamesHere[i]); // The end is the section end, the beginning of the next symbol, or // --stop-address. uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress); - if (SI + 1 < SE) - End = std::min(End, Symbols[SI + 1].Addr); + if (SI < SE) + End = std::min(End, Symbols[SI].Addr); if (Start >= End || End <= StartAddress) continue; Start -= SectionAddr; @@ -1517,13 +1608,22 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, } outs() << '\n'; - if (LeadingAddr) - outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", - SectionAddr + Start + VMAAdjustment); - if (Obj.isXCOFF() && SymbolDescription) { - outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n"; - } else - outs() << '<' << SymbolName << ">:\n"; + + for (size_t i = 0; i < SymbolsHere.size(); ++i) { + if (!SymsToPrint[i]) + continue; + + const SymbolInfoTy &Symbol = SymbolsHere[i]; + const StringRef SymbolName = SymNamesHere[i]; + + if (LeadingAddr) + outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", + SectionAddr + Start + VMAAdjustment); + if (Obj.isXCOFF() && SymbolDescription) { + outs() << getXCOFFSymbolDescription(Symbol, SymbolName) << ":\n"; + } else + outs() << '<' << SymbolName << ">:\n"; + } // Don't print raw contents of a virtual section. A virtual section // doesn't have any contents in the file. @@ -1532,57 +1632,67 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, continue; } - auto Status = DisAsm->onSymbolStart(Symbols[SI], Size, - Bytes.slice(Start, End - Start), - SectionAddr + Start, CommentStream); - // To have round trippable disassembly, we fall back to decoding the - // remaining bytes as instructions. - // - // If there is a failure, we disassemble the failed region as bytes before - // falling back. The target is expected to print nothing in this case. + // See if any of the symbols defined at this location triggers target- + // specific disassembly behavior, e.g. of special descriptors or function + // prelude information. // - // If there is Success or SoftFail i.e no 'real' failure, we go ahead by - // Size bytes before falling back. - // So if the entire symbol is 'eaten' by the target: - // Start += Size // Now Start = End and we will never decode as - // // instructions - // - // Right now, most targets return None i.e ignore to treat a symbol - // separately. But WebAssembly decodes preludes for some symbols. - // - if (Status) { + // We stop this loop at the first symbol that triggers some kind of + // interesting behavior (if any), on the assumption that if two symbols + // defined at the same address trigger two conflicting symbol handlers, + // the object file is probably confused anyway, and it would make even + // less sense to present the output of _both_ handlers, because that + // would describe the same data twice. + for (size_t SHI = 0; SHI < SymbolsHere.size(); ++SHI) { + SymbolInfoTy Symbol = SymbolsHere[SHI]; + + auto Status = + DisAsm->onSymbolStart(Symbol, Size, Bytes.slice(Start, End - Start), + SectionAddr + Start, CommentStream); + + if (!Status) { + // If onSymbolStart returns None, that means it didn't trigger any + // interesting handling for this symbol. Try the other symbols + // defined at this address. + continue; + } + if (Status.value() == MCDisassembler::Fail) { - outs() << "// Error in decoding " << SymbolName + // If onSymbolStart returns Fail, that means it identified some kind + // of special data at this address, but wasn't able to disassemble it + // meaningfully. So we fall back to disassembling the failed region + // as bytes, assuming that the target detected the failure before + // printing anything. + // + // Return values Success or SoftFail (i.e no 'real' failure) are + // expected to mean that the target has emitted its own output. + // + // Either way, 'Size' will have been set to the amount of data + // covered by whatever prologue the target identified. So we advance + // our own position to beyond that. Sometimes that will be the entire + // distance to the next symbol, and sometimes it will be just a + // prologue and we should start disassembling instructions from where + // it left off. + outs() << "// Error in decoding " << SymNamesHere[SHI] << " : Decoding failed region as bytes.\n"; for (uint64_t I = 0; I < Size; ++I) { outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true) << "\n"; } } - } else { - Size = 0; + Start += Size; + break; } - Start += Size; - Index = Start; if (SectionAddr < StartAddress) Index = std::max<uint64_t>(Index, StartAddress - SectionAddr); - // If there is a data/common symbol inside an ELF text section and we are - // only disassembling text (applicable all architectures), we are in a - // situation where we must print the data and not disassemble it. - if (Obj.isELF() && !DisassembleAll && Section.isText()) { - uint8_t SymTy = Symbols[SI].Type; - if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) { - dumpELFData(SectionAddr, Index, End, Bytes); - Index = End; - } + if (DisassembleAsData) { + dumpELFData(SectionAddr, Index, End, Bytes); + Index = End; + continue; } - bool CheckARMELFData = hasMappingSymbols(Obj) && - Symbols[SI].Type != ELF::STT_OBJECT && - !DisassembleAll; bool DumpARMELFData = false; formatted_raw_ostream FOS(outs()); @@ -1600,7 +1710,7 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, // same section. We rely on the markers introduced to understand what // we need to dump. If the data marker is within a function, it is // denoted as a word/short etc. - if (CheckARMELFData) { + if (!MappingSymbols.empty()) { char Kind = getMappingSymbolKind(MappingSymbols, Index); DumpARMELFData = Kind == 'd'; if (SecondarySTI) { @@ -2841,6 +2951,7 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) { PrivateHeaders = InputArgs.hasArg(OBJDUMP_private_headers); FilterSections = InputArgs.getAllArgValues(OBJDUMP_section_EQ); SectionHeaders = InputArgs.hasArg(OBJDUMP_section_headers); + ShowAllSymbols = InputArgs.hasArg(OBJDUMP_show_all_symbols); ShowLMA = InputArgs.hasArg(OBJDUMP_show_lma); PrintSource = InputArgs.hasArg(OBJDUMP_source); parseIntArg(InputArgs, OBJDUMP_start_address_EQ, StartAddress); |