1 files changed, 168 insertions, 57 deletions
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index a5a8638..c15b89d 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -207,6 +207,7 @@ bool objdump::PrintImmHex;
 bool objdump::PrivateHeaders;
 std::vector<std::string> objdump::FilterSections;
 bool objdump::SectionHeaders;
+static bool ShowAllSymbols;
 static bool ShowLMA;
 bool objdump::PrintSource;
 
@@ -1481,28 +1482,118 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
     std::vector<RelocationRef> Rels = RelocMap[Section];
     std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
     std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
-    // Disassemble symbol by symbol.
-    for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
-      std::string SymbolName = Symbols[SI].Name.str();
-      if (Demangle)
-        SymbolName = demangle(SymbolName);
-
-      // Skip if --disassemble-symbols is not empty and the symbol is not in
-      // the list.
-      if (!DisasmSymbolSet.empty() && !DisasmSymbolSet.count(SymbolName))
-        continue;
 
+    // Loop over each chunk of code between two points where at least
+    // one symbol is defined.
+    for (size_t SI = 0, SE = Symbols.size(); SI != SE;) {
+      // Advance SI past all the symbols starting at the same address,
+      // and make an ArrayRef of them.
+      unsigned FirstSI = SI;
       uint64_t Start = Symbols[SI].Addr;
+      ArrayRef<SymbolInfoTy> SymbolsHere;
+      while (SI != SE && Symbols[SI].Addr == Start)
+        ++SI;
+      SymbolsHere = ArrayRef<SymbolInfoTy>(&Symbols[FirstSI], SI - FirstSI);
+
+      // Get the demangled names of all those symbols. We end up with a vector
+      // of StringRef that holds the names we're going to use, and a vector of
+      // std::string that stores the new strings returned by demangle(), if
+      // any. If we don't call demangle() then that vector can stay empty.
+      std::vector<StringRef> SymNamesHere;
+      std::vector<std::string> DemangledSymNamesHere;
+      if (Demangle) {
+        // Fetch the demangled names and store them locally.
+        for (const SymbolInfoTy &Symbol : SymbolsHere)
+          DemangledSymNamesHere.push_back(demangle(Symbol.Name.str()));
+        // Now we've finished modifying that vector, it's safe to make
+        // a vector of StringRefs pointing into it.
+        SymNamesHere.insert(SymNamesHere.begin(), DemangledSymNamesHere.begin(),
+                            DemangledSymNamesHere.end());
+      } else {
+        for (const SymbolInfoTy &Symbol : SymbolsHere)
+          SymNamesHere.push_back(Symbol.Name);
+      }
+
+      // Distinguish ELF data from code symbols, which will be used later on to
+      // decide whether to 'disassemble' this chunk as a data declaration via
+      // dumpELFData(), or whether to treat it as code.
+      //
+      // If data _and_ code symbols are defined at the same address, the code
+      // takes priority, on the grounds that disassembling code is our main
+      // purpose here, and it would be a worse failure to _not_ interpret
+      // something that _was_ meaningful as code than vice versa.
+      //
+      // Any ELF symbol type that is not clearly data will be regarded as code.
+      // In particular, one of the uses of STT_NOTYPE is for branch targets
+      // inside functions, for which STT_FUNC would be inaccurate.
+      //
+      // So here, we spot whether there's any non-data symbol present at all,
+      // and only set the DisassembleAsData flag if there isn't. Also, we use
+      // this distinction to inform the decision of which symbol to print at
+      // the head of the section, so that if we're printing code, we print a
+      // code-related symbol name to go with it.
+      bool DisassembleAsData = false;
+      size_t DisplaySymIndex = SymbolsHere.size() - 1;
+      if (Obj.isELF() && !DisassembleAll && Section.isText()) {
+        DisassembleAsData = true; // unless we find a code symbol below
+
+        for (size_t i = 0; i < SymbolsHere.size(); ++i) {
+          uint8_t SymTy = SymbolsHere[i].Type;
+          if (SymTy != ELF::STT_OBJECT && SymTy != ELF::STT_COMMON) {
+            DisassembleAsData = false;
+            DisplaySymIndex = i;
+          }
+        }
+      }
+
+      // Decide which symbol(s) from this collection we're going to print.
+      std::vector<bool> SymsToPrint(SymbolsHere.size(), false);
+      // If the user has given the --disassemble-symbols option, then we must
+      // display every symbol in that set, and no others.
+      if (!DisasmSymbolSet.empty()) {
+        bool FoundAny = false;
+        for (size_t i = 0; i < SymbolsHere.size(); ++i) {
+          if (DisasmSymbolSet.count(SymNamesHere[i])) {
+            SymsToPrint[i] = true;
+            FoundAny = true;
+          }
+        }
+
+        // And if none of the symbols here is one that the user asked for, skip
+        // disassembling this entire chunk of code.
+        if (!FoundAny)
+          continue;
+      } else {
+        // Otherwise, print whichever symbol at this location is last in the
+        // Symbols array, because that array is pre-sorted in a way intended to
+        // correlate with priority of which symbol to display.
+        SymsToPrint[DisplaySymIndex] = true;
+      }
+
+      // Now that we know we're disassembling this section, override the choice
+      // of which symbols to display by printing _all_ of them at this address
+      // if the user asked for all symbols.
+      //
+      // That way, '--show-all-symbols --disassemble-symbol=foo' will print
+      // only the chunk of code headed by 'foo', but also show any other
+      // symbols defined at that address, such as aliases for 'foo', or the ARM
+      // mapping symbol preceding its code.
+      if (ShowAllSymbols) {
+        for (size_t i = 0; i < SymbolsHere.size(); ++i)
+          SymsToPrint[i] = true;
+      }
+
       if (Start < SectionAddr || StopAddress <= Start)
         continue;
-      else
-        FoundDisasmSymbolSet.insert(SymbolName);
+
+      for (size_t i = 0; i < SymbolsHere.size(); ++i)
+        FoundDisasmSymbolSet.insert(SymNamesHere[i]);
 
       // The end is the section end, the beginning of the next symbol, or
       // --stop-address.
       uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress);
-      if (SI + 1 < SE)
-        End = std::min(End, Symbols[SI + 1].Addr);
+      if (SI < SE)
+        End = std::min(End, Symbols[SI].Addr);
       if (Start >= End || End <= StartAddress)
         continue;
       Start -= SectionAddr;
@@ -1517,13 +1608,22 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
       }
 
       outs() << '\n';
-      if (LeadingAddr)
-        outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
-                         SectionAddr + Start + VMAAdjustment);
-      if (Obj.isXCOFF() && SymbolDescription) {
-        outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n";
-      } else
-        outs() << '<' << SymbolName << ">:\n";
+
+      for (size_t i = 0; i < SymbolsHere.size(); ++i) {
+        if (!SymsToPrint[i])
+          continue;
+
+        const SymbolInfoTy &Symbol = SymbolsHere[i];
+        const StringRef SymbolName = SymNamesHere[i];
+
+        if (LeadingAddr)
+          outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
+                           SectionAddr + Start + VMAAdjustment);
+        if (Obj.isXCOFF() && SymbolDescription) {
+          outs() << getXCOFFSymbolDescription(Symbol, SymbolName) << ":\n";
+        } else
+          outs() << '<' << SymbolName << ">:\n";
+      }
 
       // Don't print raw contents of a virtual section. A virtual section
       // doesn't have any contents in the file.
@@ -1532,57 +1632,67 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
         continue;
       }
 
-      auto Status = DisAsm->onSymbolStart(Symbols[SI], Size,
-                                          Bytes.slice(Start, End - Start),
-                                          SectionAddr + Start, CommentStream);
-      // To have round trippable disassembly, we fall back to decoding the
-      // remaining bytes as instructions.
-      //
-      // If there is a failure, we disassemble the failed region as bytes before
-      // falling back. The target is expected to print nothing in this case.
+      // See if any of the symbols defined at this location triggers target-
+      // specific disassembly behavior, e.g. of special descriptors or function
+      // prelude information.
       //
-      // If there is Success or SoftFail i.e no 'real' failure, we go ahead by
-      // Size bytes before falling back.
-      // So if the entire symbol is 'eaten' by the target:
-      //   Start += Size  // Now Start = End and we will never decode as
-      //                  // instructions
-      //
-      // Right now, most targets return None i.e ignore to treat a symbol
-      // separately. But WebAssembly decodes preludes for some symbols.
-      //
-      if (Status) {
+      // We stop this loop at the first symbol that triggers some kind of
+      // interesting behavior (if any), on the assumption that if two symbols
+      // defined at the same address trigger two conflicting symbol handlers,
+      // the object file is probably confused anyway, and it would make even
+      // less sense to present the output of _both_ handlers, because that
+      // would describe the same data twice.
+      for (size_t SHI = 0; SHI < SymbolsHere.size(); ++SHI) {
+        SymbolInfoTy Symbol = SymbolsHere[SHI];
+
+        auto Status =
+            DisAsm->onSymbolStart(Symbol, Size, Bytes.slice(Start, End - Start),
+                                  SectionAddr + Start, CommentStream);
+
+        if (!Status) {
+          // If onSymbolStart returns None, that means it didn't trigger any
+          // interesting handling for this symbol. Try the other symbols
+          // defined at this address.
+          continue;
+        }
+
         if (Status.value() == MCDisassembler::Fail) {
-          outs() << "// Error in decoding " << SymbolName
+          // If onSymbolStart returns Fail, that means it identified some kind
+          // of special data at this address, but wasn't able to disassemble it
+          // meaningfully. So we fall back to disassembling the failed region
+          // as bytes, assuming that the target detected the failure before
+          // printing anything.
+          //
+          // Return values Success or SoftFail (i.e no 'real' failure) are
+          // expected to mean that the target has emitted its own output.
+          //
+          // Either way, 'Size' will have been set to the amount of data
+          // covered by whatever prologue the target identified. So we advance
+          // our own position to beyond that. Sometimes that will be the entire
+          // distance to the next symbol, and sometimes it will be just a
+          // prologue and we should start disassembling instructions from where
+          // it left off.
+          outs() << "// Error in decoding " << SymNamesHere[SHI]
                  << " : Decoding failed region as bytes.\n";
           for (uint64_t I = 0; I < Size; ++I) {
             outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
                    << "\n";
           }
         }
-      } else {
-        Size = 0;
+        Start += Size;
+        break;
       }
 
-      Start += Size;
-
       Index = Start;
       if (SectionAddr < StartAddress)
         Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
 
-      // If there is a data/common symbol inside an ELF text section and we are
-      // only disassembling text (applicable all architectures), we are in a
-      // situation where we must print the data and not disassemble it.
-      if (Obj.isELF() && !DisassembleAll && Section.isText()) {
-        uint8_t SymTy = Symbols[SI].Type;
-        if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) {
-          dumpELFData(SectionAddr, Index, End, Bytes);
-          Index = End;
-        }
+      if (DisassembleAsData) {
+        dumpELFData(SectionAddr, Index, End, Bytes);
+        Index = End;
+        continue;
       }
 
-      bool CheckARMELFData = hasMappingSymbols(Obj) &&
-                             Symbols[SI].Type != ELF::STT_OBJECT &&
-                             !DisassembleAll;
       bool DumpARMELFData = false;
       formatted_raw_ostream FOS(outs());
 
@@ -1600,7 +1710,7 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
         // same section. We rely on the markers introduced to understand what
         // we need to dump. If the data marker is within a function, it is
         // denoted as a word/short etc.
-        if (CheckARMELFData) {
+        if (!MappingSymbols.empty()) {
           char Kind = getMappingSymbolKind(MappingSymbols, Index);
           DumpARMELFData = Kind == 'd';
           if (SecondarySTI) {
@@ -2841,6 +2951,7 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
   PrivateHeaders = InputArgs.hasArg(OBJDUMP_private_headers);
   FilterSections = InputArgs.getAllArgValues(OBJDUMP_section_EQ);
   SectionHeaders = InputArgs.hasArg(OBJDUMP_section_headers);
+  ShowAllSymbols = InputArgs.hasArg(OBJDUMP_show_all_symbols);
   ShowLMA = InputArgs.hasArg(OBJDUMP_show_lma);
   PrintSource = InputArgs.hasArg(OBJDUMP_source);
   parseIntArg(InputArgs, OBJDUMP_start_address_EQ, StartAddress);