aboutsummaryrefslogtreecommitdiff
path: root/bolt
diff options
context:
space:
mode:
authorMaksim Panchenko <maks@fb.com>2023-07-09 21:36:49 -0700
committerMaksim Panchenko <maks@fb.com>2023-07-13 11:07:29 -0700
commite6724cbd8ae34909d524f6d303d88928975bb85d (patch)
treee9e5b35ec98f9cbcfc6495b787c0ac6222bfa149 /bolt
parentd74421a29040d728e43f38ffa003d6cc22fbd0c6 (diff)
downloadllvm-e6724cbd8ae34909d524f6d303d88928975bb85d.zip
llvm-e6724cbd8ae34909d524f6d303d88928975bb85d.tar.gz
llvm-e6724cbd8ae34909d524f6d303d88928975bb85d.tar.bz2
[BOLT] Add reading support for Linux ORC sections
Read ORC (oops rewind capability) info used for unwinding the stack by Linux Kernel. The info is stored in .orc_unwind and .orc_unwind_ip sections. There is also a related .orc_lookup section that is being populated by the kernel during runtime. Contents of the sections are sorted for quicker lookup by a post-link objtool. Unless we modify stack access instructions, we don't have to change ORC info attributed to instructions in the binary. However, we need to update instruction addresses and sort both sections based on the new layout. For pretty printing, we add "--print-orc" option that prints ORC info next to instructions in code dumps. Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D154815
Diffstat (limited to 'bolt')
-rw-r--r--bolt/include/bolt/Core/BinaryFunction.h9
-rw-r--r--bolt/lib/Rewrite/LinuxKernelRewriter.cpp172
2 files changed, 175 insertions, 6 deletions
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 2e87bd5..c393b5b 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -339,6 +339,9 @@ private:
bool HasPseudoProbe{BC.getUniqueSectionByName(".pseudo_probe_desc") &&
BC.getUniqueSectionByName(".pseudo_probe")};
+ /// True if the function uses ORC format for stack unwinding.
+ bool HasORC{false};
+
/// True if the original entry point was patched.
bool IsPatched{false};
@@ -1340,6 +1343,9 @@ public:
/// Return true if the function has Pseudo Probe
bool hasPseudoProbe() const { return HasPseudoProbe; }
+ /// Return true if the function uses ORC format for stack unwinding.
+ bool hasORC() const { return HasORC; }
+
/// Return true if the original entry point was patched.
bool isPatched() const { return IsPatched; }
@@ -1704,6 +1710,9 @@ public:
void setHasSDTMarker(bool V) { HasSDTMarker = V; }
+ /// Mark the function as using ORC format for stack unwinding.
+ void setHasORC(bool V) { HasORC = V; }
+
BinaryFunction &setPersonalityFunction(uint64_t Addr) {
assert(!PersonalityFunction && "can't set personality function twice");
PersonalityFunction = BC.getOrCreateGlobalSymbol(Addr, "FUNCat");
diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
index 455c497..b672252 100644
--- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
+++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp
@@ -20,9 +20,43 @@
using namespace llvm;
using namespace bolt;
+namespace opts {
+static cl::opt<bool>
+ PrintORC("print-orc",
+ cl::desc("print ORC unwind information for instructions"),
+ cl::init(true), cl::cat(BoltCategory));
+}
+
+/// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
+/// ORC state at every IP can be described by the following data structure.
+struct ORCState {
+ int16_t SPOffset;
+ int16_t BPOffset;
+ int16_t Info;
+
+ bool operator==(const ORCState &Other) const {
+ return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset &&
+ Info == Other.Info;
+ }
+
+ bool operator!=(const ORCState &Other) const { return !(*this == Other); }
+};
+
+/// Basic printer for ORC entry. It does not provide the same level of
+/// information as objtool (for now).
+inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
+ if (opts::PrintORC)
+ OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset,
+ E.Info);
+ return OS;
+}
+
namespace {
-class LinuxKernelRewriter final : public MetadataRewriter {
+/// Section terminator ORC entry.
+static ORCState NullORC = {0, 0, 0};
+
+class LinuxKernelRewriter final : public MetadataRewriter {
/// Linux Kernel special sections point to a specific instruction in many
/// cases. Unlike SDTMarkerInfo, these markers can come from different
/// sections.
@@ -37,6 +71,23 @@ class LinuxKernelRewriter final : public MetadataRewriter {
/// special linux kernel sections
std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers;
+ /// Linux ORC sections.
+ ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address;
+ ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address;
+
+ /// Size of entries in ORC sections.
+ static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6;
+ static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4;
+
+ struct ORCListEntry {
+ uint64_t IP; /// Instruction address.
+ BinaryFunction *BF; /// Binary function corresponding to the entry.
+ ORCState ORC; /// Stack unwind info in ORC format.
+ };
+
+ using ORCListType = std::vector<ORCListEntry>;
+ ORCListType ORCEntries;
+
/// Insert an LKMarker for a given code pointer \p PC from a non-code section
/// \p SectionName.
void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
@@ -64,6 +115,12 @@ class LinuxKernelRewriter final : public MetadataRewriter {
/// Update LKMarkers' locations for the output binary.
void updateLKMarkers();
+ /// Read ORC unwind information and annotate instructions.
+ Error readORCTables();
+
+ /// Update ORC data in the binary.
+ Error rewriteORCTables();
+
/// Mark instructions referenced by kernel metadata.
Error markInstructions();
@@ -72,17 +129,22 @@ public:
: MetadataRewriter("linux-kernel-rewriter", BC) {}
Error preCFGInitializer() override {
- if (opts::LinuxKernelMode) {
- processLKSections();
- if (Error E = markInstructions())
- return E;
- }
+ processLKSections();
+ if (Error E = markInstructions())
+ return E;
+
+ if (Error E = readORCTables())
+ return E;
return Error::success();
}
Error postEmitFinalizer() override {
updateLKMarkers();
+
+ if (Error E = rewriteORCTables())
+ return E;
+
return Error::success();
}
};
@@ -361,6 +423,104 @@ void LinuxKernelRewriter::updateLKMarkers() {
outs() << " Section: " << KV.first << ", patch-counts: " << KV.second
<< '\n';
}
+
+Error LinuxKernelRewriter::readORCTables() {
+ // NOTE: we should ignore relocations for orc tables as the tables are sorted
+ // post-link time and relocations are not updated.
+ ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind");
+ ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip");
+
+ if (!ORCUnwindSection && !ORCUnwindIPSection)
+ return Error::success();
+
+ if (!ORCUnwindSection || !ORCUnwindIPSection)
+ return createStringError(errc::executable_format_error,
+ "missing ORC section");
+
+ const uint64_t NumEntries =
+ ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
+ if (ORCUnwindSection->getSize() != NumEntries * ORC_UNWIND_ENTRY_SIZE ||
+ ORCUnwindIPSection->getSize() != NumEntries * ORC_UNWIND_IP_ENTRY_SIZE)
+ return createStringError(errc::executable_format_error,
+ "ORC entries number mismatch detected");
+
+ const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
+ DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(),
+ BC.AsmInfo->isLittleEndian(),
+ BC.AsmInfo->getCodePointerSize());
+ DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(),
+ BC.AsmInfo->isLittleEndian(),
+ BC.AsmInfo->getCodePointerSize());
+ DataExtractor::Cursor ORCCursor(0);
+ DataExtractor::Cursor IPCursor(0);
+ for (uint32_t Index = 0; Index < NumEntries; ++Index) {
+ const uint64_t IP =
+ IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
+
+ // Store all entries, includes those we are not going to update as the
+ // tables need to be sorted globally before being written out.
+ ORCEntries.push_back(ORCListEntry());
+ ORCListEntry &Entry = ORCEntries.back();
+
+ Entry.IP = IP;
+ Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor);
+ Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor);
+ Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor);
+
+ // Consume the status of cursors.
+ if (!IPCursor || !ORCCursor)
+ return createStringError(errc::executable_format_error,
+ "out of bounds while reading ORC");
+
+ BinaryFunction *&BF = Entry.BF;
+ BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true);
+
+ // If the entry immediately pointing past the end of the function is not
+ // the terminator entry, then it does not belong to this function.
+ if (BF && BF->getAddress() + BF->getSize() == IP && Entry.ORC != NullORC)
+ BF = 0;
+
+ // If terminator entry points to the start of the function, then it belongs
+ // to a different function that contains the previous IP.
+ if (BF && BF->getAddress() == IP && Entry.ORC == NullORC)
+ BF = BC.getBinaryFunctionContainingAddress(IP - 1);
+
+ if (!BF) {
+ if (opts::Verbosity)
+ errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
+ << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n';
+ continue;
+ }
+
+ if (!BC.shouldEmit(*BF) || Entry.ORC == NullORC)
+ continue;
+
+ MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress());
+ if (!Inst)
+ return createStringError(
+ errc::executable_format_error,
+ "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP);
+
+ // Some addresses will have two entries associated with them. The first
+ // one being a "weak" section terminator. Since we ignore the terminator,
+ // we should only assign one entry per instruction.
+ if (BC.MIB->hasAnnotation(*Inst, "ORC"))
+ return createStringError(
+ errc::executable_format_error,
+ "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP);
+
+ BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC);
+
+ BF->setHasORC(true);
+ }
+
+ return Error::success();
+}
+
+Error LinuxKernelRewriter::rewriteORCTables() {
+ // TODO:
+ return Error::success();
+}
} // namespace
std::unique_ptr<MetadataRewriter>