aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/ExecutionEngine/Orc
diff options
context:
space:
mode:
authorPrem Chintalapudi <prem.chintalapudi@gmail.com>2023-04-18 17:15:32 -0400
committerValentin Churavy <v.churavy@gmail.com>2023-04-18 17:15:59 -0400
commit76e1521b0acff739c0425d0fcbb9360fc17f1af8 (patch)
tree04f440c51e2fbb50d19fcadfaaa630d897e979e7 /llvm/lib/ExecutionEngine/Orc
parentd343a395431f70f63d66ef31cb69c8c4babdb21f (diff)
downloadllvm-76e1521b0acff739c0425d0fcbb9360fc17f1af8.zip
llvm-76e1521b0acff739c0425d0fcbb9360fc17f1af8.tar.gz
llvm-76e1521b0acff739c0425d0fcbb9360fc17f1af8.tar.bz2
Non-debuginfo JITLink perf jitdump support
This patch ports PerfJITEventListener to a JITLink plugin, but adds unwind record support and drops debuginfo support temporarily. Debuginfo can be enabled in the future by providing a way to obtain a DWARFContext from a LinkGraph. See D146060 for an experimental implementation that adds debuginfo parsing. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D146169
Diffstat (limited to 'llvm/lib/ExecutionEngine/Orc')
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CMakeLists.txt1
-rw-r--r--llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp317
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt1
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp465
4 files changed, 784 insertions, 0 deletions
diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
index 1055e31..55e31b3 100644
--- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -41,6 +41,7 @@ add_llvm_component_library(LLVMOrcJIT
ObjectTransformLayer.cpp
OrcABISupport.cpp
OrcV2CBindings.cpp
+ PerfSupportPlugin.cpp
RTDyldObjectLinkingLayer.cpp
SimpleRemoteEPC.cpp
Speculation.cpp
diff --git a/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp
new file mode 100644
index 0000000..bcb445e
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp
@@ -0,0 +1,317 @@
+//===----- PerfSupportPlugin.cpp --- Utils for perf support -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Handles support for registering code with perf
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/PerfSupportPlugin.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
+#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::jitlink;
+
+namespace {
+
+// Creates an EH frame header prepared for a 32-bit relative relocation
+// to the start of the .eh_frame section. Absolute injects a 64-bit absolute
+// address space offset 4 bytes from the start instead of 4 bytes
+Expected<std::string> createX64EHFrameHeader(Section &EHFrame,
+ support::endianness endianness,
+ bool absolute) {
+ uint8_t Version = 1;
+ uint8_t EhFramePtrEnc = 0;
+ if (absolute) {
+ EhFramePtrEnc |= dwarf::DW_EH_PE_sdata8 | dwarf::DW_EH_PE_absptr;
+ } else {
+ EhFramePtrEnc |= dwarf::DW_EH_PE_sdata4 | dwarf::DW_EH_PE_datarel;
+ }
+ uint8_t FDECountEnc = dwarf::DW_EH_PE_omit;
+ uint8_t TableEnc = dwarf::DW_EH_PE_omit;
+ // X86_64_64 relocation to the start of the .eh_frame section
+ uint32_t EHFrameRelocation = 0;
+ // uint32_t FDECount = 0;
+ // Skip the FDE binary search table
+ // We'd have to reprocess the CIEs to get this information,
+ // which seems like more trouble than it's worth
+ // TODO consider implementing this.
+ // binary search table goes here
+
+ size_t HeaderSize =
+ (sizeof(Version) + sizeof(EhFramePtrEnc) + sizeof(FDECountEnc) +
+ sizeof(TableEnc) +
+ (absolute ? sizeof(uint64_t) : sizeof(EHFrameRelocation)));
+ std::string HeaderContent(HeaderSize, '\0');
+ BinaryStreamWriter Writer(
+ MutableArrayRef<uint8_t>(
+ reinterpret_cast<uint8_t *>(HeaderContent.data()), HeaderSize),
+ endianness);
+ if (auto Err = Writer.writeInteger(Version))
+ return std::move(Err);
+ if (auto Err = Writer.writeInteger(EhFramePtrEnc))
+ return std::move(Err);
+ if (auto Err = Writer.writeInteger(FDECountEnc))
+ return std::move(Err);
+ if (auto Err = Writer.writeInteger(TableEnc))
+ return std::move(Err);
+ if (absolute) {
+ uint64_t EHFrameAddr = SectionRange(EHFrame).getStart().getValue();
+ if (auto Err = Writer.writeInteger(EHFrameAddr))
+ return std::move(Err);
+ } else {
+ if (auto Err = Writer.writeInteger(EHFrameRelocation))
+ return std::move(Err);
+ }
+ return HeaderContent;
+}
+
+constexpr StringRef RegisterPerfStartSymbolName =
+ "llvm_orc_registerJITLoaderPerfStart";
+constexpr StringRef RegisterPerfEndSymbolName =
+ "llvm_orc_registerJITLoaderPerfEnd";
+constexpr StringRef RegisterPerfImplSymbolName =
+ "llvm_orc_registerJITLoaderPerfImpl";
+
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
+ const uint64_t NanoSecPerSec = 1000000000;
+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
+}
+
+static inline uint64_t perf_get_timestamp() {
+#ifdef __linux__
+ struct timespec ts;
+ int ret;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (ret)
+ return 0;
+
+ return timespec_to_ns(&ts);
+#else
+ return 0;
+#endif
+}
+
+static PerfJITCodeLoadRecord
+getCodeLoadRecord(const Symbol &Sym, std::atomic<uint64_t> &CodeIndex) {
+ PerfJITCodeLoadRecord Record;
+ auto Name = Sym.getName();
+ auto Addr = Sym.getAddress();
+ auto Size = Sym.getSize();
+ Record.Prefix.Id = PerfJITRecordType::JIT_CODE_LOAD;
+ // Runtime sets PID
+ Record.Pid = 0;
+ // Runtime sets TID
+ Record.Tid = 0;
+ Record.Vma = Addr.getValue();
+ Record.CodeAddr = Addr.getValue();
+ Record.CodeSize = Size;
+ Record.CodeIndex = CodeIndex++;
+ Record.Name = Name.str();
+ // Initialize last, once all the other fields are filled
+ Record.Prefix.TotalSize =
+ (2 * sizeof(uint32_t) // id, total_size
+ + sizeof(uint64_t) // timestamp
+ + 2 * sizeof(uint32_t) // pid, tid
+ + 4 * sizeof(uint64_t) // vma, code_addr, code_size, code_index
+ + Name.size() + 1 // symbol name
+ + Record.CodeSize // code
+ );
+ return Record;
+}
+
+static std::optional<PerfJITDebugInfoRecord>
+getDebugInfoRecord(const Symbol &Sym, DWARFContext *DC) {
+ if (!DC) {
+ LLVM_DEBUG(dbgs() << "No debug info available\n");
+ return std::nullopt;
+ }
+ auto &Section = Sym.getBlock().getSection();
+ auto Addr = Sym.getAddress();
+ auto Size = Sym.getSize();
+ auto SAddr = object::SectionedAddress{Addr.getValue(), Section.getOrdinal()};
+ LLVM_DEBUG(dbgs() << "Getting debug info for symbol " << Sym.getName()
+ << " at address " << Addr.getValue() << " with size "
+ << Size << "\n"
+ << "Section ordinal: " << Section.getOrdinal() << "\n");
+ auto LInfo = DC->getLineInfoForAddressRange(
+ SAddr, Size, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
+ if (LInfo.empty()) {
+ // No line info available
+ LLVM_DEBUG(dbgs() << "No line info available\n");
+ return std::nullopt;
+ }
+ PerfJITDebugInfoRecord Record;
+ Record.Prefix.Id = PerfJITRecordType::JIT_CODE_DEBUG_INFO;
+ Record.CodeAddr = Addr.getValue();
+ for (const auto &Entry : LInfo) {
+ auto Addr = Entry.first;
+ // The function re-created by perf is preceded by a elf
+ // header. Need to adjust for that, otherwise the results are
+ // wrong.
+ Addr += 0x40;
+ Record.Entries.push_back({Addr, Entry.second.Line,
+ Entry.second.Discriminator,
+ Entry.second.FileName});
+ }
+ size_t EntriesBytes = (2 // record header
+ + 2 // record fields
+ ) *
+ sizeof(uint64_t);
+ for (const auto &Entry : Record.Entries) {
+ EntriesBytes +=
+ sizeof(uint64_t) + 2 * sizeof(uint32_t); // Addr, Line/Discrim
+ EntriesBytes += Entry.Name.size() + 1; // Name
+ }
+ Record.Prefix.TotalSize = EntriesBytes;
+ LLVM_DEBUG(dbgs() << "Created debug info record\n"
+ << "Total size: " << Record.Prefix.TotalSize << "\n"
+ << "Nr entries: " << Record.Entries.size() << "\n");
+ return Record;
+}
+
+static Expected<PerfJITCodeUnwindingInfoRecord>
+getUnwindingRecord(LinkGraph &G) {
+ PerfJITCodeUnwindingInfoRecord Record;
+ Record.Prefix.Id = PerfJITRecordType::JIT_CODE_UNWINDING_INFO;
+ Record.Prefix.TotalSize = 0;
+ auto Eh_frame = G.findSectionByName(".eh_frame");
+ if (!Eh_frame) {
+ LLVM_DEBUG(dbgs() << "No .eh_frame section found\n");
+ return Record;
+ }
+ if (!G.getTargetTriple().isOSBinFormatELF()) {
+ LLVM_DEBUG(dbgs() << "Not an ELF file, will not emit unwinding info\n");
+ return Record;
+ }
+ auto SR = SectionRange(*Eh_frame);
+ auto EHFrameSize = SR.getSize();
+ auto Eh_frame_hdr = G.findSectionByName(".eh_frame_hdr");
+ if (!Eh_frame_hdr) {
+ if (G.getTargetTriple().getArch() == Triple::x86_64) {
+ auto Hdr = createX64EHFrameHeader(*Eh_frame, G.getEndianness(), true);
+ if (!Hdr)
+ return Hdr.takeError();
+ Record.EHFrameHdr = std::move(*Hdr);
+ } else {
+ LLVM_DEBUG(dbgs() << "No .eh_frame_hdr section found\n");
+ return Record;
+ }
+ Record.EHFrameHdrAddr = 0;
+ Record.EHFrameHdrSize = Record.EHFrameHdr.size();
+ Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize;
+ Record.MappedSize = 0; // Because the EHFrame header was not mapped
+ } else {
+ auto SR = SectionRange(*Eh_frame_hdr);
+ Record.EHFrameHdrAddr = SR.getStart().getValue();
+ Record.EHFrameHdrSize = SR.getSize();
+ Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize;
+ Record.MappedSize = Record.UnwindDataSize;
+ }
+ Record.EHFrameAddr = SR.getStart().getValue();
+ Record.Prefix.TotalSize =
+ (2 * sizeof(uint32_t) // id, total_size
+ + sizeof(uint64_t) // timestamp
+ +
+ 3 * sizeof(uint64_t) // unwind_data_size, eh_frame_hdr_size, mapped_size
+ + Record.UnwindDataSize // eh_frame_hdr, eh_frame
+ );
+ LLVM_DEBUG(dbgs() << "Created unwind record\n"
+ << "Total size: " << Record.Prefix.TotalSize << "\n"
+ << "Unwind size: " << Record.UnwindDataSize << "\n"
+ << "EHFrame size: " << EHFrameSize << "\n"
+ << "EHFrameHdr size: " << Record.EHFrameHdrSize << "\n");
+ return Record;
+}
+
+static PerfJITRecordBatch getRecords(ExecutionSession &ES, LinkGraph &G,
+ DWARFContext *DC,
+ std::atomic<uint64_t> &CodeIndex,
+ bool EmitUnwindInfo) {
+ PerfJITRecordBatch Batch;
+ for (auto Sym : G.defined_symbols()) {
+ if (!Sym->hasName() || !Sym->isCallable())
+ continue;
+ auto DebugInfo = getDebugInfoRecord(*Sym, DC);
+ if (DebugInfo)
+ Batch.DebugInfoRecords.push_back(std::move(*DebugInfo));
+ Batch.CodeLoadRecords.push_back(getCodeLoadRecord(*Sym, CodeIndex));
+ }
+ if (EmitUnwindInfo) {
+ auto UWR = getUnwindingRecord(G);
+ if (!UWR) {
+ ES.reportError(UWR.takeError());
+ } else {
+ Batch.UnwindingRecord = std::move(*UWR);
+ }
+ } else {
+ Batch.UnwindingRecord.Prefix.TotalSize = 0;
+ }
+ return Batch;
+}
+} // namespace
+
+PerfSupportPlugin::PerfSupportPlugin(ExecutorProcessControl &EPC,
+ ExecutorAddr RegisterPerfStartAddr,
+ ExecutorAddr RegisterPerfEndAddr,
+ ExecutorAddr RegisterPerfImplAddr,
+ bool EmitUnwindInfo)
+ : EPC(EPC), RegisterPerfStartAddr(RegisterPerfStartAddr),
+ RegisterPerfEndAddr(RegisterPerfEndAddr),
+ RegisterPerfImplAddr(RegisterPerfImplAddr), CodeIndex(0),
+ EmitUnwindInfo(EmitUnwindInfo) {
+ cantFail(EPC.callSPSWrapper<void()>(RegisterPerfStartAddr));
+}
+PerfSupportPlugin::~PerfSupportPlugin() {
+ cantFail(EPC.callSPSWrapper<void()>(RegisterPerfEndAddr));
+}
+
+void PerfSupportPlugin::modifyPassConfig(MaterializationResponsibility &MR,
+ LinkGraph &G,
+ PassConfiguration &Config) {
+ Config.PostFixupPasses.push_back([this, &MR](LinkGraph &G) {
+ // TODO get an actual DWARFContext for line info
+ DWARFContext *DWC = nullptr;
+ auto Batch = getRecords(EPC.getExecutionSession(), G, DWC, CodeIndex,
+ EmitUnwindInfo);
+ G.allocActions().push_back(
+ {cantFail(shared::WrapperFunctionCall::Create<
+ shared::SPSArgList<shared::SPSPerfJITRecordBatch>>(
+ RegisterPerfImplAddr, Batch)),
+ {}});
+ return Error::success();
+ });
+}
+
+Expected<std::unique_ptr<PerfSupportPlugin>>
+PerfSupportPlugin::Create(ExecutorProcessControl &EPC, JITDylib &JD,
+ bool EmitUnwindInfo) {
+ if (!EPC.getTargetTriple().isOSBinFormatELF()) {
+ return make_error<StringError>(
+ "Perf support only available for ELF LinkGraphs!",
+ inconvertibleErrorCode());
+ }
+ auto &ES = EPC.getExecutionSession();
+ ExecutorAddr StartAddr, EndAddr, ImplAddr;
+ if (auto Err = lookupAndRecordAddrs(
+ ES, LookupKind::Static, makeJITDylibSearchOrder({&JD}),
+ {{ES.intern(RegisterPerfStartSymbolName), &StartAddr},
+ {ES.intern(RegisterPerfEndSymbolName), &EndAddr},
+ {ES.intern(RegisterPerfImplSymbolName), &ImplAddr}}))
+ return std::move(Err);
+ return std::make_unique<PerfSupportPlugin>(EPC, StartAddr, EndAddr, ImplAddr,
+ EmitUnwindInfo);
+}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
index d9cd7b6..f2005dc 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
@@ -5,6 +5,7 @@ endif()
add_llvm_component_library(LLVMOrcTargetProcess
ExecutorSharedMemoryMapperService.cpp
JITLoaderGDB.cpp
+ JITLoaderPerf.cpp
OrcRTBootstrap.cpp
RegisterEHFrames.cpp
SimpleExecutorDylibManager.cpp
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
new file mode 100644
index 0000000..cf0d29c
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
@@ -0,0 +1,465 @@
+//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Register objects for access by profilers via the perf JIT interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+
+#include <mutex>
+#include <optional>
+
+#ifdef __linux__
+
+#include <sys/mman.h> // mmap()
+#include <time.h> // clock_gettime(), time(), localtime_r() */
+#include <unistd.h> // for read(), close()
+
+#define DEBUG_TYPE "orc"
+
+// language identifier (XXX: should we generate something better from debug
+// info?)
+#define JIT_LANG "llvm-IR"
+#define LLVM_PERF_JIT_MAGIC \
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
+ (uint32_t)'D')
+#define LLVM_PERF_JIT_VERSION 1
+
+using namespace llvm;
+using namespace llvm::orc;
+
+struct PerfState {
+ // cache lookups
+ uint32_t Pid;
+
+ // base directory for output data
+ std::string JitPath;
+
+ // output data stream, closed via Dumpstream
+ int DumpFd = -1;
+
+ // output data stream
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
+
+ // perf mmap marker
+ void *MarkerAddr = NULL;
+};
+
+// prevent concurrent dumps from messing up the output file
+static std::mutex Mutex;
+static std::optional<PerfState> state;
+
+struct RecHeader {
+ uint32_t Id;
+ uint32_t TotalSize;
+ uint64_t Timestamp;
+};
+
+struct DIR {
+ RecHeader Prefix;
+ uint64_t CodeAddr;
+ uint64_t NrEntry;
+};
+
+struct DIE {
+ uint64_t CodeAddr;
+ uint32_t Line;
+ uint32_t Discrim;
+};
+
+struct CLR {
+ RecHeader Prefix;
+ uint32_t Pid;
+ uint32_t Tid;
+ uint64_t Vma;
+ uint64_t CodeAddr;
+ uint64_t CodeSize;
+ uint64_t CodeIndex;
+};
+
+struct UWR {
+ RecHeader Prefix;
+ uint64_t UnwindDataSize;
+ uint64_t EhFrameHeaderSize;
+ uint64_t MappedSize;
+};
+
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
+ const uint64_t NanoSecPerSec = 1000000000;
+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
+}
+
+static inline uint64_t perf_get_timestamp() {
+ struct timespec ts;
+ int ret;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (ret)
+ return 0;
+
+ return timespec_to_ns(&ts);
+}
+
+static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
+ assert(state && "PerfState not initialized");
+ LLVM_DEBUG(dbgs() << "Writing debug record with "
+ << DebugRecord.Entries.size() << " entries\n");
+ size_t Written = 0;
+ DIR dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
+ DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
+ DebugRecord.CodeAddr, DebugRecord.Entries.size()};
+ state->Dumpstream->write(reinterpret_cast<const char *>(&dir), sizeof(dir));
+ Written += sizeof(dir);
+ for (auto &die : DebugRecord.Entries) {
+ DIE d{die.Addr, die.Lineno, die.Discrim};
+ state->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));
+ state->Dumpstream->write(die.Name.data(), die.Name.size() + 1);
+ Written += sizeof(d) + die.Name.size() + 1;
+ }
+ LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
+}
+
+static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
+ assert(state && "PerfState not initialized");
+ uint32_t Tid = get_threadid();
+ LLVM_DEBUG(dbgs() << "Writing code record with code size "
+ << CodeRecord.CodeSize << " and code index "
+ << CodeRecord.CodeIndex << "\n");
+ CLR clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
+ CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
+ state->Pid,
+ Tid,
+ CodeRecord.Vma,
+ CodeRecord.CodeAddr,
+ CodeRecord.CodeSize,
+ CodeRecord.CodeIndex};
+ LLVM_DEBUG(dbgs() << "wrote " << sizeof(clr) << " bytes of CLR, "
+ << CodeRecord.Name.size() + 1 << " bytes of name, "
+ << CodeRecord.CodeSize << " bytes of code\n");
+ state->Dumpstream->write(reinterpret_cast<const char *>(&clr), sizeof(clr));
+ state->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);
+ state->Dumpstream->write((const char *)CodeRecord.CodeAddr,
+ CodeRecord.CodeSize);
+}
+
+static void
+writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
+ assert(state && "PerfState not initialized");
+ dbgs() << "Writing unwind record with unwind data size "
+ << UnwindRecord.UnwindDataSize << " and EH frame header size "
+ << UnwindRecord.EHFrameHdrSize << " and mapped size "
+ << UnwindRecord.MappedSize << "\n";
+ UWR uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
+ UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
+ UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
+ UnwindRecord.MappedSize};
+ LLVM_DEBUG(dbgs() << "wrote " << sizeof(uwr) << " bytes of UWR, "
+ << UnwindRecord.EHFrameHdrSize
+ << " bytes of EH frame header, "
+ << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
+ << " bytes of EH frame\n");
+ state->Dumpstream->write(reinterpret_cast<const char *>(&uwr), sizeof(uwr));
+ if (UnwindRecord.EHFrameHdrAddr) {
+ state->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,
+ UnwindRecord.EHFrameHdrSize);
+ } else {
+ state->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),
+ UnwindRecord.EHFrameHdrSize);
+ }
+ state->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,
+ UnwindRecord.UnwindDataSize -
+ UnwindRecord.EHFrameHdrSize);
+}
+
+static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
+ if (!state) {
+ return make_error<StringError>("PerfState not initialized",
+ inconvertibleErrorCode());
+ }
+
+ // Serialize the batch
+ std::lock_guard<std::mutex> Lock(Mutex);
+ if (Batch.UnwindingRecord.Prefix.TotalSize > 0) {
+ writeUnwindRecord(Batch.UnwindingRecord);
+ }
+ for (const auto &DebugInfo : Batch.DebugInfoRecords) {
+ writeDebugRecord(DebugInfo);
+ }
+ for (const auto &CodeLoad : Batch.CodeLoadRecords) {
+ writeCodeRecord(CodeLoad);
+ }
+
+ state->Dumpstream->flush();
+
+ return Error::success();
+}
+
+struct Header {
+ uint32_t Magic; // characters "JiTD"
+ uint32_t Version; // header version
+ uint32_t TotalSize; // total size of header
+ uint32_t ElfMach; // elf mach target
+ uint32_t Pad1; // reserved
+ uint32_t Pid;
+ uint64_t Timestamp; // timestamp
+ uint64_t Flags; // flags
+};
+
+static Error OpenMarker(PerfState &state) {
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
+ // is captured either live (perf record running when we mmap) or in deferred
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
+ // file for more meta data info about the jitted code. Perf report/annotate
+ // detect this special filename and process the jitdump file.
+ //
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
+ // even when not using -d option.
+ state.MarkerAddr =
+ ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, state.DumpFd, 0);
+
+ if (state.MarkerAddr == MAP_FAILED) {
+ return make_error<llvm::StringError>("could not mmap JIT marker",
+ inconvertibleErrorCode());
+ }
+ return Error::success();
+}
+
+void CloseMarker(PerfState &state) {
+ if (!state.MarkerAddr)
+ return;
+
+ munmap(state.MarkerAddr, sys::Process::getPageSizeEstimate());
+ state.MarkerAddr = nullptr;
+}
+
+static Expected<Header> FillMachine(PerfState &state) {
+ Header hdr;
+ hdr.Magic = LLVM_PERF_JIT_MAGIC;
+ hdr.Version = LLVM_PERF_JIT_VERSION;
+ hdr.TotalSize = sizeof(hdr);
+ hdr.Pid = state.Pid;
+ hdr.Timestamp = perf_get_timestamp();
+
+ char id[16];
+ struct {
+ uint16_t e_type;
+ uint16_t e_machine;
+ } info;
+
+ size_t RequiredMemory = sizeof(id) + sizeof(info);
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);
+
+ // This'll not guarantee that enough data was actually read from the
+ // underlying file. Instead the trailing part of the buffer would be
+ // zeroed. Given the ELF signature check below that seems ok though,
+ // it's unlikely that the file ends just after that, and the
+ // consequence would just be that perf wouldn't recognize the
+ // signature.
+ if (!MB) {
+ return make_error<llvm::StringError>("could not open /proc/self/exe",
+ MB.getError());
+ }
+
+ memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
+ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
+
+ // check ELF signature
+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
+ return make_error<llvm::StringError>("invalid ELF signature",
+ inconvertibleErrorCode());
+ }
+
+ hdr.ElfMach = info.e_machine;
+
+ return hdr;
+}
+
+static Error InitDebuggingDir(PerfState &state) {
+ time_t Time;
+ struct tm LocalTime;
+ char TimeBuffer[sizeof("YYYYMMDD")];
+ SmallString<64> Path;
+
+ // search for location to dump data to
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
+ Path.append(BaseDir);
+ else if (!sys::path::home_directory(Path))
+ Path = ".";
+
+ // create debug directory
+ Path += "/.debug/jit/";
+ if (auto EC = sys::fs::create_directories(Path)) {
+ std::string errstr;
+ raw_string_ostream errstream(errstr);
+ errstream << "could not create jit cache directory " << Path << ": "
+ << EC.message() << "\n";
+ return make_error<StringError>(std::move(errstr), inconvertibleErrorCode());
+ }
+
+ // create unique directory for dump data related to this process
+ time(&Time);
+ localtime_r(&Time, &LocalTime);
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
+ Path += JIT_LANG "-jit-";
+ Path += TimeBuffer;
+
+ SmallString<128> UniqueDebugDir;
+
+ using sys::fs::createUniqueDirectory;
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
+ std::string errstr;
+ raw_string_ostream errstream(errstr);
+ errstream << "could not create unique jit cache directory "
+ << UniqueDebugDir << ": " << EC.message() << "\n";
+ return make_error<StringError>(std::move(errstr), inconvertibleErrorCode());
+ }
+
+ state.JitPath = std::string(UniqueDebugDir.str());
+
+ return Error::success();
+}
+
+static Error registerJITLoaderPerfStartImpl() {
+ PerfState tentative;
+ tentative.Pid = sys::Process::getProcessId();
+ // check if clock-source is supported
+ if (!perf_get_timestamp()) {
+ return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
+ inconvertibleErrorCode());
+ }
+
+ if (auto err = InitDebuggingDir(tentative)) {
+ return std::move(err);
+ }
+
+ std::string Filename;
+ raw_string_ostream FilenameBuf(Filename);
+ FilenameBuf << tentative.JitPath << "/jit-" << tentative.Pid << ".dump";
+
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
+ // raw_fd_ostream doesn't expose the FD.
+ using sys::fs::openFileForWrite;
+ if (auto EC = openFileForReadWrite(FilenameBuf.str(), tentative.DumpFd,
+ sys::fs::CD_CreateNew, sys::fs::OF_None)) {
+ std::string errstr;
+ raw_string_ostream errstream(errstr);
+ errstream << "could not open JIT dump file " << FilenameBuf.str() << ": "
+ << EC.message() << "\n";
+ return make_error<StringError>(std::move(errstr), inconvertibleErrorCode());
+ }
+
+ tentative.Dumpstream =
+ std::make_unique<raw_fd_ostream>(tentative.DumpFd, true);
+
+ auto header = FillMachine(tentative);
+ if (!header) {
+ return header.takeError();
+ }
+
+ // signal this process emits JIT information
+ if (auto err = OpenMarker(tentative)) {
+ return std::move(err);
+ }
+
+ tentative.Dumpstream->write(reinterpret_cast<const char *>(&header.get()),
+ sizeof(*header));
+
+ // Everything initialized, can do profiling now.
+ if (tentative.Dumpstream->has_error()) {
+ return make_error<StringError>("could not write JIT dump header",
+ inconvertibleErrorCode());
+ }
+ state = std::move(tentative);
+ return Error::success();
+}
+
+static Error registerJITLoaderPerfEndImpl() {
+ if (!state) {
+ return make_error<StringError>("PerfState not initialized",
+ inconvertibleErrorCode());
+ }
+ RecHeader close;
+ close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
+ close.TotalSize = sizeof(close);
+ close.Timestamp = perf_get_timestamp();
+ state->Dumpstream->write(reinterpret_cast<const char *>(&close),
+ sizeof(close));
+ if (state->MarkerAddr) {
+ CloseMarker(*state);
+ }
+ state.reset();
+ return Error::success();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
+ Data, Size, registerJITLoaderPerfImpl)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size,
+ registerJITLoaderPerfStartImpl)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size,
+ registerJITLoaderPerfEndImpl)
+ .release();
+}
+
+#else
+
+static Error badOS() {
+ return make_error<StringError>(
+ "unsupported OS (perf support is only available on linux!)",
+ inconvertibleErrorCode());
+}
+
+static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,
+ badOSBatch)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
+}
+
+#endif \ No newline at end of file