diff options
author | Prem Chintalapudi <prem.chintalapudi@gmail.com> | 2023-04-18 17:15:32 -0400 |
---|---|---|
committer | Valentin Churavy <v.churavy@gmail.com> | 2023-04-18 17:15:59 -0400 |
commit | 76e1521b0acff739c0425d0fcbb9360fc17f1af8 (patch) | |
tree | 04f440c51e2fbb50d19fcadfaaa630d897e979e7 /llvm/lib/ExecutionEngine/Orc | |
parent | d343a395431f70f63d66ef31cb69c8c4babdb21f (diff) | |
download | llvm-76e1521b0acff739c0425d0fcbb9360fc17f1af8.zip llvm-76e1521b0acff739c0425d0fcbb9360fc17f1af8.tar.gz llvm-76e1521b0acff739c0425d0fcbb9360fc17f1af8.tar.bz2 |
Non-debuginfo JITLink perf jitdump support
This patch ports PerfJITEventListener to a JITLink plugin, but adds unwind record support and drops debuginfo support temporarily. Debuginfo can be enabled in the future by providing a way to obtain a DWARFContext from a LinkGraph.
See D146060 for an experimental implementation that adds debuginfo parsing.
Reviewed By: lhames
Differential Revision: https://reviews.llvm.org/D146169
Diffstat (limited to 'llvm/lib/ExecutionEngine/Orc')
4 files changed, 784 insertions, 0 deletions
diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt index 1055e31..55e31b3 100644 --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -41,6 +41,7 @@ add_llvm_component_library(LLVMOrcJIT ObjectTransformLayer.cpp OrcABISupport.cpp OrcV2CBindings.cpp + PerfSupportPlugin.cpp RTDyldObjectLinkingLayer.cpp SimpleRemoteEPC.cpp Speculation.cpp diff --git a/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp new file mode 100644 index 0000000..bcb445e --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp @@ -0,0 +1,317 @@ +//===----- PerfSupportPlugin.cpp --- Utils for perf support -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Handles support for registering code with perf +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/PerfSupportPlugin.h" + +#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" + +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ExecutionEngine/JITLink/x86_64.h" +#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h" + +#define DEBUG_TYPE "orc" + +using namespace llvm; +using namespace llvm::orc; +using namespace llvm::jitlink; + +namespace { + +// Creates an EH frame header prepared for a 32-bit relative relocation +// to the start of the .eh_frame section. Absolute injects a 64-bit absolute +// address space offset 4 bytes from the start instead of 4 bytes +Expected<std::string> createX64EHFrameHeader(Section &EHFrame, + support::endianness endianness, + bool absolute) { + uint8_t Version = 1; + uint8_t EhFramePtrEnc = 0; + if (absolute) { + EhFramePtrEnc |= dwarf::DW_EH_PE_sdata8 | dwarf::DW_EH_PE_absptr; + } else { + EhFramePtrEnc |= dwarf::DW_EH_PE_sdata4 | dwarf::DW_EH_PE_datarel; + } + uint8_t FDECountEnc = dwarf::DW_EH_PE_omit; + uint8_t TableEnc = dwarf::DW_EH_PE_omit; + // X86_64_64 relocation to the start of the .eh_frame section + uint32_t EHFrameRelocation = 0; + // uint32_t FDECount = 0; + // Skip the FDE binary search table + // We'd have to reprocess the CIEs to get this information, + // which seems like more trouble than it's worth + // TODO consider implementing this. + // binary search table goes here + + size_t HeaderSize = + (sizeof(Version) + sizeof(EhFramePtrEnc) + sizeof(FDECountEnc) + + sizeof(TableEnc) + + (absolute ? sizeof(uint64_t) : sizeof(EHFrameRelocation))); + std::string HeaderContent(HeaderSize, '\0'); + BinaryStreamWriter Writer( + MutableArrayRef<uint8_t>( + reinterpret_cast<uint8_t *>(HeaderContent.data()), HeaderSize), + endianness); + if (auto Err = Writer.writeInteger(Version)) + return std::move(Err); + if (auto Err = Writer.writeInteger(EhFramePtrEnc)) + return std::move(Err); + if (auto Err = Writer.writeInteger(FDECountEnc)) + return std::move(Err); + if (auto Err = Writer.writeInteger(TableEnc)) + return std::move(Err); + if (absolute) { + uint64_t EHFrameAddr = SectionRange(EHFrame).getStart().getValue(); + if (auto Err = Writer.writeInteger(EHFrameAddr)) + return std::move(Err); + } else { + if (auto Err = Writer.writeInteger(EHFrameRelocation)) + return std::move(Err); + } + return HeaderContent; +} + +constexpr StringRef RegisterPerfStartSymbolName = + "llvm_orc_registerJITLoaderPerfStart"; +constexpr StringRef RegisterPerfEndSymbolName = + "llvm_orc_registerJITLoaderPerfEnd"; +constexpr StringRef RegisterPerfImplSymbolName = + "llvm_orc_registerJITLoaderPerfImpl"; + +static inline uint64_t timespec_to_ns(const struct timespec *ts) { + const uint64_t NanoSecPerSec = 1000000000; + return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; +} + +static inline uint64_t perf_get_timestamp() { +#ifdef __linux__ + struct timespec ts; + int ret; + + ret = clock_gettime(CLOCK_MONOTONIC, &ts); + if (ret) + return 0; + + return timespec_to_ns(&ts); +#else + return 0; +#endif +} + +static PerfJITCodeLoadRecord +getCodeLoadRecord(const Symbol &Sym, std::atomic<uint64_t> &CodeIndex) { + PerfJITCodeLoadRecord Record; + auto Name = Sym.getName(); + auto Addr = Sym.getAddress(); + auto Size = Sym.getSize(); + Record.Prefix.Id = PerfJITRecordType::JIT_CODE_LOAD; + // Runtime sets PID + Record.Pid = 0; + // Runtime sets TID + Record.Tid = 0; + Record.Vma = Addr.getValue(); + Record.CodeAddr = Addr.getValue(); + Record.CodeSize = Size; + Record.CodeIndex = CodeIndex++; + Record.Name = Name.str(); + // Initialize last, once all the other fields are filled + Record.Prefix.TotalSize = + (2 * sizeof(uint32_t) // id, total_size + + sizeof(uint64_t) // timestamp + + 2 * sizeof(uint32_t) // pid, tid + + 4 * sizeof(uint64_t) // vma, code_addr, code_size, code_index + + Name.size() + 1 // symbol name + + Record.CodeSize // code + ); + return Record; +} + +static std::optional<PerfJITDebugInfoRecord> +getDebugInfoRecord(const Symbol &Sym, DWARFContext *DC) { + if (!DC) { + LLVM_DEBUG(dbgs() << "No debug info available\n"); + return std::nullopt; + } + auto &Section = Sym.getBlock().getSection(); + auto Addr = Sym.getAddress(); + auto Size = Sym.getSize(); + auto SAddr = object::SectionedAddress{Addr.getValue(), Section.getOrdinal()}; + LLVM_DEBUG(dbgs() << "Getting debug info for symbol " << Sym.getName() + << " at address " << Addr.getValue() << " with size " + << Size << "\n" + << "Section ordinal: " << Section.getOrdinal() << "\n"); + auto LInfo = DC->getLineInfoForAddressRange( + SAddr, Size, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); + if (LInfo.empty()) { + // No line info available + LLVM_DEBUG(dbgs() << "No line info available\n"); + return std::nullopt; + } + PerfJITDebugInfoRecord Record; + Record.Prefix.Id = PerfJITRecordType::JIT_CODE_DEBUG_INFO; + Record.CodeAddr = Addr.getValue(); + for (const auto &Entry : LInfo) { + auto Addr = Entry.first; + // The function re-created by perf is preceded by a elf + // header. Need to adjust for that, otherwise the results are + // wrong. + Addr += 0x40; + Record.Entries.push_back({Addr, Entry.second.Line, + Entry.second.Discriminator, + Entry.second.FileName}); + } + size_t EntriesBytes = (2 // record header + + 2 // record fields + ) * + sizeof(uint64_t); + for (const auto &Entry : Record.Entries) { + EntriesBytes += + sizeof(uint64_t) + 2 * sizeof(uint32_t); // Addr, Line/Discrim + EntriesBytes += Entry.Name.size() + 1; // Name + } + Record.Prefix.TotalSize = EntriesBytes; + LLVM_DEBUG(dbgs() << "Created debug info record\n" + << "Total size: " << Record.Prefix.TotalSize << "\n" + << "Nr entries: " << Record.Entries.size() << "\n"); + return Record; +} + +static Expected<PerfJITCodeUnwindingInfoRecord> +getUnwindingRecord(LinkGraph &G) { + PerfJITCodeUnwindingInfoRecord Record; + Record.Prefix.Id = PerfJITRecordType::JIT_CODE_UNWINDING_INFO; + Record.Prefix.TotalSize = 0; + auto Eh_frame = G.findSectionByName(".eh_frame"); + if (!Eh_frame) { + LLVM_DEBUG(dbgs() << "No .eh_frame section found\n"); + return Record; + } + if (!G.getTargetTriple().isOSBinFormatELF()) { + LLVM_DEBUG(dbgs() << "Not an ELF file, will not emit unwinding info\n"); + return Record; + } + auto SR = SectionRange(*Eh_frame); + auto EHFrameSize = SR.getSize(); + auto Eh_frame_hdr = G.findSectionByName(".eh_frame_hdr"); + if (!Eh_frame_hdr) { + if (G.getTargetTriple().getArch() == Triple::x86_64) { + auto Hdr = createX64EHFrameHeader(*Eh_frame, G.getEndianness(), true); + if (!Hdr) + return Hdr.takeError(); + Record.EHFrameHdr = std::move(*Hdr); + } else { + LLVM_DEBUG(dbgs() << "No .eh_frame_hdr section found\n"); + return Record; + } + Record.EHFrameHdrAddr = 0; + Record.EHFrameHdrSize = Record.EHFrameHdr.size(); + Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize; + Record.MappedSize = 0; // Because the EHFrame header was not mapped + } else { + auto SR = SectionRange(*Eh_frame_hdr); + Record.EHFrameHdrAddr = SR.getStart().getValue(); + Record.EHFrameHdrSize = SR.getSize(); + Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize; + Record.MappedSize = Record.UnwindDataSize; + } + Record.EHFrameAddr = SR.getStart().getValue(); + Record.Prefix.TotalSize = + (2 * sizeof(uint32_t) // id, total_size + + sizeof(uint64_t) // timestamp + + + 3 * sizeof(uint64_t) // unwind_data_size, eh_frame_hdr_size, mapped_size + + Record.UnwindDataSize // eh_frame_hdr, eh_frame + ); + LLVM_DEBUG(dbgs() << "Created unwind record\n" + << "Total size: " << Record.Prefix.TotalSize << "\n" + << "Unwind size: " << Record.UnwindDataSize << "\n" + << "EHFrame size: " << EHFrameSize << "\n" + << "EHFrameHdr size: " << Record.EHFrameHdrSize << "\n"); + return Record; +} + +static PerfJITRecordBatch getRecords(ExecutionSession &ES, LinkGraph &G, + DWARFContext *DC, + std::atomic<uint64_t> &CodeIndex, + bool EmitUnwindInfo) { + PerfJITRecordBatch Batch; + for (auto Sym : G.defined_symbols()) { + if (!Sym->hasName() || !Sym->isCallable()) + continue; + auto DebugInfo = getDebugInfoRecord(*Sym, DC); + if (DebugInfo) + Batch.DebugInfoRecords.push_back(std::move(*DebugInfo)); + Batch.CodeLoadRecords.push_back(getCodeLoadRecord(*Sym, CodeIndex)); + } + if (EmitUnwindInfo) { + auto UWR = getUnwindingRecord(G); + if (!UWR) { + ES.reportError(UWR.takeError()); + } else { + Batch.UnwindingRecord = std::move(*UWR); + } + } else { + Batch.UnwindingRecord.Prefix.TotalSize = 0; + } + return Batch; +} +} // namespace + +PerfSupportPlugin::PerfSupportPlugin(ExecutorProcessControl &EPC, + ExecutorAddr RegisterPerfStartAddr, + ExecutorAddr RegisterPerfEndAddr, + ExecutorAddr RegisterPerfImplAddr, + bool EmitUnwindInfo) + : EPC(EPC), RegisterPerfStartAddr(RegisterPerfStartAddr), + RegisterPerfEndAddr(RegisterPerfEndAddr), + RegisterPerfImplAddr(RegisterPerfImplAddr), CodeIndex(0), + EmitUnwindInfo(EmitUnwindInfo) { + cantFail(EPC.callSPSWrapper<void()>(RegisterPerfStartAddr)); +} +PerfSupportPlugin::~PerfSupportPlugin() { + cantFail(EPC.callSPSWrapper<void()>(RegisterPerfEndAddr)); +} + +void PerfSupportPlugin::modifyPassConfig(MaterializationResponsibility &MR, + LinkGraph &G, + PassConfiguration &Config) { + Config.PostFixupPasses.push_back([this, &MR](LinkGraph &G) { + // TODO get an actual DWARFContext for line info + DWARFContext *DWC = nullptr; + auto Batch = getRecords(EPC.getExecutionSession(), G, DWC, CodeIndex, + EmitUnwindInfo); + G.allocActions().push_back( + {cantFail(shared::WrapperFunctionCall::Create< + shared::SPSArgList<shared::SPSPerfJITRecordBatch>>( + RegisterPerfImplAddr, Batch)), + {}}); + return Error::success(); + }); +} + +Expected<std::unique_ptr<PerfSupportPlugin>> +PerfSupportPlugin::Create(ExecutorProcessControl &EPC, JITDylib &JD, + bool EmitUnwindInfo) { + if (!EPC.getTargetTriple().isOSBinFormatELF()) { + return make_error<StringError>( + "Perf support only available for ELF LinkGraphs!", + inconvertibleErrorCode()); + } + auto &ES = EPC.getExecutionSession(); + ExecutorAddr StartAddr, EndAddr, ImplAddr; + if (auto Err = lookupAndRecordAddrs( + ES, LookupKind::Static, makeJITDylibSearchOrder({&JD}), + {{ES.intern(RegisterPerfStartSymbolName), &StartAddr}, + {ES.intern(RegisterPerfEndSymbolName), &EndAddr}, + {ES.intern(RegisterPerfImplSymbolName), &ImplAddr}})) + return std::move(Err); + return std::make_unique<PerfSupportPlugin>(EPC, StartAddr, EndAddr, ImplAddr, + EmitUnwindInfo); +} diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt index d9cd7b6..f2005dc 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt @@ -5,6 +5,7 @@ endif() add_llvm_component_library(LLVMOrcTargetProcess ExecutorSharedMemoryMapperService.cpp JITLoaderGDB.cpp + JITLoaderPerf.cpp OrcRTBootstrap.cpp RegisterEHFrames.cpp SimpleExecutorDylibManager.cpp diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp new file mode 100644 index 0000000..cf0d29c --- /dev/null +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp @@ -0,0 +1,465 @@ +//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register objects for access by profilers via the perf JIT interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" + +#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h" + +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Threading.h" + +#include <mutex> +#include <optional> + +#ifdef __linux__ + +#include <sys/mman.h> // mmap() +#include <time.h> // clock_gettime(), time(), localtime_r() */ +#include <unistd.h> // for read(), close() + +#define DEBUG_TYPE "orc" + +// language identifier (XXX: should we generate something better from debug +// info?) +#define JIT_LANG "llvm-IR" +#define LLVM_PERF_JIT_MAGIC \ + ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ + (uint32_t)'D') +#define LLVM_PERF_JIT_VERSION 1 + +using namespace llvm; +using namespace llvm::orc; + +struct PerfState { + // cache lookups + uint32_t Pid; + + // base directory for output data + std::string JitPath; + + // output data stream, closed via Dumpstream + int DumpFd = -1; + + // output data stream + std::unique_ptr<raw_fd_ostream> Dumpstream; + + // perf mmap marker + void *MarkerAddr = NULL; +}; + +// prevent concurrent dumps from messing up the output file +static std::mutex Mutex; +static std::optional<PerfState> state; + +struct RecHeader { + uint32_t Id; + uint32_t TotalSize; + uint64_t Timestamp; +}; + +struct DIR { + RecHeader Prefix; + uint64_t CodeAddr; + uint64_t NrEntry; +}; + +struct DIE { + uint64_t CodeAddr; + uint32_t Line; + uint32_t Discrim; +}; + +struct CLR { + RecHeader Prefix; + uint32_t Pid; + uint32_t Tid; + uint64_t Vma; + uint64_t CodeAddr; + uint64_t CodeSize; + uint64_t CodeIndex; +}; + +struct UWR { + RecHeader Prefix; + uint64_t UnwindDataSize; + uint64_t EhFrameHeaderSize; + uint64_t MappedSize; +}; + +static inline uint64_t timespec_to_ns(const struct timespec *ts) { + const uint64_t NanoSecPerSec = 1000000000; + return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; +} + +static inline uint64_t perf_get_timestamp() { + struct timespec ts; + int ret; + + ret = clock_gettime(CLOCK_MONOTONIC, &ts); + if (ret) + return 0; + + return timespec_to_ns(&ts); +} + +static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) { + assert(state && "PerfState not initialized"); + LLVM_DEBUG(dbgs() << "Writing debug record with " + << DebugRecord.Entries.size() << " entries\n"); + size_t Written = 0; + DIR dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id), + DebugRecord.Prefix.TotalSize, perf_get_timestamp()}, + DebugRecord.CodeAddr, DebugRecord.Entries.size()}; + state->Dumpstream->write(reinterpret_cast<const char *>(&dir), sizeof(dir)); + Written += sizeof(dir); + for (auto &die : DebugRecord.Entries) { + DIE d{die.Addr, die.Lineno, die.Discrim}; + state->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d)); + state->Dumpstream->write(die.Name.data(), die.Name.size() + 1); + Written += sizeof(d) + die.Name.size() + 1; + } + LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n"); +} + +static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) { + assert(state && "PerfState not initialized"); + uint32_t Tid = get_threadid(); + LLVM_DEBUG(dbgs() << "Writing code record with code size " + << CodeRecord.CodeSize << " and code index " + << CodeRecord.CodeIndex << "\n"); + CLR clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id), + CodeRecord.Prefix.TotalSize, perf_get_timestamp()}, + state->Pid, + Tid, + CodeRecord.Vma, + CodeRecord.CodeAddr, + CodeRecord.CodeSize, + CodeRecord.CodeIndex}; + LLVM_DEBUG(dbgs() << "wrote " << sizeof(clr) << " bytes of CLR, " + << CodeRecord.Name.size() + 1 << " bytes of name, " + << CodeRecord.CodeSize << " bytes of code\n"); + state->Dumpstream->write(reinterpret_cast<const char *>(&clr), sizeof(clr)); + state->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1); + state->Dumpstream->write((const char *)CodeRecord.CodeAddr, + CodeRecord.CodeSize); +} + +static void +writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) { + assert(state && "PerfState not initialized"); + dbgs() << "Writing unwind record with unwind data size " + << UnwindRecord.UnwindDataSize << " and EH frame header size " + << UnwindRecord.EHFrameHdrSize << " and mapped size " + << UnwindRecord.MappedSize << "\n"; + UWR uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id), + UnwindRecord.Prefix.TotalSize, perf_get_timestamp()}, + UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize, + UnwindRecord.MappedSize}; + LLVM_DEBUG(dbgs() << "wrote " << sizeof(uwr) << " bytes of UWR, " + << UnwindRecord.EHFrameHdrSize + << " bytes of EH frame header, " + << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize + << " bytes of EH frame\n"); + state->Dumpstream->write(reinterpret_cast<const char *>(&uwr), sizeof(uwr)); + if (UnwindRecord.EHFrameHdrAddr) { + state->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr, + UnwindRecord.EHFrameHdrSize); + } else { + state->Dumpstream->write(UnwindRecord.EHFrameHdr.data(), + UnwindRecord.EHFrameHdrSize); + } + state->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr, + UnwindRecord.UnwindDataSize - + UnwindRecord.EHFrameHdrSize); +} + +static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) { + if (!state) { + return make_error<StringError>("PerfState not initialized", + inconvertibleErrorCode()); + } + + // Serialize the batch + std::lock_guard<std::mutex> Lock(Mutex); + if (Batch.UnwindingRecord.Prefix.TotalSize > 0) { + writeUnwindRecord(Batch.UnwindingRecord); + } + for (const auto &DebugInfo : Batch.DebugInfoRecords) { + writeDebugRecord(DebugInfo); + } + for (const auto &CodeLoad : Batch.CodeLoadRecords) { + writeCodeRecord(CodeLoad); + } + + state->Dumpstream->flush(); + + return Error::success(); +} + +struct Header { + uint32_t Magic; // characters "JiTD" + uint32_t Version; // header version + uint32_t TotalSize; // total size of header + uint32_t ElfMach; // elf mach target + uint32_t Pad1; // reserved + uint32_t Pid; + uint64_t Timestamp; // timestamp + uint64_t Flags; // flags +}; + +static Error OpenMarker(PerfState &state) { + // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap + // is captured either live (perf record running when we mmap) or in deferred + // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump + // file for more meta data info about the jitted code. Perf report/annotate + // detect this special filename and process the jitdump file. + // + // Mapping must be PROT_EXEC to ensure it is captured by perf record + // even when not using -d option. + state.MarkerAddr = + ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC, + MAP_PRIVATE, state.DumpFd, 0); + + if (state.MarkerAddr == MAP_FAILED) { + return make_error<llvm::StringError>("could not mmap JIT marker", + inconvertibleErrorCode()); + } + return Error::success(); +} + +void CloseMarker(PerfState &state) { + if (!state.MarkerAddr) + return; + + munmap(state.MarkerAddr, sys::Process::getPageSizeEstimate()); + state.MarkerAddr = nullptr; +} + +static Expected<Header> FillMachine(PerfState &state) { + Header hdr; + hdr.Magic = LLVM_PERF_JIT_MAGIC; + hdr.Version = LLVM_PERF_JIT_VERSION; + hdr.TotalSize = sizeof(hdr); + hdr.Pid = state.Pid; + hdr.Timestamp = perf_get_timestamp(); + + char id[16]; + struct { + uint16_t e_type; + uint16_t e_machine; + } info; + + size_t RequiredMemory = sizeof(id) + sizeof(info); + + ErrorOr<std::unique_ptr<MemoryBuffer>> MB = + MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0); + + // This'll not guarantee that enough data was actually read from the + // underlying file. Instead the trailing part of the buffer would be + // zeroed. Given the ELF signature check below that seems ok though, + // it's unlikely that the file ends just after that, and the + // consequence would just be that perf wouldn't recognize the + // signature. + if (!MB) { + return make_error<llvm::StringError>("could not open /proc/self/exe", + MB.getError()); + } + + memcpy(&id, (*MB)->getBufferStart(), sizeof(id)); + memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info)); + + // check ELF signature + if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') { + return make_error<llvm::StringError>("invalid ELF signature", + inconvertibleErrorCode()); + } + + hdr.ElfMach = info.e_machine; + + return hdr; +} + +static Error InitDebuggingDir(PerfState &state) { + time_t Time; + struct tm LocalTime; + char TimeBuffer[sizeof("YYYYMMDD")]; + SmallString<64> Path; + + // search for location to dump data to + if (const char *BaseDir = getenv("JITDUMPDIR")) + Path.append(BaseDir); + else if (!sys::path::home_directory(Path)) + Path = "."; + + // create debug directory + Path += "/.debug/jit/"; + if (auto EC = sys::fs::create_directories(Path)) { + std::string errstr; + raw_string_ostream errstream(errstr); + errstream << "could not create jit cache directory " << Path << ": " + << EC.message() << "\n"; + return make_error<StringError>(std::move(errstr), inconvertibleErrorCode()); + } + + // create unique directory for dump data related to this process + time(&Time); + localtime_r(&Time, &LocalTime); + strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); + Path += JIT_LANG "-jit-"; + Path += TimeBuffer; + + SmallString<128> UniqueDebugDir; + + using sys::fs::createUniqueDirectory; + if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { + std::string errstr; + raw_string_ostream errstream(errstr); + errstream << "could not create unique jit cache directory " + << UniqueDebugDir << ": " << EC.message() << "\n"; + return make_error<StringError>(std::move(errstr), inconvertibleErrorCode()); + } + + state.JitPath = std::string(UniqueDebugDir.str()); + + return Error::success(); +} + +static Error registerJITLoaderPerfStartImpl() { + PerfState tentative; + tentative.Pid = sys::Process::getProcessId(); + // check if clock-source is supported + if (!perf_get_timestamp()) { + return make_error<StringError>("kernel does not support CLOCK_MONOTONIC", + inconvertibleErrorCode()); + } + + if (auto err = InitDebuggingDir(tentative)) { + return std::move(err); + } + + std::string Filename; + raw_string_ostream FilenameBuf(Filename); + FilenameBuf << tentative.JitPath << "/jit-" << tentative.Pid << ".dump"; + + // Need to open ourselves, because we need to hand the FD to OpenMarker() and + // raw_fd_ostream doesn't expose the FD. + using sys::fs::openFileForWrite; + if (auto EC = openFileForReadWrite(FilenameBuf.str(), tentative.DumpFd, + sys::fs::CD_CreateNew, sys::fs::OF_None)) { + std::string errstr; + raw_string_ostream errstream(errstr); + errstream << "could not open JIT dump file " << FilenameBuf.str() << ": " + << EC.message() << "\n"; + return make_error<StringError>(std::move(errstr), inconvertibleErrorCode()); + } + + tentative.Dumpstream = + std::make_unique<raw_fd_ostream>(tentative.DumpFd, true); + + auto header = FillMachine(tentative); + if (!header) { + return header.takeError(); + } + + // signal this process emits JIT information + if (auto err = OpenMarker(tentative)) { + return std::move(err); + } + + tentative.Dumpstream->write(reinterpret_cast<const char *>(&header.get()), + sizeof(*header)); + + // Everything initialized, can do profiling now. + if (tentative.Dumpstream->has_error()) { + return make_error<StringError>("could not write JIT dump header", + inconvertibleErrorCode()); + } + state = std::move(tentative); + return Error::success(); +} + +static Error registerJITLoaderPerfEndImpl() { + if (!state) { + return make_error<StringError>("PerfState not initialized", + inconvertibleErrorCode()); + } + RecHeader close; + close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE); + close.TotalSize = sizeof(close); + close.Timestamp = perf_get_timestamp(); + state->Dumpstream->write(reinterpret_cast<const char *>(&close), + sizeof(close)); + if (state->MarkerAddr) { + CloseMarker(*state); + } + state.reset(); + return Error::success(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle( + Data, Size, registerJITLoaderPerfImpl) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction<SPSError()>::handle(Data, Size, + registerJITLoaderPerfStartImpl) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction<SPSError()>::handle(Data, Size, + registerJITLoaderPerfEndImpl) + .release(); +} + +#else + +static Error badOS() { + return make_error<StringError>( + "unsupported OS (perf support is only available on linux!)", + inconvertibleErrorCode()); +} + +static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); } + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size, + badOSBatch) + .release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); +} + +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { + using namespace orc::shared; + return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); +} + +#endif
\ No newline at end of file |