aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp')
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp465
1 files changed, 465 insertions, 0 deletions
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
new file mode 100644
index 0000000..cf0d29c
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
@@ -0,0 +1,465 @@
+//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Register objects for access by profilers via the perf JIT interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+
+#include <mutex>
+#include <optional>
+
+#ifdef __linux__
+
+#include <sys/mman.h> // mmap()
+#include <time.h> // clock_gettime(), time(), localtime_r() */
+#include <unistd.h> // for read(), close()
+
+#define DEBUG_TYPE "orc"
+
+// language identifier (XXX: should we generate something better from debug
+// info?)
+#define JIT_LANG "llvm-IR"
+#define LLVM_PERF_JIT_MAGIC \
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
+ (uint32_t)'D')
+#define LLVM_PERF_JIT_VERSION 1
+
+using namespace llvm;
+using namespace llvm::orc;
+
+struct PerfState {
+ // cache lookups
+ uint32_t Pid;
+
+ // base directory for output data
+ std::string JitPath;
+
+ // output data stream, closed via Dumpstream
+ int DumpFd = -1;
+
+ // output data stream
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
+
+ // perf mmap marker
+ void *MarkerAddr = NULL;
+};
+
+// prevent concurrent dumps from messing up the output file
+static std::mutex Mutex;
+static std::optional<PerfState> state;
+
+struct RecHeader {
+ uint32_t Id;
+ uint32_t TotalSize;
+ uint64_t Timestamp;
+};
+
+struct DIR {
+ RecHeader Prefix;
+ uint64_t CodeAddr;
+ uint64_t NrEntry;
+};
+
+struct DIE {
+ uint64_t CodeAddr;
+ uint32_t Line;
+ uint32_t Discrim;
+};
+
+struct CLR {
+ RecHeader Prefix;
+ uint32_t Pid;
+ uint32_t Tid;
+ uint64_t Vma;
+ uint64_t CodeAddr;
+ uint64_t CodeSize;
+ uint64_t CodeIndex;
+};
+
+struct UWR {
+ RecHeader Prefix;
+ uint64_t UnwindDataSize;
+ uint64_t EhFrameHeaderSize;
+ uint64_t MappedSize;
+};
+
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
+ const uint64_t NanoSecPerSec = 1000000000;
+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
+}
+
+static inline uint64_t perf_get_timestamp() {
+ struct timespec ts;
+ int ret;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (ret)
+ return 0;
+
+ return timespec_to_ns(&ts);
+}
+
+static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
+ assert(state && "PerfState not initialized");
+ LLVM_DEBUG(dbgs() << "Writing debug record with "
+ << DebugRecord.Entries.size() << " entries\n");
+ size_t Written = 0;
+ DIR dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
+ DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
+ DebugRecord.CodeAddr, DebugRecord.Entries.size()};
+ state->Dumpstream->write(reinterpret_cast<const char *>(&dir), sizeof(dir));
+ Written += sizeof(dir);
+ for (auto &die : DebugRecord.Entries) {
+ DIE d{die.Addr, die.Lineno, die.Discrim};
+ state->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));
+ state->Dumpstream->write(die.Name.data(), die.Name.size() + 1);
+ Written += sizeof(d) + die.Name.size() + 1;
+ }
+ LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
+}
+
+static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
+ assert(state && "PerfState not initialized");
+ uint32_t Tid = get_threadid();
+ LLVM_DEBUG(dbgs() << "Writing code record with code size "
+ << CodeRecord.CodeSize << " and code index "
+ << CodeRecord.CodeIndex << "\n");
+ CLR clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
+ CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
+ state->Pid,
+ Tid,
+ CodeRecord.Vma,
+ CodeRecord.CodeAddr,
+ CodeRecord.CodeSize,
+ CodeRecord.CodeIndex};
+ LLVM_DEBUG(dbgs() << "wrote " << sizeof(clr) << " bytes of CLR, "
+ << CodeRecord.Name.size() + 1 << " bytes of name, "
+ << CodeRecord.CodeSize << " bytes of code\n");
+ state->Dumpstream->write(reinterpret_cast<const char *>(&clr), sizeof(clr));
+ state->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);
+ state->Dumpstream->write((const char *)CodeRecord.CodeAddr,
+ CodeRecord.CodeSize);
+}
+
+static void
+writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
+ assert(state && "PerfState not initialized");
+ dbgs() << "Writing unwind record with unwind data size "
+ << UnwindRecord.UnwindDataSize << " and EH frame header size "
+ << UnwindRecord.EHFrameHdrSize << " and mapped size "
+ << UnwindRecord.MappedSize << "\n";
+ UWR uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
+ UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
+ UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
+ UnwindRecord.MappedSize};
+ LLVM_DEBUG(dbgs() << "wrote " << sizeof(uwr) << " bytes of UWR, "
+ << UnwindRecord.EHFrameHdrSize
+ << " bytes of EH frame header, "
+ << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
+ << " bytes of EH frame\n");
+ state->Dumpstream->write(reinterpret_cast<const char *>(&uwr), sizeof(uwr));
+ if (UnwindRecord.EHFrameHdrAddr) {
+ state->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,
+ UnwindRecord.EHFrameHdrSize);
+ } else {
+ state->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),
+ UnwindRecord.EHFrameHdrSize);
+ }
+ state->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,
+ UnwindRecord.UnwindDataSize -
+ UnwindRecord.EHFrameHdrSize);
+}
+
+static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
+ if (!state) {
+ return make_error<StringError>("PerfState not initialized",
+ inconvertibleErrorCode());
+ }
+
+ // Serialize the batch
+ std::lock_guard<std::mutex> Lock(Mutex);
+ if (Batch.UnwindingRecord.Prefix.TotalSize > 0) {
+ writeUnwindRecord(Batch.UnwindingRecord);
+ }
+ for (const auto &DebugInfo : Batch.DebugInfoRecords) {
+ writeDebugRecord(DebugInfo);
+ }
+ for (const auto &CodeLoad : Batch.CodeLoadRecords) {
+ writeCodeRecord(CodeLoad);
+ }
+
+ state->Dumpstream->flush();
+
+ return Error::success();
+}
+
+struct Header {
+ uint32_t Magic; // characters "JiTD"
+ uint32_t Version; // header version
+ uint32_t TotalSize; // total size of header
+ uint32_t ElfMach; // elf mach target
+ uint32_t Pad1; // reserved
+ uint32_t Pid;
+ uint64_t Timestamp; // timestamp
+ uint64_t Flags; // flags
+};
+
+static Error OpenMarker(PerfState &state) {
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
+ // is captured either live (perf record running when we mmap) or in deferred
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
+ // file for more meta data info about the jitted code. Perf report/annotate
+ // detect this special filename and process the jitdump file.
+ //
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
+ // even when not using -d option.
+ state.MarkerAddr =
+ ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, state.DumpFd, 0);
+
+ if (state.MarkerAddr == MAP_FAILED) {
+ return make_error<llvm::StringError>("could not mmap JIT marker",
+ inconvertibleErrorCode());
+ }
+ return Error::success();
+}
+
+void CloseMarker(PerfState &state) {
+ if (!state.MarkerAddr)
+ return;
+
+ munmap(state.MarkerAddr, sys::Process::getPageSizeEstimate());
+ state.MarkerAddr = nullptr;
+}
+
+static Expected<Header> FillMachine(PerfState &state) {
+ Header hdr;
+ hdr.Magic = LLVM_PERF_JIT_MAGIC;
+ hdr.Version = LLVM_PERF_JIT_VERSION;
+ hdr.TotalSize = sizeof(hdr);
+ hdr.Pid = state.Pid;
+ hdr.Timestamp = perf_get_timestamp();
+
+ char id[16];
+ struct {
+ uint16_t e_type;
+ uint16_t e_machine;
+ } info;
+
+ size_t RequiredMemory = sizeof(id) + sizeof(info);
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);
+
+ // This'll not guarantee that enough data was actually read from the
+ // underlying file. Instead the trailing part of the buffer would be
+ // zeroed. Given the ELF signature check below that seems ok though,
+ // it's unlikely that the file ends just after that, and the
+ // consequence would just be that perf wouldn't recognize the
+ // signature.
+ if (!MB) {
+ return make_error<llvm::StringError>("could not open /proc/self/exe",
+ MB.getError());
+ }
+
+ memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
+ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
+
+ // check ELF signature
+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
+ return make_error<llvm::StringError>("invalid ELF signature",
+ inconvertibleErrorCode());
+ }
+
+ hdr.ElfMach = info.e_machine;
+
+ return hdr;
+}
+
+static Error InitDebuggingDir(PerfState &state) {
+ time_t Time;
+ struct tm LocalTime;
+ char TimeBuffer[sizeof("YYYYMMDD")];
+ SmallString<64> Path;
+
+ // search for location to dump data to
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
+ Path.append(BaseDir);
+ else if (!sys::path::home_directory(Path))
+ Path = ".";
+
+ // create debug directory
+ Path += "/.debug/jit/";
+ if (auto EC = sys::fs::create_directories(Path)) {
+ std::string errstr;
+ raw_string_ostream errstream(errstr);
+ errstream << "could not create jit cache directory " << Path << ": "
+ << EC.message() << "\n";
+ return make_error<StringError>(std::move(errstr), inconvertibleErrorCode());
+ }
+
+ // create unique directory for dump data related to this process
+ time(&Time);
+ localtime_r(&Time, &LocalTime);
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
+ Path += JIT_LANG "-jit-";
+ Path += TimeBuffer;
+
+ SmallString<128> UniqueDebugDir;
+
+ using sys::fs::createUniqueDirectory;
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
+ std::string errstr;
+ raw_string_ostream errstream(errstr);
+ errstream << "could not create unique jit cache directory "
+ << UniqueDebugDir << ": " << EC.message() << "\n";
+ return make_error<StringError>(std::move(errstr), inconvertibleErrorCode());
+ }
+
+ state.JitPath = std::string(UniqueDebugDir.str());
+
+ return Error::success();
+}
+
+static Error registerJITLoaderPerfStartImpl() {
+ PerfState tentative;
+ tentative.Pid = sys::Process::getProcessId();
+ // check if clock-source is supported
+ if (!perf_get_timestamp()) {
+ return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
+ inconvertibleErrorCode());
+ }
+
+ if (auto err = InitDebuggingDir(tentative)) {
+ return std::move(err);
+ }
+
+ std::string Filename;
+ raw_string_ostream FilenameBuf(Filename);
+ FilenameBuf << tentative.JitPath << "/jit-" << tentative.Pid << ".dump";
+
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
+ // raw_fd_ostream doesn't expose the FD.
+ using sys::fs::openFileForWrite;
+ if (auto EC = openFileForReadWrite(FilenameBuf.str(), tentative.DumpFd,
+ sys::fs::CD_CreateNew, sys::fs::OF_None)) {
+ std::string errstr;
+ raw_string_ostream errstream(errstr);
+ errstream << "could not open JIT dump file " << FilenameBuf.str() << ": "
+ << EC.message() << "\n";
+ return make_error<StringError>(std::move(errstr), inconvertibleErrorCode());
+ }
+
+ tentative.Dumpstream =
+ std::make_unique<raw_fd_ostream>(tentative.DumpFd, true);
+
+ auto header = FillMachine(tentative);
+ if (!header) {
+ return header.takeError();
+ }
+
+ // signal this process emits JIT information
+ if (auto err = OpenMarker(tentative)) {
+ return std::move(err);
+ }
+
+ tentative.Dumpstream->write(reinterpret_cast<const char *>(&header.get()),
+ sizeof(*header));
+
+ // Everything initialized, can do profiling now.
+ if (tentative.Dumpstream->has_error()) {
+ return make_error<StringError>("could not write JIT dump header",
+ inconvertibleErrorCode());
+ }
+ state = std::move(tentative);
+ return Error::success();
+}
+
+static Error registerJITLoaderPerfEndImpl() {
+ if (!state) {
+ return make_error<StringError>("PerfState not initialized",
+ inconvertibleErrorCode());
+ }
+ RecHeader close;
+ close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
+ close.TotalSize = sizeof(close);
+ close.Timestamp = perf_get_timestamp();
+ state->Dumpstream->write(reinterpret_cast<const char *>(&close),
+ sizeof(close));
+ if (state->MarkerAddr) {
+ CloseMarker(*state);
+ }
+ state.reset();
+ return Error::success();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
+ Data, Size, registerJITLoaderPerfImpl)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size,
+ registerJITLoaderPerfStartImpl)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size,
+ registerJITLoaderPerfEndImpl)
+ .release();
+}
+
+#else
+
+static Error badOS() {
+ return make_error<StringError>(
+ "unsupported OS (perf support is only available on linux!)",
+ inconvertibleErrorCode());
+}
+
+static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,
+ badOSBatch)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
+}
+
+#endif \ No newline at end of file