aboutsummaryrefslogtreecommitdiff
path: root/llvm/tools/llvm-profgen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-profgen')
-rw-r--r--llvm/tools/llvm-profgen/PerfReader.cpp72
-rw-r--r--llvm/tools/llvm-profgen/PerfReader.h34
-rw-r--r--llvm/tools/llvm-profgen/ProfileGenerator.cpp16
-rw-r--r--llvm/tools/llvm-profgen/ProfiledBinary.cpp40
-rw-r--r--llvm/tools/llvm-profgen/ProfiledBinary.h35
-rw-r--r--llvm/tools/llvm-profgen/llvm-profgen.cpp12
6 files changed, 192 insertions, 17 deletions
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index ad113ed..b84152e 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -6,13 +6,20 @@
//
//===----------------------------------------------------------------------===//
#include "PerfReader.h"
+#include "ErrorHandling.h"
+#include "PerfReader.h"
#include "ProfileGenerator.h"
+#include "ProfiledBinary.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/ToolOutputFile.h"
+#include <regex>
+
#define DEBUG_TYPE "perf-reader"
cl::opt<bool> SkipSymbolization("skip-symbolization",
@@ -370,6 +377,61 @@ PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
return PerfReader;
}
+void PerfReaderBase::parseDataAccessPerfTraces(
+ StringRef DataAccessPerfTraceFile, std::optional<int32_t> PIDFilter) {
+ std::regex logRegex(
+ R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)");
+
+ auto BufferOrErr = MemoryBuffer::getFile(DataAccessPerfTraceFile);
+ std::error_code EC = BufferOrErr.getError();
+ if (EC)
+ exitWithError("Failed to open perf trace file: " + DataAccessPerfTraceFile);
+
+ assert(!SampleCounters.empty() && "Sample counters should not be empty!");
+ SampleCounter &Counter = SampleCounters.begin()->second;
+ line_iterator LineIt(*BufferOrErr.get(), true);
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ StringRef Line = *LineIt;
+
+ MMapEvent MMap;
+ if (Line.contains("PERF_RECORD_MMAP2")) {
+ if (PerfScriptReader::extractMMapEventForBinary(Binary, Line, MMap)) {
+ if (!MMap.MemProtectionFlag.contains("x")) {
+ Binary->addMMapNonTextEvent(MMap);
+ }
+ }
+ continue;
+ }
+
+ // Skip lines that do not contain "PERF_RECORD_SAMPLE".
+ if (!Line.contains("PERF_RECORD_SAMPLE")) {
+ continue;
+ }
+
+ std::smatch matches;
+ const std::string LineStr = Line.str();
+
+ if (std::regex_search(LineStr.begin(), LineStr.end(), matches, logRegex)) {
+ if (matches.size() != 5)
+ continue;
+
+ const int32_t PID = std::stoi(matches[1].str());
+ if (PIDFilter && *PIDFilter != PID) {
+ continue;
+ }
+
+ const uint64_t DataAddress = std::stoull(matches[4].str(), nullptr, 16);
+ StringRef DataSymbol = Binary->symbolizeDataAddress(
+ Binary->CanonicalizeNonTextAddress(DataAddress));
+ if (DataSymbol.starts_with("_ZTV")) {
+ const uint64_t IP = std::stoull(matches[3].str(), nullptr, 16);
+ Counter.recordDataAccessCount(Binary->canonicalizeVirtualAddress(IP),
+ DataSymbol, 1);
+ }
+ }
+ }
+}
+
PerfInputFile
PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID,
PerfInputFile &File,
@@ -990,14 +1052,14 @@ bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary,
constexpr static const char *const MMap2Pattern =
"PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: "
"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
- "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
+ "(0x[a-f0-9]+|0) .*\\]: ([-a-z]+) (.*)";
// Parse a MMap line like
// PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \
// 0xffffffff81e00000]: x [kernel.kallsyms]_text
constexpr static const char *const MMapPattern =
"PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: "
"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
- "(0x[a-f0-9]+|0)\\]: [-a-z]+ (.*)";
+ "(0x[a-f0-9]+|0)\\]: ([-a-z]+) (.*)";
// Field 0 - whole line
// Field 1 - PID
// Field 2 - base address
@@ -1010,11 +1072,12 @@ bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary,
MMAPPED_ADDRESS = 2,
MMAPPED_SIZE = 3,
PAGE_OFFSET = 4,
- BINARY_PATH = 5
+ MEM_PROTECTION_FLAG = 5,
+ BINARY_PATH = 6,
};
bool R = false;
- SmallVector<StringRef, 6> Fields;
+ SmallVector<StringRef, 7> Fields;
if (Line.contains("PERF_RECORD_MMAP2 ")) {
Regex RegMmap2(MMap2Pattern);
R = RegMmap2.match(Line, &Fields);
@@ -1035,6 +1098,7 @@ bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary,
Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address);
Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size);
Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset);
+ MMap.MemProtectionFlag = Fields[MEM_PROTECTION_FLAG];
MMap.BinaryPath = Fields[BINARY_PATH];
if (ShowMmapEvents) {
outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at "
diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 4b3ac8f..778fc12 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -395,10 +395,13 @@ using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
// The counter of range samples for one function indexed by the range,
// which is represented as the start and end offset pair.
using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
+// <<inst-addr, vtable-data-symbol>, count> map for data access samples.
+using DataAccessSample = std::map<std::pair<uint64_t, StringRef>, uint64_t>;
// Wrapper for sample counters including range counter and branch counter
struct SampleCounter {
RangeSample RangeCounter;
BranchSample BranchCounter;
+ DataAccessSample DataAccessCounter;
void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
assert(Start <= End && "Invalid instruction range");
@@ -407,6 +410,10 @@ struct SampleCounter {
void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
BranchCounter[{Source, Target}] += Repeat;
}
+ void recordDataAccessCount(uint64_t InstAddr, StringRef DataSymbol,
+ uint64_t Repeat) {
+ DataAccessCounter[{InstAddr, DataSymbol}] += Repeat;
+ }
};
// Sample counter with context to support context-sensitive profile
@@ -572,6 +579,13 @@ public:
// Entry of the reader to parse multiple perf traces
virtual void parsePerfTraces() = 0;
+
+ // Parse the <ip, vtable-data-symbol> from the data access perf trace file,
+ // and accummuate the data access count for each <ip, data-symbol> pair.
+ void
+ parseDataAccessPerfTraces(StringRef DataAccessPerfFile,
+ std::optional<int32_t> PIDFilter = std::nullopt);
+
const ContextSampleCounterMap &getSampleCounters() const {
return SampleCounters;
}
@@ -598,6 +612,12 @@ public:
// Entry of the reader to parse multiple perf traces
void parsePerfTraces() override;
+
+ // Parse a single line of a PERF_RECORD_MMAP event looking for a
+ // mapping between the binary name and its memory layout.
+ static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
+ MMapEvent &MMap);
+
// Generate perf script from perf data
static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
bool SkipPID, PerfInputFile &File,
@@ -611,23 +631,11 @@ public:
static SmallVector<CleanupInstaller, 2> TempFileCleanups;
protected:
- // The parsed MMap event
- struct MMapEvent {
- int64_t PID = 0;
- uint64_t Address = 0;
- uint64_t Size = 0;
- uint64_t Offset = 0;
- StringRef BinaryPath;
- };
-
// Check whether a given line is LBR sample
static bool isLBRSample(StringRef Line);
// Check whether a given line is MMAP event
static bool isMMapEvent(StringRef Line);
- // Parse a single line of a PERF_RECORD_MMAP event looking for a
- // mapping between the binary name and its memory layout.
- static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
- MMapEvent &MMap);
+
// Update base address based on mmap events
void updateBinaryAddress(const MMapEvent &Event);
// Parse mmap event and update binary address
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index db686c3..20c0c0e 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -540,6 +540,22 @@ void ProfileGenerator::generateLineNumBasedProfile() {
// Fill in boundary sample counts as well as call site samples for calls
populateBoundarySamplesForAllFunctions(SC.BranchCounter);
+ // For each instruction with vtable accesses, get its symbolized inline
+ // stack, and add the vtable counters to the function samples.
+ for (const auto &[IpData, Count] : SC.DataAccessCounter) {
+ uint64_t InstAddr = IpData.first;
+ const SampleContextFrameVector &FrameVec =
+ Binary->getCachedFrameLocationStack(InstAddr, false);
+ if (!FrameVec.empty()) {
+ FunctionSamples &FunctionProfile =
+ getLeafProfileAndAddTotalSamples(FrameVec, 0);
+ LineLocation Loc(
+ FrameVec.back().Location.LineOffset,
+ getBaseDiscriminator(FrameVec.back().Location.Discriminator));
+ FunctionProfile.getTypeSamplesAt(Loc)[FunctionId(IpData.second)] += Count;
+ }
+ }
+
updateFunctionSamples();
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 6847ba1..9adc203 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -336,6 +336,12 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
~(PageSize - 1U));
TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
+ } else {
+ PhdrInfo Info;
+ Info.FileOffset = Phdr.p_offset;
+ Info.FileSz = Phdr.p_filesz;
+ Info.vAddr = Phdr.p_vaddr;
+ NonTextPhdrInfo.push_back(Info);
}
}
}
@@ -344,6 +350,32 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
exitWithError("no executable segment found", FileName);
}
+uint64_t ProfiledBinary::CanonicalizeNonTextAddress(uint64_t Address) {
+ uint64_t FileOffset = 0;
+ for (const auto &MMapEvent : MMapNonTextEvents) {
+ if (MMapEvent.Address <= Address &&
+ Address < MMapEvent.Address + MMapEvent.Size) {
+ // If the address is within the mmap event, return the file offset.
+ FileOffset = Address - MMapEvent.Address + MMapEvent.Offset;
+ break;
+ }
+ }
+ if (FileOffset == 0) {
+ // If the address is not within any mmap event, return the address as is.
+ return Address;
+ }
+ for (const auto &PhdrInfo : NonTextPhdrInfo) {
+ // Check if the file offset is within the non-text segment.
+ if (PhdrInfo.FileOffset <= FileOffset &&
+ FileOffset < PhdrInfo.FileOffset + PhdrInfo.FileSz) {
+ // If it is, return the virtual address of the segment.
+ return PhdrInfo.vAddr + (FileOffset - PhdrInfo.FileOffset);
+ }
+ }
+
+ return Address;
+}
+
void ProfiledBinary::setPreferredTextSegmentAddresses(const COFFObjectFile *Obj,
StringRef FileName) {
uint64_t ImageBase = Obj->getImageBase();
@@ -946,6 +978,14 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
return CallStack;
}
+StringRef ProfiledBinary::symbolizeDataAddress(uint64_t Address) {
+ DIGlobal DataDIGlobal = unwrapOrError(
+ Symbolizer->symbolizeData(SymbolizerPath.str(), {Address, 0}),
+ SymbolizerPath);
+ auto It = NameStrings.insert(DataDIGlobal.Name);
+ return StringRef(*It.first);
+}
+
void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin,
uint64_t RangeEnd) {
InstructionPointer IP(this, RangeBegin, true);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 0588cb4..02df762 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -185,6 +185,16 @@ private:
using AddressRange = std::pair<uint64_t, uint64_t>;
+// The parsed MMap event
+struct MMapEvent {
+ int64_t PID = 0;
+ uint64_t Address = 0;
+ uint64_t Size = 0;
+ uint64_t Offset = 0;
+ StringRef MemProtectionFlag;
+ StringRef BinaryPath;
+};
+
class ProfiledBinary {
// Absolute path of the executable binary.
std::string Path;
@@ -276,6 +286,19 @@ class ProfiledBinary {
// String table owning function name strings created from the symbolizer.
std::unordered_set<std::string> NameStrings;
+ // MMap events for PT_LOAD segments without 'x' memory protection flag.
+ SmallVector<MMapEvent> MMapNonTextEvents;
+
+ // Records the file offset, file size and virtual address of program headers.
+ struct PhdrInfo {
+ uint64_t FileOffset;
+ uint64_t FileSz;
+ uint64_t vAddr;
+ };
+
+ // Program header information for non-text PT_LOAD segments.
+ SmallVector<PhdrInfo> NonTextPhdrInfo;
+
// A collection of functions to print disassembly for.
StringSet<> DisassembleFunctionSet;
@@ -363,6 +386,10 @@ public:
ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath);
~ProfiledBinary();
+ /// Symbolize an address and return the symbol name. The returned StringRef is
+ /// owned by this ProfiledBinary object.
+ StringRef symbolizeDataAddress(uint64_t Address);
+
void decodePseudoProbe();
StringRef getPath() const { return Path; }
@@ -603,6 +630,14 @@ public:
return ProbeDecoder.getInlinerDescForProbe(Probe);
}
+ void addMMapNonTextEvent(MMapEvent MMap) {
+ MMapNonTextEvents.push_back(MMap);
+ }
+
+ // Given a runtime address, canonicalize it to the virtual address in the
+ // binary.
+ uint64_t CanonicalizeNonTextAddress(uint64_t Address);
+
bool getTrackFuncContextSize() { return TrackFuncContextSize; }
bool getIsLoadedByMMap() { return IsLoadedByMMap; }
diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp
index 3b974e2..d8af0ac 100644
--- a/llvm/tools/llvm-profgen/llvm-profgen.cpp
+++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp
@@ -67,6 +67,11 @@ static cl::opt<std::string> DebugBinPath(
"from it instead of the executable binary."),
cl::cat(ProfGenCategory));
+static cl::opt<std::string> DataAccessProfileFilename(
+ "data-access-profile", cl::value_desc("data-access-profile"),
+ cl::desc("Path of the data access profile to be generated."),
+ cl::cat(ProfGenCategory));
+
extern cl::opt<bool> ShowDisassemblyOnly;
extern cl::opt<bool> ShowSourceLocations;
extern cl::opt<bool> SkipSymbolization;
@@ -179,6 +184,13 @@ int main(int argc, const char *argv[]) {
// Parse perf events and samples
Reader->parsePerfTraces();
+ if (!DataAccessProfileFilename.empty()) {
+ // Parse the data access perf traces into <ip, data-addr> pairs, symbolize
+ // the data-addr to data-symbol. If the data-addr is a vtable, increment
+ // counters for the <ip, data-symbol> pair.
+ Reader->parseDataAccessPerfTraces(DataAccessProfileFilename, PIDFilter);
+ }
+
if (SkipSymbolization)
return EXIT_SUCCESS;