diff options
Diffstat (limited to 'llvm/tools/llvm-profgen')
-rw-r--r-- | llvm/tools/llvm-profgen/PerfReader.cpp | 72 | ||||
-rw-r--r-- | llvm/tools/llvm-profgen/PerfReader.h | 34 | ||||
-rw-r--r-- | llvm/tools/llvm-profgen/ProfileGenerator.cpp | 16 | ||||
-rw-r--r-- | llvm/tools/llvm-profgen/ProfiledBinary.cpp | 40 | ||||
-rw-r--r-- | llvm/tools/llvm-profgen/ProfiledBinary.h | 35 | ||||
-rw-r--r-- | llvm/tools/llvm-profgen/llvm-profgen.cpp | 12 |
6 files changed, 192 insertions, 17 deletions
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index ad113ed..b84152e 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -6,13 +6,20 @@ // //===----------------------------------------------------------------------===// #include "PerfReader.h" +#include "ErrorHandling.h" +#include "PerfReader.h" #include "ProfileGenerator.h" +#include "ProfiledBinary.h" #include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" #include "llvm/Support/ToolOutputFile.h" +#include <regex> + #define DEBUG_TYPE "perf-reader" cl::opt<bool> SkipSymbolization("skip-symbolization", @@ -370,6 +377,61 @@ PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput, return PerfReader; } +void PerfReaderBase::parseDataAccessPerfTraces( + StringRef DataAccessPerfTraceFile, std::optional<int32_t> PIDFilter) { + std::regex logRegex( + R"(^.*?PERF_RECORD_SAMPLE\(.*?\):\s*(\d+)\/(\d+):\s*(0x[0-9a-fA-F]+)\s+period:\s*\d+\s+addr:\s*(0x[0-9a-fA-F]+)$)"); + + auto BufferOrErr = MemoryBuffer::getFile(DataAccessPerfTraceFile); + std::error_code EC = BufferOrErr.getError(); + if (EC) + exitWithError("Failed to open perf trace file: " + DataAccessPerfTraceFile); + + assert(!SampleCounters.empty() && "Sample counters should not be empty!"); + SampleCounter &Counter = SampleCounters.begin()->second; + line_iterator LineIt(*BufferOrErr.get(), true); + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef Line = *LineIt; + + MMapEvent MMap; + if (Line.contains("PERF_RECORD_MMAP2")) { + if (PerfScriptReader::extractMMapEventForBinary(Binary, Line, MMap)) { + if (!MMap.MemProtectionFlag.contains("x")) { + Binary->addMMapNonTextEvent(MMap); + } + } + continue; + } + + // Skip lines that do not contain "PERF_RECORD_SAMPLE". + if (!Line.contains("PERF_RECORD_SAMPLE")) { + continue; + } + + std::smatch matches; + const std::string LineStr = Line.str(); + + if (std::regex_search(LineStr.begin(), LineStr.end(), matches, logRegex)) { + if (matches.size() != 5) + continue; + + const int32_t PID = std::stoi(matches[1].str()); + if (PIDFilter && *PIDFilter != PID) { + continue; + } + + const uint64_t DataAddress = std::stoull(matches[4].str(), nullptr, 16); + StringRef DataSymbol = Binary->symbolizeDataAddress( + Binary->CanonicalizeNonTextAddress(DataAddress)); + if (DataSymbol.starts_with("_ZTV")) { + const uint64_t IP = std::stoull(matches[3].str(), nullptr, 16); + Counter.recordDataAccessCount(Binary->canonicalizeVirtualAddress(IP), + DataSymbol, 1); + } + } + } +} + PerfInputFile PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID, PerfInputFile &File, @@ -990,14 +1052,14 @@ bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary, constexpr static const char *const MMap2Pattern = "PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: " "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " - "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; + "(0x[a-f0-9]+|0) .*\\]: ([-a-z]+) (.*)"; // Parse a MMap line like // PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \ // 0xffffffff81e00000]: x [kernel.kallsyms]_text constexpr static const char *const MMapPattern = "PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: " "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " - "(0x[a-f0-9]+|0)\\]: [-a-z]+ (.*)"; + "(0x[a-f0-9]+|0)\\]: ([-a-z]+) (.*)"; // Field 0 - whole line // Field 1 - PID // Field 2 - base address @@ -1010,11 +1072,12 @@ bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary, MMAPPED_ADDRESS = 2, MMAPPED_SIZE = 3, PAGE_OFFSET = 4, - BINARY_PATH = 5 + MEM_PROTECTION_FLAG = 5, + BINARY_PATH = 6, }; bool R = false; - SmallVector<StringRef, 6> Fields; + SmallVector<StringRef, 7> Fields; if (Line.contains("PERF_RECORD_MMAP2 ")) { Regex RegMmap2(MMap2Pattern); R = RegMmap2.match(Line, &Fields); @@ -1035,6 +1098,7 @@ bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary, Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address); Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size); Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset); + MMap.MemProtectionFlag = Fields[MEM_PROTECTION_FLAG]; MMap.BinaryPath = Fields[BINARY_PATH]; if (ShowMmapEvents) { outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at " diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h index 4b3ac8f..778fc12 100644 --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -395,10 +395,13 @@ using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>; // The counter of range samples for one function indexed by the range, // which is represented as the start and end offset pair. using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>; +// <<inst-addr, vtable-data-symbol>, count> map for data access samples. +using DataAccessSample = std::map<std::pair<uint64_t, StringRef>, uint64_t>; // Wrapper for sample counters including range counter and branch counter struct SampleCounter { RangeSample RangeCounter; BranchSample BranchCounter; + DataAccessSample DataAccessCounter; void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) { assert(Start <= End && "Invalid instruction range"); @@ -407,6 +410,10 @@ struct SampleCounter { void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) { BranchCounter[{Source, Target}] += Repeat; } + void recordDataAccessCount(uint64_t InstAddr, StringRef DataSymbol, + uint64_t Repeat) { + DataAccessCounter[{InstAddr, DataSymbol}] += Repeat; + } }; // Sample counter with context to support context-sensitive profile @@ -572,6 +579,13 @@ public: // Entry of the reader to parse multiple perf traces virtual void parsePerfTraces() = 0; + + // Parse the <ip, vtable-data-symbol> from the data access perf trace file, + // and accummuate the data access count for each <ip, data-symbol> pair. + void + parseDataAccessPerfTraces(StringRef DataAccessPerfFile, + std::optional<int32_t> PIDFilter = std::nullopt); + const ContextSampleCounterMap &getSampleCounters() const { return SampleCounters; } @@ -598,6 +612,12 @@ public: // Entry of the reader to parse multiple perf traces void parsePerfTraces() override; + + // Parse a single line of a PERF_RECORD_MMAP event looking for a + // mapping between the binary name and its memory layout. + static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line, + MMapEvent &MMap); + // Generate perf script from perf data static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID, PerfInputFile &File, @@ -611,23 +631,11 @@ public: static SmallVector<CleanupInstaller, 2> TempFileCleanups; protected: - // The parsed MMap event - struct MMapEvent { - int64_t PID = 0; - uint64_t Address = 0; - uint64_t Size = 0; - uint64_t Offset = 0; - StringRef BinaryPath; - }; - // Check whether a given line is LBR sample static bool isLBRSample(StringRef Line); // Check whether a given line is MMAP event static bool isMMapEvent(StringRef Line); - // Parse a single line of a PERF_RECORD_MMAP event looking for a - // mapping between the binary name and its memory layout. - static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line, - MMapEvent &MMap); + // Update base address based on mmap events void updateBinaryAddress(const MMapEvent &Event); // Parse mmap event and update binary address diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index db686c3..20c0c0e 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -540,6 +540,22 @@ void ProfileGenerator::generateLineNumBasedProfile() { // Fill in boundary sample counts as well as call site samples for calls populateBoundarySamplesForAllFunctions(SC.BranchCounter); + // For each instruction with vtable accesses, get its symbolized inline + // stack, and add the vtable counters to the function samples. + for (const auto &[IpData, Count] : SC.DataAccessCounter) { + uint64_t InstAddr = IpData.first; + const SampleContextFrameVector &FrameVec = + Binary->getCachedFrameLocationStack(InstAddr, false); + if (!FrameVec.empty()) { + FunctionSamples &FunctionProfile = + getLeafProfileAndAddTotalSamples(FrameVec, 0); + LineLocation Loc( + FrameVec.back().Location.LineOffset, + getBaseDiscriminator(FrameVec.back().Location.Discriminator)); + FunctionProfile.getTypeSamplesAt(Loc)[FunctionId(IpData.second)] += Count; + } + } + updateFunctionSamples(); } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 6847ba1..9adc203 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -336,6 +336,12 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr & ~(PageSize - 1U)); TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U)); + } else { + PhdrInfo Info; + Info.FileOffset = Phdr.p_offset; + Info.FileSz = Phdr.p_filesz; + Info.vAddr = Phdr.p_vaddr; + NonTextPhdrInfo.push_back(Info); } } } @@ -344,6 +350,32 @@ void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, exitWithError("no executable segment found", FileName); } +uint64_t ProfiledBinary::CanonicalizeNonTextAddress(uint64_t Address) { + uint64_t FileOffset = 0; + for (const auto &MMapEvent : MMapNonTextEvents) { + if (MMapEvent.Address <= Address && + Address < MMapEvent.Address + MMapEvent.Size) { + // If the address is within the mmap event, return the file offset. + FileOffset = Address - MMapEvent.Address + MMapEvent.Offset; + break; + } + } + if (FileOffset == 0) { + // If the address is not within any mmap event, return the address as is. + return Address; + } + for (const auto &PhdrInfo : NonTextPhdrInfo) { + // Check if the file offset is within the non-text segment. + if (PhdrInfo.FileOffset <= FileOffset && + FileOffset < PhdrInfo.FileOffset + PhdrInfo.FileSz) { + // If it is, return the virtual address of the segment. + return PhdrInfo.vAddr + (FileOffset - PhdrInfo.FileOffset); + } + } + + return Address; +} + void ProfiledBinary::setPreferredTextSegmentAddresses(const COFFObjectFile *Obj, StringRef FileName) { uint64_t ImageBase = Obj->getImageBase(); @@ -946,6 +978,14 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, return CallStack; } +StringRef ProfiledBinary::symbolizeDataAddress(uint64_t Address) { + DIGlobal DataDIGlobal = unwrapOrError( + Symbolizer->symbolizeData(SymbolizerPath.str(), {Address, 0}), + SymbolizerPath); + auto It = NameStrings.insert(DataDIGlobal.Name); + return StringRef(*It.first); +} + void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin, uint64_t RangeEnd) { InstructionPointer IP(this, RangeBegin, true); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 0588cb4..02df762 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -185,6 +185,16 @@ private: using AddressRange = std::pair<uint64_t, uint64_t>; +// The parsed MMap event +struct MMapEvent { + int64_t PID = 0; + uint64_t Address = 0; + uint64_t Size = 0; + uint64_t Offset = 0; + StringRef MemProtectionFlag; + StringRef BinaryPath; +}; + class ProfiledBinary { // Absolute path of the executable binary. std::string Path; @@ -276,6 +286,19 @@ class ProfiledBinary { // String table owning function name strings created from the symbolizer. std::unordered_set<std::string> NameStrings; + // MMap events for PT_LOAD segments without 'x' memory protection flag. + SmallVector<MMapEvent> MMapNonTextEvents; + + // Records the file offset, file size and virtual address of program headers. + struct PhdrInfo { + uint64_t FileOffset; + uint64_t FileSz; + uint64_t vAddr; + }; + + // Program header information for non-text PT_LOAD segments. + SmallVector<PhdrInfo> NonTextPhdrInfo; + // A collection of functions to print disassembly for. StringSet<> DisassembleFunctionSet; @@ -363,6 +386,10 @@ public: ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath); ~ProfiledBinary(); + /// Symbolize an address and return the symbol name. The returned StringRef is + /// owned by this ProfiledBinary object. + StringRef symbolizeDataAddress(uint64_t Address); + void decodePseudoProbe(); StringRef getPath() const { return Path; } @@ -603,6 +630,14 @@ public: return ProbeDecoder.getInlinerDescForProbe(Probe); } + void addMMapNonTextEvent(MMapEvent MMap) { + MMapNonTextEvents.push_back(MMap); + } + + // Given a runtime address, canonicalize it to the virtual address in the + // binary. + uint64_t CanonicalizeNonTextAddress(uint64_t Address); + bool getTrackFuncContextSize() { return TrackFuncContextSize; } bool getIsLoadedByMMap() { return IsLoadedByMMap; } diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp index 3b974e2..d8af0ac 100644 --- a/llvm/tools/llvm-profgen/llvm-profgen.cpp +++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp @@ -67,6 +67,11 @@ static cl::opt<std::string> DebugBinPath( "from it instead of the executable binary."), cl::cat(ProfGenCategory)); +static cl::opt<std::string> DataAccessProfileFilename( + "data-access-profile", cl::value_desc("data-access-profile"), + cl::desc("Path of the data access profile to be generated."), + cl::cat(ProfGenCategory)); + extern cl::opt<bool> ShowDisassemblyOnly; extern cl::opt<bool> ShowSourceLocations; extern cl::opt<bool> SkipSymbolization; @@ -179,6 +184,13 @@ int main(int argc, const char *argv[]) { // Parse perf events and samples Reader->parsePerfTraces(); + if (!DataAccessProfileFilename.empty()) { + // Parse the data access perf traces into <ip, data-addr> pairs, symbolize + // the data-addr to data-symbol. If the data-addr is a vtable, increment + // counters for the <ip, data-symbol> pair. + Reader->parseDataAccessPerfTraces(DataAccessProfileFilename, PIDFilter); + } + if (SkipSymbolization) return EXIT_SUCCESS; |