diff options
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/MemProfiler.cpp | 93 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test | 88 |
2 files changed, 171 insertions, 10 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index c03aa5a..177b940 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -178,6 +178,12 @@ static cl::opt<bool> cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden); +static cl::opt<bool> ClMemProfAttachCalleeGuids( + "memprof-attach-calleeguids", + cl::desc( + "Attach calleeguids as value profile metadata for indirect calls."), + cl::init(true), cl::Hidden); + extern cl::opt<bool> MemProfReportHintedSizes; extern cl::opt<unsigned> MinClonedColdBytePercent; extern cl::opt<unsigned> MinCallsiteColdBytePercent; @@ -952,6 +958,46 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps, UndriftCallStack(CS.Frames); } +// Helper function to process CalleeGuids and create value profile metadata +static void addVPMetadata(Module &M, Instruction &I, + ArrayRef<GlobalValue::GUID> CalleeGuids) { + if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty()) + return; + + if (I.getMetadata(LLVMContext::MD_prof)) { + uint64_t Unused; + // TODO: When merging is implemented, increase this to a typical ICP value + // (e.g., 3-6) For now, we only need to check if existing data exists, so 1 + // is sufficient + auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget, + /*MaxNumValueData=*/1, Unused); + // We don't know how to merge value profile data yet. + if (!ExistingVD.empty()) { + return; + } + } + + SmallVector<InstrProfValueData, 4> VDs; + uint64_t TotalCount = 0; + + for (const GlobalValue::GUID CalleeGUID : CalleeGuids) { + InstrProfValueData VD; + VD.Value = CalleeGUID; + // For MemProf, we don't have actual call counts, so we assign + // a weight of 1 to each potential target. + // TODO: Consider making this weight configurable or increasing it to + // improve effectiveness for ICP. + VD.Count = 1; + VDs.push_back(VD); + TotalCount += VD.Count; + } + + if (!VDs.empty()) { + annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget, + VDs.size()); + } +} + static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, @@ -1020,15 +1066,35 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, // Build maps of the location hash to all profile data with that leaf location // (allocation info and the callsites). std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; - // A hash function for std::unordered_set<ArrayRef<Frame>> to work. - struct CallStackHash { - size_t operator()(ArrayRef<Frame> CS) const { - return computeFullStackId(CS); + + // Helper struct for maintaining refs to callsite data. As an alternative we + // could store a pointer to the CallSiteInfo struct but we also need the frame + // index. Using ArrayRefs instead makes it a little easier to read. + struct CallSiteEntry { + // Subset of frames for the corresponding CallSiteInfo. + ArrayRef<Frame> Frames; + // Potential targets for indirect calls. + ArrayRef<GlobalValue::GUID> CalleeGuids; + + // Only compare Frame contents. + // Use pointer-based equality instead of ArrayRef's operator== which does + // element-wise comparison. We want to check if it's the same slice of the + // underlying array, not just equivalent content. + bool operator==(const CallSiteEntry &Other) const { + return Frames.data() == Other.Frames.data() && + Frames.size() == Other.Frames.size(); + } + }; + + struct CallSiteEntryHash { + size_t operator()(const CallSiteEntry &Entry) const { + return computeFullStackId(Entry.Frames); } }; + // For the callsites we need to record slices of the frame array (see comments - // below where the map entries are added). - std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>> + // below where the map entries are added) along with their CalleeGuids. + std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>> LocHashToCallSites; for (auto &AI : MemProfRec->AllocSites) { NumOfMemProfAllocContextProfiles++; @@ -1046,8 +1112,10 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, unsigned Idx = 0; for (auto &StackFrame : CS.Frames) { uint64_t StackId = computeStackId(StackFrame); - LocHashToCallSites[StackId].insert( - ArrayRef<Frame>(CS.Frames).drop_front(Idx++)); + ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++); + ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids); + LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids}); + ProfileHasColumns |= StackFrame.Column; // Once we find this function, we can stop recording. if (StackFrame.Function == FuncGUID) @@ -1191,13 +1259,18 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, // Otherwise, add callsite metadata. If we reach here then we found the // instruction's leaf location in the callsites map and not the allocation // map. - for (auto CallStackIdx : CallSitesIter->second) { + for (const auto &CallSiteEntry : CallSitesIter->second) { // If we found and thus matched all frames on the call, create and // attach call stack metadata. - if (stackFrameIncludesInlinedCallStack(CallStackIdx, + if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames, InlinedCallStack)) { NumOfMemProfMatchedCallSites++; addCallsiteMetadata(I, InlinedCallStack, Ctx); + + // Try to attach indirect call metadata if possible. + if (!CalledFunction) + addVPMetadata(M, I, CallSiteEntry.CalleeGuids); + // Only need to find one with a matching call stack and add a single // callsite metadata. diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test new file mode 100644 index 0000000..ad83da2 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test @@ -0,0 +1,88 @@ +; RUN: split-file %s %t + +;; Basic functionality with flag toggle +; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata +; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE +; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE + +;; FDO conflict handling +; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata +; RUN: opt < %t/fdo_conflict.ll -passes='memprof-use<profile-filename=%t/fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-FDO + +;--- basic.yaml +--- +HeapProfileRecords: + - GUID: _Z3barv + AllocSites: [] + CallSites: + - Frames: + - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false } + CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01] +... + +;--- basic.ll +define dso_local void @_Z3barv() !dbg !4 { +entry: + %fp = alloca ptr, align 8 + %0 = load ptr, ptr %fp, align 8 + call void %0(), !dbg !5 +; CHECK-ENABLE: call void %0(), {{.*}} !prof !6 +; CHECK-DISABLE-NOT: !prof + ret void +} + +; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1} + +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1) +!1 = !DIFile(filename: "t", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0) +!5 = !DILocation(line: 4, column: 5, scope: !4) + +;--- fdo_conflict.yaml +--- +HeapProfileRecords: + - GUID: _Z3foov + AllocSites: [] + CallSites: + - Frames: + - { Function: _Z3foov, LineOffset: 3, Column: 5, IsInlineFrame: false } + CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01] + - Frames: + - { Function: _Z3foov, LineOffset: 5, Column: 5, IsInlineFrame: false } + CalleeGuids: [0x555556789abcdef0, 0x666656789abcdef1] +... + +;--- fdo_conflict.ll +define dso_local void @_Z3foov() !dbg !14 { +entry: + %fp = alloca ptr, align 8 + %0 = load ptr, ptr %fp, align 8 + ; This call already has FDO value profile metadata - should NOT be modified + ; CHECK-FDO: call void %0(), {{.*}} !prof !6 + call void %0(), !dbg !15, !prof !16 + + %1 = load ptr, ptr %fp, align 8 + ; This call does NOT have existing metadata - should get MemProf annotation + ; CHECK-FDO: call void %1(), {{.*}} !prof !9 + call void %1(), !dbg !17 + ret void +} + +!16 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20} + +; CHECK-FDO: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20} +; CHECK-FDO: !9 = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1} + +!llvm.module.flags = !{!12, !13} + +!10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !11) +!11 = !DIFile(filename: "t", directory: "/") +!12 = !{i32 7, !"Dwarf Version", i32 5} +!13 = !{i32 2, !"Debug Info Version", i32 3} +!14 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !11, file: !11, line: 1, unit: !10) +!15 = !DILocation(line: 4, column: 5, scope: !14) +!17 = !DILocation(line: 6, column: 5, scope: !14) |