diff options
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/MemProfiler.cpp | 43 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll | 73 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/memprof.ll | 8 |
3 files changed, 76 insertions, 48 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 177b940..a64dfc0 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -816,13 +816,6 @@ static bool isAllocationWithHotColdVariant(const Function *Callee, } } -struct AllocMatchInfo { - uint64_t TotalSize = 0; - size_t NumFramesMatched = 0; - AllocationType AllocType = AllocationType::None; - bool Matched = false; -}; - DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, function_ref<bool(uint64_t)> IsPresentInProfile) { @@ -998,13 +991,12 @@ static void addVPMetadata(Module &M, Instruction &I, } } -static void -readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, - const TargetLibraryInfo &TLI, - std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo, - std::set<std::vector<uint64_t>> &MatchedCallSites, - DenseMap<uint64_t, LocToLocMap> &UndriftMaps, - OptimizationRemarkEmitter &ORE) { +static void readMemprof(Module &M, Function &F, + IndexedInstrProfReader *MemProfReader, + const TargetLibraryInfo &TLI, + std::set<std::vector<uint64_t>> &MatchedCallSites, + DenseMap<uint64_t, LocToLocMap> &UndriftMaps, + OptimizationRemarkEmitter &ORE) { auto &Ctx = M.getContext(); // Previously we used getIRPGOFuncName() here. If F is local linkage, // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But @@ -1214,9 +1206,11 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, // was requested. if (ClPrintMemProfMatchInfo) { assert(FullStackId != 0); - FullStackIdToAllocMatchInfo[FullStackId] = { - AllocInfo->Info.getTotalSize(), InlinedCallStack.size(), - AllocType, /*Matched=*/true}; + errs() << "MemProf " << getAllocTypeAttributeString(AllocType) + << " context with id " << FullStackId + << " has total profiled size " + << AllocInfo->Info.getTotalSize() << " is matched with " + << InlinedCallStack.size() << " frames\n"; } } } @@ -1331,11 +1325,6 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { if (SalvageStaleProfile) UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI); - // Map from the stack has of each allocation context in the function profiles - // to the total profiled size (bytes), allocation type, and whether we matched - // it to an allocation in the IR. - std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo; - // Set of the matched call sites, each expressed as a sequence of an inline // call stack. std::set<std::vector<uint64_t>> MatchedCallSites; @@ -1346,17 +1335,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo, - MatchedCallSites, UndriftMaps, ORE); + readMemprof(M, F, MemProfReader.get(), TLI, MatchedCallSites, UndriftMaps, + ORE); } if (ClPrintMemProfMatchInfo) { - for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) - errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType) - << " context with id " << Id << " has total profiled size " - << Info.TotalSize << (Info.Matched ? " is" : " not") - << " matched with " << Info.NumFramesMatched << " frames\n"; - for (const auto &CallStack : MatchedCallSites) { errs() << "MemProf callsite match for inline call stack"; for (uint64_t StackId : CallStack) diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll index b9126ac..2dcaa9d 100644 --- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll +++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll @@ -31,11 +31,41 @@ ;--- memprof-dump-matched-alloc-site.yaml --- HeapProfileRecords: + - GUID: _Z2f2v + AllocSites: + - Callstack: + - { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true } + - { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true } + - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false } + - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 3 + TotalLifetime: 0 + TotalLifetimeAccessDensity: 0 + CallSites: + - Frames: + - { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true } + - { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true } + - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false } + - GUID: _Z2f1v + AllocSites: + - Callstack: + - { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true } + - { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true } + - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false } + - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 3 + TotalLifetime: 0 + TotalLifetimeAccessDensity: 0 + CallSites: [] - GUID: _Z2f3v AllocSites: - Callstack: - - { Function: _ZL2f1v, LineOffset: 0, Column: 35, IsInlineFrame: true } - - { Function: _ZL2f2v, LineOffset: 0, Column: 35, IsInlineFrame: true } + - { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true } + - { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true } - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false } - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false } MemInfoBlock: @@ -47,32 +77,47 @@ HeapProfileRecords: # Kept empty here because this section is irrelevant for this test. ... ;--- memprof-dump-matched-alloc-site.ll -; CHECK: MemProf notcold context with id 12978026349401156968 has total profiled size 3 is matched with 3 frames +; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames +; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames +; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define ptr @_Z2f3v() { +define ptr @_Z2f1v() { entry: - %call.i.i = call ptr @_Znam(i64 0), !dbg !3 - ret ptr null + %call = call ptr @_Znam(i64 0), !dbg !3 + ret ptr %call } declare ptr @_Znam(i64) +define ptr @_Z2f2v() { +entry: + %call.i = call ptr @_Znam(i64 0), !dbg !7 + ret ptr %call.i +} + +define ptr @_Z2f3v() { +entry: + %call.i.i = call ptr @_Znam(i64 0), !dbg !10 + ret ptr %call.i.i +} + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1) !1 = !DIFile(filename: "memprof-dump-matched-alloc-site.cc", directory: "/") !2 = !{i32 2, !"Debug Info Version", i32 3} -!3 = !DILocation(line: 1, column: 35, scope: !4, inlinedAt: !7) -!4 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!3 = !DILocation(line: 1, column: 21, scope: !4) +!4 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) !5 = !DISubroutineType(types: !6) !6 = !{} -!7 = distinct !DILocation(line: 2, column: 35, scope: !8, inlinedAt: !9) -!8 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0) -!9 = distinct !DILocation(line: 3, column: 47, scope: !10) -!10 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) -!11 = !DILocation(line: 6, column: 3, scope: !12) -!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !5, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!7 = !DILocation(line: 1, column: 21, scope: !4, inlinedAt: !8) +!8 = distinct !DILocation(line: 2, column: 21, scope: !9) +!9 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!10 = !DILocation(line: 1, column: 21, scope: !4, inlinedAt: !11) +!11 = distinct !DILocation(line: 2, column: 21, scope: !9, inlinedAt: !12) +!12 = distinct !DILocation(line: 3, column: 47, scope: !13) +!13 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index c69d031..e48da36 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -111,13 +111,13 @@ ; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL ; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames -; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames -; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames ; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames -; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames -; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames ; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames ; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames +; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691 |