diff options
Diffstat (limited to 'llvm')
3 files changed, 12 insertions, 359 deletions
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 81de0a5..0164fcd 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -97,8 +97,6 @@ STATISTIC(MissingAllocForContextId, "Number of missing alloc nodes for context ids"); STATISTIC(SkippedCallsCloning, "Number of calls skipped during cloning due to unexpected operand"); -STATISTIC(MismatchedCloneAssignments, - "Number of callsites assigned to call multiple non-matching clones"); static cl::opt<std::string> DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -2062,20 +2060,6 @@ static bool isMemProfClone(const Function &F) { return F.getName().contains(MemProfCloneSuffix); } -// Return the clone number of the given function by extracting it from the -// memprof suffix. Assumes the caller has already confirmed it is a memprof -// clone. -static unsigned getMemProfCloneNum(const Function &F) { - assert(isMemProfClone(F)); - auto Pos = F.getName().find_last_of('.'); - assert(Pos > 0); - unsigned CloneNo; - bool Err = F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo); - assert(!Err); - (void)Err; - return CloneNo; -} - std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, const Instruction *Call, unsigned CloneNo) const { @@ -3995,22 +3979,7 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const { void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) { - auto *CurF = cast<CallBase>(CallerCall.call())->getCalledFunction(); - auto NewCalleeCloneNo = CalleeFunc.cloneNo(); - if (isMemProfClone(*CurF)) { - // If we already assigned this callsite to call a specific non-default - // clone (i.e. not the original function which is clone 0), ensure that we - // aren't trying to now update it to call a different clone, which is - // indicative of a bug in the graph or function assignment. - auto CurCalleeCloneNo = getMemProfCloneNum(*CurF); - if (CurCalleeCloneNo != NewCalleeCloneNo) { - LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was " - << CurCalleeCloneNo << " now " << NewCalleeCloneNo - << "\n"); - MismatchedCloneAssignments++; - } - } - if (NewCalleeCloneNo > 0) + if (CalleeFunc.cloneNo() > 0) cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func()); OREGetter(CallerCall.call()->getFunction()) .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call()) @@ -4026,19 +3995,7 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall, assert(CI && "Caller cannot be an allocation which should not have profiled calls"); assert(CI->Clones.size() > CallerCall.cloneNo()); - auto NewCalleeCloneNo = CalleeFunc.cloneNo(); - auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()]; - // If we already assigned this callsite to call a specific non-default - // clone (i.e. not the original function which is clone 0), ensure that we - // aren't trying to now update it to call a different clone, which is - // indicative of a bug in the graph or function assignment. - if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) { - LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was " - << CurCalleeCloneNo << " now " << NewCalleeCloneNo - << "\n"); - MismatchedCloneAssignments++; - } - CurCalleeCloneNo = NewCalleeCloneNo; + CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo(); } // Update the debug information attached to NewFunc to use the clone Name. Note @@ -4746,19 +4703,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { // where the callers were assigned to different clones of a function. } - auto FindFirstAvailFuncClone = [&]() { - // Find first function in FuncClonesToCallMap without an assigned - // clone of this callsite Node. We should always have one - // available at this point due to the earlier cloning when the - // FuncClonesToCallMap size was smaller than the clone number. - for (auto &CF : FuncClonesToCallMap) { - if (!FuncCloneToCurNodeCloneMap.count(CF.first)) - return CF.first; - } - assert(false && - "Expected an available func clone for this callsite clone"); - }; - // See if we can use existing function clone. Walk through // all caller edges to see if any have already been assigned to // a clone of this callsite's function. If we can use it, do so. If not, @@ -4875,7 +4819,16 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { // clone of OrigFunc for another caller during this iteration over // its caller edges. if (!FuncCloneAssignedToCurCallsiteClone) { - FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone(); + // Find first function in FuncClonesToCallMap without an assigned + // clone of this callsite Node. We should always have one + // available at this point due to the earlier cloning when the + // FuncClonesToCallMap size was smaller than the clone number. + for (auto &CF : FuncClonesToCallMap) { + if (!FuncCloneToCurNodeCloneMap.count(CF.first)) { + FuncCloneAssignedToCurCallsiteClone = CF.first; + break; + } + } assert(FuncCloneAssignedToCurCallsiteClone); // Assign Clone to FuncCloneAssignedToCurCallsiteClone AssignCallsiteCloneToFuncClone( @@ -4889,31 +4842,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() { FuncCloneAssignedToCurCallsiteClone); } } - // If we didn't assign a function clone to this callsite clone yet, e.g. - // none of its callers has a non-null call, do the assignment here. - // We want to ensure that every callsite clone is assigned to some - // function clone, so that the call updates below work as expected. - // In particular if this is the original callsite, we want to ensure it - // is assigned to the original function, otherwise the original function - // will appear available for assignment to other callsite clones, - // leading to unintended effects. For one, the unknown and not updated - // callers will call into cloned paths leading to the wrong hints, - // because they still call the original function (clone 0). Also, - // because all callsites start out as being clone 0 by default, we can't - // easily distinguish between callsites explicitly assigned to clone 0 - // vs those never assigned, which can lead to multiple updates of the - // calls when invoking updateCall below, with mismatched clone values. - // TODO: Add a flag to the callsite nodes or some other mechanism to - // better distinguish and identify callsite clones that are not getting - // assigned to function clones as expected. - if (!FuncCloneAssignedToCurCallsiteClone) { - FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone(); - assert(FuncCloneAssignedToCurCallsiteClone && - "No available func clone for this callsite clone"); - AssignCallsiteCloneToFuncClone( - FuncCloneAssignedToCurCallsiteClone, Call, Clone, - /*IsAlloc=*/AllocationCallToContextNodeMap.contains(Call)); - } } if (VerifyCCG) { checkNode<DerivedCCG, FuncTy, CallTy>(Node); diff --git a/llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll b/llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll deleted file mode 100644 index bd35850..0000000 --- a/llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll +++ /dev/null @@ -1,145 +0,0 @@ -;; Make sure we assign the original callsite to a function clone (which will be -;; the original function clone), even when we cannot update its caller (due to -;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use -;; the original function for a different clone, leading to confusion later when -;; rewriting the calls. - -;; -stats requires asserts -; REQUIRES: asserts - -; RUN: opt -thinlto-bc %s >%t.o -; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ -; RUN: -supports-hot-cold-new \ -; RUN: -r=%t.o,A,plx \ -; RUN: -r=%t.o,B,plx \ -; RUN: -r=%t.o,C,plx \ -; RUN: -r=%t.o,D,plx \ -; RUN: -r=%t.o,E,plx \ -; RUN: -r=%t.o,F,plx \ -; RUN: -r=%t.o,G,plx \ -; RUN: -r=%t.o,A1,plx \ -; RUN: -r=%t.o,B1,plx \ -; RUN: -r=%t.o,_Znwm, \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -debug-only=memprof-context-disambiguation \ -; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ -; RUN: -o %t.out 2>&1 | FileCheck %s \ -; RUN: --implicit-check-not="Mismatch in call clone assignment" \ -; RUN: --implicit-check-not="Number of callsites assigned to call multiple non-matching clones" - -; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR - -; ModuleID = '<stdin>' -source_filename = "reduced.ll" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" -target triple = "x86_64-grtev4-linux-gnu" - -; IR-LABEL: define dso_local void @A() -define void @A() #0 { - ; IR: call void @C() - call void @C() - ret void -} - -; IR-LABEL: define dso_local void @B() -define void @B() #0 { - ; IR: call void @C.memprof.1() - call void @C(), !callsite !1 - ret void -} - -; IR-LABEL: define dso_local void @C() -define void @C() #0 { - ; IR: call void @F() - call void @F(), !callsite !16 - ; IR: call void @D() - call void @D(), !callsite !2 - ret void -} - -; IR-LABEL: define dso_local void @D() -define void @D() #0 { - ; IR: call void @E() - call void @E(), !callsite !3 - ; IR: call void @G() - call void @G(), !callsite !17 - ret void -} - -; IR-LABEL: define dso_local void @E() -define void @E() #0 { - ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]] - %1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9 - ret void -} - -; IR-LABEL: define dso_local void @F() -define void @F() #0 { - ; IR: call void @G() - call void @G(), !callsite !17 - ret void -} - -; IR-LABEL: define dso_local void @G() -define void @G() #0 { - ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD]] - %2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15 - ret void -} - -; IR-LABEL: define dso_local void @A1() -define void @A1() #0 { - ; IR: call void @C() - call void @C(), !callsite !18 - ret void -} - -; IR-LABEL: define dso_local void @B1() -define void @B1() #0 { - ; IR: call void @C.memprof.1() - call void @C(), !callsite !19 - ret void -} - -; IR-LABEL: define dso_local void @C.memprof.1() - ; IR: call void @F.memprof.1() - ; IR: call void @D.memprof.1() - -; IR-LABEL: define dso_local void @D.memprof.1() - ; IR: call void @E.memprof.1() - ; IR: call void @G() - -; IR-LABEL: define dso_local void @E.memprof.1() - ; IR: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]] - -; IR-LABEL: define dso_local void @F.memprof.1() - ; IR: call void @G.memprof.1() - -; IR-LABEL: define dso_local void @G.memprof.1() - ; IR: call ptr @_Znwm(i64 0) #[[COLD]] - -declare ptr @_Znwm(i64) - -attributes #0 = { noinline optnone } -; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } -; IR: attributes #[[COLD]] = { "memprof"="cold" } - -!0 = !{i64 123} -!1 = !{i64 234} -!2 = !{i64 345} -!3 = !{i64 456} -!4 = !{!5, !7} -!5 = !{!6, !"notcold"} -!6 = !{i64 567, i64 456, i64 345, i64 123} -!7 = !{!8, !"cold"} -!8 = !{i64 567, i64 456, i64 345, i64 234} -!9 = !{i64 567} -!10 = !{!11, !13} -!11 = !{!12, !"notcold"} -!12 = !{i64 678, i64 891, i64 789, i64 912} -!13 = !{!14, !"cold"} -!14 = !{i64 678, i64 891, i64 789, i64 812} -!15 = !{i64 678} -!16 = !{i64 789} -!17 = !{i64 891} -!18 = !{i64 912} -!19 = !{i64 812} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll b/llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll deleted file mode 100644 index 29c58c9..0000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll +++ /dev/null @@ -1,130 +0,0 @@ -;; Make sure we assign the original callsite to a function clone (which will be -;; the original function clone), even when we cannot update its caller (due to -;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use -;; the original function for a different clone, leading to confusion later when -;; rewriting the calls. - -;; -stats requires asserts -; REQUIRES: asserts - -; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats -debug \ -; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | \ -; RUN: FileCheck %s --implicit-check-not="Mismatch in call clone assignment" \ -; RUN: --implicit-check-not="Number of callsites assigned to call multiple non-matching clones" - - -; ModuleID = '<stdin>' -source_filename = "reduced.ll" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" -target triple = "x86_64-grtev4-linux-gnu" - -; CHECK-LABEL: define void @A() -define void @A() { - ; CHECK: call void @C() - call void @C() - ret void -} - -; CHECK-LABEL: define void @B() -define void @B() { - ; CHECK: call void @C.memprof.1() - call void @C(), !callsite !1 - ret void -} - -; CHECK-LABEL: define void @C() -define void @C() { - ; CHECK: call void @F() - call void @F(), !callsite !16 - ; CHECK: call void @D() - call void @D(), !callsite !2 - ret void -} - -; CHECK-LABEL: define void @D() -define void @D() { - ; CHECK: call void @E() - call void @E(), !callsite !3 - ; CHECK: call void @G() - call void @G(), !callsite !17 - ret void -} - -; CHECK-LABEL: define void @E() -define void @E() { - ; CHECK: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]] - %1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9 - ret void -} - -; CHECK-LABEL: define void @F() -define void @F() { - ; CHECK: call void @G() - call void @G(), !callsite !17 - ret void -} - -; CHECK-LABEL: define void @G() -define void @G() { - ; CHECK: call ptr @_Znwm(i64 0) #[[NOTCOLD]] - %2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15 - ret void -} - -; CHECK-LABEL: define void @A1() -define void @A1() { - ; CHECK: call void @C() - call void @C(), !callsite !18 - ret void -} - -; CHECK-LABEL: define void @B1() -define void @B1() { - ; CHECK: call void @C.memprof.1() - call void @C(), !callsite !19 - ret void -} - -; CHECK-LABEL: define void @C.memprof.1() - ; CHECK: call void @F.memprof.1() - ; CHECK: call void @D.memprof.1() - -; CHECK-LABEL: define void @D.memprof.1() - ; CHECK: call void @E.memprof.1() - ; CHECK: call void @G() - -; CHECK-LABEL: define void @E.memprof.1() - ; CHECK: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]] - -; CHECK-LABEL: define void @F.memprof.1() - ; CHECK: call void @G.memprof.1() - -; CHECK-LABEL: define void @G.memprof.1() - ; CHECK: call ptr @_Znwm(i64 0) #[[COLD]] - -declare ptr @_Znwm(i64) - -; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } -; IR: attributes #[[COLD]] = { "memprof"="cold" } - -!0 = !{i64 123} -!1 = !{i64 234} -!2 = !{i64 345} -!3 = !{i64 456} -!4 = !{!5, !7} -!5 = !{!6, !"notcold"} -!6 = !{i64 567, i64 456, i64 345, i64 123} -!7 = !{!8, !"cold"} -!8 = !{i64 567, i64 456, i64 345, i64 234} -!9 = !{i64 567} -!10 = !{!11, !13} -!11 = !{!12, !"notcold"} -!12 = !{i64 678, i64 891, i64 789, i64 912} -!13 = !{!14, !"cold"} -!14 = !{i64 678, i64 891, i64 789, i64 812} -!15 = !{i64 678} -!16 = !{i64 789} -!17 = !{i64 891} -!18 = !{i64 912} -!19 = !{i64 812} |