diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp')
-rw-r--r-- | llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp | 49 |
1 files changed, 44 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 1bf7ff4..016db55 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -122,6 +122,20 @@ static cl::opt<unsigned> cl::desc("Max depth to recursively search for missing " "frames through tail calls.")); +// By default enable cloning of callsites involved with recursive cycles +static cl::opt<bool> AllowRecursiveCallsites( + "memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, + cl::desc("Allow cloning of callsites involved in recursive cycles")); + +// When disabled, try to detect and prevent cloning of recursive contexts. +// This is only necessary until we support cloning through recursive cycles. +// Leave on by default for now, as disabling requires a little bit of compile +// time overhead and doesn't affect correctness, it will just inflate the cold +// hinted bytes reporting a bit when -memprof-report-hinted-sizes is enabled. +static cl::opt<bool> AllowRecursiveContexts( + "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, + cl::desc("Allow cloning of contexts through recursive cycles")); + namespace llvm { cl::opt<bool> EnableMemProfContextDisambiguation( "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, @@ -1236,9 +1250,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB( StackEntryIdToContextNodeMap[StackId] = StackNode; StackNode->OrigStackOrAllocId = StackId; } - auto Ins = StackIdSet.insert(StackId); - if (!Ins.second) - StackNode->Recursive = true; + // Marking a node recursive will prevent its cloning completely, even for + // non-recursive contexts flowing through it. + if (!AllowRecursiveCallsites) { + auto Ins = StackIdSet.insert(StackId); + if (!Ins.second) + StackNode->Recursive = true; + } StackNode->AllocTypes |= (uint8_t)AllocType; PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); PrevNode = StackNode; @@ -1375,8 +1393,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node, set_union(CallerEdgeContextIds, Edge->ContextIds); } // Node can have more context ids than callers if some contexts terminate at - // node and some are longer. - assert(NodeContextIds == CallerEdgeContextIds || + // node and some are longer. If we are allowing recursive callsites but + // haven't disabled recursive contexts, this will be violated for + // incompletely cloned recursive cycles, so skip the checking in that case. + assert((AllowRecursiveCallsites && AllowRecursiveContexts) || + NodeContextIds == CallerEdgeContextIds || set_is_subset(CallerEdgeContextIds, NodeContextIds)); } if (Node->CalleeEdges.size()) { @@ -3370,6 +3391,21 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( assert(Node->AllocTypes != (uint8_t)AllocationType::None); + DenseSet<uint32_t> RecursiveContextIds; + // If we are allowing recursive callsites, but have also disabled recursive + // contexts, look for context ids that show up in multiple caller edges. + if (AllowRecursiveCallsites && !AllowRecursiveContexts) { + DenseSet<uint32_t> AllCallerContextIds; + for (auto &CE : Node->CallerEdges) { + // Resize to the largest set of caller context ids, since we know the + // final set will be at least that large. + AllCallerContextIds.reserve(CE->getContextIds().size()); + for (auto Id : CE->getContextIds()) + if (!AllCallerContextIds.insert(Id).second) + RecursiveContextIds.insert(Id); + } + } + // Iterate until we find no more opportunities for disambiguating the alloc // types via cloning. In most cases this loop will terminate once the Node // has a single allocation type, in which case no more cloning is needed. @@ -3394,6 +3430,9 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( // allocation. auto CallerEdgeContextsForAlloc = set_intersection(CallerEdge->getContextIds(), AllocContextIds); + if (!RecursiveContextIds.empty()) + CallerEdgeContextsForAlloc = + set_difference(CallerEdgeContextsForAlloc, RecursiveContextIds); if (CallerEdgeContextsForAlloc.empty()) { ++EI; continue; |