diff options
author | Arthur Eubanks <aeubanks@google.com> | 2020-12-04 08:30:34 -0800 |
---|---|---|
committer | Arthur Eubanks <aeubanks@google.com> | 2020-12-04 08:30:50 -0800 |
commit | 7f6f9f4cf966c78a315d15d6e913c43cfa45c47c (patch) | |
tree | 704c28dcfca3341243afa63c91e1a75d00f50bb8 /llvm/lib/Analysis/CGSCCPassManager.cpp | |
parent | f628eef98acd24f8eb6a52d67ee887bb18f04bca (diff) | |
download | llvm-7f6f9f4cf966c78a315d15d6e913c43cfa45c47c.zip llvm-7f6f9f4cf966c78a315d15d6e913c43cfa45c47c.tar.gz llvm-7f6f9f4cf966c78a315d15d6e913c43cfa45c47c.tar.bz2 |
[NewPM] Make pass adaptors less templatey
Currently PassBuilder.cpp is by far the file that takes longest to
compile. This is due to tons of templates being instantiated per pass.
Follow PassManager by using wrappers around passes to avoid making
the adaptors templated on the pass type. This allows us to move various
adaptors' run methods into .cpp files.
This reduces the compile time of PassBuilder.cpp on my machine from 66
to 39 seconds. It also reduces the size of opt from 685M to 676M.
Reviewed By: dexonsmith
Differential Revision: https://reviews.llvm.org/D92616
Diffstat (limited to 'llvm/lib/Analysis/CGSCCPassManager.cpp')
-rw-r--r-- | llvm/lib/Analysis/CGSCCPassManager.cpp | 445 |
1 files changed, 445 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp index 95d2ebf..fae78273 100644 --- a/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -148,6 +148,451 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, return PA; } +PreservedAnalyses +ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) { + // Setup the CGSCC analysis manager from its proxy. + CGSCCAnalysisManager &CGAM = + AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); + + // Get the call graph for this module. + LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M); + + // Get Function analysis manager from its proxy. + FunctionAnalysisManager &FAM = + AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager(); + + // We keep worklists to allow us to push more work onto the pass manager as + // the passes are run. + SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist; + SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist; + + // Keep sets for invalidated SCCs and RefSCCs that should be skipped when + // iterating off the worklists. + SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet; + SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet; + + SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> + InlinedInternalEdges; + + CGSCCUpdateResult UR = { + RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet, + nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges, + {}}; + + // Request PassInstrumentation from analysis manager, will use it to run + // instrumenting callbacks for the passes later. + PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M); + + PreservedAnalyses PA = PreservedAnalyses::all(); + CG.buildRefSCCs(); + for (auto RCI = CG.postorder_ref_scc_begin(), + RCE = CG.postorder_ref_scc_end(); + RCI != RCE;) { + assert(RCWorklist.empty() && + "Should always start with an empty RefSCC worklist"); + // The postorder_ref_sccs range we are walking is lazily constructed, so + // we only push the first one onto the worklist. The worklist allows us + // to capture *new* RefSCCs created during transformations. + // + // We really want to form RefSCCs lazily because that makes them cheaper + // to update as the program is simplified and allows us to have greater + // cache locality as forming a RefSCC touches all the parts of all the + // functions within that RefSCC. + // + // We also eagerly increment the iterator to the next position because + // the CGSCC passes below may delete the current RefSCC. + RCWorklist.insert(&*RCI++); + + do { + LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); + if (InvalidRefSCCSet.count(RC)) { + LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n"); + continue; + } + + assert(CWorklist.empty() && + "Should always start with an empty SCC worklist"); + + LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC + << "\n"); + + // The top of the worklist may *also* be the same SCC we just ran over + // (and invalidated for). Keep track of that last SCC we processed due + // to SCC update to avoid redundant processing when an SCC is both just + // updated itself and at the top of the worklist. + LazyCallGraph::SCC *LastUpdatedC = nullptr; + + // Push the initial SCCs in reverse post-order as we'll pop off the + // back and so see this in post-order. + for (LazyCallGraph::SCC &C : llvm::reverse(*RC)) + CWorklist.insert(&C); + + do { + LazyCallGraph::SCC *C = CWorklist.pop_back_val(); + // Due to call graph mutations, we may have invalid SCCs or SCCs from + // other RefSCCs in the worklist. The invalid ones are dead and the + // other RefSCCs should be queued above, so we just need to skip both + // scenarios here. + if (InvalidSCCSet.count(C)) { + LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n"); + continue; + } + if (LastUpdatedC == C) { + LLVM_DEBUG(dbgs() << "Skipping redundant run on SCC: " << *C << "\n"); + continue; + } + if (&C->getOuterRefSCC() != RC) { + LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other " + "RefSCC...\n"); + continue; + } + + // Ensure we can proxy analysis updates from the CGSCC analysis manager + // into the the Function analysis manager by getting a proxy here. + // This also needs to update the FunctionAnalysisManager, as this may be + // the first time we see this SCC. + CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM( + FAM); + + // Each time we visit a new SCC pulled off the worklist, + // a transformation of a child SCC may have also modified this parent + // and invalidated analyses. So we invalidate using the update record's + // cross-SCC preserved set. This preserved set is intersected by any + // CGSCC pass that handles invalidation (primarily pass managers) prior + // to marking its SCC as preserved. That lets us track everything that + // might need invalidation across SCCs without excessive invalidations + // on a single SCC. + // + // This essentially allows SCC passes to freely invalidate analyses + // of any ancestor SCC. If this becomes detrimental to successfully + // caching analyses, we could force each SCC pass to manually + // invalidate the analyses for any SCCs other than themselves which + // are mutated. However, that seems to lose the robustness of the + // pass-manager driven invalidation scheme. + CGAM.invalidate(*C, UR.CrossSCCPA); + + do { + // Check that we didn't miss any update scenario. + assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + assert(&C->getOuterRefSCC() == RC && + "Processing an SCC in a different RefSCC!"); + + LastUpdatedC = UR.UpdatedC; + UR.UpdatedRC = nullptr; + UR.UpdatedC = nullptr; + + // Check the PassInstrumentation's BeforePass callbacks before + // running the pass, skip its execution completely if asked to + // (callback returns false). + if (!PI.runBeforePass<LazyCallGraph::SCC>(*Pass, *C)) + continue; + + PreservedAnalyses PassPA; + { + TimeTraceScope TimeScope(Pass->name()); + PassPA = Pass->run(*C, CGAM, CG, UR); + } + + if (UR.InvalidatedSCCs.count(C)) + PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA); + else + PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA); + + // Update the SCC and RefSCC if necessary. + C = UR.UpdatedC ? UR.UpdatedC : C; + RC = UR.UpdatedRC ? UR.UpdatedRC : RC; + + if (UR.UpdatedC) { + // If we're updating the SCC, also update the FAM inside the proxy's + // result. + CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM( + FAM); + } + + // If the CGSCC pass wasn't able to provide a valid updated SCC, + // the current SCC may simply need to be skipped if invalid. + if (UR.InvalidatedSCCs.count(C)) { + LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); + break; + } + // Check that we didn't miss any update scenario. + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + + // We handle invalidating the CGSCC analysis manager's information + // for the (potentially updated) SCC here. Note that any other SCCs + // whose structure has changed should have been invalidated by + // whatever was updating the call graph. This SCC gets invalidated + // late as it contains the nodes that were actively being + // processed. + CGAM.invalidate(*C, PassPA); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + // Also intersect with the cross-SCC preserved set to capture any + // cross-SCC invalidation. + UR.CrossSCCPA.intersect(PassPA); + PA.intersect(std::move(PassPA)); + + // The pass may have restructured the call graph and refined the + // current SCC and/or RefSCC. We need to update our current SCC and + // RefSCC pointers to follow these. Also, when the current SCC is + // refined, re-run the SCC pass over the newly refined SCC in order + // to observe the most precise SCC model available. This inherently + // cannot cycle excessively as it only happens when we split SCCs + // apart, at most converging on a DAG of single nodes. + // FIXME: If we ever start having RefSCC passes, we'll want to + // iterate there too. + if (UR.UpdatedC) + LLVM_DEBUG(dbgs() + << "Re-running SCC passes after a refinement of the " + "current SCC: " + << *UR.UpdatedC << "\n"); + + // Note that both `C` and `RC` may at this point refer to deleted, + // invalid SCC and RefSCCs respectively. But we will short circuit + // the processing when we check them in the loop above. + } while (UR.UpdatedC); + } while (!CWorklist.empty()); + + // We only need to keep internal inlined edge information within + // a RefSCC, clear it to save on space and let the next time we visit + // any of these functions have a fresh start. + InlinedInternalEdges.clear(); + } while (!RCWorklist.empty()); + } + + // By definition we preserve the call garph, all SCC analyses, and the + // analysis proxies by handling them above and in any nested pass managers. + PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>(); + PA.preserve<LazyCallGraphAnalysis>(); + PA.preserve<CGSCCAnalysisManagerModuleProxy>(); + PA.preserve<FunctionAnalysisManagerModuleProxy>(); + return PA; +} + +PreservedAnalyses DevirtSCCRepeatedPass::run(LazyCallGraph::SCC &InitialC, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + PreservedAnalyses PA = PreservedAnalyses::all(); + PassInstrumentation PI = + AM.getResult<PassInstrumentationAnalysis>(InitialC, CG); + + // The SCC may be refined while we are running passes over it, so set up + // a pointer that we can update. + LazyCallGraph::SCC *C = &InitialC; + + // Struct to track the counts of direct and indirect calls in each function + // of the SCC. + struct CallCount { + int Direct; + int Indirect; + }; + + // Put value handles on all of the indirect calls and return the number of + // direct calls for each function in the SCC. + auto ScanSCC = [](LazyCallGraph::SCC &C, + SmallMapVector<Value *, WeakTrackingVH, 16> &CallHandles) { + assert(CallHandles.empty() && "Must start with a clear set of handles."); + + SmallDenseMap<Function *, CallCount> CallCounts; + CallCount CountLocal = {0, 0}; + for (LazyCallGraph::Node &N : C) { + CallCount &Count = + CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) + .first->second; + for (Instruction &I : instructions(N.getFunction())) + if (auto *CB = dyn_cast<CallBase>(&I)) { + if (CB->getCalledFunction()) { + ++Count.Direct; + } else { + ++Count.Indirect; + CallHandles.insert({CB, WeakTrackingVH(CB)}); + } + } + } + + return CallCounts; + }; + + UR.IndirectVHs.clear(); + // Populate the initial call handles and get the initial call counts. + auto CallCounts = ScanSCC(*C, UR.IndirectVHs); + + for (int Iteration = 0;; ++Iteration) { + if (!PI.runBeforePass<LazyCallGraph::SCC>(*Pass, *C)) + continue; + + PreservedAnalyses PassPA = Pass->run(*C, AM, CG, UR); + + if (UR.InvalidatedSCCs.count(C)) + PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA); + else + PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA); + + // If the SCC structure has changed, bail immediately and let the outer + // CGSCC layer handle any iteration to reflect the refined structure. + if (UR.UpdatedC && UR.UpdatedC != C) { + PA.intersect(std::move(PassPA)); + break; + } + + // Check that we didn't miss any update scenario. + assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + + // Check whether any of the handles were devirtualized. + bool Devirt = llvm::any_of(UR.IndirectVHs, [](auto &P) -> bool { + if (P.second) { + if (CallBase *CB = dyn_cast<CallBase>(P.second)) { + if (CB->getCalledFunction()) { + LLVM_DEBUG(dbgs() << "Found devirtualized call: " << *CB << "\n"); + return true; + } + } + } + return false; + }); + + // Rescan to build up a new set of handles and count how many direct + // calls remain. If we decide to iterate, this also sets up the input to + // the next iteration. + UR.IndirectVHs.clear(); + auto NewCallCounts = ScanSCC(*C, UR.IndirectVHs); + + // If we haven't found an explicit devirtualization already see if we + // have decreased the number of indirect calls and increased the number + // of direct calls for any function in the SCC. This can be fooled by all + // manner of transformations such as DCE and other things, but seems to + // work well in practice. + if (!Devirt) + // Iterate over the keys in NewCallCounts, if Function also exists in + // CallCounts, make the check below. + for (auto &Pair : NewCallCounts) { + auto &CallCountNew = Pair.second; + auto CountIt = CallCounts.find(Pair.first); + if (CountIt != CallCounts.end()) { + const auto &CallCountOld = CountIt->second; + if (CallCountOld.Indirect > CallCountNew.Indirect && + CallCountOld.Direct < CallCountNew.Direct) { + Devirt = true; + break; + } + } + } + + if (!Devirt) { + PA.intersect(std::move(PassPA)); + break; + } + + // Otherwise, if we've already hit our max, we're done. + if (Iteration >= MaxIterations) { + maxDevirtIterationsReached(); + LLVM_DEBUG( + dbgs() << "Found another devirtualization after hitting the max " + "number of repetitions (" + << MaxIterations << ") on SCC: " << *C << "\n"); + PA.intersect(std::move(PassPA)); + break; + } + + LLVM_DEBUG( + dbgs() << "Repeating an SCC pass after finding a devirtualization in: " + << *C << "\n"); + + // Move over the new call counts in preparation for iterating. + CallCounts = std::move(NewCallCounts); + + // Update the analysis manager with each run and intersect the total set + // of preserved analyses so we're ready to iterate. + AM.invalidate(*C, PassPA); + + PA.intersect(std::move(PassPA)); + } + + // Note that we don't add any preserved entries here unlike a more normal + // "pass manager" because we only handle invalidation *between* iterations, + // not after the last iteration. + return PA; +} + +PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + // Setup the function analysis manager from its proxy. + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); + + SmallVector<LazyCallGraph::Node *, 4> Nodes; + for (LazyCallGraph::Node &N : C) + Nodes.push_back(&N); + + // The SCC may get split while we are optimizing functions due to deleting + // edges. If this happens, the current SCC can shift, so keep track of + // a pointer we can overwrite. + LazyCallGraph::SCC *CurrentC = &C; + + LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C << "\n"); + + PreservedAnalyses PA = PreservedAnalyses::all(); + for (LazyCallGraph::Node *N : Nodes) { + // Skip nodes from other SCCs. These may have been split out during + // processing. We'll eventually visit those SCCs and pick up the nodes + // there. + if (CG.lookupSCC(*N) != CurrentC) + continue; + + Function &F = N->getFunction(); + + PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F); + if (!PI.runBeforePass<Function>(*Pass, F)) + continue; + + PreservedAnalyses PassPA; + { + TimeTraceScope TimeScope(Pass->name()); + PassPA = Pass->run(F, FAM); + } + + PI.runAfterPass<Function>(*Pass, F, PassPA); + + // We know that the function pass couldn't have invalidated any other + // function's analyses (that's the contract of a function pass), so + // directly handle the function analysis manager's invalidation here. + FAM.invalidate(F, PassPA); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + + // If the call graph hasn't been preserved, update it based on this + // function pass. This may also update the current SCC to point to + // a smaller, more refined SCC. + auto PAC = PA.getChecker<LazyCallGraphAnalysis>(); + if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) { + CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N, + AM, UR, FAM); + assert(CG.lookupSCC(*N) == CurrentC && + "Current SCC not updated to the SCC containing the current node!"); + } + } + + // By definition we preserve the proxy. And we preserve all analyses on + // Functions. This precludes *any* invalidation of function analyses by the + // proxy, but that's OK because we've taken care to invalidate analyses in + // the function analysis manager incrementally above. + PA.preserveSet<AllAnalysesOn<Function>>(); + PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); + + // We've also ensured that we updated the call graph along the way. + PA.preserve<LazyCallGraphAnalysis>(); + + return PA; +} + bool CGSCCAnalysisManagerModuleProxy::Result::invalidate( Module &M, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &Inv) { |