diff options
author | Florian Hahn <flo@fhahn.com> | 2023-01-19 18:10:51 +0000 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2023-01-19 18:11:54 +0000 |
commit | d0907ce7ed9f159562ca3f4cfd8d87e89e93febe (patch) | |
tree | d035d381ae35f5c04a28f30de7250f9e59284d8b /llvm/lib/Transforms/Utils/LoopUnroll.cpp | |
parent | 1d98861a7896236895d467f7e7ab4eadf7dffd82 (diff) | |
download | llvm-d0907ce7ed9f159562ca3f4cfd8d87e89e93febe.zip llvm-d0907ce7ed9f159562ca3f4cfd8d87e89e93febe.tar.gz llvm-d0907ce7ed9f159562ca3f4cfd8d87e89e93febe.tar.bz2 |
[LoopUnroll] Directly update DT instead of DTU.
The scope of DT updates are very limited when unrolling loops: the DT
should only need updating for
* new blocks added
* exiting blocks we simplified branches
This can be done manually without too much extra work.
MergeBlockIntoPredecessor also needs to be updated to support direct
DT updates.
This fixes excessive time spent in DTU for same cases. In an internal
example, time spent in LoopUnroll with this patch goes from ~200s to 2s.
It also is slightly positive for CTMark:
* NewPM-O3: -0.13%
* NewPM-ReleaseThinLTO: -0.11%
* NewPM-ReleaseLTO-g: -0.13%
Notable improvements are mafft (~ -0.50%) and lencod (~ -0.30%), with no
workload regressed.
https://llvm-compile-time-tracker.com/compare.php?from=78a9ee7834331fb4360457cc565fa36f5452f7e0&to=687e08d011b0dc6d3edd223612761e44225c7537&stat=instructions:u
Reviewed By: kuhar
Differential Revision: https://reviews.llvm.org/D141487
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopUnroll.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUnroll.cpp | 53 |
1 files changed, 43 insertions, 10 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index cc96d15..f8251ee9 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -321,6 +321,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, unsigned TripMultiple; unsigned BreakoutTrip; bool ExitOnTrue; + BasicBlock *FirstExitingBlock = nullptr; SmallVector<BasicBlock *> ExitingBlocks; }; DenseMap<BasicBlock *, ExitInfo> ExitInfos; @@ -680,8 +681,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, assert(!UnrollVerifyDomtree || DT->verify(DominatorTree::VerificationLevel::Fast)); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - + SmallVector<DominatorTree::UpdateType> DTUpdates; auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) { auto *Term = cast<BranchInst>(Src->getTerminator()); const unsigned Idx = ExitOnTrue ^ WillExit; @@ -695,7 +695,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, BranchInst::Create(Dest, Term); Term->eraseFromParent(); - DTU.applyUpdates({{DominatorTree::Delete, Src, DeadSucc}}); + DTUpdates.emplace_back(DominatorTree::Delete, Src, DeadSucc); }; auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j, @@ -733,28 +733,56 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // Fold branches for iterations where we know that they will exit or not // exit. - for (const auto &Pair : ExitInfos) { - const ExitInfo &Info = Pair.second; + for (auto &Pair : ExitInfos) { + ExitInfo &Info = Pair.second; for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) { // The branch destination. unsigned j = (i + 1) % e; bool IsLatch = Pair.first == LatchBlock; std::optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch); - if (!KnownWillExit) + if (!KnownWillExit) { + if (!Info.FirstExitingBlock) + Info.FirstExitingBlock = Info.ExitingBlocks[i]; continue; + } // We don't fold known-exiting branches for non-latch exits here, // because this ensures that both all loop blocks and all exit blocks // remain reachable in the CFG. // TODO: We could fold these branches, but it would require much more // sophisticated updates to LoopInfo. - if (*KnownWillExit && !IsLatch) + if (*KnownWillExit && !IsLatch) { + if (!Info.FirstExitingBlock) + Info.FirstExitingBlock = Info.ExitingBlocks[i]; continue; + } SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue); } } + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + DomTreeUpdater *DTUToUse = &DTU; + if (ExitingBlocks.size() == 1) { + // Manually update the DT if there's a single exiting node. In that case + // there's a single exit node and it is sufficient to update the nodes + // immediately dominated by the original exiting block. They will become + // dominated by the first exiting block that leaves the loop after + // unrolling. Note that the CFG inside the loop does not change, so there's + // no need to update the DT inside the unrolled loop. + DTUToUse = nullptr; + auto &[OriginalExit, Info] = *ExitInfos.begin(); + if (!Info.FirstExitingBlock) + Info.FirstExitingBlock = Info.ExitingBlocks.back(); + for (auto *C : to_vector(DT->getNode(OriginalExit)->children())) { + if (L->contains(C->getBlock())) + continue; + C->setIDom(DT->getNode(Info.FirstExitingBlock)); + } + } else { + DTU.applyUpdates(DTUpdates); + } + // When completely unrolling, the last latch becomes unreachable. if (!LatchIsExiting && CompletelyUnroll) { // There is no need to update the DT here, because there must be a unique @@ -774,16 +802,21 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (Term && Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); BasicBlock *Fold = Dest->getUniquePredecessor(); - if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) { + if (MergeBlockIntoPredecessor(Dest, /*DTU=*/DTUToUse, LI, + /*MSSAU=*/nullptr, /*MemDep=*/nullptr, + /*PredecessorWithTwoSuccessors=*/false, + DTUToUse ? nullptr : DT)) { // Dest has been folded into Fold. Update our worklists accordingly. std::replace(Latches.begin(), Latches.end(), Dest, Fold); llvm::erase_value(UnrolledLoopBlocks, Dest); } } } - // Apply updates to the DomTree. - DT = &DTU.getDomTree(); + if (DTUToUse) { + // Apply updates to the DomTree. + DT = &DTU.getDomTree(); + } assert(!UnrollVerifyDomtree || DT->verify(DominatorTree::VerificationLevel::Fast)); |