diff options
author | Florian Hahn <flo@fhahn.com> | 2023-01-19 18:10:51 +0000 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2023-01-19 18:11:54 +0000 |
commit | d0907ce7ed9f159562ca3f4cfd8d87e89e93febe (patch) | |
tree | d035d381ae35f5c04a28f30de7250f9e59284d8b /llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | |
parent | 1d98861a7896236895d467f7e7ab4eadf7dffd82 (diff) | |
download | llvm-d0907ce7ed9f159562ca3f4cfd8d87e89e93febe.zip llvm-d0907ce7ed9f159562ca3f4cfd8d87e89e93febe.tar.gz llvm-d0907ce7ed9f159562ca3f4cfd8d87e89e93febe.tar.bz2 |
[LoopUnroll] Directly update DT instead of DTU.
The scope of DT updates are very limited when unrolling loops: the DT
should only need updating for
* new blocks added
* exiting blocks we simplified branches
This can be done manually without too much extra work.
MergeBlockIntoPredecessor also needs to be updated to support direct
DT updates.
This fixes excessive time spent in DTU for same cases. In an internal
example, time spent in LoopUnroll with this patch goes from ~200s to 2s.
It also is slightly positive for CTMark:
* NewPM-O3: -0.13%
* NewPM-ReleaseThinLTO: -0.11%
* NewPM-ReleaseLTO-g: -0.13%
Notable improvements are mafft (~ -0.50%) and lencod (~ -0.30%), with no
workload regressed.
https://llvm-compile-time-tracker.com/compare.php?from=78a9ee7834331fb4360457cc565fa36f5452f7e0&to=687e08d011b0dc6d3edd223612761e44225c7537&stat=instructions:u
Reviewed By: kuhar
Differential Revision: https://reviews.llvm.org/D141487
Diffstat (limited to 'llvm/lib/Transforms/Utils/BasicBlockUtils.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index d14e5b8..8e49edb 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -179,7 +179,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI, bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, MemoryDependenceResults *MemDep, - bool PredecessorWithTwoSuccessors) { + bool PredecessorWithTwoSuccessors, + DominatorTree *DT) { if (BB->hasAddressTaken()) return false; @@ -232,10 +233,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, FoldSingleEntryPHINodes(BB, MemDep); } + if (DT) { + assert(!DTU && "cannot use both DT and DTU for updates"); + DomTreeNode *PredNode = DT->getNode(PredBB); + DomTreeNode *BBNode = DT->getNode(BB); + if (PredNode) { + assert(BBNode && "PredNode unreachable but BBNode reachable?"); + for (DomTreeNode *C : to_vector(BBNode->children())) + C->setIDom(PredNode); + } + } // DTU update: Collect all the edges that exit BB. // These dominator edges will be redirected from Pred. std::vector<DominatorTree::UpdateType> Updates; if (DTU) { + assert(!DT && "cannot use both DT and DTU for updates"); // To avoid processing the same predecessor more than once. SmallPtrSet<BasicBlock *, 8> SeenSuccs; SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB), @@ -311,6 +323,12 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, if (DTU) DTU->applyUpdates(Updates); + if (DT) { + assert(succ_empty(BB) && + "successors should have been transferred to PredBB"); + DT->eraseNode(BB); + } + // Finally, erase the old block and update dominator info. DeleteDeadBlock(BB, DTU); |