From 6f87b162e636b0cfe590758621a606d2bc68424f Mon Sep 17 00:00:00 2001 From: Roman Tereshin Date: Sun, 23 Feb 2020 21:53:19 -0800 Subject: [MachineVerifier] Doing ::calcRegsPassed in RPO: ~35% faster MV, NFC Depending on the target, test suite, pipeline config and perhaps other factors machine verifier when forced on with -verify-machineinstrs can increase compile time 2-2.5 times over (Release, Asserts On), taking up ~60% of the time. An invaluable tool, it significantly slows down machine verifier-enabled testing. Nearly 75% of its time MachineVerifier spends in the calcRegsPassed method. It's a classic forward dataflow analysis executed over sets, but visiting MBBs in arbitrary order. We switch that to RPO here. This speeds up MachineVerifier by about 35%, decreasing the overall compile time with -verify-machineinstrs by 20-25% or so. calcRegsPassed itself gets 2x faster here. All measured on a large suite of shaders targeting a number of GPUs. Reviewers: bogner, stoklund, rudkx, qcolombet Reviewed By: bogner Tags: #llvm Differential Revision: https://reviews.llvm.org/D75032 --- llvm/lib/CodeGen/MachineVerifier.cpp | 45 +++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 16 deletions(-) (limited to 'llvm/lib/CodeGen/MachineVerifier.cpp') diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 8d1c2fe..af3b8fd 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallPtrSet.h" @@ -2126,34 +2127,46 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. void MachineVerifier::calcRegsPassed() { + // This is a forward dataflow, doing it in RPO. A standard map serves as a + // priority (sorting by RPO number) queue, deduplicating worklist, and an RPO + // number to MBB mapping all at once. + std::map RPOWorklist; + DenseMap RPONumbers; + if (MF->empty()) { + // ReversePostOrderTraversal doesn't handle empty functions. + return; + } + for (const MachineBasicBlock *MBB : + ReversePostOrderTraversal(MF)) { + // Careful with the evaluation order, fetch next number before allocating. + unsigned Number = RPONumbers.size(); + RPONumbers[MBB] = Number; + } // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. - SmallPtrSet todo; - for (const auto &MBB : *MF) { + for (const MachineBasicBlock &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; if (!MInfo.reachable) continue; - for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(), - SuE = MBB.succ_end(); SuI != SuE; ++SuI) { - BBInfo &SInfo = MBBInfoMap[*SuI]; + for (const MachineBasicBlock *Succ : MBB.successors()) { + BBInfo &SInfo = MBBInfoMap[Succ]; if (SInfo.addPassed(MInfo.regsLiveOut)) - todo.insert(*SuI); + RPOWorklist.emplace(RPONumbers[Succ], Succ); } } - // Iteratively push vregsPassed to successors. This will converge to the same - // final state regardless of DenseSet iteration order. - while (!todo.empty()) { - const MachineBasicBlock *MBB = *todo.begin(); - todo.erase(MBB); + // Iteratively push vregsPassed to successors. + while (!RPOWorklist.empty()) { + auto Next = RPOWorklist.begin(); + const MachineBasicBlock *MBB = Next->second; + RPOWorklist.erase(Next); BBInfo &MInfo = MBBInfoMap[MBB]; - for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), - SuE = MBB->succ_end(); SuI != SuE; ++SuI) { - if (*SuI == MBB) + for (const MachineBasicBlock *Succ : MBB->successors()) { + if (Succ == MBB) continue; - BBInfo &SInfo = MBBInfoMap[*SuI]; + BBInfo &SInfo = MBBInfoMap[Succ]; if (SInfo.addPassed(MInfo.vregsPassed)) - todo.insert(*SuI); + RPOWorklist.emplace(RPONumbers[Succ], Succ); } } } -- cgit v1.1