aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineCombiner.cpp
diff options
context:
space:
mode:
authorFlorian Hahn <florian.hahn@arm.com>2017-09-07 12:49:39 +0000
committerFlorian Hahn <florian.hahn@arm.com>2017-09-07 12:49:39 +0000
commitd39b8a35335f591edba19e4e0f8a336dee3dbc06 (patch)
treedf6ec7102f861e6f89f3e19395b84734f54a515c /llvm/lib/CodeGen/MachineCombiner.cpp
parent2f5cbc449a6371486982c20648975ee70894f71d (diff)
downloadllvm-d39b8a35335f591edba19e4e0f8a336dee3dbc06.zip
llvm-d39b8a35335f591edba19e4e0f8a336dee3dbc06.tar.gz
llvm-d39b8a35335f591edba19e4e0f8a336dee3dbc06.tar.bz2
[MachineCombiner] Update instruction depths incrementally for large BBs.
Summary: For large basic blocks with lots of combinable instructions, the MachineTraceMetrics computations in MachineCombiner can dominate the compile time, as computing the trace information is quadratic in the number of instructions in a BB and it's relevant successors/predecessors. In most cases, knowing the instruction depth should be enough to make combination decisions. As we already iterate over all instructions in a basic block, the instruction depth can be computed incrementally. This reduces the cost of machine-combine drastically in cases where lots of instructions are combined. The major drawback is that AFAIK, computing the critical path length cannot be done incrementally. Therefore we only compute instruction depths incrementally, for basic blocks with more instructions than inc_threshold. The -machine-combiner-inc-threshold option can be used to set the threshold and allows for easier experimenting and checking if using incremental updates for all basic blocks has any impact on the performance. Reviewers: sanjoy, Gerolf, MatzeB, efriedma, fhahn Reviewed By: fhahn Subscribers: kiranchandramohan, javed.absar, efriedma, llvm-commits Differential Revision: https://reviews.llvm.org/D36619 llvm-svn: 312719
Diffstat (limited to 'llvm/lib/CodeGen/MachineCombiner.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp105
1 files changed, 82 insertions, 23 deletions
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index e6f80db..8586cf9 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -34,6 +35,11 @@ using namespace llvm;
STATISTIC(NumInstCombined, "Number of machineinst combined");
+static cl::opt<unsigned>
+inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
+ cl::desc("Incremental depth computation will be used for basic "
+ "blocks with more instructions."), cl::init(500));
+
namespace {
class MachineCombiner : public MachineFunctionPass {
const TargetInstrInfo *TII;
@@ -73,7 +79,7 @@ private:
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern);
+ MachineCombinerPattern Pattern, bool SlackIsAccurate);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -247,7 +253,8 @@ bool MachineCombiner::improvesCriticalPathLen(
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineCombinerPattern Pattern) {
+ MachineCombinerPattern Pattern,
+ bool SlackIsAccurate) {
assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
"Missing machine model\n");
// NewRoot is the last instruction in the \p InsInstrs vector.
@@ -258,7 +265,7 @@ bool MachineCombiner::improvesCriticalPathLen(
unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
- DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n";
dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
dbgs() << " RootDepth: " << RootDepth << "\n");
@@ -281,17 +288,18 @@ bool MachineCombiner::improvesCriticalPathLen(
RootLatency += TSchedModel.computeInstrLatency(I);
unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
-
+ unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+ unsigned OldCycleCount = RootDepth + RootLatency +
+ (SlackIsAccurate ? RootSlack : 0);
DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
dbgs() << " RootLatency: " << RootLatency << "\n";
- dbgs() << " RootSlack: " << RootSlack << "\n";
+ dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate="
+ << SlackIsAccurate << "\n";
dbgs() << " NewRootDepth + NewRootLatency = "
- << NewRootDepth + NewRootLatency << "\n";
+ << NewCycleCount << "\n";
dbgs() << " RootDepth + RootLatency + RootSlack = "
- << RootDepth + RootLatency + RootSlack << "\n";);
-
- unsigned NewCycleCount = NewRootDepth + NewRootLatency;
- unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
+ << OldCycleCount << "\n";
+ );
return NewCycleCount <= OldCycleCount;
}
@@ -354,17 +362,44 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
return false;
}
+/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
+/// depths if requested.
+///
+/// \param MBB basic block to insert instructions in
+/// \param MI current machine instruction
+/// \param InsInstrs new instructions to insert in \p MBB
+/// \param DelInstrs instruction to delete from \p MBB
+/// \param MinInstr is a pointer to the machine trace information
+/// \param RegUnits set of live registers, needed to compute instruction depths
+/// \param IncrementalUpdate if true, compute instruction depths incrementally,
+/// otherwise invalidate the trace
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
SmallVector<MachineInstr *, 16> InsInstrs,
SmallVector<MachineInstr *, 16> DelInstrs,
- MachineTraceMetrics *Traces) {
+ MachineTraceMetrics::Ensemble *MinInstr,
+ SparseSet<LiveRegUnit> &RegUnits,
+ bool IncrementalUpdate) {
for (auto *InstrPtr : InsInstrs)
MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
- for (auto *InstrPtr : DelInstrs)
+
+ for (auto *InstrPtr : DelInstrs) {
InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
- ++NumInstCombined;
- Traces->invalidate(MBB);
- Traces->verifyAnalysis();
+ // Erase all LiveRegs defined by the removed instruction
+ for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
+ if (I->MI == InstrPtr)
+ I = RegUnits.erase(I);
+ else
+ I++;
+ }
+ }
+
+ if (IncrementalUpdate)
+ for (auto *InstrPtr : InsInstrs)
+ MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
+ else
+ MinInstr->invalidate(MBB);
+
+ NumInstCombined++;
}
/// Substitute a slow code sequence with a faster one by
@@ -378,9 +413,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
bool Changed = false;
DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
+ bool IncrementalUpdate = false;
auto BlockIter = MBB->begin();
+ auto LastUpdate = BlockIter;
// Check if the block is in a loop.
const MachineLoop *ML = MLI->getLoopFor(MBB);
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(TRI->getNumRegUnits());
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
@@ -419,9 +461,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
SmallVector<MachineInstr *, 16> InsInstrs;
SmallVector<MachineInstr *, 16> DelInstrs;
DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
- if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
- Traces->verifyAnalysis();
TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
InstrIdxForVirtReg);
unsigned NewInstCount = InsInstrs.size();
@@ -436,23 +475,41 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (ML && TII->isThroughputPattern(P))
SubstituteAlways = true;
+ if (IncrementalUpdate) {
+ // Update depths since the last incremental update.
+ MinInstr->updateDepths(LastUpdate, std::next(BlockIter), RegUnits);
+ LastUpdate = BlockIter;
+ }
+
// Substitute when we optimize for codesize and the new sequence has
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ RegUnits, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
break;
} else {
- // Calculating the trace metrics may be expensive,
- // so only do this when necessary.
+ // For big basic blocks, we only compute the full trace the first time
+ // we hit this. We do not invalidate the trace, but instead update the
+ // instruction depths incrementally.
+ // NOTE: Only the instruction depths up to MI are accurate. All other
+ // trace information is not updated.
MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ Traces->verifyAnalysis();
if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
- InstrIdxForVirtReg, P) &&
+ InstrIdxForVirtReg, P,
+ !IncrementalUpdate) &&
preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+ if (MBB->size() > inc_threshold)
+ // Use incremental depth updates for basic blocks above treshold
+ IncrementalUpdate = true;
+
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ RegUnits, IncrementalUpdate);
+
// Eagerly stop after the first pattern fires.
Changed = true;
break;
@@ -467,6 +524,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
}
+ if (Changed && IncrementalUpdate)
+ Traces->invalidate(MBB);
return Changed;
}