aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachineBlockPlacement.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MachineBlockPlacement.cpp')
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp41
1 files changed, 37 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 783d22f..1729670 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -177,6 +177,14 @@ static cl::opt<unsigned> TailDupPlacementPenalty(
cl::init(2),
cl::Hidden);
+// Heuristic for tail duplication if profile count is used in cost model.
+static cl::opt<unsigned> TailDupProfilePercentThreshold(
+ "tail-dup-profile-percent-threshold",
+ cl::desc("If profile count information is used in tail duplication cost "
+ "model, the gained fall through number from tail duplication "
+ "should be at least this percent of hot count."),
+ cl::init(50), cl::Hidden);
+
// Heuristic for triangle chains.
static cl::opt<unsigned> TriangleChainCount(
"triangle-chain-count",
@@ -377,6 +385,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Partial tail duplication threshold.
BlockFrequency DupThreshold;
+ /// True: use block profile count to compute tail duplication cost.
+ /// False: use block frequency to compute tail duplication cost.
+ bool UseProfileCount;
+
/// Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -402,6 +414,19 @@ class MachineBlockPlacement : public MachineFunctionPass {
SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
#endif
+ /// Get block profile count or frequency according to UseProfileCount.
+ /// The return value is used to model tail duplication cost.
+ BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) {
+ if (UseProfileCount) {
+ auto Count = MBFI->getMBFI().getBlockProfileCount(BB);
+ if (Count)
+ return *Count;
+ else
+ return 0;
+ } else
+ return MBFI->getBlockFreq(BB);
+ }
+
/// Scale the DupThreshold according to basic block size.
BlockFrequency scaleThreshold(MachineBasicBlock *BB);
void initDupThreshold();
@@ -3120,7 +3145,7 @@ bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
// Compute the number of reduced taken branches if Pred falls through to BB
// instead of another successor. Then compare it with threshold.
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
BlockFrequency Gain = PredFreq * (BBProb - BestProb);
return Gain > scaleThreshold(BB);
}
@@ -3194,7 +3219,7 @@ void MachineBlockPlacement::findDuplicateCandidates(
// it. But it can beneficially fall through to BB, and duplicate BB into other
// predecessors.
for (MachineBasicBlock *Pred : Preds) {
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
if (!TailDup.canTailDuplicate(BB, Pred)) {
// BB can't be duplicated into Pred, but it is possible to be layout
@@ -3243,6 +3268,15 @@ void MachineBlockPlacement::initDupThreshold() {
if (!F->getFunction().hasProfileData())
return;
+ // We prefer to use prifile count.
+ uint64_t HotThreshold = PSI->getOrCompHotCountThreshold();
+ if (HotThreshold != UINT64_MAX) {
+ UseProfileCount = true;
+ DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100;
+ return;
+ }
+
+ // Profile count is not available, we can use block frequency instead.
BlockFrequency MaxFreq = 0;
for (MachineBasicBlock &MBB : *F) {
BlockFrequency Freq = MBFI->getBlockFreq(&MBB);
@@ -3250,10 +3284,9 @@ void MachineBlockPlacement::initDupThreshold() {
MaxFreq = Freq;
}
- // FIXME: we may use profile count instead of frequency,
- // and need more fine tuning.
BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
DupThreshold = MaxFreq * ThresholdProb;
+ UseProfileCount = false;
}
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {