diff options
author | Oliver Stannard <oliver.stannard@linaro.org> | 2019-10-10 09:58:28 +0000 |
---|---|---|
committer | Oliver Stannard <oliver.stannard@linaro.org> | 2019-10-10 09:58:28 +0000 |
commit | 4f454b227545016665e4dbdc237ffe8e49f240aa (patch) | |
tree | 645abce1d4420555cf13fb444cb17b248f090440 /llvm/lib/CodeGen/IfConversion.cpp | |
parent | c92a75fec0adb8569e81c35817e0e27a1cdb7959 (diff) | |
download | llvm-4f454b227545016665e4dbdc237ffe8e49f240aa.zip llvm-4f454b227545016665e4dbdc237ffe8e49f240aa.tar.gz llvm-4f454b227545016665e4dbdc237ffe8e49f240aa.tar.bz2 |
[IfCvt][ARM] Optimise diamond if-conversion for code size
Currently, the heuristics the if-conversion pass uses for diamond if-conversion
are based on execution time, with no consideration for code size. This adds a
new set of heuristics to be used when optimising for code size.
This is mostly target-independent, because the if-conversion pass can
see the code size of the instructions which it is removing. For thumb,
there are a few passes (insertion of IT instructions, selection of
narrow branches, and selection of CBZ instructions) which are run after
if conversion and affect these heuristics, so I've added target hooks to
better predict the code-size effect of a proposed if-conversion.
Differential revision: https://reviews.llvm.org/D67350
llvm-svn: 374301
Diffstat (limited to 'llvm/lib/CodeGen/IfConversion.cpp')
-rw-r--r-- | llvm/lib/CodeGen/IfConversion.cpp | 133 |
1 files changed, 116 insertions, 17 deletions
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index e503c568..d9caa56 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -285,14 +285,113 @@ namespace { Prediction); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, - unsigned TCycle, unsigned TExtra, - MachineBasicBlock &FBB, - unsigned FCycle, unsigned FExtra, - BranchProbability Prediction) const { - return TCycle > 0 && FCycle > 0 && - TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, - Prediction); + bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo, + MachineBasicBlock &CommBB, unsigned Dups, + BranchProbability Prediction, bool Forked) const { + const MachineFunction &MF = *TBBInfo.BB->getParent(); + if (MF.getFunction().hasMinSize()) { + MachineBasicBlock::iterator TIB = TBBInfo.BB->begin(); + MachineBasicBlock::iterator FIB = FBBInfo.BB->begin(); + MachineBasicBlock::iterator TIE = TBBInfo.BB->end(); + MachineBasicBlock::iterator FIE = FBBInfo.BB->end(); + + unsigned Dups1, Dups2; + if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TBBInfo.BB, *FBBInfo.BB, + /*SkipUnconditionalBranches*/ true)) + llvm_unreachable("should already have been checked by ValidDiamond"); + + unsigned BranchBytes = 0; + unsigned CommonBytes = 0; + + // Count common instructions at the start of the true and false blocks. + for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + + // Count instructions at the end of the true and false blocks, after + // the ones we plan to predicate. Analyzable branches will be removed + // (unless this is a forked diamond), and all other instructions are + // common between the two blocks. + for (auto &I : make_range(TIE, TBBInfo.BB->end())) { + if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : make_range(FIE, FBBInfo.BB->end())) { + if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : CommBB.terminators()) { + if (I.isBranch()) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } + } + + // The common instructions in one branch will be eliminated, halving + // their code size. + CommonBytes /= 2; + + // Count the instructions which we need to predicate. + unsigned NumPredicatedInstructions = 0; + for (auto &I : make_range(TIB, TIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + for (auto &I : make_range(FIB, FIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + + // Even though we're optimising for size at the expense of performance, + // avoid creating really long predicated blocks. + if (NumPredicatedInstructions > 15) + return false; + + // Some targets (e.g. Thumb2) need to insert extra instructions to + // start predicated blocks. + unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions( + MF, NumPredicatedInstructions); + + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes + << ", CommonBytes=" << CommonBytes + << ", NumPredicatedInstructions=" + << NumPredicatedInstructions + << ", ExtraPredicateBytes=" << ExtraPredicateBytes + << ")\n"); + return (BranchBytes + CommonBytes) > ExtraPredicateBytes; + } else { + unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups; + unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups; + bool Res = TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt( + *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB, + FCycle, FBBInfo.ExtraCost2, Prediction); + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle + << ", FCycle=" << FCycle + << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra=" + << FBBInfo.ExtraCost2 << ") = " << Res << "\n"); + return Res; + } } /// Returns true if Block ends without a terminator. @@ -842,6 +941,8 @@ bool IfConverter::ValidForkedDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; @@ -899,6 +1000,8 @@ bool IfConverter::ValidDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; // The size is used to decide whether to if-convert, and the shared portions @@ -1186,13 +1289,9 @@ void IfConverter::AnalyzeBlock( if (CanRevCond) { BBInfo TrueBBICalc, FalseBBICalc; - auto feasibleDiamond = [&]() { - bool MeetsSize = MeetIfcvtSizeLimit( - *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + - TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, - *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + - FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, - Prediction); + auto feasibleDiamond = [&](bool Forked) { + bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB, + Dups + Dups2, Prediction, Forked); bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond, /* IsTriangle */ false, /* RevCond */ false, /* hasCommonTail */ true); @@ -1204,7 +1303,7 @@ void IfConverter::AnalyzeBlock( if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(false)) { // Diamond: // EBB // / \_ @@ -1220,7 +1319,7 @@ void IfConverter::AnalyzeBlock( } } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(true)) { // ForkedDiamond: // if TBB and FBB have a common tail that includes their conditional // branch instructions, then we can If Convert this pattern. |