diff options
author | Hongtao Yu <hoy@fb.com> | 2020-07-04 20:24:11 -0700 |
---|---|---|
committer | Hongtao Yu <hoy@fb.com> | 2020-07-15 12:33:29 -0700 |
commit | f3731d34faa7432462c877714af235e9787c9b30 (patch) | |
tree | e57c1ccc117e502a0adb1ee1a28bb0c1f4fc10cd /llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp | |
parent | 41d0af00740ac5140f11c7f37157fc6e6dd1b016 (diff) | |
download | llvm-f3731d34faa7432462c877714af235e9787c9b30.zip llvm-f3731d34faa7432462c877714af235e9787c9b30.tar.gz llvm-f3731d34faa7432462c877714af235e9787c9b30.tar.bz2 |
[LoopUnroll] Update branch weight for remainder loop
Unrolling a loop with compile-time unknown trip count results in a remainder loop. The remainder loop executes the remaining iterations of the original loop when the original trip count is not a multiple of the unroll factor. For better profile counts maintenance throughout the optimization pipeline, I'm assigning an artificial weight to the latch branch of the remainder loop.
A remainder loop runs up to as many times as the unroll factor subtracted by 1. Therefore I'm assigning the maximum possible trip count as the back edge weight. This should be more accurate than the default non-profile weight, which assumes the back edge runs much more frequently than the exit edge.
Differential Revision: https://reviews.llvm.org/D83187
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 2515b16..ebcd820 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" @@ -505,6 +506,32 @@ static bool canProfitablyUnrollMultiExitLoop( // know of kinds of multiexit loops that would benefit from unrolling. } +// Assign the maximum possible trip count as the back edge weight for the +// remainder loop if the original loop comes with a branch weight. +static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, + Loop *RemainderLoop, + uint64_t UnrollFactor) { + uint64_t TrueWeight, FalseWeight; + BranchInst *LatchBR = + cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()); + if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader() + ? FalseWeight + : TrueWeight; + assert(UnrollFactor > 1); + uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight; + BasicBlock *Header = RemainderLoop->getHeader(); + BasicBlock *Latch = RemainderLoop->getLoopLatch(); + auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator()); + unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1); + MDBuilder MDB(RemainderLatchBR->getContext()); + MDNode *WeightNode = + HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) + : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + } +} + /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// @@ -788,6 +815,11 @@ bool llvm::UnrollRuntimeLoopRemainder( InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); + // Assign the maximum possible trip count as the back edge weight for the + // remainder loop if the original loop comes with a branch weight. + if (remainderLoop && !UnrollRemainder) + updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count); + // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), F->getBasicBlockList(), |