diff options
author | Madhur Amilkanthwar <madhura@nvidia.com> | 2025-01-23 10:41:54 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-01-23 10:41:54 +0530 |
commit | d15f3e828d3d3335aa9b92b9013a590b71e56b92 (patch) | |
tree | f7678cbb6485b8025d68fa534bf32d8f1e32db9b /llvm/lib/Transforms/Scalar/LoopInterchange.cpp | |
parent | de209fa11b5455155228bcdba012b6074388b917 (diff) | |
download | llvm-d15f3e828d3d3335aa9b92b9013a590b71e56b92.zip llvm-d15f3e828d3d3335aa9b92b9013a590b71e56b92.tar.gz llvm-d15f3e828d3d3335aa9b92b9013a590b71e56b92.tar.bz2 |
[LoopInterchange] Constrain LI within supported loop nest depth (#118656)
This patch is an extension to #115128.
After profiling LLVM test-suite, I see a lot of loop nest of depth more
than `MaxLoopNestDepth` which is 10. Early exit for them would save
compile-time as it would avoid computing DependenceInfo and CacheCost.
Please see 'bound-max-depth' branch on compile-time-tracker.
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopInterchange.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 42 |
1 files changed, 29 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 5bcc5e4..d366e74 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -74,8 +74,15 @@ using CharMatrix = std::vector<std::vector<char>>; } // end anonymous namespace +// Minimum loop depth supported. +static cl::opt<unsigned int> MinLoopNestDepth( + "loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, + cl::desc("Minimum depth of loop nest considered for the transform")); + // Maximum loop depth supported. -static const unsigned MaxLoopNestDepth = 10; +static cl::opt<unsigned int> MaxLoopNestDepth( + "loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, + cl::desc("Maximum depth of loop nest considered for the transform")); #ifndef NDEBUG static void printDepMatrix(CharMatrix &DepMatrix) { @@ -244,10 +251,22 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) { LoopList.push_back(CurrentLoop); } -static bool hasMinimumLoopDepth(SmallVectorImpl<Loop *> &LoopList) { +static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList, + OptimizationRemarkEmitter &ORE) { unsigned LoopNestDepth = LoopList.size(); - if (LoopNestDepth < 2) { - LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); + if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) { + LLVM_DEBUG(dbgs() << "Unsupported depth of loop nest " << LoopNestDepth + << ", the supported range is [" << MinLoopNestDepth + << ", " << MaxLoopNestDepth << "].\n"); + Loop **OuterLoop = LoopList.begin(); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedLoopNestDepth", + (*OuterLoop)->getStartLoc(), + (*OuterLoop)->getHeader()) + << "Unsupported depth of loop nest, the supported range is [" + << std::to_string(MinLoopNestDepth) << ", " + << std::to_string(MaxLoopNestDepth) << "].\n"; + }); return false; } return true; @@ -435,15 +454,11 @@ struct LoopInterchange { bool processLoopList(SmallVectorImpl<Loop *> &LoopList) { bool Changed = false; - // Ensure minimum loop nest depth. - assert(hasMinimumLoopDepth(LoopList) && "Loop nest does not meet minimum depth."); + // Ensure proper loop nest depth. + assert(hasSupportedLoopDepth(LoopList, *ORE) && + "Unsupported depth of loop nest."); unsigned LoopNestDepth = LoopList.size(); - if (LoopNestDepth > MaxLoopNestDepth) { - LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than " - << MaxLoopNestDepth << "\n"); - return false; - } if (!isComputableLoopNest(LoopList)) { LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); return false; @@ -1735,14 +1750,15 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1"); return PreservedAnalyses::all(); } + OptimizationRemarkEmitter ORE(&F); // Ensure minimum depth of the loop nest to do the interchange. - if (!hasMinimumLoopDepth(LoopList)) + if (!hasSupportedLoopDepth(LoopList, ORE)) return PreservedAnalyses::all(); DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); std::unique_ptr<CacheCost> CC = CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); - OptimizationRemarkEmitter ORE(&F); + if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN)) return PreservedAnalyses::all(); U.markLoopNestChanged(true); |