diff options
author | Tiehu Zhang <zhangtiehu@huawei.com> | 2022-05-19 23:24:14 +0800 |
---|---|---|
committer | Mindong Chen <chenmindong1@huawei.com> | 2022-05-19 23:29:00 +0800 |
commit | 3ed9f603fd59cafe3ab864d8bb77349ed418d384 (patch) | |
tree | 8a64e2f24874c768bbe2a279e506a121fc70abdc | |
parent | 94a2bd5a270bca1c539a71d0e1ca88c02a023ebd (diff) | |
download | llvm-3ed9f603fd59cafe3ab864d8bb77349ed418d384.zip llvm-3ed9f603fd59cafe3ab864d8bb77349ed418d384.tar.gz llvm-3ed9f603fd59cafe3ab864d8bb77349ed418d384.tar.bz2 |
[LoopVectorize] Don't interleave when the number of runtime checks exceeds the threshold
The runtime check threshold should also restrict interleave count.
Otherwise, too many runtime checks will be generated for some cases.
Reviewed By: fhahn, dmgreen
Differential Revision: https://reviews.llvm.org/D122126
3 files changed, 25 insertions, 25 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index f99b728..9cbbf96 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -320,6 +320,9 @@ public: getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate, VFRange &Range); + /// Check if the number of runtime checks exceeds the threshold. + bool requiresTooManyRuntimeChecks() const; + protected: /// Collect the instructions from the original loop that would be trivially /// dead in the vectorized loop if generated. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 880274e..0669f9a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7492,6 +7492,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { return VectorizationFactor::Disabled(); } +bool LoopVectorizationPlanner::requiresTooManyRuntimeChecks() const { + unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks(); + return (NumRuntimePointerChecks > + VectorizerParams::RuntimeMemoryCheckThreshold && + !Hints.allowReordering()) || + NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; +} + Optional<VectorizationFactor> LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { assert(OrigLoop->isInnermost() && "Inner loop expected."); @@ -7560,30 +7568,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { return VectorizationFactor::Disabled(); // Select the optimal vectorization factor. - auto SelectedVF = CM.selectVectorizationFactor(VFCandidates); - - // Check if it is profitable to vectorize with runtime checks. - unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks(); - if (SelectedVF.Width.getKnownMinValue() > 1 && NumRuntimePointerChecks) { - bool PragmaThresholdReached = - NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; - bool ThresholdReached = - NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; - if ((ThresholdReached && !Hints.allowReordering()) || - PragmaThresholdReached) { - ORE->emit([&]() { - return OptimizationRemarkAnalysisAliasing( - DEBUG_TYPE, "CantReorderMemOps", OrigLoop->getStartLoc(), - OrigLoop->getHeader()) - << "loop not vectorized: cannot prove it is safe to reorder " - "memory operations"; - }); - LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); - Hints.emitRemarkWithHints(); - return VectorizationFactor::Disabled(); - } - } - return SelectedVF; + return CM.selectVectorizationFactor(VFCandidates); } VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { @@ -10499,6 +10484,18 @@ bool LoopVectorizePass::processLoop(Loop *L) { unsigned IC = 1; if (MaybeVF) { + if (LVP.requiresTooManyRuntimeChecks()) { + ORE->emit([&]() { + return OptimizationRemarkAnalysisAliasing( + DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(), + L->getHeader()) + << "loop not vectorized: cannot prove it is safe to reorder " + "memory operations"; + }); + LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); + Hints.emitRemarkWithHints(); + return false; + } VF = *MaybeVF; // Select the interleave count. IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue()); diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll index e3a76c4..5370a2c 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll @@ -8,7 +8,7 @@ ; (e.g., runtime-memory-check-threshold, default 8). ; CHECK-LABEL: @eddy_diff_caleddy_ -; CHECK: vector.memcheck +; CHECK-NOT: vector.memcheck define fastcc void @eddy_diff_caleddy_(i64* %wet_cl, i64 %0, i32 %ncol.cast.val) { entry: |