aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTiehu Zhang <zhangtiehu@huawei.com>2022-05-19 23:24:14 +0800
committerMindong Chen <chenmindong1@huawei.com>2022-05-19 23:29:00 +0800
commit3ed9f603fd59cafe3ab864d8bb77349ed418d384 (patch)
tree8a64e2f24874c768bbe2a279e506a121fc70abdc
parent94a2bd5a270bca1c539a71d0e1ca88c02a023ebd (diff)
downloadllvm-3ed9f603fd59cafe3ab864d8bb77349ed418d384.zip
llvm-3ed9f603fd59cafe3ab864d8bb77349ed418d384.tar.gz
llvm-3ed9f603fd59cafe3ab864d8bb77349ed418d384.tar.bz2
[LoopVectorize] Don't interleave when the number of runtime checks exceeds the threshold
The runtime check threshold should also restrict interleave count. Otherwise, too many runtime checks will be generated for some cases. Reviewed By: fhahn, dmgreen Differential Revision: https://reviews.llvm.org/D122126
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp45
-rw-r--r--llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll2
3 files changed, 25 insertions, 25 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index f99b728..9cbbf96 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -320,6 +320,9 @@ public:
getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
VFRange &Range);
+ /// Check if the number of runtime checks exceeds the threshold.
+ bool requiresTooManyRuntimeChecks() const;
+
protected:
/// Collect the instructions from the original loop that would be trivially
/// dead in the vectorized loop if generated.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 880274e..0669f9a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7492,6 +7492,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
return VectorizationFactor::Disabled();
}
+bool LoopVectorizationPlanner::requiresTooManyRuntimeChecks() const {
+ unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
+ return (NumRuntimePointerChecks >
+ VectorizerParams::RuntimeMemoryCheckThreshold &&
+ !Hints.allowReordering()) ||
+ NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
+}
+
Optional<VectorizationFactor>
LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
@@ -7560,30 +7568,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VectorizationFactor::Disabled();
// Select the optimal vectorization factor.
- auto SelectedVF = CM.selectVectorizationFactor(VFCandidates);
-
- // Check if it is profitable to vectorize with runtime checks.
- unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
- if (SelectedVF.Width.getKnownMinValue() > 1 && NumRuntimePointerChecks) {
- bool PragmaThresholdReached =
- NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
- bool ThresholdReached =
- NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
- if ((ThresholdReached && !Hints.allowReordering()) ||
- PragmaThresholdReached) {
- ORE->emit([&]() {
- return OptimizationRemarkAnalysisAliasing(
- DEBUG_TYPE, "CantReorderMemOps", OrigLoop->getStartLoc(),
- OrigLoop->getHeader())
- << "loop not vectorized: cannot prove it is safe to reorder "
- "memory operations";
- });
- LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
- Hints.emitRemarkWithHints();
- return VectorizationFactor::Disabled();
- }
- }
- return SelectedVF;
+ return CM.selectVectorizationFactor(VFCandidates);
}
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -10499,6 +10484,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
unsigned IC = 1;
if (MaybeVF) {
+ if (LVP.requiresTooManyRuntimeChecks()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysisAliasing(
+ DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(),
+ L->getHeader())
+ << "loop not vectorized: cannot prove it is safe to reorder "
+ "memory operations";
+ });
+ LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
+ Hints.emitRemarkWithHints();
+ return false;
+ }
VF = *MaybeVF;
// Select the interleave count.
IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue());
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
index e3a76c4..5370a2c 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
@@ -8,7 +8,7 @@
; (e.g., runtime-memory-check-threshold, default 8).
; CHECK-LABEL: @eddy_diff_caleddy_
-; CHECK: vector.memcheck
+; CHECK-NOT: vector.memcheck
define fastcc void @eddy_diff_caleddy_(i64* %wet_cl, i64 %0, i32 %ncol.cast.val) {
entry: