aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2025-08-01 14:18:07 +0100
committerGitHub <noreply@github.com>2025-08-01 14:18:07 +0100
commit2ae996cbbe92f006d711eeeb8f29e0946fc1c1e8 (patch)
tree9016795ce5fd22d4c66fa7a255238a75b5258231
parent6da1a0908a975a55adb5eae293b37ae0a850b21d (diff)
downloadllvm-2ae996cbbe92f006d711eeeb8f29e0946fc1c1e8.zip
llvm-2ae996cbbe92f006d711eeeb8f29e0946fc1c1e8.tar.gz
llvm-2ae996cbbe92f006d711eeeb8f29e0946fc1c1e8.tar.bz2
[LAA] Support assumptions in evaluatePtrAddRecAtMaxBTCWillNotWrap (#147047)
This patch extends the logic added in https://github.com/llvm/llvm-project/pull/128061 to support dereferenceability information from assumptions as well. Unfortunately both assumption cache and the dominator tree need to be threaded through multiple layers to make them available where needed. PR: https://github.com/llvm/llvm-project/pull/147047
-rw-r--r--llvm/include/llvm/Analysis/LoopAccessAnalysis.h29
-rw-r--r--llvm/lib/Analysis/Loads.cpp2
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp76
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFlatten.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp2
-rw-r--r--llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll8
-rw-r--r--llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll33
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp3
8 files changed, 111 insertions, 45 deletions
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index af6e534..92304ed 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -180,10 +180,12 @@ public:
const SmallVectorImpl<Instruction *> &Instrs) const;
};
- MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
+ MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC,
+ DominatorTree *DT, const Loop *L,
const DenseMap<Value *, const SCEV *> &SymbolicStrides,
unsigned MaxTargetVectorWidthInBits)
- : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
+ : PSE(PSE), AC(AC), DT(DT), InnermostLoop(L),
+ SymbolicStrides(SymbolicStrides),
MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
/// Register the location (instructions are given increasing numbers)
@@ -288,6 +290,15 @@ public:
return PointerBounds;
}
+ DominatorTree *getDT() const {
+ assert(DT && "requested DT, but it is not available");
+ return DT;
+ }
+ AssumptionCache *getAC() const {
+ assert(AC && "requested AC, but it is not available");
+ return AC;
+ }
+
private:
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
/// applies dynamic knowledge to simplify SCEV expressions and convert them
@@ -296,6 +307,10 @@ private:
/// example we might assume a unit stride for a pointer in order to prove
/// that a memory access is strided and doesn't wrap.
PredicatedScalarEvolution &PSE;
+
+ AssumptionCache *AC;
+ DominatorTree *DT;
+
const Loop *InnermostLoop;
/// Reference to map of pointer values to
@@ -670,7 +685,7 @@ public:
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetTransformInfo *TTI,
const TargetLibraryInfo *TLI, AAResults *AA,
- DominatorTree *DT, LoopInfo *LI,
+ DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC,
bool AllowPartial = false);
/// Return true we can analyze the memory accesses in the loop and there are
@@ -922,7 +937,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
- std::pair<const SCEV *, const SCEV *>> *PointerBounds);
+ std::pair<const SCEV *, const SCEV *>> *PointerBounds,
+ DominatorTree *DT, AssumptionCache *AC);
class LoopAccessInfoManager {
/// The cache.
@@ -935,12 +951,13 @@ class LoopAccessInfoManager {
LoopInfo &LI;
TargetTransformInfo *TTI;
const TargetLibraryInfo *TLI = nullptr;
+ AssumptionCache *AC;
public:
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
LoopInfo &LI, TargetTransformInfo *TTI,
- const TargetLibraryInfo *TLI)
- : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
+ const TargetLibraryInfo *TLI, AssumptionCache *AC)
+ : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {}
LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false);
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 393f264..6fc81d787 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -342,7 +342,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(
: SE.getConstantMaxBackedgeTakenCount(L);
}
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
- L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr);
+ L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr, &DT, AC);
if (isa<SCEVCouldNotCompute>(AccessStart) ||
isa<SCEVCouldNotCompute>(AccessEnd))
return false;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 14be385..a553533 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -23,6 +23,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -208,28 +210,46 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
/// \p MaxBTC is guaranteed inbounds of the accessed object.
-static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
- const SCEV *MaxBTC,
- const SCEV *EltSize,
- ScalarEvolution &SE,
- const DataLayout &DL) {
+static bool
+evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
+ const SCEV *MaxBTC, const SCEV *EltSize,
+ ScalarEvolution &SE, const DataLayout &DL,
+ DominatorTree *DT, AssumptionCache *AC) {
auto *PointerBase = SE.getPointerBase(AR->getStart());
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
if (!StartPtr)
return false;
+ const Loop *L = AR->getLoop();
bool CheckForNonNull, CheckForFreed;
- uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes(
+ Value *StartPtrV = StartPtr->getValue();
+ uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(
DL, CheckForNonNull, CheckForFreed);
- if (CheckForNonNull || CheckForFreed)
+ if (DerefBytes && (CheckForNonNull || CheckForFreed))
return false;
const SCEV *Step = AR->getStepRecurrence(SE);
+ Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
+ const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
+
+ // Check if we have a suitable dereferencable assumption we can use.
+ if (!StartPtrV->canBeFreed()) {
+ RetainedKnowledge DerefRK = getKnowledgeValidInContext(
+ StartPtrV, {Attribute::Dereferenceable}, *AC,
+ L->getLoopPredecessor()->getTerminator(), DT);
+ if (DerefRK) {
+ DerefBytesSCEV = SE.getUMaxExpr(
+ DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue));
+ }
+ }
+
+ if (DerefBytesSCEV->isZero())
+ return false;
+
bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
return false;
- Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
Step = SE.getNoopOrSignExtend(Step, WiderTy);
MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
@@ -256,8 +276,7 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
if (!EndBytes)
return false;
- return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes,
- SE.getConstant(WiderTy, DerefBytes));
+ return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
}
// For negative steps check if
@@ -265,15 +284,15 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
// * StartOffset <= DerefBytes.
assert(SE.isKnownNegative(Step) && "must be known negative");
return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
- SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset,
- SE.getConstant(WiderTy, DerefBytes));
+ SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
}
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
- std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
+ std::pair<const SCEV *, const SCEV *>> *PointerBounds,
+ DominatorTree *DT, AssumptionCache *AC) {
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
if (PointerBounds) {
auto [Iter, Ins] = PointerBounds->insert(
@@ -308,8 +327,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
// sets ScEnd to the maximum unsigned value for the type. Note that LAA
// separately checks that accesses cannot not wrap, so unsigned max
// represents an upper bound.
- if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE,
- DL)) {
+ if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
+ DT, AC)) {
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
} else {
ScEnd = SE->getAddExpr(
@@ -356,9 +375,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
bool NeedsFreeze) {
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
const SCEV *BTC = PSE.getBackedgeTakenCount();
- const auto &[ScStart, ScEnd] =
- getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
- PSE.getSE(), &DC.getPointerBounds());
+ const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
+ Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
+ &DC.getPointerBounds(), DC.getDT(), DC.getAC());
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
!isa<SCEVCouldNotCompute>(ScEnd) &&
"must be able to compute both start and end expressions");
@@ -1961,13 +1980,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
const SCEV *BTC = PSE.getBackedgeTakenCount();
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
ScalarEvolution &SE = *PSE.getSE();
- const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
- InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
+ const auto &[SrcStart_, SrcEnd_] =
+ getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
+ &SE, &PointerBounds, DT, AC);
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
return false;
- const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
- InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
+ const auto &[SinkStart_, SinkEnd_] =
+ getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
+ &SE, &PointerBounds, DT, AC);
if (isa<SCEVCouldNotCompute>(SinkStart_) ||
isa<SCEVCouldNotCompute>(SinkEnd_))
return false;
@@ -3002,7 +3023,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetTransformInfo *TTI,
const TargetLibraryInfo *TLI, AAResults *AA,
DominatorTree *DT, LoopInfo *LI,
- bool AllowPartial)
+ AssumptionCache *AC, bool AllowPartial)
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
@@ -3012,8 +3033,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
MaxTargetVectorWidthInBits =
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
- DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
- MaxTargetVectorWidthInBits);
+ DepChecker = std::make_unique<MemoryDepChecker>(
+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
if (canAnalyzeLoop())
CanVecMem = analyzeLoop(AA, LI, TLI, DT);
@@ -3082,7 +3103,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
// or if it was created with a different value of AllowPartial.
if (Inserted || It->second->hasAllowPartial() != AllowPartial)
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
- &LI, AllowPartial);
+ &LI, AC, AllowPartial);
return *It->second;
}
@@ -3125,7 +3146,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
auto &LI = FAM.getResult<LoopAnalysis>(F);
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
- return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI);
+ auto &AC = FAM.getResult<AssumptionAnalysis>(F);
+ return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC);
}
AnalysisKey LoopAccessAnalysis::Key;
diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index f3e992c..04039b8 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -1009,7 +1009,8 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
// in simplified form, and also needs LCSSA. Running
// this pass will simplify all loops that contain inner loops,
// regardless of whether anything ends up being flattened.
- LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
+ LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr,
+ &AR.AC);
for (Loop *InnerLoop : LN.getLoops()) {
auto *OuterLoop = InnerLoop->getParentLoop();
if (!OuterLoop)
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 4f2bfb0..448dc2b 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -551,7 +551,7 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
const Function *F = L.getHeader()->getParent();
OptimizationRemarkEmitter ORE(F);
- LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
+ LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr, &LAR.AC);
if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
index 1dc8d4a7..207a44d 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
@@ -505,7 +505,7 @@ e.1:
ret i32 1
}
-define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) {
+define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) nosync nofree {
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption'
; CHECK-NEXT: loop.header:
; CHECK-NEXT: Memory dependences are safe with run-time checks
@@ -518,10 +518,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_kno
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
+; CHECK-NEXT: (Low: %B High: (2000 + %B))
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
; CHECK-NEXT: Group GRP1:
-; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
+; CHECK-NEXT: (Low: %A High: (2000 + %A))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -565,7 +565,7 @@ e.2:
ret void
}
-define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) {
+define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) nosync nofree {
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small'
; CHECK-NEXT: loop.header:
; CHECK-NEXT: Memory dependences are safe with run-time checks
diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
index e79995f..f329a18f 100644
--- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
@@ -7,21 +7,46 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ]
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]]
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br label [[LOOP_END:%.*]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ -1, [[LOOP_INC]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP9]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
index 118bf67..7471355 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
@@ -41,7 +41,8 @@ protected:
AARes.reset(new AAResults(*TLI));
AARes->addAAResult(*BasicAA);
PSE.reset(new PredicatedScalarEvolution(*SE, *L));
- LAI.reset(new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI));
+ LAI.reset(
+ new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI, &*AC));
IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI));
IAI->analyzeInterleaving(false);
return {Plan, *IAI};