aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2024-04-12 22:07:29 +0100
committerGitHub <noreply@github.com>2024-04-12 22:07:29 +0100
commit0f82469314f34a086669dfcd190a9f89260fbee5 (patch)
tree9e705e4477681396a91b9f16a74a2c27b4e2402d
parent8a4b7de91dc334c828674aa2cad927c6ffb9cf37 (diff)
downloadllvm-0f82469314f34a086669dfcd190a9f89260fbee5.zip
llvm-0f82469314f34a086669dfcd190a9f89260fbee5.tar.gz
llvm-0f82469314f34a086669dfcd190a9f89260fbee5.tar.bz2
[Passes] Run SimpleLoopUnswitch after introducing invariant branches. (#81271)
IndVars may be able to replace a loop dependent condition with a loop invariant one, but loop-unswitch runs before IndVars, so the invariant check remains in the loop. For an example, consider a read-only loop with a bounds check: https://godbolt.org/z/8cdj4qhbG This patch uses a approach similar to the way extra cleanup passes are run on demand after vectorization (added in acea6e9cfa4c4a0e8678c7). It introduces a new ShouldRunExtraSimpleLoopUnswitch analysis marker, which IndVars can use to indicate that extra unswitching is beneficial. ExtraSimpleLoopUnswitchPassManager uses this analysis to determine whether to run its passes on a loop. Compile-time impact (geomean) ranges from +0.0% to 0.02% https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=19e6e99eeb280d426907ea73a21b139ba7225627&stat=instructions%3Au Compile-time impact (geomean) of unconditionally running SimpleLoopUnswitch ranges from +0.05% - +0.16% https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=2930dfd5accdce2e6f8d5146ae4d626add2065a2&stat=instructions:u Unconditionally running SimpleLoopUnswitch seems to indicate that there are multiple other scenarios where we fail to run unswitching when opportunities remain. Fixes https://github.com/llvm/llvm-project/issues/85551. PR: https://github.com/llvm/llvm-project/pull/81271
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h35
-rw-r--r--llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h17
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp7
-rw-r--r--llvm/lib/Passes/PassRegistry.def3
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp17
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp20
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll106
8 files changed, 169 insertions, 37 deletions
diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
index b97ee23..a40b0a2 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
@@ -20,6 +21,40 @@ class Loop;
class StringRef;
class raw_ostream;
+struct ShouldRunExtraSimpleLoopUnswitch
+ : public AnalysisInfoMixin<ShouldRunExtraSimpleLoopUnswitch> {
+ static AnalysisKey Key;
+ struct Result {
+ bool invalidate(Loop &L, const PreservedAnalyses &PA,
+ LoopAnalysisManager::Invalidator &) {
+ // Check whether the analysis has been explicitly invalidated. Otherwise,
+ // it remains preserved.
+ auto PAC = PA.getChecker<ShouldRunExtraSimpleLoopUnswitch>();
+ return !PAC.preservedWhenStateless();
+ }
+ };
+
+ Result run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR) {
+ return Result();
+ }
+
+ static bool isRequired() { return true; }
+};
+
+struct ExtraSimpleLoopUnswitchPassManager : public LoopPassManager {
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U) {
+ auto PA = PreservedAnalyses::all();
+ if (AM.getCachedResult<ShouldRunExtraSimpleLoopUnswitch>(L))
+ PA.intersect(LoopPassManager::run(L, AM, AR, U));
+ PA.abandon<ShouldRunExtraSimpleLoopUnswitch>();
+ return PA;
+ }
+
+ static bool isRequired() { return true; }
+};
+
/// This pass transforms loops that contain branches or switches on loop-
/// invariant conditions to have multiple loops. For example, it turns the left
/// into the right code:
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
index ff60811..bd1718d 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -15,6 +15,8 @@
#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
#define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
+#include <utility>
+
namespace llvm {
class Type;
@@ -46,11 +48,16 @@ public:
};
/// simplifyUsersOfIV - Simplify instructions that use this induction variable
-/// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, const TargetTransformInfo *TTI,
- SmallVectorImpl<WeakTrackingVH> &Dead,
- SCEVExpander &Rewriter, IVVisitor *V = nullptr);
+/// by using ScalarEvolution to analyze the IV's recurrence. Returns a pair
+/// where the first entry indicates that the function makes changes and the
+/// second entry indicates that it introduced new opportunities for loop
+/// unswitching.
+std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
+ DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ SmallVectorImpl<WeakTrackingVH> &Dead,
+ SCEVExpander &Rewriter,
+ IVVisitor *V = nullptr);
/// SimplifyLoopIVs - Simplify users of induction variables within this
/// loop. This does not actually change or add IVs.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index cb892e3..3bb2ce0 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -651,6 +651,13 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(IndVarSimplifyPass());
+ {
+ ExtraSimpleLoopUnswitchPassManager ExtraPasses;
+ ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
+ OptimizationLevel::O3));
+ LPM2.addPass(std::move(ExtraPasses));
+ }
+
invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
LPM2.addPass(LoopDeletionPass());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index d15f58d..2fbc7f7 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -587,6 +587,9 @@ LOOP_ANALYSIS("ddg", DDGAnalysis())
LOOP_ANALYSIS("iv-users", IVUsersAnalysis())
LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch",
+ ShouldRunExtraSimpleLoopUnswitch())
+
#undef LOOP_ANALYSIS
#ifndef LOOP_PASS
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 38104afa..ba392e1 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -70,6 +70,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -137,6 +138,8 @@ class IndVarSimplify {
SmallVector<WeakTrackingVH, 16> DeadInsts;
bool WidenIndVars;
+ bool RunUnswitching = false;
+
bool handleFloatingPointIV(Loop *L, PHINode *PH);
bool rewriteNonIntegerIVs(Loop *L);
@@ -170,6 +173,8 @@ public:
}
bool run(Loop *L);
+
+ bool runUnswitching() const { return RunUnswitching; }
};
} // end anonymous namespace
@@ -615,9 +620,11 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
// Information about sign/zero extensions of CurrIV.
IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
- Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
- &Visitor);
+ const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
+ Rewriter, &Visitor);
+ Changed |= C;
+ RunUnswitching |= U;
if (Visitor.WI.WidestNativeType) {
WideIVs.push_back(Visitor.WI);
}
@@ -1874,6 +1881,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (OldCond->use_empty())
DeadInsts.emplace_back(OldCond);
Changed = true;
+ RunUnswitching = true;
}
return Changed;
@@ -2059,6 +2067,11 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
+ if (IVS.runUnswitching()) {
+ AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
+ PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
+ }
+
if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 64b850a..d763b1e 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -133,6 +133,7 @@ static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
"not-taken 1/<this option> times or less."),
cl::init(16));
+AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
namespace {
struct CompareDesc {
BranchInst *Term;
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index f6d7226..440fe07 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -60,6 +60,7 @@ namespace {
SmallVectorImpl<WeakTrackingVH> &DeadInsts;
bool Changed = false;
+ bool RunUnswitching = false;
public:
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
@@ -72,6 +73,7 @@ namespace {
}
bool hasChanged() const { return Changed; }
+ bool runUnswitching() const { return RunUnswitching; }
/// Iteratively perform simplification on a worklist of users of the
/// specified induction variable. This is the top-level driver that applies
@@ -233,6 +235,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
ICmp->setPredicate(InvariantPredicate);
ICmp->setOperand(0, NewLHS);
ICmp->setOperand(1, NewRHS);
+ RunUnswitching = true;
return true;
}
@@ -993,14 +996,18 @@ void IVVisitor::anchor() { }
/// Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, const TargetTransformInfo *TTI,
- SmallVectorImpl<WeakTrackingVH> &Dead,
- SCEVExpander &Rewriter, IVVisitor *V) {
+/// Returns a pair where the first entry indicates that the function makes
+/// changes and the second entry indicates that it introduced new opportunities
+/// for loop unswitching.
+std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE,
+ DominatorTree *DT, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ SmallVectorImpl<WeakTrackingVH> &Dead,
+ SCEVExpander &Rewriter, IVVisitor *V) {
SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI,
Rewriter, Dead);
SIV.simplifyUsers(CurrIV, V);
- return SIV.hasChanged();
+ return {SIV.hasChanged(), SIV.runUnswitching()};
}
/// Simplify users of induction variables within this
@@ -1014,8 +1021,9 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
#endif
bool Changed = false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- Changed |=
+ const auto &[C, _] =
simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, TTI, Dead, Rewriter);
+ Changed |= C;
}
return Changed;
}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
index a140e17a..55dd28b 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
@@ -14,24 +14,50 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[DOTNOT_NOT:%.*]] = icmp ult i32 [[TMP1]], [[COUNT]]
+; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[FOR_BODY_PREHEADER10:%.*]], label [[IF_THEN:%.*]]
+; CHECK: for.body.preheader10:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER13:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967288
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[WIDE_LOAD12]], [[VEC_PHI11]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]]
+; CHECK: for.body.preheader13:
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
-; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ]
-; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD]], [[IF_END]] ]
-; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[IF_END]], label [[IF_THEN:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: tail call void @llvm.trap()
-; CHECK-NEXT: unreachable
-; CHECK: if.end:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER13]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[SUM_07_PH]], [[FOR_BODY_PREHEADER13]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP2]], [[SUM_07]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP8]], [[SUM_07]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: if.then:
+; CHECK-NEXT: tail call void @llvm.trap()
+; CHECK-NEXT: unreachable
;
entry:
%array.addr = alloca ptr, align 8
@@ -103,24 +129,50 @@ define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[DOTNOT_NOT:%.*]] = icmp ult i64 [[TMP0]], [[S_COERCE1]]
+; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[ENTRY:%.*]], label [[COND_FALSE_I:%.*]], !prof [[PROF4:![0-9]+]]
+; CHECK: for.body.preheader8:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER11:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD10]], [[VEC_PHI9]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[SPAN_CHECKED_ACCESS_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[ADD:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER11]]
+; CHECK: for.body.preheader11:
+; CHECK-NEXT: [[I_07_PH:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[SPAN_CHECKED_ACCESS_EXIT]] ]
+; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ]
+; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
; CHECK: for.cond.cleanup:
-; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[SPAN_CHECKED_ACCESS_EXIT:%.*]] ]
-; CHECK-NEXT: ret i32 [[RET_0_LCSSA]]
+; CHECK-NEXT: [[RET_0_LCSSA1:%.*]] = phi i32 [ 0, [[ENTRY1:%.*]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ [[ADD1:%.*]], [[FOR_BODY1]] ]
+; CHECK-NEXT: ret i32 [[RET_0_LCSSA1]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[RET_06:%.*]] = phi i32 [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[SPAN_CHECKED_ACCESS_EXIT]], label [[COND_FALSE_I:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK: cond.false.i:
-; CHECK-NEXT: tail call void @llvm.trap()
-; CHECK-NEXT: unreachable
-; CHECK: span_checked_access.exit:
+; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY1]] ], [ [[I_07_PH]], [[FOR_BODY_PREHEADER11]] ]
+; CHECK-NEXT: [[RET_06:%.*]] = phi i32 [ [[ADD1]], [[FOR_BODY1]] ], [ [[RET_0_LCSSA]], [[FOR_BODY_PREHEADER11]] ]
; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[I_07]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_I]], align 4
-; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP7]], [[RET_06]]
+; CHECK-NEXT: [[ADD1]] = add nsw i32 [[TMP7]], [[RET_06]]
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: cond.false.i:
+; CHECK-NEXT: tail call void @llvm.trap()
+; CHECK-NEXT: unreachable
;
entry:
%s = alloca %"class.std::__1::span", align 8
@@ -176,7 +228,7 @@ define hidden noundef nonnull align 4 dereferenceable(4) ptr @span_checked_acces
; CHECK-NEXT: [[__SIZE__I:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[__SIZE__I]], align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], [[__IDX]]
-; CHECK-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !prof [[PROF0]]
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !prof [[PROF4]]
; CHECK: cond.false:
; CHECK-NEXT: tail call void @llvm.trap()
; CHECK-NEXT: unreachable
@@ -237,5 +289,11 @@ declare void @llvm.trap()
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
;.
-; CHECK: [[PROF0]] = !{!"branch_weights", i32 2000, i32 1}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 2000, i32 1}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
;.