From 0f82469314f34a086669dfcd190a9f89260fbee5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 12 Apr 2024 22:07:29 +0100 Subject: [Passes] Run SimpleLoopUnswitch after introducing invariant branches. (#81271) IndVars may be able to replace a loop dependent condition with a loop invariant one, but loop-unswitch runs before IndVars, so the invariant check remains in the loop. For an example, consider a read-only loop with a bounds check: https://godbolt.org/z/8cdj4qhbG This patch uses a approach similar to the way extra cleanup passes are run on demand after vectorization (added in acea6e9cfa4c4a0e8678c7). It introduces a new ShouldRunExtraSimpleLoopUnswitch analysis marker, which IndVars can use to indicate that extra unswitching is beneficial. ExtraSimpleLoopUnswitchPassManager uses this analysis to determine whether to run its passes on a loop. Compile-time impact (geomean) ranges from +0.0% to 0.02% https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=19e6e99eeb280d426907ea73a21b139ba7225627&stat=instructions%3Au Compile-time impact (geomean) of unconditionally running SimpleLoopUnswitch ranges from +0.05% - +0.16% https://llvm-compile-time-tracker.com/compare.php?from=138c0beb109ffe47f75a0fe8c4dc2cdabe8a6532&to=2930dfd5accdce2e6f8d5146ae4d626add2065a2&stat=instructions:u Unconditionally running SimpleLoopUnswitch seems to indicate that there are multiple other scenarios where we fail to run unswitching when opportunities remain. Fixes https://github.com/llvm/llvm-project/issues/85551. PR: https://github.com/llvm/llvm-project/pull/81271 --- .../llvm/Transforms/Scalar/SimpleLoopUnswitch.h | 35 +++++++ .../include/llvm/Transforms/Utils/SimplifyIndVar.h | 17 +++- llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ++ llvm/lib/Passes/PassRegistry.def | 3 + llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 17 +++- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 1 + llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 20 ++-- .../PhaseOrdering/AArch64/hoist-runtime-checks.ll | 106 ++++++++++++++++----- 8 files changed, 169 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h index b97ee23..a40b0a2 100644 --- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h +++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h @@ -12,6 +12,7 @@ #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { @@ -20,6 +21,40 @@ class Loop; class StringRef; class raw_ostream; +struct ShouldRunExtraSimpleLoopUnswitch + : public AnalysisInfoMixin { + static AnalysisKey Key; + struct Result { + bool invalidate(Loop &L, const PreservedAnalyses &PA, + LoopAnalysisManager::Invalidator &) { + // Check whether the analysis has been explicitly invalidated. Otherwise, + // it remains preserved. + auto PAC = PA.getChecker(); + return !PAC.preservedWhenStateless(); + } + }; + + Result run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + return Result(); + } + + static bool isRequired() { return true; } +}; + +struct ExtraSimpleLoopUnswitchPassManager : public LoopPassManager { + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) { + auto PA = PreservedAnalyses::all(); + if (AM.getCachedResult(L)) + PA.intersect(LoopPassManager::run(L, AM, AR, U)); + PA.abandon(); + return PA; + } + + static bool isRequired() { return true; } +}; + /// This pass transforms loops that contain branches or switches on loop- /// invariant conditions to have multiple loops. For example, it turns the left /// into the right code: diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h index ff60811..bd1718d 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -15,6 +15,8 @@ #ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H #define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H +#include + namespace llvm { class Type; @@ -46,11 +48,16 @@ public: }; /// simplifyUsersOfIV - Simplify instructions that use this induction variable -/// by using ScalarEvolution to analyze the IV's recurrence. -bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, const TargetTransformInfo *TTI, - SmallVectorImpl &Dead, - SCEVExpander &Rewriter, IVVisitor *V = nullptr); +/// by using ScalarEvolution to analyze the IV's recurrence. Returns a pair +/// where the first entry indicates that the function makes changes and the +/// second entry indicates that it introduced new opportunities for loop +/// unswitching. +std::pair simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, + DominatorTree *DT, LoopInfo *LI, + const TargetTransformInfo *TTI, + SmallVectorImpl &Dead, + SCEVExpander &Rewriter, + IVVisitor *V = nullptr); /// SimplifyLoopIVs - Simplify users of induction variables within this /// loop. This does not actually change or add IVs. diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index cb892e3..3bb2ce0 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -651,6 +651,13 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(IndVarSimplifyPass()); + { + ExtraSimpleLoopUnswitchPassManager ExtraPasses; + ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == + OptimizationLevel::O3)); + LPM2.addPass(std::move(ExtraPasses)); + } + invokeLateLoopOptimizationsEPCallbacks(LPM2, Level); LPM2.addPass(LoopDeletionPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index d15f58d..2fbc7f7 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -587,6 +587,9 @@ LOOP_ANALYSIS("ddg", DDGAnalysis()) LOOP_ANALYSIS("iv-users", IVUsersAnalysis()) LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis()) LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) +LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch", + ShouldRunExtraSimpleLoopUnswitch()) + #undef LOOP_ANALYSIS #ifndef LOOP_PASS diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 38104afa..ba392e1 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -70,6 +70,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -137,6 +138,8 @@ class IndVarSimplify { SmallVector DeadInsts; bool WidenIndVars; + bool RunUnswitching = false; + bool handleFloatingPointIV(Loop *L, PHINode *PH); bool rewriteNonIntegerIVs(Loop *L); @@ -170,6 +173,8 @@ public: } bool run(Loop *L); + + bool runUnswitching() const { return RunUnswitching; } }; } // end anonymous namespace @@ -615,9 +620,11 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L, // Information about sign/zero extensions of CurrIV. IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); - Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, - &Visitor); + const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, + Rewriter, &Visitor); + Changed |= C; + RunUnswitching |= U; if (Visitor.WI.WidestNativeType) { WideIVs.push_back(Visitor.WI); } @@ -1874,6 +1881,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { if (OldCond->use_empty()) DeadInsts.emplace_back(OldCond); Changed = true; + RunUnswitching = true; } return Changed; @@ -2059,6 +2067,11 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, auto PA = getLoopPassPreservedAnalyses(); PA.preserveSet(); + if (IVS.runUnswitching()) { + AM.getResult(L, AR); + PA.preserve(); + } + if (AR.MSSA) PA.preserve(); return PA; diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 64b850a..d763b1e 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -133,6 +133,7 @@ static cl::opt InjectInvariantConditionHotnesThreshold( "not-taken 1/ times or less."), cl::init(16)); +AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key; namespace { struct CompareDesc { BranchInst *Term; diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index f6d7226..440fe07 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -60,6 +60,7 @@ namespace { SmallVectorImpl &DeadInsts; bool Changed = false; + bool RunUnswitching = false; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, @@ -72,6 +73,7 @@ namespace { } bool hasChanged() const { return Changed; } + bool runUnswitching() const { return RunUnswitching; } /// Iteratively perform simplification on a worklist of users of the /// specified induction variable. This is the top-level driver that applies @@ -233,6 +235,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, ICmp->setPredicate(InvariantPredicate); ICmp->setOperand(0, NewLHS); ICmp->setOperand(1, NewRHS); + RunUnswitching = true; return true; } @@ -993,14 +996,18 @@ void IVVisitor::anchor() { } /// Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. -bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, const TargetTransformInfo *TTI, - SmallVectorImpl &Dead, - SCEVExpander &Rewriter, IVVisitor *V) { +/// Returns a pair where the first entry indicates that the function makes +/// changes and the second entry indicates that it introduced new opportunities +/// for loop unswitching. +std::pair simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, + DominatorTree *DT, LoopInfo *LI, + const TargetTransformInfo *TTI, + SmallVectorImpl &Dead, + SCEVExpander &Rewriter, IVVisitor *V) { SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI, Rewriter, Dead); SIV.simplifyUsers(CurrIV, V); - return SIV.hasChanged(); + return {SIV.hasChanged(), SIV.runUnswitching()}; } /// Simplify users of induction variables within this @@ -1014,8 +1021,9 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, #endif bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { - Changed |= + const auto &[C, _] = simplifyUsersOfIV(cast(I), SE, DT, LI, TTI, Dead, Rewriter); + Changed |= C; } return Changed; } diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll index a140e17a..55dd28b 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll @@ -14,24 +14,50 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[DOTNOT_NOT:%.*]] = icmp ult i32 [[TMP1]], [[COUNT]] +; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[FOR_BODY_PREHEADER10:%.*]], label [[IF_THEN:%.*]] +; CHECK: for.body.preheader10: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER13:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967288 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[WIDE_LOAD12]], [[VEC_PHI11]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]] +; CHECK: for.body.preheader13: +; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD]], [[IF_END]] ] -; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: tail call void @llvm.trap() -; CHECK-NEXT: unreachable -; CHECK: if.end: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER13]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[SUM_07_PH]], [[FOR_BODY_PREHEADER13]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP2]], [[SUM_07]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP8]], [[SUM_07]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @llvm.trap() +; CHECK-NEXT: unreachable ; entry: %array.addr = alloca ptr, align 8 @@ -103,24 +129,50 @@ define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1 ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 ; CHECK-NEXT: [[DOTNOT_NOT:%.*]] = icmp ult i64 [[TMP0]], [[S_COERCE1]] +; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[ENTRY:%.*]], label [[COND_FALSE_I:%.*]], !prof [[PROF4:![0-9]+]] +; CHECK: for.body.preheader8: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER11:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD10]], [[VEC_PHI9]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[SPAN_CHECKED_ACCESS_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[ADD:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER11]] +; CHECK: for.body.preheader11: +; CHECK-NEXT: [[I_07_PH:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[SPAN_CHECKED_ACCESS_EXIT]] ] +; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ] +; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[SPAN_CHECKED_ACCESS_EXIT:%.*]] ] -; CHECK-NEXT: ret i32 [[RET_0_LCSSA]] +; CHECK-NEXT: [[RET_0_LCSSA1:%.*]] = phi i32 [ 0, [[ENTRY1:%.*]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ [[ADD1:%.*]], [[FOR_BODY1]] ] +; CHECK-NEXT: ret i32 [[RET_0_LCSSA1]] ; CHECK: for.body: -; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[RET_06:%.*]] = phi i32 [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: br i1 [[DOTNOT_NOT]], label [[SPAN_CHECKED_ACCESS_EXIT]], label [[COND_FALSE_I:%.*]], !prof [[PROF0:![0-9]+]] -; CHECK: cond.false.i: -; CHECK-NEXT: tail call void @llvm.trap() -; CHECK-NEXT: unreachable -; CHECK: span_checked_access.exit: +; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY1]] ], [ [[I_07_PH]], [[FOR_BODY_PREHEADER11]] ] +; CHECK-NEXT: [[RET_06:%.*]] = phi i32 [ [[ADD1]], [[FOR_BODY1]] ], [ [[RET_0_LCSSA]], [[FOR_BODY_PREHEADER11]] ] ; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[I_07]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_I]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP7]], [[RET_06]] +; CHECK-NEXT: [[ADD1]] = add nsw i32 [[TMP7]], [[RET_06]] ; CHECK-NEXT: [[INC]] = add nuw i64 [[I_07]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: cond.false.i: +; CHECK-NEXT: tail call void @llvm.trap() +; CHECK-NEXT: unreachable ; entry: %s = alloca %"class.std::__1::span", align 8 @@ -176,7 +228,7 @@ define hidden noundef nonnull align 4 dereferenceable(4) ptr @span_checked_acces ; CHECK-NEXT: [[__SIZE__I:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8 ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[__SIZE__I]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], [[__IDX]] -; CHECK-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !prof [[PROF0]] +; CHECK-NEXT: br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !prof [[PROF4]] ; CHECK: cond.false: ; CHECK-NEXT: tail call void @llvm.trap() ; CHECK-NEXT: unreachable @@ -237,5 +289,11 @@ declare void @llvm.trap() declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 2000, i32 1} +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[PROF4]] = !{!"branch_weights", i32 2000, i32 1} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} ;. -- cgit v1.1