aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVigneshwar Jayakumar <vigneshwar.jayakumar@amd.com>2025-10-30 09:23:04 -0500
committerGitHub <noreply@github.com>2025-10-30 09:23:04 -0500
commit469702c5d5cc4fa18c3a962afb971950a084f373 (patch)
treea07fe3c6f11731fa55bd08a2443200e55f933606
parent8e6ef2d51b639a20b7cc29113d1eb38c81ea84d1 (diff)
downloadllvm-469702c5d5cc4fa18c3a962afb971950a084f373.zip
llvm-469702c5d5cc4fa18c3a962afb971950a084f373.tar.gz
llvm-469702c5d5cc4fa18c3a962afb971950a084f373.tar.bz2
[LICM] Sink unused l-invariant loads in preheader. (#157559)
Unused loop invariant loads were not sunk from the preheader to the exit block, increasing live range. This commit moves the sinkUnusedInvariant logic from indvarsimplify to LICM also adds functionality to sink unused load that's not clobbered by the loop body.
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp85
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp87
-rw-r--r--llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll2
-rw-r--r--llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll100
-rw-r--r--llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll22
-rw-r--r--llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll4
-rw-r--r--llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/exit-count-select.ll14
-rw-r--r--llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll6
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pr116483.ll8
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pr24783.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pr39673.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pr63763.ll6
-rw-r--r--llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll21
-rw-r--r--llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll8
-rw-r--r--llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll14
-rw-r--r--llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll4
-rw-r--r--llvm/test/Transforms/IndVarSimplify/sentinel.ll14
-rw-r--r--llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll32
-rw-r--r--llvm/test/Transforms/IndVarSimplify/sink-trapping.ll19
-rw-r--r--llvm/test/Transforms/IndVarSimplify/zext-nuw.ll2
-rw-r--r--llvm/test/Transforms/LICM/scalar-promote.ll6
-rw-r--r--llvm/test/Transforms/LICM/sink-alloca.ll (renamed from llvm/test/Transforms/IndVarSimplify/sink-alloca.ll)6
-rw-r--r--llvm/test/Transforms/LICM/sink-from-preheader.ll185
-rw-r--r--llvm/test/Transforms/LICM/sink-trapping.ll28
-rw-r--r--llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll2
-rw-r--r--llvm/test/Transforms/LoopDistribute/laa-invalidation.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll2
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll2
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll4
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll2
-rw-r--r--llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll20
-rw-r--r--llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll6
-rw-r--r--llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll49
36 files changed, 453 insertions, 323 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7ebcc21..4ba4ba3 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -162,8 +162,6 @@ class IndVarSimplify {
const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter);
- bool sinkUnusedInvariants(Loop *L);
-
public:
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
const DataLayout &DL, TargetLibraryInfo *TLI,
@@ -1079,85 +1077,6 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
return true;
}
-//===----------------------------------------------------------------------===//
-// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
-//===----------------------------------------------------------------------===//
-
-/// If there's a single exit block, sink any loop-invariant values that
-/// were defined in the preheader but not used inside the loop into the
-/// exit block to reduce register pressure in the loop.
-bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
- BasicBlock *ExitBlock = L->getExitBlock();
- if (!ExitBlock) return false;
-
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) return false;
-
- bool MadeAnyChanges = false;
- for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
-
- // Skip BB Terminator.
- if (Preheader->getTerminator() == &I)
- continue;
-
- // New instructions were inserted at the end of the preheader.
- if (isa<PHINode>(I))
- break;
-
- // Don't move instructions which might have side effects, since the side
- // effects need to complete before instructions inside the loop. Also don't
- // move instructions which might read memory, since the loop may modify
- // memory. Note that it's okay if the instruction might have undefined
- // behavior: LoopSimplify guarantees that the preheader dominates the exit
- // block.
- if (I.mayHaveSideEffects() || I.mayReadFromMemory())
- continue;
-
- // Skip debug or pseudo instructions.
- if (I.isDebugOrPseudoInst())
- continue;
-
- // Skip eh pad instructions.
- if (I.isEHPad())
- continue;
-
- // Don't sink alloca: we never want to sink static alloca's out of the
- // entry block, and correctly sinking dynamic alloca's requires
- // checks for stacksave/stackrestore intrinsics.
- // FIXME: Refactor this check somehow?
- if (isa<AllocaInst>(&I))
- continue;
-
- // Determine if there is a use in or before the loop (direct or
- // otherwise).
- bool UsedInLoop = false;
- for (Use &U : I.uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- BasicBlock *UseBB = User->getParent();
- if (PHINode *P = dyn_cast<PHINode>(User)) {
- unsigned i =
- PHINode::getIncomingValueNumForOperand(U.getOperandNo());
- UseBB = P->getIncomingBlock(i);
- }
- if (UseBB == Preheader || L->contains(UseBB)) {
- UsedInLoop = true;
- break;
- }
- }
-
- // If there is, the def must remain in the preheader.
- if (UsedInLoop)
- continue;
-
- // Otherwise, sink it to the exit block.
- I.moveBefore(ExitBlock->getFirstInsertionPt());
- SE->forgetValue(&I);
- MadeAnyChanges = true;
- }
-
- return MadeAnyChanges;
-}
-
static void replaceExitCond(BranchInst *BI, Value *NewCond,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
auto *OldCond = BI->getCondition();
@@ -2065,10 +1984,6 @@ bool IndVarSimplify::run(Loop *L) {
// The Rewriter may not be used from this point on.
- // Loop-invariant instructions in the preheader that aren't used in the
- // loop may be sunk below the loop to reduce register pressure.
- Changed |= sinkUnusedInvariants(L);
-
// rewriteFirstIterationLoopExitValues does not rely on the computation of
// trip count and therefore can further simplify exit values in addition to
// rewriteLoopExitValues.
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index b2c526b..d13b990 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -211,9 +211,15 @@ static Instruction *cloneInstructionInExitBlock(
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU);
-static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
- ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
+static void moveInstructionBefore(
+ Instruction &I, BasicBlock::iterator Dest, ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
+ MemorySSA::InsertionPlace Point = MemorySSA::BeforeTerminator);
+
+static bool sinkUnusedInvariantsFromPreheaderToExit(
+ Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
+ SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE);
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
function_ref<void(Instruction *)> Fn);
@@ -471,6 +477,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
: sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
MSSAU, &SafetyInfo, Flags, ORE);
+
+ // sink pre-header defs that are unused in-loop into the unique exit to reduce
+ // pressure.
+ Changed |= sinkUnusedInvariantsFromPreheaderToExit(L, AA, &SafetyInfo, MSSAU,
+ SE, DT, Flags, ORE);
+
Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
@@ -1456,19 +1468,80 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater &MSSAU,
- ScalarEvolution *SE) {
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
+ MemorySSA::InsertionPlace Point) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest->getParent());
I.moveBefore(*Dest->getParent(), Dest);
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
MSSAU.getMemorySSA()->getMemoryAccess(&I)))
- MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
- MemorySSA::BeforeTerminator);
+ MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), Point);
if (SE)
SE->forgetBlockAndLoopDispositions(&I);
}
+// If there's a single exit block, sink any loop-invariant values that were
+// defined in the preheader but not used inside the loop into the exit block
+// to reduce register pressure in the loop.
+static bool sinkUnusedInvariantsFromPreheaderToExit(
+ Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
+ SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE) {
+ BasicBlock *ExitBlock = L->getExitBlock();
+ if (!ExitBlock)
+ return false;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ bool MadeAnyChanges = false;
+
+ for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
+
+ // Skip terminator.
+ if (Preheader->getTerminator() == &I)
+ continue;
+
+ // New instructions were inserted at the end of the preheader.
+ if (isa<PHINode>(I))
+ break;
+
+ // Don't move instructions which might have side effects, since the side
+ // effects need to complete before instructions inside the loop. Note that
+ // it's okay if the instruction might have undefined behavior: LoopSimplify
+ // guarantees that the preheader dominates the exit block.
+ if (I.mayHaveSideEffects())
+ continue;
+
+ if (!canSinkOrHoistInst(I, AA, DT, L, MSSAU, true, SinkFlags, nullptr))
+ continue;
+
+ // Determine if there is a use in or before the loop (direct or
+ // otherwise).
+ bool UsedInLoopOrPreheader = false;
+ for (Use &U : I.uses()) {
+ auto *UserI = cast<Instruction>(U.getUser());
+ BasicBlock *UseBB = UserI->getParent();
+ if (auto *PN = dyn_cast<PHINode>(UserI)) {
+ UseBB = PN->getIncomingBlock(U);
+ }
+ if (UseBB == Preheader || L->contains(UseBB)) {
+ UsedInLoopOrPreheader = true;
+ break;
+ }
+ }
+ if (UsedInLoopOrPreheader)
+ continue;
+
+ moveInstructionBefore(I, ExitBlock->getFirstInsertionPt(), *SafetyInfo,
+ MSSAU, SE, MemorySSA::Beginning);
+ MadeAnyChanges = true;
+ }
+
+ return MadeAnyChanges;
+}
+
static Instruction *sinkThroughTriviallyReplaceablePHI(
PHINode *TPN, Instruction *I, LoopInfo *LI,
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
index c573253..48ed5c4 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
@@ -73,10 +73,10 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
}
; CHECK-LABEL: {{^}}excess_soft_clause_reg_pressure:
-; GFX908: NumSgprs: 64
-; GFX908-GCNTRACKERS: NumSgprs: 64
+; GFX908: NumSgprs: 56
+; GFX908-GCNTRACKERS: NumSgprs: 56
; GFX908: NumVgprs: 43
-; GFX908-GCNTRACKERS: NumVgprs: 39
+; GFX908-GCNTRACKERS: NumVgprs: 40
; GFX908: Occupancy: 5
; GFX908-GCNTRACKERS: Occupancy: 6
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index db49339..9c16b3c 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -22,8 +22,6 @@
; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000
; OFFREG is offset system SGPR
-; GCN: buffer_store_dword {{v[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
; GCN: NumVgprs: 256
; GCN: ScratchSize: 640
diff --git a/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll b/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
index 00a77f9..530169f 100644
--- a/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll
@@ -212,37 +212,33 @@ define hidden void @testCaller(i1 %incond) local_unnamed_addr align 2 nounwind {
; CHECK-NEXT: std r30, 48(r1) # 8-byte Folded Spill
; CHECK-NEXT: andi. r3, r3, 1
; CHECK-NEXT: li r3, -1
+; CHECK-NEXT: li r4, 0
; CHECK-NEXT: li r30, 0
; CHECK-NEXT: crmove 4*cr2+lt, gt
; CHECK-NEXT: std r29, 40(r1) # 8-byte Folded Spill
; CHECK-NEXT: b .LBB3_2
-; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB3_1: # %if.end116
; CHECK-NEXT: #
; CHECK-NEXT: bl callee
; CHECK-NEXT: nop
; CHECK-NEXT: mr r3, r29
-; CHECK-NEXT: .LBB3_2: # %cond.end.i.i
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB3_3 Depth 2
-; CHECK-NEXT: lwz r29, 0(r3)
-; CHECK-NEXT: li r5, 0
-; CHECK-NEXT: extsw r4, r29
-; CHECK-NEXT: .p2align 5
-; CHECK-NEXT: .LBB3_3: # %while.body5.i
-; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: addi r5, r5, -1
-; CHECK-NEXT: cmpwi r5, 0
-; CHECK-NEXT: bgt cr0, .LBB3_3
-; CHECK-NEXT: # %bb.4: # %while.cond12.preheader.i
+; CHECK-NEXT: li r4, 0
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB3_2: # %while.body5.i
; CHECK-NEXT: #
+; CHECK-NEXT: addi r4, r4, -1
+; CHECK-NEXT: cmpwi r4, 0
+; CHECK-NEXT: bgt cr0, .LBB3_2
+; CHECK-NEXT: # %bb.3: # %while.cond12.preheader.i
+; CHECK-NEXT: #
+; CHECK-NEXT: lwz r29, 0(r3)
; CHECK-NEXT: bc 12, 4*cr2+lt, .LBB3_1
-; CHECK-NEXT: # %bb.5: # %for.cond99.preheader
+; CHECK-NEXT: # %bb.4: # %for.cond99.preheader
; CHECK-NEXT: #
+; CHECK-NEXT: extsw r4, r29
; CHECK-NEXT: ld r5, 0(r3)
-; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: stw r3, 0(r3)
+; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: stwx r30, r5, r4
; CHECK-NEXT: b .LBB3_1
;
@@ -256,37 +252,33 @@ define hidden void @testCaller(i1 %incond) local_unnamed_addr align 2 nounwind {
; CHECK-BE-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: andi. r3, r3, 1
; CHECK-BE-NEXT: li r3, -1
+; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: li r30, 0
; CHECK-BE-NEXT: crmove 4*cr2+lt, gt
; CHECK-BE-NEXT: std r29, 56(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: b .LBB3_2
-; CHECK-BE-NEXT: .p2align 4
; CHECK-BE-NEXT: .LBB3_1: # %if.end116
; CHECK-BE-NEXT: #
; CHECK-BE-NEXT: bl callee
; CHECK-BE-NEXT: nop
; CHECK-BE-NEXT: mr r3, r29
-; CHECK-BE-NEXT: .LBB3_2: # %cond.end.i.i
-; CHECK-BE-NEXT: # =>This Loop Header: Depth=1
-; CHECK-BE-NEXT: # Child Loop BB3_3 Depth 2
-; CHECK-BE-NEXT: lwz r29, 0(r3)
-; CHECK-BE-NEXT: li r5, 0
-; CHECK-BE-NEXT: extsw r4, r29
-; CHECK-BE-NEXT: .p2align 5
-; CHECK-BE-NEXT: .LBB3_3: # %while.body5.i
-; CHECK-BE-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-BE-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-BE-NEXT: addi r5, r5, -1
-; CHECK-BE-NEXT: cmpwi r5, 0
-; CHECK-BE-NEXT: bgt cr0, .LBB3_3
-; CHECK-BE-NEXT: # %bb.4: # %while.cond12.preheader.i
+; CHECK-BE-NEXT: li r4, 0
+; CHECK-BE-NEXT: .p2align 4
+; CHECK-BE-NEXT: .LBB3_2: # %while.body5.i
+; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: addi r4, r4, -1
+; CHECK-BE-NEXT: cmpwi r4, 0
+; CHECK-BE-NEXT: bgt cr0, .LBB3_2
+; CHECK-BE-NEXT: # %bb.3: # %while.cond12.preheader.i
; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: lwz r29, 0(r3)
; CHECK-BE-NEXT: bc 12, 4*cr2+lt, .LBB3_1
-; CHECK-BE-NEXT: # %bb.5: # %for.cond99.preheader
+; CHECK-BE-NEXT: # %bb.4: # %for.cond99.preheader
; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: extsw r4, r29
; CHECK-BE-NEXT: ld r5, 0(r3)
-; CHECK-BE-NEXT: sldi r4, r4, 2
; CHECK-BE-NEXT: stw r3, 0(r3)
+; CHECK-BE-NEXT: sldi r4, r4, 2
; CHECK-BE-NEXT: stwx r30, r5, r4
; CHECK-BE-NEXT: b .LBB3_1
;
@@ -300,32 +292,28 @@ define hidden void @testCaller(i1 %incond) local_unnamed_addr align 2 nounwind {
; CHECK-P9-NEXT: std r0, 80(r1)
; CHECK-P9-NEXT: std r30, 48(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: li r3, -1
+; CHECK-P9-NEXT: li r4, 0
; CHECK-P9-NEXT: li r30, 0
; CHECK-P9-NEXT: std r29, 40(r1) # 8-byte Folded Spill
; CHECK-P9-NEXT: crmove 4*cr2+lt, gt
; CHECK-P9-NEXT: b .LBB3_2
-; CHECK-P9-NEXT: .p2align 4
; CHECK-P9-NEXT: .LBB3_1: # %if.end116
; CHECK-P9-NEXT: #
; CHECK-P9-NEXT: bl callee
; CHECK-P9-NEXT: nop
; CHECK-P9-NEXT: mr r3, r29
-; CHECK-P9-NEXT: .LBB3_2: # %cond.end.i.i
-; CHECK-P9-NEXT: # =>This Loop Header: Depth=1
-; CHECK-P9-NEXT: # Child Loop BB3_3 Depth 2
-; CHECK-P9-NEXT: lwz r29, 0(r3)
; CHECK-P9-NEXT: li r4, 0
-; CHECK-P9-NEXT: .p2align 5
-; CHECK-P9-NEXT: .LBB3_3: # %while.body5.i
-; CHECK-P9-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-P9-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-P9-NEXT: .p2align 4
+; CHECK-P9-NEXT: .LBB3_2: # %while.body5.i
+; CHECK-P9-NEXT: #
; CHECK-P9-NEXT: addi r4, r4, -1
; CHECK-P9-NEXT: cmpwi r4, 0
-; CHECK-P9-NEXT: bgt cr0, .LBB3_3
-; CHECK-P9-NEXT: # %bb.4: # %while.cond12.preheader.i
+; CHECK-P9-NEXT: bgt cr0, .LBB3_2
+; CHECK-P9-NEXT: # %bb.3: # %while.cond12.preheader.i
; CHECK-P9-NEXT: #
+; CHECK-P9-NEXT: lwz r29, 0(r3)
; CHECK-P9-NEXT: bc 12, 4*cr2+lt, .LBB3_1
-; CHECK-P9-NEXT: # %bb.5: # %for.cond99.preheader
+; CHECK-P9-NEXT: # %bb.4: # %for.cond99.preheader
; CHECK-P9-NEXT: #
; CHECK-P9-NEXT: ld r4, 0(r3)
; CHECK-P9-NEXT: extswsli r5, r29, 2
@@ -343,32 +331,28 @@ define hidden void @testCaller(i1 %incond) local_unnamed_addr align 2 nounwind {
; CHECK-P9-BE-NEXT: std r0, 96(r1)
; CHECK-P9-BE-NEXT: std r30, 64(r1) # 8-byte Folded Spill
; CHECK-P9-BE-NEXT: li r3, -1
+; CHECK-P9-BE-NEXT: li r4, 0
; CHECK-P9-BE-NEXT: li r30, 0
; CHECK-P9-BE-NEXT: std r29, 56(r1) # 8-byte Folded Spill
; CHECK-P9-BE-NEXT: crmove 4*cr2+lt, gt
; CHECK-P9-BE-NEXT: b .LBB3_2
-; CHECK-P9-BE-NEXT: .p2align 4
; CHECK-P9-BE-NEXT: .LBB3_1: # %if.end116
; CHECK-P9-BE-NEXT: #
; CHECK-P9-BE-NEXT: bl callee
; CHECK-P9-BE-NEXT: nop
; CHECK-P9-BE-NEXT: mr r3, r29
-; CHECK-P9-BE-NEXT: .LBB3_2: # %cond.end.i.i
-; CHECK-P9-BE-NEXT: # =>This Loop Header: Depth=1
-; CHECK-P9-BE-NEXT: # Child Loop BB3_3 Depth 2
-; CHECK-P9-BE-NEXT: lwz r29, 0(r3)
; CHECK-P9-BE-NEXT: li r4, 0
-; CHECK-P9-BE-NEXT: .p2align 5
-; CHECK-P9-BE-NEXT: .LBB3_3: # %while.body5.i
-; CHECK-P9-BE-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-P9-BE-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-P9-BE-NEXT: .p2align 4
+; CHECK-P9-BE-NEXT: .LBB3_2: # %while.body5.i
+; CHECK-P9-BE-NEXT: #
; CHECK-P9-BE-NEXT: addi r4, r4, -1
; CHECK-P9-BE-NEXT: cmpwi r4, 0
-; CHECK-P9-BE-NEXT: bgt cr0, .LBB3_3
-; CHECK-P9-BE-NEXT: # %bb.4: # %while.cond12.preheader.i
+; CHECK-P9-BE-NEXT: bgt cr0, .LBB3_2
+; CHECK-P9-BE-NEXT: # %bb.3: # %while.cond12.preheader.i
; CHECK-P9-BE-NEXT: #
+; CHECK-P9-BE-NEXT: lwz r29, 0(r3)
; CHECK-P9-BE-NEXT: bc 12, 4*cr2+lt, .LBB3_1
-; CHECK-P9-BE-NEXT: # %bb.5: # %for.cond99.preheader
+; CHECK-P9-BE-NEXT: # %bb.4: # %for.cond99.preheader
; CHECK-P9-BE-NEXT: #
; CHECK-P9-BE-NEXT: ld r4, 0(r3)
; CHECK-P9-BE-NEXT: extswsli r5, r29, 2
diff --git a/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll b/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll
index 08dcf1d..8e932e0 100644
--- a/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll
+++ b/llvm/test/Transforms/IndVarSimplify/AMDGPU/addrspace-7-doesnt-crash.ll
@@ -7,11 +7,11 @@ define void @f(ptr addrspace(7) %arg) {
; CHECK-LABEL: define void @f
; CHECK-SAME: (ptr addrspace(7) [[ARG:%.*]]) {
; CHECK-NEXT: bb:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[ARG]], i32 8
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB1]]
; CHECK: bb2:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[ARG]], i32 8
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb3:
; CHECK-NEXT: [[I4:%.*]] = load i32, ptr addrspace(7) [[SCEVGEP]], align 4
diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
index 2003b1a..3c6535d 100644
--- a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll
@@ -4,33 +4,31 @@
define i32 @remove_loop(i32 %size) #0 {
; CHECK-V8M-LABEL: @remove_loop(
-; CHECK-V8M-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-V8M-NEXT: entry:
-; CHECK-V8M-NEXT: br label %[[WHILE_COND:.*]]
-; CHECK-V8M: while.cond:
-; CHECK-V8M-NEXT: br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
-; CHECK-V8M: while.end:
-; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], 31
+; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
; CHECK-V8M-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
; CHECK-V8M-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
; CHECK-V8M-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
; CHECK-V8M-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
+; CHECK-V8M-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-V8M: while.cond:
+; CHECK-V8M-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
+; CHECK-V8M: while.end:
; CHECK-V8M-NEXT: ret i32 [[TMP4]]
;
; CHECK-V8A-LABEL: @remove_loop(
-; CHECK-V8A-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-V8A-NEXT: entry:
-; CHECK-V8A-NEXT: br label %[[WHILE_COND:.*]]
-; CHECK-V8A: while.cond:
-; CHECK-V8A-NEXT: br i1 false, label %[[WHILE_COND]], label %[[WHILE_END:.*]]
-; CHECK-V8A: while.end:
-; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], 31
+; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
; CHECK-V8A-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
; CHECK-V8A-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
; CHECK-V8A-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
; CHECK-V8A-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
+; CHECK-V8A-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK-V8A: while.cond:
+; CHECK-V8A-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
+; CHECK-V8A: while.end:
; CHECK-V8A-NEXT: ret i32 [[TMP4]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
index 2261423..382f026 100644
--- a/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ARM/indvar-unroll-imm-cost.ll
@@ -77,6 +77,8 @@ define dso_local arm_aapcscc void @test(ptr nocapture %pDest, ptr nocapture read
; CHECK-NEXT: [[CMP2780:%.*]] = icmp ugt i32 [[ADD25]], [[J_0_LCSSA]]
; CHECK-NEXT: br i1 [[CMP2780]], label [[FOR_BODY29_PREHEADER:%.*]], label [[FOR_END40]]
; CHECK: for.body29.preheader:
+; CHECK-NEXT: [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]]
+; CHECK-NEXT: [[SCEVGEP93:%.*]] = getelementptr i16, ptr [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP10]]
; CHECK-NEXT: br label [[FOR_BODY29:%.*]]
; CHECK: for.body29:
; CHECK-NEXT: [[J_184:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY29]] ], [ [[J_0_LCSSA]], [[FOR_BODY29_PREHEADER]] ]
@@ -100,8 +102,6 @@ define dso_local arm_aapcscc void @test(ptr nocapture %pDest, ptr nocapture read
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ADD25]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29]]
; CHECK: for.end40.loopexit:
-; CHECK-NEXT: [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]]
-; CHECK-NEXT: [[SCEVGEP93:%.*]] = getelementptr i16, ptr [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP10]]
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, ptr [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP10]]
; CHECK-NEXT: [[SCEVGEP94:%.*]] = getelementptr i32, ptr [[PDEST_ADDR_1_LCSSA]], i32 [[TMP10]]
; CHECK-NEXT: br label [[FOR_END40]]
diff --git a/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll b/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll
index 0fa6e34..0eb9deb 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/inner-loop-by-latch-cond.ll
@@ -14,6 +14,7 @@ define void @test(i64 %a) {
; CHECK: outer_header:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 21, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 20, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[OUTER_LATCH]] ]
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: br label [[INNER_HEADER:%.*]]
; CHECK: inner_header:
; CHECK-NEXT: [[J:%.*]] = phi i64 [ 1, [[OUTER_HEADER]] ], [ [[J_NEXT:%.*]], [[INNER_HEADER]] ]
@@ -22,7 +23,6 @@ define void @test(i64 %a) {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[J_NEXT]], [[INDVARS_IV]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER_HEADER]], label [[OUTER_LATCH]]
; CHECK: outer_latch:
-; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND2:%.*]] = icmp ne i64 [[I_NEXT]], 40
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br i1 [[COND2]], label [[OUTER_HEADER]], label [[RETURN:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
index 1592b84..829092f 100644
--- a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
+++ b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=indvars -S | FileCheck %s
+; RUN: opt < %s -passes='require<scalar-evolution>,indvars,loop-mssa(licm)' -S | FileCheck %s
define i32 @logical_and_2ops(i32 %n, i32 %m) {
; CHECK-LABEL: @logical_and_2ops(
@@ -56,10 +56,10 @@ define i32 @logical_and_3ops(i32 %n, i32 %m, i32 %k) {
; CHECK: loop:
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[K:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT: [[N:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[N]])
+; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N1:%.*]])
; CHECK-NEXT: ret i32 [[UMIN1]]
;
entry:
@@ -84,10 +84,10 @@ define i32 @logical_or_3ops(i32 %n, i32 %m, i32 %k) {
; CHECK: loop:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[K:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[M:%.*]]
-; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
-; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT: [[N:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[N]])
+; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N1:%.*]])
; CHECK-NEXT: ret i32 [[UMIN1]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
index e006d9f..f798eb28 100644
--- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
+++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll
@@ -932,6 +932,9 @@ for.end: ; preds = %for.body, %entry
define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
; CHECK-LABEL: @ult_multiuse_profit(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[START:%.*]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT: [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP1]], i16 254)
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
@@ -940,9 +943,6 @@ define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress {
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[START:%.*]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i16
-; CHECK-NEXT: [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP2]], i16 254)
; CHECK-NEXT: ret i16 [[UMAX]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/pr116483.ll b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
index 093e25a..e9e0d22 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr116483.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
@@ -4,16 +4,16 @@
define i32 @test() {
; CHECK-LABEL: define i32 @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
-; CHECK: [[LOOP_BODY]]:
-; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
-; CHECK: [[EXIT]]:
; CHECK-NEXT: [[XOR:%.*]] = xor i32 0, 3
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[XOR]], 329
; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[MUL]] to i16
; CHECK-NEXT: [[SEXT:%.*]] = shl i16 [[CONV]], 8
; CHECK-NEXT: [[CONV1:%.*]] = ashr i16 [[SEXT]], 8
; CHECK-NEXT: [[CONV3:%.*]] = zext i16 [[CONV1]] to i32
+; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
+; CHECK: [[LOOP_BODY]]:
+; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
+; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[CONV3]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/pr24783.ll b/llvm/test/Transforms/IndVarSimplify/pr24783.ll
index c521bca..37ecf42 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr24783.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr24783.ll
@@ -7,11 +7,11 @@ target triple = "powerpc64-unknown-linux-gnu"
define void @f(ptr %end.s, ptr %loc, i32 %p) {
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds i32, ptr [[END_S:%.*]], i32 [[P:%.*]]
; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]]
; CHECK: while.body.i:
; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[WHILE_BODY_I]]
; CHECK: loop.exit:
-; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds i32, ptr [[END_S:%.*]], i32 [[P:%.*]]
; CHECK-NEXT: store ptr [[END]], ptr [[LOC:%.*]], align 8
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/IndVarSimplify/pr39673.ll b/llvm/test/Transforms/IndVarSimplify/pr39673.ll
index 7b093b3..3cee1ab 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr39673.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr39673.ll
@@ -148,6 +148,7 @@ loop2.end: ; preds = %loop2
define i16 @neg_loop_carried(i16 %arg) {
; CHECK-LABEL: @neg_loop_carried(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[ARG:%.*]], 2
; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop1:
; CHECK-NEXT: [[L1:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[L1_ADD:%.*]], [[LOOP1]] ]
@@ -155,7 +156,6 @@ define i16 @neg_loop_carried(i16 %arg) {
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i16 [[L1_ADD]], 2
; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP1]], label [[LOOP2_PREHEADER:%.*]]
; CHECK: loop2.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[ARG:%.*]], 2
; CHECK-NEXT: br label [[LOOP2:%.*]]
; CHECK: loop2:
; CHECK-NEXT: [[K2:%.*]] = phi i16 [ [[K2_ADD:%.*]], [[LOOP2]] ], [ [[TMP0]], [[LOOP2_PREHEADER]] ]
diff --git a/llvm/test/Transforms/IndVarSimplify/pr63763.ll b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
index 427db1e..a5fde67 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr63763.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr63763.ll
@@ -16,13 +16,13 @@ define i32 @test(i1 %c) {
; CHECK-NEXT: [[CONV2:%.*]] = ashr exact i32 [[SEXT]], 24
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 7, [[CONV2]]
; CHECK-NEXT: call void @use(i32 [[INVARIANT_OP]])
+; CHECK-NEXT: [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
+; CHECK-NEXT: [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
+; CHECK-NEXT: [[INVARIANT_OP_US:%.*]] = sub nsw i32 7, [[CONV2_US]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[SEXT_US:%.*]] = shl i32 [[SEL]], 24
-; CHECK-NEXT: [[CONV2_US:%.*]] = ashr exact i32 [[SEXT_US]], 24
-; CHECK-NEXT: [[INVARIANT_OP_US:%.*]] = sub nsw i32 7, [[CONV2_US]]
; CHECK-NEXT: ret i32 [[INVARIANT_OP_US]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
index b3162de..7cdc98a 100644
--- a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
+++ b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll
@@ -4,22 +4,21 @@
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
define i32 @remove_loop(i32 %size) {
-; CHECK-LABEL: define i32 @remove_loop(
-; CHECK-SAME: i32 [[SIZE:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[WHILE_COND:.*]]
-; CHECK: [[WHILE_COND]]:
-; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], %[[ENTRY]] ], [ [[SUB:%.*]], %[[WHILE_COND]] ]
-; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
-; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
-; CHECK-NEXT: br i1 [[CMP]], label %[[WHILE_COND]], label %[[WHILE_END:.*]]
-; CHECK: [[WHILE_END]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], 31
+; CHECK-LABEL: @remove_loop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
+; CHECK-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31
+; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32
+; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]]
+; CHECK: while.end:
; CHECK-NEXT: ret i32 [[TMP4]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
index 84ae79d..41fce36 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
@@ -76,6 +76,10 @@ define i64 @narow_canonical_iv_wide_multiplied_iv(i32 %x, i64 %y, ptr %0) {
; CHECK-LABEL: @narow_canonical_iv_wide_multiplied_iv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[SMAX]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -84,10 +88,6 @@ define i64 @narow_canonical_iv_wide_multiplied_iv(i32 %x, i64 %y, ptr %0) {
; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[SMAX]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[Y:%.*]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: ret i64 [[TMP6]]
;
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll b/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll
index 14e06fe..aca553e 100644
--- a/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll
+++ b/llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll
@@ -23,8 +23,8 @@ define void @test1(i8 %x, ptr %ptr) {
; CHECK-NEXT: br label [[WHILE_COND192:%.*]]
; CHECK: while.cond192:
; CHECK-NEXT: switch i8 [[X:%.*]], label [[WHILE_BODY205:%.*]] [
-; CHECK-NEXT: i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
-; CHECK-NEXT: i8 10, label [[IF_END224_LOOPEXIT1:%.*]]
+; CHECK-NEXT: i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
+; CHECK-NEXT: i8 10, label [[IF_END224_LOOPEXIT1:%.*]]
; CHECK-NEXT: ]
; CHECK: while.cond215.preheader:
; CHECK-NEXT: br label [[WHILE_COND215:%.*]]
@@ -103,8 +103,8 @@ define void @test2(i16 %x) {
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: switch i16 [[X:%.*]], label [[RETURN_LOOPEXIT1:%.*]] [
-; CHECK-NEXT: i16 41, label [[FOR_END:%.*]]
-; CHECK-NEXT: i16 43, label [[FOR_COND]]
+; CHECK-NEXT: i16 41, label [[FOR_END:%.*]]
+; CHECK-NEXT: i16 43, label [[FOR_COND]]
; CHECK-NEXT: ]
; CHECK: for.end:
; CHECK-NEXT: [[I_0_LCSSA2:%.*]] = phi i32 [ 0, [[FOR_COND]] ]
@@ -336,6 +336,7 @@ if.end1824: ; preds = %for.end1326
define void @test5(ptr %header, i32 %conv, i8 %n) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i32 [[CONV:%.*]], 2
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br label [[FOR_INNER:%.*]]
@@ -358,7 +359,6 @@ define void @test5(ptr %header, i32 %conv, i8 %n) {
; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[WHILE_COND_PREHEADER:%.*]]
; CHECK: while.cond.preheader:
; CHECK-NEXT: [[ADD85_LCSSA:%.*]] = phi i32 [ [[ADD85]], [[FOR_INC]] ]
-; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i32 [[CONV:%.*]], 2
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[POS_8:%.*]] = phi i32 [ [[INC114:%.*]], [[WHILE_BODY:%.*]] ], [ [[ADD85_LCSSA]], [[WHILE_COND_PREHEADER]] ]
@@ -427,8 +427,8 @@ define void @test6(i8 %x) {
; CHECK-NEXT: br label [[WHILE_COND192:%.*]]
; CHECK: while.cond192:
; CHECK-NEXT: switch i8 [[X:%.*]], label [[WHILE_BODY205:%.*]] [
-; CHECK-NEXT: i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
-; CHECK-NEXT: i8 10, label [[IF_END224:%.*]]
+; CHECK-NEXT: i8 59, label [[WHILE_COND215_PREHEADER:%.*]]
+; CHECK-NEXT: i8 10, label [[IF_END224:%.*]]
; CHECK-NEXT: ]
; CHECK: while.cond215.preheader:
; CHECK-NEXT: [[I_7_LCSSA:%.*]] = phi i32 [ 0, [[WHILE_COND192]] ]
diff --git a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll
index a92d328..ad69812 100644
--- a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll
+++ b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll
@@ -46,12 +46,12 @@ for.end106: ; preds = %for.cond
define i32 @test_pr58439(i32 %a) {
; CHECK-LABEL: @test_pr58439(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A:%.*]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[C_EXT_LCSSA:%.*]] = phi i32 [ 0, [[LOOP]] ]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[A:%.*]], 1
; CHECK-NEXT: [[RES:%.*]] = add i32 [[C_EXT_LCSSA]], [[OR]]
; CHECK-NEXT: ret i32 [[RES]]
;
@@ -76,6 +76,7 @@ define i8 @l(i32 %inc, i1 %tobool.not.i) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
+; CHECK-NEXT: [[AND:%.*]] = and i32 1, [[INC:%.*]]
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[C_05_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[INNER]] ], [ 0, [[OUTER_HEADER]] ]
@@ -86,7 +87,6 @@ define i8 @l(i32 %inc, i1 %tobool.not.i) {
; CHECK: outer.latch:
; CHECK-NEXT: [[C_05_I_LCSSA:%.*]] = phi i32 [ [[C_05_I]], [[INNER]] ]
; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[INNER]] ]
-; CHECK-NEXT: [[AND:%.*]] = and i32 1, [[INC:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[AND]] to i8
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C_05_I_LCSSA]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[TMP0]], [[TMP1]]
diff --git a/llvm/test/Transforms/IndVarSimplify/sentinel.ll b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
index 5234141..4f12308 100644
--- a/llvm/test/Transforms/IndVarSimplify/sentinel.ll
+++ b/llvm/test/Transforms/IndVarSimplify/sentinel.ll
@@ -9,19 +9,19 @@ define void @test(i1 %arg) personality ptr @snork {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB4:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add i32 [[INDVARS_IV:%.*]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[TMP6:%.*]], [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMAX:%.*]]
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB2:%.*]], label [[BB4]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[TMP1]], [[BB1:%.*]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[TMP1:%.*]], [[BB1:%.*]] ]
; CHECK-NEXT: ret void
; CHECK: bb4:
-; CHECK-NEXT: [[INDVARS_IV]] = phi i32 [ [[INDVARS_IV_NEXT]], [[BB1]] ], [ undef, [[BB:%.*]] ]
-; CHECK-NEXT: [[SMAX]] = call i32 @llvm.smax.i32(i32 [[INDVARS_IV]], i32 36)
-; CHECK-NEXT: [[TMP6]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[BB1]] ], [ undef, [[BB:%.*]] ]
+; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INDVARS_IV]], i32 36)
+; CHECK-NEXT: [[TMP6:%.*]] = invoke i32 @quux() [ "deopt"(i32 0, i32 0, i32 0, i32 180, i32 0, i32 25, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 [[INDVARS_IV]], i32 3, i32 undef, i32 7, ptr null, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 3, i32 undef, i32 4, double undef, i32 7, ptr null, i32 4, i64 undef, i32 7, ptr null, i32 0, ptr addrspace(1) undef, i32 3, i32 undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 0, ptr addrspace(1) undef, i32 7, ptr null) ]
; CHECK-NEXT: to label [[BB7:%.*]] unwind label [[BB15:%.*]]
; CHECK: bb7:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[TMP6]], [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1]] = sub i32 [[TMP0]], [[SMAX]]
; CHECK-NEXT: br label [[BB9:%.*]]
; CHECK: bb9:
; CHECK-NEXT: br i1 true, label [[BB1]], label [[BB9]]
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll b/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
deleted file mode 100644
index 89583f9..0000000
--- a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=indvars -indvars-predicate-loops=0 -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin10.0"
-
-; We make sinking here, Changed flag should be set properly.
-define i32 @test(i32 %a, i32 %b, i32 %N) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
-; CHECK: exit:
-; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: ret i32 [[ADD]]
-;
-entry:
- %add = add i32 %a, %b
- br label %loop
-
-loop:
- %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
- %iv.next = add i32 %iv, 1
- %cmp = icmp slt i32 %iv.next, %N
- br i1 %cmp, label %loop, label %exit
-
-exit:
- ret i32 %add
-}
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-trapping.ll b/llvm/test/Transforms/IndVarSimplify/sink-trapping.ll
deleted file mode 100644
index d2478be..0000000
--- a/llvm/test/Transforms/IndVarSimplify/sink-trapping.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -passes=indvars -S | FileCheck %s
-
-declare i1 @b()
-
-define i32 @a(i32 %x) nounwind {
-for.body.preheader:
- %y = sdiv i32 10, %x
- br label %for.body
-
-for.body:
- %cmp = call i1 @b()
- br i1 %cmp, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:
- ret i32 %y
-}
-; CHECK: for.end.loopexit:
-; CHECK: sdiv
-; CHECK: ret
diff --git a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
index 17921af..abe7a3e 100644
--- a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
+++ b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -24,13 +24,13 @@ define void @_Z3fn1v() {
; CHECK-NEXT: [[X8:%.*]] = icmp ult i32 0, 4
; CHECK-NEXT: br i1 [[X8]], label [[DOTPREHEADER_LR_PH:%.*]], label [[X22]]
; CHECK: .preheader.lr.ph:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[K_09]], i64 [[TMP5]]
; CHECK-NEXT: br label [[DOTPREHEADER:%.*]]
; CHECK: .preheader:
; CHECK-NEXT: br label [[X17:%.*]]
; CHECK: x17:
; CHECK-NEXT: br i1 false, label [[DOTPREHEADER]], label [[DOT_CRIT_EDGE_8:%.*]]
; CHECK: ._crit_edge.8:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[K_09]], i64 [[TMP5]]
; CHECK-NEXT: br label [[X22]]
; CHECK: x22:
; CHECK-NEXT: [[K_1_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[DOT_CRIT_EDGE_8]] ], [ [[K_09]], [[DOTPREHEADER4]] ]
diff --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll
index 3af65df..e6cc457 100644
--- a/llvm/test/Transforms/LICM/scalar-promote.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote.ll
@@ -43,9 +43,9 @@ define void @test2(i32 %i) {
; CHECK-LABEL: define void @test2(
; CHECK-SAME: i32 [[I:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[X1:%.*]] = getelementptr i32, ptr @X, i64 1
; CHECK-NEXT: [[X2:%.*]] = getelementptr i32, ptr @X, i64 1
-; CHECK-NEXT: [[X1_PROMOTED:%.*]] = load i32, ptr [[X1]], align 4
+; CHECK-NEXT: [[X3:%.*]] = getelementptr i32, ptr @X, i64 1
+; CHECK-NEXT: [[X1_PROMOTED:%.*]] = load i32, ptr [[X2]], align 4
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[A1:%.*]] = phi i32 [ [[V:%.*]], %[[LOOP]] ], [ [[X1_PROMOTED]], %[[ENTRY]] ]
@@ -53,7 +53,7 @@ define void @test2(i32 %i) {
; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i32 [ [[V]], %[[LOOP]] ]
-; CHECK-NEXT: store i32 [[V_LCSSA]], ptr [[X1]], align 4
+; CHECK-NEXT: store i32 [[V_LCSSA]], ptr [[X2]], align 4
; CHECK-NEXT: ret void
;
Entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-alloca.ll b/llvm/test/Transforms/LICM/sink-alloca.ll
index 0997bf6..2bf9350 100644
--- a/llvm/test/Transforms/IndVarSimplify/sink-alloca.ll
+++ b/llvm/test/Transforms/LICM/sink-alloca.ll
@@ -1,9 +1,9 @@
-; RUN: opt < %s -passes=indvars -S | FileCheck %s
+; RUN: opt < %s -passes=licm -verify-memoryssa -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin10.0"
; PR4775
-; Indvars shouldn't sink the alloca out of the entry block, even though
+; LICM shouldn't sink the alloca out of the entry block, even though
; it's not used until after the loop.
define i32 @main() nounwind {
; CHECK: entry:
@@ -25,7 +25,7 @@ while.end: ; preds = %while.cond
declare i32 @bar()
; <rdar://problem/10352360>
-; Indvars shouldn't sink the first alloca between the stacksave and stackrestore
+; LICM shouldn't sink the first alloca between the stacksave and stackrestore
; intrinsics.
declare ptr @a(...)
declare ptr @llvm.stacksave() nounwind
diff --git a/llvm/test/Transforms/LICM/sink-from-preheader.ll b/llvm/test/Transforms/LICM/sink-from-preheader.ll
new file mode 100644
index 0000000..bbe3d3b
--- /dev/null
+++ b/llvm/test/Transforms/LICM/sink-from-preheader.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=licm -verify-memoryssa -S | FileCheck %s
+
+; We perform sinking here, Changed flag should be set properly.
+define i32 @test(i32 %a, i32 %b, i32 %N) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %add = add i32 %a, %b
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+define i32 @test_with_unused_load(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LOAD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %load = load i32, ptr %b
+ %add = add i32 %a, %load
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+define i32 @test_with_unused_load_modified_store(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load_modified_store(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[A:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[SMAX:%.*]] = phi i32 [ [[IV_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: store i32 [[SMAX]], ptr [[B]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A]], [[LOAD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %load = load i32, ptr %b
+ %add = add i32 %a, %load
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, %a
+ store i32 %iv.next, ptr %b
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+; Volatile loads must not be sunk.
+define i32 @test_with_volatile_load_no_sink(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_volatile_load_no_sink(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LD:%.*]] = load volatile i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %ld = load volatile i32, ptr %b, align 4
+ %add = add i32 %a, %ld
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+; Ordered/atomic loads must not be sunk.
+define i32 @test_with_atomic_load_no_sink(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_atomic_load_no_sink(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LD:%.*]] = load atomic i32, ptr [[B:%.*]] acquire, align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %ld = load atomic i32, ptr %b acquire, align 4
+ %add = add i32 %a, %ld
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
+
+declare void @clobber(ptr)
+
+; Calls that may write memory in the loop should prevent sinking the load.
+define i32 @test_with_unused_load_clobbered_by_call(i32 %a, ptr %b, i32 %N) {
+; CHECK-LABEL: @test_with_unused_load_clobbered_by_call(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: call void @clobber(ptr [[B]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LD]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %ld = load i32, ptr %b, align 4
+ %add = add i32 %a, %ld
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ call void @clobber(ptr %b)
+ %cmp = icmp slt i32 %iv.next, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %add
+}
diff --git a/llvm/test/Transforms/LICM/sink-trapping.ll b/llvm/test/Transforms/LICM/sink-trapping.ll
new file mode 100644
index 0000000..f4d260d
--- /dev/null
+++ b/llvm/test/Transforms/LICM/sink-trapping.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=licm -verify-memoryssa -S | FileCheck %s
+
+declare i1 @b()
+
+define i32 @a(i32 %x) nounwind {
+; CHECK-LABEL: define i32 @a(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[FOR_BODY_PREHEADER:.*:]]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[CMP:%.*]] = call i1 @b()
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: [[Y:%.*]] = sdiv i32 10, [[X]]
+; CHECK-NEXT: ret i32 [[Y]]
+;
+for.body.preheader:
+ %y = sdiv i32 10, %x
+ br label %for.body
+
+for.body:
+ %cmp = call i1 @b()
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+ ret i32 %y
+}
diff --git a/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll b/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
index bdd51c2..6c19aaa 100644
--- a/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
+++ b/llvm/test/Transforms/LoopDeletion/invalidate-scev-after-hoisting.ll
@@ -84,13 +84,13 @@ define i32 @scev_invalidation_after_deleting(ptr %src) {
; CHECK: inner.2.preheader:
; CHECK-NEXT: br label [[INNER_3_PH:%.*]]
; CHECK: inner.3.ph:
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 0 to i32
; CHECK-NEXT: br label [[INNER_3:%.*]]
; CHECK: inner.3:
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: br i1 false, label [[OUTER_LATCH]], label [[INNER_3]]
; CHECK: outer.latch:
; CHECK-NEXT: [[L_LCSSA:%.*]] = phi i32 [ [[L]], [[INNER_3]] ]
-; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 0 to i32
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i32 [[L_LCSSA]], [[TRUNC]]
; CHECK-NEXT: br label [[OUTER_HEADER]]
;
diff --git a/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
index 62c5627..4a55c0e 100644
--- a/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
+++ b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
@@ -4,11 +4,11 @@
define void @test_pr50940(ptr %A, ptr %B) {
; CHECK-LABEL: @test_pr50940(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: br i1 false, label [[OUTER_LATCH:%.*]], label [[INNER_PH:%.*]]
; CHECK: inner.ph:
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
; CHECK-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 3
; CHECK-NEXT: br label [[INNER_LVER_CHECK:%.*]]
; CHECK: inner.lver.check:
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index eea2237..abed18a 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -380,7 +380,6 @@ define void @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP8]], 8589934588
-; CHECK-NEXT: [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP4]]
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX5_PROMOTED]], i64 0
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i32, ptr [[VAR2]], i64 [[TMP4]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -396,6 +395,7 @@ define void @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP4]]
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4, !alias.scope [[META27:![0-9]+]], !noalias [[META23]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
index 8d20a3b..d311f54 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
@@ -43,7 +43,6 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER13]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP8]], -8
-; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -64,6 +63,7 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
+; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER13]]
; CHECK: for.body.preheader14:
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
index 2dceb27..f2ae327 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
@@ -1040,7 +1040,6 @@ define void @saxpy_5(i64 %n, float %a, ptr readonly %x, ptr noalias %y) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_PREHEADER11:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775806
-; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 5
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[A]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <10 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -1058,10 +1057,11 @@ define void @saxpy_5(i64 %n, float %a, ptr readonly %x, ptr noalias %y) {
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[N_VEC]], 5
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT]], label %[[LOOP_PREHEADER11]]
; CHECK: [[LOOP_PREHEADER11]]:
-; CHECK-NEXT: [[I1_PH:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[I1_PH:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[TMP16]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> poison, float [[A]], i64 0
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[LOOP:.*]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
index a3b8736..338d925 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll
@@ -9,7 +9,6 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST_COERCE]], i64 2) ]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST_COERCE]], i64 256) ]
-; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 256
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -27,6 +26,7 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_SPLIT]]:
+; CHECK-NEXT: [[COERCE_VAL_IP:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 256
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[RETURN:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 true)
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
index 5127b7d..7c349fb 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
@@ -18,22 +18,15 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY_PREHEADER15:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -8
-; CHECK-NEXT: [[IND_END:%.*]] = and i32 [[BLOCKSIZE]], 7
-; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[PSRCA:%.*]], i32 [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i32 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRCB:%.*]], i32 [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRCA]], i32 [[OFFSET_IDX]]
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRCA:%.*]], i32 [[OFFSET_IDX]]
; CHECK-NEXT: [[OFFSET_IDX13:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX13]]
+; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i32 [[OFFSET_IDX13]]
; CHECK-NEXT: [[OFFSET_IDX15:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[PSRCB]], i32 [[OFFSET_IDX15]]
+; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[PSRCB:%.*]], i32 [[OFFSET_IDX15]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
; CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i16>, ptr [[NEXT_GEP16]], align 2
@@ -47,6 +40,13 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
+; CHECK-NEXT: [[IND_END:%.*]] = and i32 [[BLOCKSIZE]], 7
+; CHECK-NEXT: [[TMP13:%.*]] = shl i32 [[N_VEC]], 1
+; CHECK-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[PSRCA]], i32 [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = shl i32 [[N_VEC]], 1
+; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP14]]
+; CHECK-NEXT: [[TMP12:%.*]] = shl i32 [[N_VEC]], 1
+; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRCB]], i32 [[TMP12]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[BLOCKSIZE]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[WHILE_BODY_PREHEADER15]]
; CHECK: while.body.preheader15:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
index dcfebe3..6e95b63 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
@@ -46,7 +46,6 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
; AVX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 124
; AVX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[BB12_PREHEADER11:%.*]], label [[VECTOR_PH:%.*]]
; AVX2: vector.ph:
-; AVX2-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP3]], 24
; AVX2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775776
; AVX2-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX2: vector.body:
@@ -80,6 +79,7 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
; AVX2-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; AVX2-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; AVX2: middle.block:
+; AVX2-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP3]], 24
; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; AVX2-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; AVX2: vec.epilog.iter.check:
@@ -90,8 +90,6 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
; AVX2: vec.epilog.ph:
; AVX2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; AVX2-NEXT: [[N_VEC10:%.*]] = and i64 [[TMP3]], 9223372036854775800
-; AVX2-NEXT: [[TMP21:%.*]] = shl i64 [[N_VEC10]], 2
-; AVX2-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP21]]
; AVX2-NEXT: br label [[BB12:%.*]]
; AVX2: vec.epilog.vector.body:
; AVX2-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[BB12_PREHEADER11]] ], [ [[INDEX_NEXT16:%.*]], [[BB12]] ]
@@ -106,6 +104,8 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
; AVX2-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC10]]
; AVX2-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[BB12]], !llvm.loop [[LOOP4:![0-9]+]]
; AVX2: vec.epilog.middle.block:
+; AVX2-NEXT: [[TMP27:%.*]] = shl i64 [[N_VEC10]], 2
+; AVX2-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]]
; AVX2-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]]
; AVX2-NEXT: br i1 [[CMP_N17]], label [[EXIT]], label [[BB12_PREHEADER1]]
; AVX2: bb12.preheader:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
index bfb8554..4562072 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
@@ -16,8 +16,8 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-SAME: ptr writeonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], double [[A:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N]], 0
-; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
-; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br i1 [[CMP1]], label %[[ITER_CHECK:.*]], label %[[FOR_END:.*]]
+; CHECK: [[ITER_CHECK]]:
; CHECK-NEXT: [[X4:%.*]] = ptrtoint ptr [[X]] to i64
; CHECK-NEXT: [[Y5:%.*]] = ptrtoint ptr [[Y]] to i64
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
@@ -25,12 +25,11 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X4]], [[Y5]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
-; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_BODY_PREHEADER9:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_BODY_PREHEADER:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i32 [[N]], 16
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]]
-; CHECK: [[VECTOR_PH1]]:
-; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 12
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
@@ -40,7 +39,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 32
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 64
@@ -65,13 +64,14 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 12
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER9]], label %[[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER]], label %[[VEC_EPILOG_PH]], !prof [[PROF10:![0-9]+]]
; CHECK: [[VEC_EPILOG_PH]]:
-; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ]
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT14]], <4 x double> poison, <4 x i32> zeroinitializer
@@ -86,12 +86,12 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: store <4 x double> [[TMP40]], ptr [[TMP41]], align 8, !tbaa [[DOUBLE_TBAA3]]
; CHECK-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX12]], 4
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC11]]
-; CHECK-NEXT: br i1 [[TMP42]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP42]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[N_VEC11]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT: br i1 [[CMP_N17]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9]]
-; CHECK: [[FOR_BODY_PREHEADER9]]:
-; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br i1 [[CMP_N17]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[TMP43:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP43]], 7
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
@@ -110,13 +110,13 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT]], label %[[FOR_BODY_PROL]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT]], label %[[FOR_BODY_PROL]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[FOR_BODY_PROL_LOOPEXIT]]:
-; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ]
; CHECK-NEXT: [[TMP20:%.*]] = sub nsw i64 [[INDVARS_IV_PH]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp ugt i64 [[TMP20]], -8
-; CHECK-NEXT: br i1 [[TMP21]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9_NEW:.*]]
-; CHECK: [[FOR_BODY_PREHEADER9_NEW]]:
+; CHECK-NEXT: br i1 [[TMP21]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER_NEW:.*]]
+; CHECK: [[FOR_BODY_PREHEADER_NEW]]:
; CHECK-NEXT: [[TMP22:%.*]] = fdiv fast double 1.000000e+00, [[A]]
; CHECK-NEXT: [[TMP23:%.*]] = fdiv fast double 1.000000e+00, [[A]]
; CHECK-NEXT: [[TMP24:%.*]] = fdiv fast double 1.000000e+00, [[A]]
@@ -127,7 +127,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast double 1.000000e+00, [[A]]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], %[[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], %[[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA3]]
; CHECK-NEXT: [[TMP30:%.*]] = fmul fast double [[T0]], [[TMP22]]
@@ -177,7 +177,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: store double [[TMP37]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[DOUBLE_TBAA3]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret void
;
@@ -232,8 +232,9 @@ attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]}
; CHECK: [[META8]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META9]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]}
-; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]}
-; CHECK: [[META12]] = !{!"llvm.loop.unroll.disable"}
-; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META8]]}
+; CHECK: [[PROF10]] = !{!"branch_weights", i32 4, i32 12}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META8]], [[META9]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META13:![0-9]+]]}
+; CHECK: [[META13]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META8]]}
;.