aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp92
-rw-r--r--llvm/test/Transforms/LICM/lnicm.ll234
-rw-r--r--llvm/test/Transforms/LoopInterchange/call-instructions.ll3
-rw-r--r--llvm/test/Transforms/LoopInterchange/currentLimitation.ll7
-rw-r--r--llvm/test/Transforms/LoopInterchange/debuginfo.ll3
-rw-r--r--llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll3
-rw-r--r--llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll31
-rw-r--r--llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll8
-rw-r--r--llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/interchangeable.ll5
-rw-r--r--llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll23
-rw-r--r--llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll4
-rw-r--r--llvm/test/Transforms/LoopInterchange/lcssa.ll3
-rw-r--r--llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll10
-rw-r--r--llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll3
-rw-r--r--llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll23
-rw-r--r--llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll21
-rw-r--r--llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/phi-ordering.ll7
-rw-r--r--llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/pr43326.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/pr48212.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/profitability.ll3
-rw-r--r--llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll3
-rw-r--r--llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll2
-rw-r--r--llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll2
36 files changed, 307 insertions, 213 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index ced4396..1d3023d 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
@@ -358,8 +359,10 @@ public:
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
/// Check if the loop interchange is profitable.
- bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
- CharMatrix &DepMatrix);
+ bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
+ unsigned InnerLoopId, unsigned OuterLoopId,
+ CharMatrix &DepMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap);
private:
int getInstrOrderCost();
@@ -410,13 +413,15 @@ struct LoopInterchange {
LoopInfo *LI = nullptr;
DependenceInfo *DI = nullptr;
DominatorTree *DT = nullptr;
+ std::unique_ptr<CacheCost> CC = nullptr;
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
- DominatorTree *DT, OptimizationRemarkEmitter *ORE)
- : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
+ DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
+ OptimizationRemarkEmitter *ORE)
+ : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
bool run(Loop *L) {
if (L->getParentLoop())
@@ -499,6 +504,21 @@ struct LoopInterchange {
}
unsigned SelecLoopId = selectLoopForInterchange(LoopList);
+ // Obtain the loop vector returned from loop cache analysis beforehand,
+ // and put each <Loop, index> pair into a map for constant time query
+ // later. Indices in loop vector reprsent the optimal order of the
+ // corresponding loop, e.g., given a loopnest with depth N, index 0
+ // indicates the loop should be placed as the outermost loop and index N
+ // indicates the loop should be placed as the innermost loop.
+ //
+ // For the old pass manager CacheCost would be null.
+ DenseMap<const Loop *, unsigned> CostMap;
+ if (CC != nullptr) {
+ const auto &LoopCosts = CC->getLoopCosts();
+ for (unsigned i = 0; i < LoopCosts.size(); i++) {
+ CostMap[LoopCosts[i].first] = i;
+ }
+ }
// We try to achieve the globally optimal memory access for the loopnest,
// and do interchange based on a bubble-sort fasion. We start from
// the innermost loop, move it outwards to the best possible position
@@ -507,7 +527,7 @@ struct LoopInterchange {
bool ChangedPerIter = false;
for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
- DependencyMatrix);
+ DependencyMatrix, CostMap);
if (!Interchanged)
continue;
// Loops interchanged, update LoopList accordingly.
@@ -531,7 +551,8 @@ struct LoopInterchange {
bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
unsigned OuterLoopId,
- std::vector<std::vector<char>> &DependencyMatrix) {
+ std::vector<std::vector<char>> &DependencyMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap) {
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId << "\n");
LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
@@ -541,7 +562,8 @@ struct LoopInterchange {
}
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
- if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
+ if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
+ DependencyMatrix, CostMap)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false;
}
@@ -1135,21 +1157,33 @@ static bool isProfitableForVectorization(unsigned InnerLoopId,
return !DepMatrix.empty();
}
-bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
- unsigned OuterLoopId,
- CharMatrix &DepMatrix) {
- // TODO: Add better profitability checks.
- // e.g
- // 1) Construct dependency matrix and move the one with no loop carried dep
- // inside to enable vectorization.
-
- // This is rough cost estimation algorithm. It counts the good and bad order
- // of induction variables in the instruction and allows reordering if number
- // of bad orders is more than good.
- int Cost = getInstrOrderCost();
- LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
- if (Cost < -LoopInterchangeCostThreshold)
- return true;
+bool LoopInterchangeProfitability::isProfitable(
+ const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
+ unsigned OuterLoopId, CharMatrix &DepMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap) {
+ // TODO: Remove the legacy cost model.
+
+ // This is the new cost model returned from loop cache analysis.
+ // A smaller index means the loop should be placed an outer loop, and vice
+ // versa.
+ if (CostMap.find(InnerLoop) != CostMap.end() &&
+ CostMap.find(OuterLoop) != CostMap.end()) {
+ unsigned InnerIndex = 0, OuterIndex = 0;
+ InnerIndex = CostMap.find(InnerLoop)->second;
+ OuterIndex = CostMap.find(OuterLoop)->second;
+ LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
+ << ", OuterIndex = " << OuterIndex << "\n");
+ if (InnerIndex < OuterIndex)
+ return true;
+ } else {
+ // Legacy cost model: this is rough cost estimation algorithm. It counts the
+ // good and bad order of induction variables in the instruction and allows
+ // reordering if number of bad orders is more than good.
+ int Cost = getInstrOrderCost();
+ LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
+ if (Cost < -LoopInterchangeCostThreshold)
+ return true;
+ }
// It is not profitable as per current cache profitability model. But check if
// we can move this loop outside to improve parallelism.
@@ -1160,10 +1194,8 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
InnerLoop->getStartLoc(),
InnerLoop->getHeader())
- << "Interchanging loops is too costly (cost="
- << ore::NV("Cost", Cost) << ", threshold="
- << ore::NV("Threshold", LoopInterchangeCostThreshold)
- << ") and it does not improve parallelism.";
+ << "Interchanging loops is too costly and it does not improve "
+ "parallelism.";
});
return false;
}
@@ -1709,8 +1741,8 @@ struct LoopInterchangeLegacyPass : public LoopPass {
auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
- return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
+ std::unique_ptr<CacheCost> CC = nullptr;
+ return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
}
};
} // namespace
@@ -1737,8 +1769,10 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
Function &F = *LN.getParent();
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
+ std::unique_ptr<CacheCost> CC =
+ CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
OptimizationRemarkEmitter ORE(&F);
- if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
+ if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
}
diff --git a/llvm/test/Transforms/LICM/lnicm.ll b/llvm/test/Transforms/LICM/lnicm.ll
index 1ef0c1f..814f964 100644
--- a/llvm/test/Transforms/LICM/lnicm.ll
+++ b/llvm/test/Transforms/LICM/lnicm.ll
@@ -1,12 +1,13 @@
-; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes INTC
-; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes LNICM,CHECK
-; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes LICM,CHECK
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes INTC
+; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LNICM
+; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LICM
; This test represents the following function:
-; void test(int x[10][10], int y[10], int *z) {
-; for (int k = 0; k < 10; k++) {
+; void test(int n, int m, int x[m][n], int y[n], int *z) {
+; for (int k = 0; k < n; k++) {
; int tmp = *z;
-; for (int i = 0; i < 10; i++)
+; for (int i = 0; i < m; i++)
; x[i][k] += y[k] + tmp;
; }
; }
@@ -16,82 +17,187 @@
; perfect loop nest (e.g. loop-interchange) to perform.
-define dso_local void @test([10 x i32]* noalias %x, i32* noalias readonly %y, i32* readonly %z) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[Z:%.*]] = load i32, i32* %z, align 4
-; CHECK-NEXT: br label [[FOR_BODY3_PREHEADER:%.*]]
-; LNICM: for.body.preheader:
-; LICM-NOT: for.body.preheader:
-; INTC-NOT: for.body.preheader:
-; LNICM-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; LNICM-NEXT: [[K:%.*]] = phi i32 [ [[INC10:%.*]], [[FOR_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
-; LNICM-NEXT: br label [[FOR_BODY3_SPLIT1:%.*]]
-; LICM: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
-; LNICM: for.body3.preheader:
-; LICM-NOT: for.body3.preheader:
-; INTC-NOT: for.body3.preheader:
-; LNICM-NEXT: br label [[FOR_BODY3:%.*]]
-; CHECK: for.body3:
-; LNICM-NEXT: [[I:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER:%.*]] ]
-; LNICM-NEXT: br label [[FOR_BODY_PREHEADER:%.*]]
-; LNICM: for.body3.split1:
-; LNICM-NEXT: [[IDXPROM:%.*]] = sext i32 [[K:%.*]] to i64
-; LNICM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %y, i64 [[IDXPROM:%.*]]
-; LNICM-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
-; LNICM-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP:%.*]], [[Z:%.*]]
-; LNICM-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I:%.*]] to i64
-; LNICM-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 [[IDXPROM4:%.*]]
-; LNICM-NEXT: [[IDXPROM6:%.*]] = sext i32 [[K:%.*]] to i64
-; LNICM-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5:%.*]], i64 0, i64 [[IDXPROM6:%.*]]
-; LNICM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX7:%.*]], align 4
-; LNICM-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2:%.*]], [[ADD:%.*]]
-; LNICM-NEXT: store i32 [[ADD8:%.*]], i32* [[ARRAYIDX7:%.*]], align 4
-; LNICM-NEXT: [[INC:%.*]] = add nsw i32 [[I:%.*]], 1
-; LNICM-NEXT: [[CMP2:%.*]] = icmp slt i32 [[INC:%.*]], 10
-; LNICM-NEXT: br label [[FOR_END:%.*]]
-; LNICM: for.body3.split:
-; LICM-NOT: for.body3.split:
-; INTC-NOT: for.body3.split:
-; LNICM-NEXT: [[TMP3:%.*]] = add nsw i32 [[I:%.*]], 1
-; LNICM-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3:%.*]], 10
-; LNICM-NEXT: br i1 [[TMP4:%.*]], label [[FOR_BODY3:%.*]], label [[FOR_END11:%.*]], !llvm.loop !0
-; LNICM: for.end:
-; LNICM-NEXT: [[INC10:%.*]] = add nsw i32 [[K:%.*]], 1
-; LNICM-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC10:%.*]], 10
-; LNICM-NEXT: br i1 [[CMP:%.*]], label [[FOR_BODY:%.*]], label [[FOR_BODY3_SPLIT:%.*]], !llvm.loop !2
-; LNICM: for.end11:
-; LNICM-NEXT: ret void
+define dso_local void @test(i64 %n, i64 %m, ptr noalias %x, ptr noalias readonly %y, ptr readonly %z) {
+; The loopnest is not interchanged when we only run loop interchange.
+; INTC-LABEL: @test(
+; INTC-NEXT: gurad:
+; INTC-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
+; INTC-NEXT: [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; INTC-NEXT: br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
+; INTC: for.cond1.preheader.lr.ph:
+; INTC-NEXT: br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
+; INTC: for.i.preheader:
+; INTC-NEXT: br label [[ENTRY:%.*]]
+; INTC: entry:
+; INTC-NEXT: br label [[FOR_BODY:%.*]]
+; INTC: for.body:
+; INTC-NEXT: [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
+; INTC-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
+; INTC-NEXT: br label [[FOR_BODY3:%.*]]
+; INTC: for.body3:
+; INTC-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
+; INTC-NEXT: [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
+; INTC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
+; INTC-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; INTC-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
+; INTC-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
+; INTC-NEXT: [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
+; INTC-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
+; INTC-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
+; INTC-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
+; INTC-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
+; INTC-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
+; INTC-NEXT: [[INC]] = add nsw i32 [[I_01]], 1
+; INTC-NEXT: [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
+; INTC-NEXT: [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
+; INTC-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
+; INTC: for.end:
+; INTC-NEXT: [[INC10]] = add nsw i32 [[K_02]], 1
+; INTC-NEXT: [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
+; INTC-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
+; INTC-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; INTC: for.end11.loopexit:
+; INTC-NEXT: br label [[FOR_END11]]
+; INTC: for.end11:
+; INTC-NEXT: ret void
+;
+; The loopnest is interchanged when we run lnicm and loop interchange.
+; LNICM-LABEL: @test(
+; LNICM-NEXT: gurad:
+; LNICM-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
+; LNICM-NEXT: [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; LNICM-NEXT: br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
+; LNICM: for.cond1.preheader.lr.ph:
+; LNICM-NEXT: br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
+; LNICM: for.i.preheader:
+; LNICM-NEXT: br label [[FOR_BODY3_PREHEADER:%.*]]
+; LNICM: entry:
+; LNICM-NEXT: br label [[FOR_BODY:%.*]]
+; LNICM: for.body:
+; LNICM-NEXT: [[K_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
+; LNICM-NEXT: br label [[FOR_BODY3_SPLIT1:%.*]]
+; LNICM: for.body3.preheader:
+; LNICM-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
+; LNICM-NEXT: br label [[FOR_BODY3:%.*]]
+; LNICM: for.body3:
+; LNICM-NEXT: [[I_01:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER]] ]
+; LNICM-NEXT: br label [[ENTRY]]
+; LNICM: for.body3.split1:
+; LNICM-NEXT: [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
+; LNICM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
+; LNICM-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; LNICM-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
+; LNICM-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
+; LNICM-NEXT: [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
+; LNICM-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
+; LNICM-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
+; LNICM-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
+; LNICM-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
+; LNICM-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
+; LNICM-NEXT: [[INC:%.*]] = add nsw i32 [[I_01]], 1
+; LNICM-NEXT: [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
+; LNICM-NEXT: [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
+; LNICM-NEXT: br label [[FOR_END]]
+; LNICM: for.body3.split:
+; LNICM-NEXT: [[TMP3]] = add nsw i32 [[I_01]], 1
+; LNICM-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
+; LNICM-NEXT: [[TMP5:%.*]] = icmp slt i64 [[TMP4]], [[M]]
+; LNICM-NEXT: br i1 [[TMP5]], label [[FOR_BODY3]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; LNICM: for.end:
+; LNICM-NEXT: [[INC10]] = add nsw i32 [[K_02]], 1
+; LNICM-NEXT: [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
+; LNICM-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
+; LNICM-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_BODY3_SPLIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; LNICM: for.end11.loopexit:
+; LNICM-NEXT: br label [[FOR_END11]]
+; LNICM: for.end11:
+; LNICM-NEXT: ret void
+;
+; The loopnest is not interchanged when we run licm and loop interchange.
+; LICM-LABEL: @test(
+; LICM-NEXT: gurad:
+; LICM-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
+; LICM-NEXT: [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; LICM-NEXT: br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
+; LICM: for.cond1.preheader.lr.ph:
+; LICM-NEXT: br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
+; LICM: for.i.preheader:
+; LICM-NEXT: br label [[ENTRY:%.*]]
+; LICM: entry:
+; LICM-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
+; LICM-NEXT: br label [[FOR_BODY:%.*]]
+; LICM: for.body:
+; LICM-NEXT: [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
+; LICM-NEXT: [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
+; LICM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
+; LICM-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; LICM-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
+; LICM-NEXT: br label [[FOR_BODY3:%.*]]
+; LICM: for.body3:
+; LICM-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
+; LICM-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
+; LICM-NEXT: [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
+; LICM-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
+; LICM-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
+; LICM-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
+; LICM-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
+; LICM-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
+; LICM-NEXT: [[INC]] = add nsw i32 [[I_01]], 1
+; LICM-NEXT: [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
+; LICM-NEXT: [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
+; LICM-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
+; LICM: for.end:
+; LICM-NEXT: [[INC10]] = add nsw i32 [[K_02]], 1
+; LICM-NEXT: [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
+; LICM-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
+; LICM-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; LICM: for.end11.loopexit:
+; LICM-NEXT: br label [[FOR_END11]]
+; LICM: for.end11:
+; LICM-NEXT: ret void
+;
-entry:
+gurad:
+ %cmp23 = icmp sgt i64 %m, 0
+ %cmp32 = icmp sgt i64 %n, 0
+ br i1 %cmp23, label %for.cond1.preheader.lr.ph, label %for.end11
+
+for.cond1.preheader.lr.ph: ; preds = %gurad
+ br i1 %cmp32, label %for.i.preheader, label %for.end11
+
+for.i.preheader: ; preds = %for.cond1.preheader.lr.ph
+ br label %entry
+
+entry: ; preds = %for.i.preheader
br label %for.body
for.body:
%k.02 = phi i32 [ 0, %entry ], [ %inc10, %for.end ]
- %0 = load i32, i32* %z, align 4
+ %0 = load i32, ptr %z, align 4
br label %for.body3
for.body3:
%i.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
%idxprom = sext i32 %k.02 to i64
- %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
- %1 = load i32, i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %y, i64 %idxprom
+ %1 = load i32, ptr %arrayidx, align 4
%add = add nsw i32 %1, %0
%idxprom4 = sext i32 %i.01 to i64
- %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 %idxprom4
- %idxprom6 = sext i32 %k.02 to i64
- %arrayidx7 = getelementptr inbounds [10 x i32], [10 x i32]* %arrayidx5, i64 0, i64 %idxprom6
- %2 = load i32, i32* %arrayidx7, align 4
+ %index0 = mul i64 %idxprom4, %n
+ %index1 = add i64 %index0, %idxprom
+ %arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %index1
+ %2 = load i32, ptr %arrayidx7, align 4
%add8 = add nsw i32 %2, %add
- store i32 %add8, i32* %arrayidx7, align 4
+ store i32 %add8, ptr %arrayidx7, align 4
%inc = add nsw i32 %i.01, 1
- %cmp2 = icmp slt i32 %inc, 10
+ %inc.ext = sext i32 %inc to i64
+ %cmp2 = icmp slt i64 %inc.ext, %m
br i1 %cmp2, label %for.body3, label %for.end, !llvm.loop !0
for.end:
%inc10 = add nsw i32 %k.02, 1
- %cmp = icmp slt i32 %inc10, 10
+ %inc10.ext = sext i32 %inc10 to i64
+ %cmp = icmp slt i64 %inc10.ext, %n
br i1 %cmp, label %for.body, label %for.end11, !llvm.loop !2
for.end11:
diff --git a/llvm/test/Transforms/LoopInterchange/call-instructions.ll b/llvm/test/Transforms/LoopInterchange/call-instructions.ll
index d945fe6..40f5bf4 100644
--- a/llvm/test/Transforms/LoopInterchange/call-instructions.ll
+++ b/llvm/test/Transforms/LoopInterchange/call-instructions.ll
@@ -1,10 +1,9 @@
; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -stats 2>&1 | FileCheck -check-prefix=STATS %s
; RUN: FileCheck --input-file=%t %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer
diff --git a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
index 82c1655..f2b4cab 100644
--- a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
+++ b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
@@ -1,15 +1,14 @@
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' \
; RUN: -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
; RUN: FileCheck --input-file=%t %s
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' \
; RUN: -da-disable-delinearization-checks -pass-remarks-output=%t \
; RUN: -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
+
@A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x [100 x [100 x i32]]] zeroinitializer
@C = common global [100 x [100 x i64]] zeroinitializer
diff --git a/llvm/test/Transforms/LoopInterchange/debuginfo.ll b/llvm/test/Transforms/LoopInterchange/debuginfo.ll
index e2187b9..6a55a4f 100644
--- a/llvm/test/Transforms/LoopInterchange/debuginfo.ll
+++ b/llvm/test/Transforms/LoopInterchange/debuginfo.ll
@@ -1,10 +1,9 @@
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info | FileCheck %s
; RUN: FileCheck -check-prefix=REMARK --input-file=%t %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i64]] zeroinitializer
diff --git a/llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll b/llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll
index 5e3c59e..936591f 100644
--- a/llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll
+++ b/llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s
@A = common global [100 x [100 x i64]] zeroinitializer
diff --git a/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll b/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
index d0eac6d..af84da4 100644
--- a/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
+++ b/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
; RUN: FileCheck --input-file=%t %s
diff --git a/llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll b/llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll
index 65f1290..2bf1237 100644
--- a/llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll
+++ b/llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s
@a = common global i32 0, align 4
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll b/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
index 23f7d91..9997f41 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
@@ -1,9 +1,8 @@
; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x i32] zeroinitializer
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll b/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
index fa2f112..0e5e69c 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s
@A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
diff --git a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
index f8e2d6b..8ea2e47b 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt < %s -loop-interchange -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -pass-remarks-output=%t \
+; RUN: opt < %s -loop-interchange -cache-line-size=64 -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -pass-remarks-output=%t \
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange -stats -S 2>&1 \
; RUN: | FileCheck -check-prefix=STATS %s
; RUN: FileCheck -input-file %t %s
@@ -34,35 +34,6 @@ exit: ; preds = %for1.inc
}
-; Only the inner loop induction variable is used for memory accesses.
-; Interchanging is not beneficial.
-; CHECK: Name: InterchangeNotProfitable
-; CHECK-NEXT: Function: no_bad_order
-define i32 @no_bad_order(i32* %Arr) {
-entry:
- br label %for1.header
-
-for1.header: ; preds = %entry, %for1.inc
- %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for1.inc ]
- br label %for2
-
-for2: ; preds = %for1.header, %for2
- %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next, %for2 ]
- %arrayidx6 = getelementptr inbounds i32, i32* %Arr, i64 %indvars.iv
- store i32 0, i32* %arrayidx6, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 1024
- br i1 %exitcond, label %for2, label %for1.inc
-
-for1.inc:
- %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
- %exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
- br i1 %exitcond21, label %for1.header, label %exit
-
-exit: ; preds = %for1.inc
- ret i32 0
-}
-
; No memory access using any induction variables, interchanging not beneficial.
; CHECK: Name: InterchangeNotProfitable
; CHECK-NEXT: Function: no_mem_instrs
diff --git a/llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll b/llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll
index 830d0a2..38abcc5 100644
--- a/llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
@b = common dso_local local_unnamed_addr global [200 x [200 x i32]] zeroinitializer, align 4
@a = common dso_local local_unnamed_addr global i32 0, align 4
@@ -139,7 +139,7 @@ define void @test2() {
; CHECK-NEXT: [[INDVAR1_NEXT:%.*]] = add nsw i32 [[INDVAR1]], -2
; CHECK-NEXT: [[INDVAR1_NEXT_EXT:%.*]] = sext i32 [[INDVAR1_NEXT]] to i64
; CHECK-NEXT: [[INDVARS_ADD:%.*]] = add nsw i64 [[INDVAR0_NEXT]], [[INDVAR1_NEXT_EXT]]
-; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i64 [[INDVARS_ADD]], 0
+; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp sle i64 [[INDVARS_ADD]], 0
; CHECK-NEXT: br label [[FOR_INC7]]
; CHECK: for.body3.split:
; CHECK-NEXT: [[OR_LCSSA]] = phi i32 [ [[OR]], [[FOR_INC7]] ]
@@ -147,7 +147,7 @@ define void @test2() {
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2]] = add nsw i64 [[INDVAR0]], -1
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sle i64 [[TMP3]], 0
; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_COND_FOR_END8_CRIT_EDGE:%.*]], label [[FOR_BODY3]]
; CHECK: for.inc7:
; CHECK-NEXT: [[INDVARS_OUTER_NEXT]] = add nsw i64 [[INDVARS_OUTER]], 1
@@ -182,7 +182,7 @@ for.body3: ; preds = %for.body, %for.body
%indvar1.next = add nsw i32 %indvar1, -2
%indvar1.next.ext = sext i32 %indvar1.next to i64
%indvars.add = add nsw i64 %indvar0.next, %indvar1.next.ext
- %tobool2 = icmp eq i64 %indvars.add, 0
+ %tobool2 = icmp sle i64 %indvars.add, 0
br i1 %tobool2, label %for.inc7, label %for.body3
for.inc7: ; preds = %for.body3
diff --git a/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
index 3b12cf4..6148dc5 100644
--- a/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s --basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s --basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
@b = constant [200 x [100 x i32]] zeroinitializer, align 4
@a = constant i32 0, align 4
diff --git a/llvm/test/Transforms/LoopInterchange/interchangeable.ll b/llvm/test/Transforms/LoopInterchange/interchangeable.ll
index 00261aa..74c9533 100644
--- a/llvm/test/Transforms/LoopInterchange/interchangeable.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchangeable.ll
@@ -1,9 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i64]] zeroinitializer
@B = common global [100 x i64] zeroinitializer
diff --git a/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll b/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll
index fa5db55..a33b0a8 100644
--- a/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll
@@ -1,9 +1,8 @@
; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@D = common global [100 x [100 x [100 x i32]]] zeroinitializer
@@ -24,31 +23,31 @@ entry:
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc15, %entry
- %i.028 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ]
+ %i.028 = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ]
br label %for.cond4.preheader
for.cond4.preheader: ; preds = %for.inc12, %for.cond1.preheader
- %j.027 = phi i32 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
+ %j.027 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
br label %for.body6
for.body6: ; preds = %for.body6, %for.cond4.preheader
- %k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
- %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %k.026, i32 %j.027, i32 %i.028
+ %k.026 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+ %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i64 0, i64 %k.026, i64 %j.027, i64 %i.028
%0 = load i32, i32* %arrayidx8
%add = add nsw i32 %0, %t
store i32 %add, i32* %arrayidx8
- %inc = add nuw nsw i32 %k.026, 1
- %exitcond = icmp eq i32 %inc, 100
+ %inc = add nuw nsw i64 %k.026, 1
+ %exitcond = icmp eq i64 %inc, 100
br i1 %exitcond, label %for.inc12, label %for.body6
for.inc12: ; preds = %for.body6
- %inc13 = add nuw nsw i32 %j.027, 1
- %exitcond29 = icmp eq i32 %inc13, 100
+ %inc13 = add nuw nsw i64 %j.027, 1
+ %exitcond29 = icmp eq i64 %inc13, 100
br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader
for.inc15: ; preds = %for.inc12
- %inc16 = add nuw nsw i32 %i.028, 1
- %exitcond30 = icmp eq i32 %inc16, 100
+ %inc16 = add nuw nsw i64 %i.028, 1
+ %exitcond30 = icmp eq i64 %inc16, 100
br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
for.end17: ; preds = %for.inc15
diff --git a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
index ab616ee..a188f0e 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck %s
-; RUN: opt < %s -basic-aa -loop-interchange -da-disable-delinearization-checks -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck -check-prefix=CHECK-DELIN %s
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -da-disable-delinearization-checks -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck -check-prefix=CHECK-DELIN %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/Transforms/LoopInterchange/lcssa.ll b/llvm/test/Transforms/LoopInterchange/lcssa.ll
index 9809689..92db586 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa.ll
@@ -1,8 +1,7 @@
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t -S
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t -S
; RUN: FileCheck --input-file %t --check-prefix REMARK %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer
@C = common global [100 x [100 x i32]] zeroinitializer
diff --git a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
index 388f237..9ef2582 100644
--- a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
+++ b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@@ -1,11 +1,11 @@
; Test optimization remarks generated by the LoopInterchange pass.
;
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
; RUN: -pass-remarks='loop-interchange' -S
; RUN: cat %t | FileCheck %s
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
; RUN: -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks
; RUN: cat %t | FileCheck --check-prefix=DELIN %s
@@ -71,11 +71,7 @@ for.end19:
; DELIN-NEXT: Name: InterchangeNotProfitable
; DELIN-NEXT: Function: test01
; DELIN-NEXT: Args:
-; DELIN-NEXT: - String: 'Interchanging loops is too costly (cost='
-; DELIN-NEXT: - Cost: '2'
-; DELIN-NEXT: - String: ', threshold='
-; DELIN-NEXT: - Threshold: '0'
-; DELIN-NEXT: - String: ') and it does not improve parallelism.'
+; DELIN-NEXT: - String: Interchanging loops is too costly and it does not improve parallelism.
; DELIN-NEXT: ...
;;--------------------------------------Test case 02------------------------------------
diff --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
index 3bf1e21..c827a8d 100644
--- a/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
+++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
@@ -1,9 +1,8 @@
; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x i32] zeroinitializer
diff --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
index 46aa4b5..876f001 100644
--- a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
+++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
@@ -1,9 +1,8 @@
; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@D = common global [100 x [100 x [100 x i32]]] zeroinitializer
@@ -24,31 +23,31 @@ entry:
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc15, %entry
- %i.028 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ]
+ %i.028 = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ]
br label %for.cond4.preheader
for.cond4.preheader: ; preds = %for.inc12, %for.cond1.preheader
- %j.027 = phi i32 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
+ %j.027 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
br label %for.body6
for.body6: ; preds = %for.body6, %for.cond4.preheader
- %k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
- %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
+ %k.026 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+ %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i64 %i.028, i64 %k.026, i64 %j.027
%0 = load i32, i32* %arrayidx8
%add = add nsw i32 %0, %t
store i32 %add, i32* %arrayidx8
- %inc = add nuw nsw i32 %k.026, 1
- %exitcond = icmp eq i32 %inc, 100
+ %inc = add nuw nsw i64 %k.026, 1
+ %exitcond = icmp eq i64 %inc, 100
br i1 %exitcond, label %for.inc12, label %for.body6
for.inc12: ; preds = %for.body6
- %inc13 = add nuw nsw i32 %j.027, 1
- %exitcond29 = icmp eq i32 %inc13, 100
+ %inc13 = add nuw nsw i64 %j.027, 1
+ %exitcond29 = icmp eq i64 %inc13, 100
br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader
for.inc15: ; preds = %for.inc12
- %inc16 = add nuw nsw i32 %i.028, 1
- %exitcond30 = icmp eq i32 %inc16, 100
+ %inc16 = add nuw nsw i64 %i.028, 1
+ %exitcond30 = icmp eq i64 %inc16, 100
br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
for.end17: ; preds = %for.inc15
diff --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
index 82f6615..f09c679 100644
--- a/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
+++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
@@ -1,9 +1,8 @@
; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x i32] zeroinitializer
@@ -108,13 +107,13 @@ for.end12:
;; The outer loop header does not branch to the inner loop preheader, or the
;; inner loop header, or the outer loop latch.
; CHECK: Not interchanging loops. Cannot prove legality.
-define void @interchange_07(i32 %k, i32 %N, i32 %ny) {
+define void @interchange_07(i32 %k, i32 %N, i64 %ny) {
entry:
br label %for1.header
for1.header:
- %j23 = phi i32 [ 0, %entry ], [ %j.next24, %for1.inc10 ]
- %cmp21 = icmp slt i32 0, %ny
+ %j23 = phi i64 [ 0, %entry ], [ %j.next24, %for1.inc10 ]
+ %cmp21 = icmp slt i64 0, %ny
br label %singleSucc
singleSucc:
@@ -124,18 +123,18 @@ preheader.j:
br label %for2
for2:
- %j = phi i32 [ %j.next, %for2 ], [ 0, %preheader.j ]
- %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i32 0, i32 %j, i32 %j23
+ %j = phi i64 [ %j.next, %for2 ], [ 0, %preheader.j ]
+ %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %j, i64 %j23
%lv = load i32, i32* %arrayidx5
%add = add nsw i32 %lv, %k
store i32 %add, i32* %arrayidx5
- %j.next = add nuw nsw i32 %j, 1
- %exitcond = icmp eq i32 %j, 99
+ %j.next = add nuw nsw i64 %j, 1
+ %exitcond = icmp eq i64 %j, 99
br i1 %exitcond, label %for1.inc10, label %for2
for1.inc10:
- %j.next24 = add nuw nsw i32 %j23, 1
- %exitcond26 = icmp eq i32 %j23, 99
+ %j.next24 = add nuw nsw i64 %j23, 1
+ %exitcond26 = icmp eq i64 %j23, 99
br i1 %exitcond26, label %for.end12, label %for1.header
for.end12:
diff --git a/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll b/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll
index 3a7c82b..bdf46ca 100644
--- a/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll
+++ b/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S %s | FileCheck %s
+; RUN: opt -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-loop-lcssa -S %s | FileCheck %s
@b = global [3 x [5 x [8 x i16]]] [[5 x [8 x i16]] zeroinitializer, [5 x [8 x i16]] [[8 x i16] zeroinitializer, [8 x i16] [i16 0, i16 0, i16 0, i16 6, i16 1, i16 6, i16 0, i16 0], [8 x i16] zeroinitializer, [8 x i16] zeroinitializer, [8 x i16] zeroinitializer], [5 x [8 x i16]] zeroinitializer], align 2
@a = common global i32 0, align 4
diff --git a/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll b/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll
index 7e9db5a..3dd291a 100644
--- a/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll
+++ b/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
; RUN: FileCheck --input-file=%t %s
diff --git a/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll b/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll
index 7754602..653c97d 100644
--- a/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll
+++ b/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-interchange -loop-interchange-threshold=-100 -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -loop-interchange -cache-line-size=64 -loop-interchange-threshold=-100 -verify-loop-lcssa -S | FileCheck %s
; Test case for PR41725. The induction variables in the latches escape the
; loops and we must move some PHIs around.
diff --git a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll
index 78e333b..c6fb18d 100644
--- a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll
+++ b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll
@@ -1,10 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=0 -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=0 -S 2>&1 | FileCheck %s
;; Checks the order of the inner phi nodes does not cause havoc.
;; The inner loop has a reduction into c. The IV is not the first phi.
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
-target triple = "armv8--linux-gnueabihf"
@@ -31,7 +30,7 @@ define void @test(i32 %T, [90 x i32]* noalias nocapture %C, [90 x [90 x i16]]* n
; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
; CHECK: for3.split1:
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* [[A:%.*]], i32 [[ADD]], i32 [[J]], i32 [[I]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* [[A:%.*]], i32 [[ADD]], i32 [[I]], i32 [[J]]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
; CHECK-NEXT: [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1
; CHECK-NEXT: store i16 [[ADD15]], i16* [[ARRAYIDX]]
@@ -70,7 +69,7 @@ for2.header: ; preds = %for2.inc16, %for1.heade
for3: ; preds = %for3, %for2.header
%k = phi i32 [ 1, %for2.header ], [ %inc, %for3 ]
%add = add nsw i32 %k, %mul
- %arrayidx = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* %A, i32 %add, i32 %j, i32 %i
+ %arrayidx = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* %A, i32 %add, i32 %i, i32 %j
%0 = load i16, i16* %arrayidx, align 2
%add15 = add nsw i16 %0, 1
store i16 %add15, i16* %arrayidx
diff --git a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll
index ef0c5ad6..ff3e6bc3 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s
+; RUN: opt -loop-interchange -cache-line-size=64 -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s
@b = external dso_local global [5 x i32], align 16
diff --git a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
index 443f61f..48773d0 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
diff --git a/llvm/test/Transforms/LoopInterchange/pr43326.ll b/llvm/test/Transforms/LoopInterchange/pr43326.ll
index 68bcc4e..bf0da36 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43326.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43326.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
diff --git a/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll
index 9ae5f42..5f598e3 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -S < %s | FileCheck %s
+; RUN: opt -loop-interchange -cache-line-size=64 -S < %s | FileCheck %s
; Test cases for PR43473.
diff --git a/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll b/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll
index 445fffe..fcb5ca6 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -verify-loop-lcssa -S %s | FileCheck %s
+; RUN: opt -loop-interchange -cache-line-size=64 -verify-loop-lcssa -S %s | FileCheck %s
; Tests for PR43797.
diff --git a/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll b/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll
index 7336477..2cc6ce9 100644
--- a/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -S %s | FileCheck %s
+; RUN: opt -loop-interchange -cache-line-size=64 -S %s | FileCheck %s
@global = external local_unnamed_addr global [400 x [400 x i32]], align 16
diff --git a/llvm/test/Transforms/LoopInterchange/pr48212.ll b/llvm/test/Transforms/LoopInterchange/pr48212.ll
index b6894bc..fb18ece 100644
--- a/llvm/test/Transforms/LoopInterchange/pr48212.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr48212.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
diff --git a/llvm/test/Transforms/LoopInterchange/profitability.ll b/llvm/test/Transforms/LoopInterchange/profitability.ll
index 79706d8..983bbd6e 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability.ll
@@ -1,11 +1,10 @@
-; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -loop-interchange -cache-line-size=64 -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
; RUN: FileCheck -input-file %t %s
;; We test profitability model in these test cases.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x [100 x i32]] zeroinitializer
diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
index 89b734d..225b4bb 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
@@ -1,10 +1,9 @@
-; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
; REMARKS: --- !Passed
; REMARKS-NEXT: Pass: loop-interchange
diff --git a/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll b/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll
index 3f17844..5599a09 100644
--- a/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll
+++ b/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -S %s | FileCheck %s
+; RUN: opt -loop-interchange -cache-line-size=64 -S %s | FileCheck %s
@global = external dso_local global [1000 x [1000 x i32]], align 16
diff --git a/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll b/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
index 3bc69c3..587b6386 100644
--- a/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
+++ b/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -loop-interchange-threshold=-10 -S %s | FileCheck %s
+; RUN: opt -loop-interchange -cache-line-size=64 -loop-interchange-threshold=-10 -S %s | FileCheck %s
; The test contains a GEP with an operand that is not SCEV-able. Make sure
; loop-interchange does not crash.