aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrei Elovikov <andrei.elovikov@sifive.com>2026-03-20 13:43:04 -0700
committerAndrei Elovikov <andrei.elovikov@sifive.com>2026-03-23 11:49:01 -0700
commit4974e0d5ce2d5b93da3e3b5e76e2ee290f18aa06 (patch)
treedd7c2a9733c254f28eb35abbf93264c70547b764
parentaa4e85a2ecfff002f0505c1ed15a4ec80999c41b (diff)
downloadllvm-users/eas/laa-detect-waw-hazard-1.tar.gz
llvm-users/eas/laa-detect-waw-hazard-1.tar.bz2
llvm-users/eas/laa-detect-waw-hazard-1.zip
[LAA] Detect cross-iteration WAW when writing to the same pointerusers/eas/laa-detect-waw-hazard-1
Fixes https://github.com/llvm/llvm-project/issues/187402.
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp51
-rw-r--r--llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll53
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll40
3 files changed, 97 insertions, 47 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 5f4f305506d4..202665ff6bde 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -872,7 +872,7 @@ public:
/// Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
- void buildDependenceSets();
+ void buildDependenceSets(const MemoryDepChecker &DepChecker);
/// Initial processing of memory accesses determined that we need to
/// perform dependency checking.
@@ -1520,7 +1520,16 @@ bool AccessAnalysis::canCheckPtrAtRT(
return CanDoRTIfNeeded;
}
-void AccessAnalysis::buildDependenceSets() {
+static bool isInvariant(Value *V, const Loop *TheLoop, ScalarEvolution *SE) {
+ if (TheLoop->isLoopInvariant(V))
+ return true;
+ if (!SE->isSCEVable(V->getType()))
+ return false;
+ const SCEV *S = SE->getSCEV(V);
+ return SE->isLoopInvariant(S, TheLoop);
+}
+
+void AccessAnalysis::buildDependenceSets(const MemoryDepChecker &DepChecker) {
// We process the set twice: first we process read-write pointers, last we
// process read-only pointers. This allows us to skip dependence tests for
// read-only pointers.
@@ -1602,7 +1611,31 @@ void AccessAnalysis::buildDependenceSets() {
// this is a read only check other writes for conflicts (but only if
// there is no other write to the ptr - this is an optimization to
// catch "a[i] = a[i] + " without having to do a dependence check).
- if ((IsWrite || IsReadOnlyPtr) && AliasSetHasWrite) {
+ //
+ // If there are multiple writes into the same pointer we need to make
+ // sure that there are no cross-iteration dependencies between those
+ // writes to avoid the following scenario:
+ //
+ // code:
+ // if (RT_COND0) *p = x;
+ // if (RT_COND1) *p = y;
+ //
+ // execution:
+ // Iter0 | Iter1
+ // no store | *p = 2
+ // *p = 1 | no store
+ //
+ // Scalar loop would leave `*p == 2`, yet two vectorized scatter's
+ // would result in `*p == 1` which is wrong.
+ //
+ // NOTE: Known invariant stores are handled separately in both this
+ // file and LoopVectorizationLegality to support the case when
+ // reduction wasn't completely transformed into SSA form.
+ bool MultipleNonInvariantStoresToPtrExist =
+ DepChecker.getOrderForAccess(Ptr, true).size() > 1 &&
+ !::isInvariant(Ptr, TheLoop, PSE.getSE());
+ if ((IsWrite || IsReadOnlyPtr) &&
+ (AliasSetHasWrite || MultipleNonInvariantStoresToPtrExist)) {
CheckDeps.push_back(Access);
IsRTCheckAnalysisNeeded = true;
}
@@ -2775,14 +2808,14 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
// If we write (or read-write) to a single destination and there are no other
// reads in this loop then is it safe to vectorize: the vectorized stores
// preserve ordering via replication or order-preserving @llvm.masked.scatter.
- if (NumReadWrites == 1 && NumReads == 0) {
+ if (NumReadWrites == 1 && NumReads == 0 && Stores.size() == 1) {
LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
return true;
}
// Build dependence sets and check whether we need a runtime pointer bounds
// check.
- Accesses.buildDependenceSets();
+ Accesses.buildDependenceSets(getDepChecker());
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
@@ -2955,13 +2988,7 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
}
bool LoopAccessInfo::isInvariant(Value *V) const {
- auto *SE = PSE->getSE();
- if (TheLoop->isLoopInvariant(V))
- return true;
- if (!SE->isSCEVable(V->getType()))
- return false;
- const SCEV *S = SE->getSCEV(V);
- return SE->isLoopInvariant(S, TheLoop);
+ return ::isInvariant(V, TheLoop, PSE->getSE());
}
/// If \p Ptr is a GEP, which has a loop-variant operand, return that operand.
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll b/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll
index 0a71d4a3c63c..bdfee12db528 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll
@@ -6,8 +6,13 @@
define void @waw_no_mask(ptr %p, i64 %stride, i64 %n) {
; CHECK-LABEL: 'waw_no_mask'
; CHECK-NEXT: header:
-; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -40,8 +45,13 @@ exit:
define void @waw_mask(ptr %p, i64 %stride, i64 %n, i64 %n0, i64 %n1) {
; CHECK-LABEL: 'waw_mask'
; CHECK-NEXT: header:
-; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -87,8 +97,13 @@ exit:
define void @waw_no_mask_unknown_stride(ptr %p, i64 %stride, i64 %n) {
; CHECK-LABEL: 'waw_no_mask_unknown_stride'
; CHECK-NEXT: header:
-; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -121,8 +136,13 @@ exit:
define void @waw_mask_unknown_stride(ptr %p, i64 %stride, i64 %n0, i64 %n1) {
; CHECK-LABEL: 'waw_mask_unknown_stride'
; CHECK-NEXT: header:
-; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -168,8 +188,13 @@ exit:
define void @no_cross_iter_dependency(ptr %p, i8 %a, i64 %n, i64 %n0, i64 %n1) {
; CHECK-LABEL: 'no_cross_iter_dependency'
; CHECK-NEXT: header:
-; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -219,6 +244,10 @@ define void @const_stride(ptr %p, i64 %n, i64 %n0, i64 %n1) {
; CHECK-NEXT: header:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -295,8 +324,13 @@ exit:
define void @indirect_no_mask(ptr noalias %p, i64 %n) {
; CHECK-LABEL: 'indirect_no_mask'
; CHECK-NEXT: header:
-; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -328,8 +362,13 @@ exit:
define void @indirect_mask(ptr noalias %p, i64 %n, i64 %n0, i64 %n1) {
; CHECK-LABEL: 'indirect_mask'
; CHECK-NEXT: header:
-; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
index fabab210fb85..3ea068440ce2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
@@ -162,35 +162,19 @@ exit:
define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2, ptr %p3, i64 %N) {
; CHECK-LABEL: @store_to_addr_generated_from_invariant_addr(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
-; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[P0:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
-; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP4]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], <vscale x 2 x i64> [[VEC_IND]]
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT1]], <vscale x 2 x ptr> align 8 [[TMP5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P1:%.*]], i64 [[IV]]
+; CHECK-NEXT: store ptr [[P0:%.*]], ptr [[ARRAYIDX11]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> align 1 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
+; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
+; CHECK-NEXT: store i8 0, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -250,7 +234,7 @@ define i8 @mixed_gather_scatters(ptr %A, ptr %B, ptr %C) #0 {
; RVA23-NEXT: [[TMP14]] = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> [[TMP13]], <vscale x 2 x i8> [[VEC_PHI]], i32 [[TMP0]])
; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP0]]
; RVA23-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
-; RVA23-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; RVA23-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; RVA23: middle.block:
; RVA23-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> [[TMP14]])
; RVA23-NEXT: br label [[EXIT:%.*]]
@@ -291,7 +275,7 @@ define i8 @mixed_gather_scatters(ptr %A, ptr %B, ptr %C) #0 {
; RVA23ZVL1024B-NEXT: [[TMP14]] = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i8> [[TMP13]], <vscale x 1 x i8> [[VEC_PHI]], i32 [[TMP0]])
; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP0]]
; RVA23ZVL1024B-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
-; RVA23ZVL1024B-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; RVA23ZVL1024B-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; RVA23ZVL1024B: middle.block:
; RVA23ZVL1024B-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> [[TMP14]])
; RVA23ZVL1024B-NEXT: br label [[EXIT:%.*]]