diff options
| author | Andrei Elovikov <andrei.elovikov@sifive.com> | 2026-03-20 13:43:04 -0700 |
|---|---|---|
| committer | Andrei Elovikov <andrei.elovikov@sifive.com> | 2026-03-23 11:49:01 -0700 |
| commit | 4974e0d5ce2d5b93da3e3b5e76e2ee290f18aa06 (patch) | |
| tree | dd7c2a9733c254f28eb35abbf93264c70547b764 | |
| parent | aa4e85a2ecfff002f0505c1ed15a4ec80999c41b (diff) | |
| download | llvm-users/eas/laa-detect-waw-hazard-1.tar.gz llvm-users/eas/laa-detect-waw-hazard-1.tar.bz2 llvm-users/eas/laa-detect-waw-hazard-1.zip | |
[LAA] Detect cross-iteration WAW when writing to the same pointerusers/eas/laa-detect-waw-hazard-1
Fixes https://github.com/llvm/llvm-project/issues/187402.
3 files changed, 97 insertions, 47 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 5f4f305506d4..202665ff6bde 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -872,7 +872,7 @@ public: /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. - void buildDependenceSets(); + void buildDependenceSets(const MemoryDepChecker &DepChecker); /// Initial processing of memory accesses determined that we need to /// perform dependency checking. @@ -1520,7 +1520,16 @@ bool AccessAnalysis::canCheckPtrAtRT( return CanDoRTIfNeeded; } -void AccessAnalysis::buildDependenceSets() { +static bool isInvariant(Value *V, const Loop *TheLoop, ScalarEvolution *SE) { + if (TheLoop->isLoopInvariant(V)) + return true; + if (!SE->isSCEVable(V->getType())) + return false; + const SCEV *S = SE->getSCEV(V); + return SE->isLoopInvariant(S, TheLoop); +} + +void AccessAnalysis::buildDependenceSets(const MemoryDepChecker &DepChecker) { // We process the set twice: first we process read-write pointers, last we // process read-only pointers. This allows us to skip dependence tests for // read-only pointers. @@ -1602,7 +1611,31 @@ void AccessAnalysis::buildDependenceSets() { // this is a read only check other writes for conflicts (but only if // there is no other write to the ptr - this is an optimization to // catch "a[i] = a[i] + " without having to do a dependence check). - if ((IsWrite || IsReadOnlyPtr) && AliasSetHasWrite) { + // + // If there are multiple writes into the same pointer we need to make + // sure that there are no cross-iteration dependencies between those + // writes to avoid the following scenario: + // + // code: + // if (RT_COND0) *p = x; + // if (RT_COND1) *p = y; + // + // execution: + // Iter0 | Iter1 + // no store | *p = 2 + // *p = 1 | no store + // + // Scalar loop would leave `*p == 2`, yet two vectorized scatter's + // would result in `*p == 1` which is wrong. + // + // NOTE: Known invariant stores are handled separately in both this + // file and LoopVectorizationLegality to support the case when + // reduction wasn't completely transformed into SSA form. + bool MultipleNonInvariantStoresToPtrExist = + DepChecker.getOrderForAccess(Ptr, true).size() > 1 && + !::isInvariant(Ptr, TheLoop, PSE.getSE()); + if ((IsWrite || IsReadOnlyPtr) && + (AliasSetHasWrite || MultipleNonInvariantStoresToPtrExist)) { CheckDeps.push_back(Access); IsRTCheckAnalysisNeeded = true; } @@ -2775,14 +2808,14 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, // If we write (or read-write) to a single destination and there are no other // reads in this loop then is it safe to vectorize: the vectorized stores // preserve ordering via replication or order-preserving @llvm.masked.scatter. - if (NumReadWrites == 1 && NumReads == 0) { + if (NumReadWrites == 1 && NumReads == 0 && Stores.size() == 1) { LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); return true; } // Build dependence sets and check whether we need a runtime pointer bounds // check. - Accesses.buildDependenceSets(); + Accesses.buildDependenceSets(getDepChecker()); // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. @@ -2955,13 +2988,7 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) { } bool LoopAccessInfo::isInvariant(Value *V) const { - auto *SE = PSE->getSE(); - if (TheLoop->isLoopInvariant(V)) - return true; - if (!SE->isSCEVable(V->getType())) - return false; - const SCEV *S = SE->getSCEV(V); - return SE->isLoopInvariant(S, TheLoop); + return ::isInvariant(V, TheLoop, PSE->getSE()); } /// If \p Ptr is a GEP, which has a loop-variant operand, return that operand. diff --git a/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll b/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll index 0a71d4a3c63c..bdfee12db528 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/multiple_stores_to_same_addr.ll @@ -6,8 +6,13 @@ define void @waw_no_mask(ptr %p, i64 %stride, i64 %n) { ; CHECK-LABEL: 'waw_no_mask' ; CHECK-NEXT: header: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: IndirectUnsafe: +; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 -> +; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: @@ -40,8 +45,13 @@ exit: define void @waw_mask(ptr %p, i64 %stride, i64 %n, i64 %n0, i64 %n1) { ; CHECK-LABEL: 'waw_mask' ; CHECK-NEXT: header: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: IndirectUnsafe: +; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 -> +; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: @@ -87,8 +97,13 @@ exit: define void @waw_no_mask_unknown_stride(ptr %p, i64 %stride, i64 %n) { ; CHECK-LABEL: 'waw_no_mask_unknown_stride' ; CHECK-NEXT: header: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: IndirectUnsafe: +; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 -> +; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: @@ -121,8 +136,13 @@ exit: define void @waw_mask_unknown_stride(ptr %p, i64 %stride, i64 %n0, i64 %n1) { ; CHECK-LABEL: 'waw_mask_unknown_stride' ; CHECK-NEXT: header: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: IndirectUnsafe: +; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 -> +; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: @@ -168,8 +188,13 @@ exit: define void @no_cross_iter_dependency(ptr %p, i8 %a, i64 %n, i64 %n0, i64 %n1) { ; CHECK-LABEL: 'no_cross_iter_dependency' ; CHECK-NEXT: header: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: IndirectUnsafe: +; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 -> +; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: @@ -219,6 +244,10 @@ define void @const_stride(ptr %p, i64 %n, i64 %n0, i64 %n1) { ; CHECK-NEXT: header: ; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Forward: +; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 -> +; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: @@ -295,8 +324,13 @@ exit: define void @indirect_no_mask(ptr noalias %p, i64 %n) { ; CHECK-LABEL: 'indirect_no_mask' ; CHECK-NEXT: header: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: IndirectUnsafe: +; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 -> +; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: @@ -328,8 +362,13 @@ exit: define void @indirect_mask(ptr noalias %p, i64 %n, i64 %n0, i64 %n1) { ; CHECK-LABEL: 'indirect_mask' ; CHECK-NEXT: header: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: IndirectUnsafe: +; CHECK-NEXT: store i64 %iv, ptr %gep, align 4 -> +; CHECK-NEXT: store i64 %iv.next, ptr %gep, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index fabab210fb85..3ea068440ce2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -162,35 +162,19 @@ exit: define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2, ptr %p3, i64 %N) { ; CHECK-LABEL: @store_to_addr_generated_from_invariant_addr( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; CHECK-NEXT: br label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[P0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) -; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP4]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P1:%.*]], <vscale x 2 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT1]], <vscale x 2 x ptr> align 8 [[TMP5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]]) +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P1:%.*]], i64 [[IV]] +; CHECK-NEXT: store ptr [[P0:%.*]], ptr [[ARRAYIDX11]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[P2:%.*]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P3:%.*]], i64 [[TMP6]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> align 1 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 +; CHECK-NEXT: store i8 0, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -250,7 +234,7 @@ define i8 @mixed_gather_scatters(ptr %A, ptr %B, ptr %C) #0 { ; RVA23-NEXT: [[TMP14]] = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> [[TMP13]], <vscale x 2 x i8> [[VEC_PHI]], i32 [[TMP0]]) ; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP0]] ; RVA23-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 -; RVA23-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; RVA23-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; RVA23: middle.block: ; RVA23-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> [[TMP14]]) ; RVA23-NEXT: br label [[EXIT:%.*]] @@ -291,7 +275,7 @@ define i8 @mixed_gather_scatters(ptr %A, ptr %B, ptr %C) #0 { ; RVA23ZVL1024B-NEXT: [[TMP14]] = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i8> [[TMP13]], <vscale x 1 x i8> [[VEC_PHI]], i32 [[TMP0]]) ; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP0]] ; RVA23ZVL1024B-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 -; RVA23ZVL1024B-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; RVA23ZVL1024B-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; RVA23ZVL1024B: middle.block: ; RVA23ZVL1024B-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> [[TMP14]]) ; RVA23ZVL1024B-NEXT: br label [[EXIT:%.*]] |
