aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrei Elovikov <andrei.elovikov@sifive.com>2026-03-11 14:47:09 -0700
committerAndrei Elovikov <andrei.elovikov@sifive.com>2026-03-23 15:19:09 -0700
commit71a0b9fda2f07910efbcdb633932ab2d23bb609f (patch)
treeeefcc68046a296346829a147a97afd5ab0af0a7a
parent6f66cdb0fcdc4f763d834a56a404afbf63fe8384 (diff)
downloadllvm-users/eas/laa-read-write-rt-stride.tar.gz
llvm-users/eas/laa-read-write-rt-stride.tar.bz2
llvm-users/eas/laa-read-write-rt-stride.zip
[LAA] Allow vectorizing `A[NonZeroNonConstantStride*I] += 1`users/eas/laa-read-write-rt-stride
In this patch only do that when we can statically prove that non-constant stride is non-zero and the resulting index doesn't overflow. That can later be extended to introduce run-time check when not provable in compile-time. My main motivation for this is to move unit-strideness speculation to a VPlan-based transformation. However, it cannot be done right now because sometimes such speculation affects legality and we simply avoid vectorizing loop if it's not done. As such, we need to extend LAA to properly support dependence analysis/RT checks for strided access without speculating for it being one. This PR is expected to be the first one on that journey.
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp25
-rw-r--r--llvm/test/Analysis/LoopAccessAnalysis/single_strided_readwrite.ll95
2 files changed, 102 insertions, 18 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 63a3878ce7f5..ae1f0d94448d 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2754,14 +2754,27 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
// allows us to vectorize expressions such as A[i] += x; Because the address
// of A[i] is a read-write pointer. This only works if the index of A[i] is
// strictly monotonic, which we approximate (conservatively) via
- // getPtrStride. If the address is unknown (e.g. A[B[i]]) then we may read,
- // modify, and write overlapping words. Note that "zero stride" is unsafe
- // and is being handled below.
+ // getPtrStrideScev. If the address is unknown (e.g. A[B[i]]) then we may
+ // read, modify, and write overlapping words. Note that "zero stride" is
+ // unsafe and is being handled below.
bool IsReadOnlyPtr = false;
Type *AccessTy = getLoadStoreType(LD);
- if (Seen.insert({Ptr, AccessTy}).second ||
- !getPtrStride(*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,
- true)) {
+ auto IsSafeReadWrite = [&] {
+ const SCEV *Stride = getPtrStrideScev(*PSE, AccessTy, Ptr, TheLoop, *DT,
+ SymbolicStrides, false, true);
+ if (!Stride)
+ return false;
+
+ // Statically known invariant address, preserve old behavior for the
+ // LoopDistributePass. For LoopVectorizer we will detect a load from the
+ // uniform store pointer and bail out further below.
+ if (Stride->isZero())
+ return true;
+
+ auto *SE = PSE->getSE();
+ return SE->isKnownPositive(SE->getAbsExpr(Stride, false));
+ };
+ if (Seen.insert({Ptr, AccessTy}).second || !IsSafeReadWrite()) {
++NumReads;
IsReadOnlyPtr = true;
}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/single_strided_readwrite.ll b/llvm/test/Analysis/LoopAccessAnalysis/single_strided_readwrite.ll
index 390e694c0b34..43133aeb2053 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/single_strided_readwrite.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/single_strided_readwrite.ll
@@ -4,13 +4,8 @@
define void @known_safe(ptr %p, i8 %a) {
; CHECK-LABEL: 'known_safe'
; CHECK-NEXT: header:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
-; CHECK-NEXT: %ld = load i64, ptr %gep, align 4 ->
-; CHECK-NEXT: store i64 %add, ptr %gep, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -44,13 +39,8 @@ exit:
define void @known_safe_byte_gep(ptr %p, i8 %a) {
; CHECK-LABEL: 'known_safe_byte_gep'
; CHECK-NEXT: header:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
-; CHECK-NEXT: %ld = load i64, ptr %gep, align 4 ->
-; CHECK-NEXT: store i64 %add, ptr %gep, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -241,3 +231,84 @@ header:
exit:
ret void
}
+
+; Not too important to actually support for now, the priority is to handle the
+; one below correctly.
+define void @known_safe_varying_stride(ptr %p, i8 %a) {
+; CHECK-LABEL: 'known_safe_varying_stride'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %mul = mul nsw nuw i64 %iv.next, %iv.next
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %mul
+ %ld = load i64, ptr %gep
+ %add = add i64 %ld, %iv
+ store i64 %add, ptr %gep
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+define void @unsafe_varying_stride(ptr %p) {
+; CHECK-LABEL: 'unsafe_varying_stride'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %mul = mul nsw nuw i64 %iv.next, %iv.next
+
+ ; 0, 0, 3, ...
+ %idx = sub nsw nuw i64 %mul, %iv
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+ %ld = load i64, ptr %gep
+ %add = add i64 %ld, %iv
+ store i64 %add, ptr %gep
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}