diff options
author | vaibhav <73255802+mrdaybird@users.noreply.github.com> | 2025-05-08 01:32:21 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-07 21:02:21 +0100 |
commit | 384a5b00a7c8fffa72f7fe7021863d00da842a19 (patch) | |
tree | 7157ec403da61711578cbb951c1f8be44150975c /llvm/lib/Analysis/LoopAccessAnalysis.cpp | |
parent | 1a7cd92c8607bbad5c212f474a1e46043a8016cd (diff) | |
download | llvm-384a5b00a7c8fffa72f7fe7021863d00da842a19.zip llvm-384a5b00a7c8fffa72f7fe7021863d00da842a19.tar.gz llvm-384a5b00a7c8fffa72f7fe7021863d00da842a19.tar.bz2 |
[LAA] Use MaxStride instead of CommonStride to calculate MaxVF (#98142)
We bail out from MaxVF calculation if the strides are not the same.
Instead, we are dependent on runtime checks, though not yet implemented.
We could instead use the MaxStride to conservatively use an upper bound.
This handles cases like the following:
```c
#define LEN 256 * 256
float a[LEN];
void gather() {
for (int i = 0; i < LEN - 1024 - 255; i++) {
#pragma clang loop interleave(disable)
#pragma clang loop unroll(disable)
for (int j = 0; j < 256; j++)
a[i + j + 1024] += a[j * 4 + i];
}
}
```
---------
Co-authored-by: Florian Hahn <flo@fhahn.com>
Diffstat (limited to 'llvm/lib/Analysis/LoopAccessAnalysis.cpp')
-rw-r--r-- | llvm/lib/Analysis/LoopAccessAnalysis.cpp | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 7ec9bdb..f222a99 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2148,10 +2148,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, "different type sizes\n"); return Dependence::Unknown; } - - if (!CommonStride) - return Dependence::Unknown; - // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? VectorizerParams::VectorizationFactor : 1); @@ -2162,7 +2158,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // It's not vectorizable if the distance is smaller than the minimum distance // needed for a vectroized/unrolled version. Vectorizing one iteration in - // front needs CommonStride. Vectorizing the last iteration needs TypeByteSize + // front needs MaxStride. Vectorizing the last iteration needs TypeByteSize. // (No need to plus the last gap distance). // // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. @@ -2186,11 +2182,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), // the minimum distance needed is 28, which is greater than distance. It is // not safe to do vectorization. + // + // We use MaxStride (maximum of src and sink strides) to get a conservative + // lower bound on the MinDistanceNeeded in case of different strides. // We know that Dist is positive, but it may not be constant. Use the signed // minimum for computations below, as this ensures we compute the closest // possible dependence distance. - uint64_t MinDistanceNeeded = *CommonStride * (MinNumIter - 1) + TypeByteSize; + uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize; if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) { if (!ConstDist) { // For non-constant distances, we checked the lower bound of the @@ -2236,7 +2235,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride)) return Dependence::BackwardVectorizableButPreventsForwarding; - uint64_t MaxVF = MinDepDistBytes / *CommonStride; + uint64_t MaxVF = MinDepDistBytes / MaxStride; LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance << " with max VF = " << MaxVF << '\n'); |