aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorPhilip Reames <preames@rivosinc.com>2025-07-22 15:50:17 -0700
committerGitHub <noreply@github.com>2025-07-22 15:50:17 -0700
commitdbd9eae95a5ad113d4559c07839306bf68f1925c (patch)
tree8e1f2e23009e73f5e6ce31f0d5c0dbd03fbe3211 /llvm/lib/CodeGen
parent5edb845fcaafd56110670fb2f6eeac22cbb0afff (diff)
downloadllvm-dbd9eae95a5ad113d4559c07839306bf68f1925c.zip
llvm-dbd9eae95a5ad113d4559c07839306bf68f1925c.tar.gz
llvm-dbd9eae95a5ad113d4559c07839306bf68f1925c.tar.bz2
[IA] Support vp.store in lowerinterleavedStore (#149605)
Follow up to 28417e64, and the whole line of work started with 4b81dc7. This change merges the handling for VPStore - currently in lowerInterleavedVPStore - into the existing dedicated routine used in the shuffle lowering path. This removes the last use of the dedicated lowerInterleavedVPStore and thus we can remove it. This contains two changes which are functional. First, like in 28417e64, merging support for vp.store exposes the strided store optimization for code using vp.store. Second, it seems the strided store case had a significant missed optimization. We were performing the strided store at the full unit strided store type width (i.e. LMUL) rather than reducing it to match the input width. This became obvious when I tried to use the mask created by the helper routine as it caused a type incompatibility. Normally, I'd try not to include an optimization in an API rework, but structuring the code to both be correct for vp.store and not optimize the existing case turned out be more involved than seemed worthwhile. I could pull this part out as a pre-change, but its a bit awkward on it's own as it turns out to be somewhat of a half step on the possible optimization; the full optimization is complex with the old code structure. --------- Co-authored-by: Craig Topper <craig.topper@sifive.com>
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp38
1 files changed, 9 insertions, 29 deletions
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 1298aea..1b69188 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -507,46 +507,26 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
assert(NumStoredElements % Factor == 0 &&
"number of stored element should be a multiple of Factor");
+ Value *Mask = nullptr;
if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
unsigned LaneMaskLen = NumStoredElements / Factor;
- Value *LaneMask = getMask(VPStore->getMaskParam(), Factor,
- ElementCount::getFixed(LaneMaskLen));
- if (!LaneMask)
+ Mask = getMask(VPStore->getMaskParam(), Factor,
+ ElementCount::getFixed(LaneMaskLen));
+ if (!Mask)
return false;
LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store
<< "\n");
- IRBuilder<> Builder(VPStore);
- // We need to effectively de-interleave the shufflemask
- // because lowerInterleavedVPStore expects individual de-interleaved
- // values.
- SmallVector<Value *, 10> NewShuffles;
- SmallVector<int, 16> NewShuffleMask(LaneMaskLen);
- auto ShuffleMask = SVI->getShuffleMask();
-
- for (unsigned i = 0; i < Factor; i++) {
- for (unsigned j = 0; j < LaneMaskLen; j++)
- NewShuffleMask[j] = ShuffleMask[i + Factor * j];
-
- NewShuffles.push_back(Builder.CreateShuffleVector(
- SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask));
- }
-
- // Try to create target specific intrinsics to replace the vp.store and
- // shuffle.
- if (!TLI->lowerInterleavedVPStore(VPStore, LaneMask, NewShuffles))
- // We already created new shuffles.
- return true;
} else {
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
-
- // Try to create target specific intrinsics to replace the store and
- // shuffle.
- if (!TLI->lowerInterleavedStore(cast<StoreInst>(Store), SVI, Factor))
- return false;
}
+ // Try to create target specific intrinsics to replace the store and
+ // shuffle.
+ if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor))
+ return false;
+
// Already have a new target specific interleaved store. Erase the old store.
DeadInsts.insert(Store);
DeadInsts.insert(SVI);