From dbd9eae95a5ad113d4559c07839306bf68f1925c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 22 Jul 2025 15:50:17 -0700 Subject: [IA] Support vp.store in lowerinterleavedStore (#149605) Follow up to 28417e64, and the whole line of work started with 4b81dc7. This change merges the handling for VPStore - currently in lowerInterleavedVPStore - into the existing dedicated routine used in the shuffle lowering path. This removes the last use of the dedicated lowerInterleavedVPStore and thus we can remove it. This contains two changes which are functional. First, like in 28417e64, merging support for vp.store exposes the strided store optimization for code using vp.store. Second, it seems the strided store case had a significant missed optimization. We were performing the strided store at the full unit strided store type width (i.e. LMUL) rather than reducing it to match the input width. This became obvious when I tried to use the mask created by the helper routine as it caused a type incompatibility. Normally, I'd try not to include an optimization in an API rework, but structuring the code to both be correct for vp.store and not optimize the existing case turned out be more involved than seemed worthwhile. I could pull this part out as a pre-change, but its a bit awkward on it's own as it turns out to be somewhat of a half step on the possible optimization; the full optimization is complex with the old code structure. --------- Co-authored-by: Craig Topper --- llvm/lib/CodeGen/InterleavedAccessPass.cpp | 38 +++++++----------------------- 1 file changed, 9 insertions(+), 29 deletions(-) (limited to 'llvm/lib/CodeGen') diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 1298aea..1b69188 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -507,46 +507,26 @@ bool InterleavedAccessImpl::lowerInterleavedStore( assert(NumStoredElements % Factor == 0 && "number of stored element should be a multiple of Factor"); + Value *Mask = nullptr; if (auto *VPStore = dyn_cast(Store)) { unsigned LaneMaskLen = NumStoredElements / Factor; - Value *LaneMask = getMask(VPStore->getMaskParam(), Factor, - ElementCount::getFixed(LaneMaskLen)); - if (!LaneMask) + Mask = getMask(VPStore->getMaskParam(), Factor, + ElementCount::getFixed(LaneMaskLen)); + if (!Mask) return false; LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store << "\n"); - IRBuilder<> Builder(VPStore); - // We need to effectively de-interleave the shufflemask - // because lowerInterleavedVPStore expects individual de-interleaved - // values. - SmallVector NewShuffles; - SmallVector NewShuffleMask(LaneMaskLen); - auto ShuffleMask = SVI->getShuffleMask(); - - for (unsigned i = 0; i < Factor; i++) { - for (unsigned j = 0; j < LaneMaskLen; j++) - NewShuffleMask[j] = ShuffleMask[i + Factor * j]; - - NewShuffles.push_back(Builder.CreateShuffleVector( - SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask)); - } - - // Try to create target specific intrinsics to replace the vp.store and - // shuffle. - if (!TLI->lowerInterleavedVPStore(VPStore, LaneMask, NewShuffles)) - // We already created new shuffles. - return true; } else { LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n"); - - // Try to create target specific intrinsics to replace the store and - // shuffle. - if (!TLI->lowerInterleavedStore(cast(Store), SVI, Factor)) - return false; } + // Try to create target specific intrinsics to replace the store and + // shuffle. + if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor)) + return false; + // Already have a new target specific interleaved store. Erase the old store. DeadInsts.insert(Store); DeadInsts.insert(SVI); -- cgit v1.1