diff options
Diffstat (limited to 'llvm/lib/CodeGen/InterleavedAccessPass.cpp')
-rw-r--r-- | llvm/lib/CodeGen/InterleavedAccessPass.cpp | 73 |
1 files changed, 20 insertions, 53 deletions
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 9559983..d2b2edf 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -367,34 +367,23 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( bool BinOpShuffleChanged = replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load); + Value *Mask = nullptr; if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) { - Value *LaneMask = - getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy)); - if (!LaneMask) + Mask = getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy)); + if (!Mask) return false; - LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load: " << *Load << "\n"); - - // Sometimes the number of Shuffles might be less than Factor, we have to - // fill the gaps with null. Also, lowerInterleavedVPLoad - // expects them to be sorted. - SmallVector<Value *, 4> ShuffleValues(Factor, nullptr); - for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices)) - ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx]; - if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, ShuffleValues)) - // If Extracts is not empty, tryReplaceExtracts made changes earlier. - return !Extracts.empty() || BinOpShuffleChanged; } else { LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n"); - - // Try to create target specific intrinsics to replace the load and - // shuffles. - if (!TLI->lowerInterleavedLoad(cast<LoadInst>(Load), Shuffles, Indices, - Factor)) - // If Extracts is not empty, tryReplaceExtracts made changes earlier. - return !Extracts.empty() || BinOpShuffleChanged; } + // Try to create target specific intrinsics to replace the load and + // shuffles. + if (!TLI->lowerInterleavedLoad(cast<Instruction>(Load), Mask, Shuffles, + Indices, Factor)) + // If Extracts is not empty, tryReplaceExtracts made changes earlier. + return !Extracts.empty() || BinOpShuffleChanged; + DeadInsts.insert_range(Shuffles); DeadInsts.insert(Load); @@ -618,29 +607,13 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); assert(Factor && "unexpected deinterleave intrinsic"); - SmallVector<Value *, 8> DeinterleaveValues(Factor, nullptr); - Value *LastFactor = nullptr; - for (auto *User : DI->users()) { - auto *Extract = dyn_cast<ExtractValueInst>(User); - if (!Extract || Extract->getNumIndices() != 1) - return false; - unsigned Idx = Extract->getIndices()[0]; - if (DeinterleaveValues[Idx]) - return false; - DeinterleaveValues[Idx] = Extract; - LastFactor = Extract; - } - - if (!LastFactor) - return false; - Value *Mask = nullptr; if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) { if (VPLoad->getIntrinsicID() != Intrinsic::vp_load) return false; // Check mask operand. Handle both all-true/false and interleaved mask. Value *WideMask = VPLoad->getOperand(1); - Mask = getMask(WideMask, Factor, cast<VectorType>(LastFactor->getType())); + Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI)); if (!Mask) return false; @@ -657,12 +630,9 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( // Try and match this with target specific intrinsics. if (!TLI->lowerDeinterleaveIntrinsicToLoad(cast<Instruction>(LoadedVal), Mask, - DeinterleaveValues)) + DI)) return false; - for (Value *V : DeinterleaveValues) - if (V) - DeadInsts.insert(cast<Instruction>(V)); DeadInsts.insert(DI); // We now have a target-specific load, so delete the old one. DeadInsts.insert(cast<Instruction>(LoadedVal)); @@ -681,23 +651,19 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( const unsigned Factor = getInterleaveIntrinsicFactor(II->getIntrinsicID()); assert(Factor && "unexpected interleave intrinsic"); + Value *Mask = nullptr; if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) { if (VPStore->getIntrinsicID() != Intrinsic::vp_store) return false; Value *WideMask = VPStore->getOperand(2); - Value *Mask = getMask(WideMask, Factor, - cast<VectorType>(InterleaveValues[0]->getType())); + Mask = getMask(WideMask, Factor, + cast<VectorType>(InterleaveValues[0]->getType())); if (!Mask) return false; LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic " << *II << " and factor = " << Factor << "\n"); - - // Since lowerInterleavedStore expects Shuffle and StoreInst, use special - // TLI function to emit target-specific interleaved instruction. - if (!TLI->lowerInterleavedVPStore(VPStore, Mask, InterleaveValues)) - return false; } else { auto *SI = cast<StoreInst>(StoredBy); if (!SI->isSimple()) @@ -705,12 +671,13 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II << " and factor = " << Factor << "\n"); - - // Try and match this with target specific intrinsics. - if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues)) - return false; } + // Try and match this with target specific intrinsics. + if (!TLI->lowerInterleaveIntrinsicToStore(cast<Instruction>(StoredBy), Mask, + InterleaveValues)) + return false; + // We now have a target-specific store, so delete the old one. DeadInsts.insert(cast<Instruction>(StoredBy)); DeadInsts.insert(II); |