diff options
Diffstat (limited to 'llvm/lib/CodeGen/InterleavedAccessPass.cpp')
-rw-r--r-- | llvm/lib/CodeGen/InterleavedAccessPass.cpp | 153 |
1 files changed, 74 insertions, 79 deletions
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index d2b2edf..1b69188 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -268,17 +268,9 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( if (isa<ScalableVectorType>(Load->getType())) return false; - if (auto *LI = dyn_cast<LoadInst>(Load)) { - if (!LI->isSimple()) - return false; - } else if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) { - assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load); - // Require a constant mask. - if (!isa<ConstantVector>(VPLoad->getMaskParam())) - return false; - } else { - llvm_unreachable("unsupported load operation"); - } + if (auto *LI = dyn_cast<LoadInst>(Load); + LI && !LI->isSimple()) + return false; // Check if all users of this load are shufflevectors. If we encounter any // users that are extractelement instructions or binary operators, we save @@ -497,9 +489,6 @@ bool InterleavedAccessImpl::lowerInterleavedStore( StoredValue = SI->getValueOperand(); } else if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) { assert(VPStore->getIntrinsicID() == Intrinsic::vp_store); - // Require a constant mask. - if (!isa<ConstantVector>(VPStore->getMaskParam())) - return false; StoredValue = VPStore->getArgOperand(0); } else { llvm_unreachable("unsupported store operation"); @@ -518,46 +507,26 @@ bool InterleavedAccessImpl::lowerInterleavedStore( assert(NumStoredElements % Factor == 0 && "number of stored element should be a multiple of Factor"); + Value *Mask = nullptr; if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) { unsigned LaneMaskLen = NumStoredElements / Factor; - Value *LaneMask = getMask(VPStore->getMaskParam(), Factor, - ElementCount::getFixed(LaneMaskLen)); - if (!LaneMask) + Mask = getMask(VPStore->getMaskParam(), Factor, + ElementCount::getFixed(LaneMaskLen)); + if (!Mask) return false; LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store << "\n"); - IRBuilder<> Builder(VPStore); - // We need to effectively de-interleave the shufflemask - // because lowerInterleavedVPStore expects individual de-interleaved - // values. - SmallVector<Value *, 10> NewShuffles; - SmallVector<int, 16> NewShuffleMask(LaneMaskLen); - auto ShuffleMask = SVI->getShuffleMask(); - - for (unsigned i = 0; i < Factor; i++) { - for (unsigned j = 0; j < LaneMaskLen; j++) - NewShuffleMask[j] = ShuffleMask[i + Factor * j]; - - NewShuffles.push_back(Builder.CreateShuffleVector( - SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask)); - } - - // Try to create target specific intrinsics to replace the vp.store and - // shuffle. - if (!TLI->lowerInterleavedVPStore(VPStore, LaneMask, NewShuffles)) - // We already created new shuffles. - return true; } else { LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n"); - - // Try to create target specific intrinsics to replace the store and - // shuffle. - if (!TLI->lowerInterleavedStore(cast<StoreInst>(Store), SVI, Factor)) - return false; } + // Try to create target specific intrinsics to replace the store and + // shuffle. + if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor)) + return false; + // Already have a new target specific interleaved store. Erase the old store. DeadInsts.insert(Store); DeadInsts.insert(SVI); @@ -600,87 +569,113 @@ static Value *getMask(Value *WideMask, unsigned Factor, bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) { - Value *LoadedVal = DI->getOperand(0); - if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal)) + Instruction *LoadedVal = dyn_cast<Instruction>(DI->getOperand(0)); + if (!LoadedVal || !LoadedVal->hasOneUse()) + return false; + + auto *LI = dyn_cast<LoadInst>(LoadedVal); + auto *II = dyn_cast<IntrinsicInst>(LoadedVal); + if (!LI && !II) return false; const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); assert(Factor && "unexpected deinterleave intrinsic"); Value *Mask = nullptr; - if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) { - if (VPLoad->getIntrinsicID() != Intrinsic::vp_load) + if (LI) { + if (!LI->isSimple()) return false; + + LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI + << " and factor = " << Factor << "\n"); + } else { + assert(II); + // Check mask operand. Handle both all-true/false and interleaved mask. - Value *WideMask = VPLoad->getOperand(1); - Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI)); - if (!Mask) + Value *WideMask; + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::vp_load: + WideMask = II->getOperand(1); + break; + case Intrinsic::masked_load: + WideMask = II->getOperand(2); + break; + } - LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic " - << *DI << " and factor = " << Factor << "\n"); - } else { - auto *LI = cast<LoadInst>(LoadedVal); - if (!LI->isSimple()) + Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI)); + if (!Mask) return false; - LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI - << " and factor = " << Factor << "\n"); + LLVM_DEBUG(dbgs() << "IA: Found a vp.load or masked.load with deinterleave" + << " intrinsic " << *DI << " and factor = " + << Factor << "\n"); } // Try and match this with target specific intrinsics. - if (!TLI->lowerDeinterleaveIntrinsicToLoad(cast<Instruction>(LoadedVal), Mask, - DI)) + if (!TLI->lowerDeinterleaveIntrinsicToLoad(LoadedVal, Mask, DI)) return false; DeadInsts.insert(DI); // We now have a target-specific load, so delete the old one. - DeadInsts.insert(cast<Instruction>(LoadedVal)); + DeadInsts.insert(LoadedVal); return true; } bool InterleavedAccessImpl::lowerInterleaveIntrinsic( - IntrinsicInst *II, SmallSetVector<Instruction *, 32> &DeadInsts) { - if (!II->hasOneUse()) + IntrinsicInst *IntII, SmallSetVector<Instruction *, 32> &DeadInsts) { + if (!IntII->hasOneUse()) return false; - Value *StoredBy = II->user_back(); - if (!isa<StoreInst, VPIntrinsic>(StoredBy)) + Instruction *StoredBy = dyn_cast<Instruction>(IntII->user_back()); + if (!StoredBy) + return false; + auto *SI = dyn_cast<StoreInst>(StoredBy); + auto *II = dyn_cast<IntrinsicInst>(StoredBy); + if (!SI && !II) return false; - SmallVector<Value *, 8> InterleaveValues(II->args()); - const unsigned Factor = getInterleaveIntrinsicFactor(II->getIntrinsicID()); + SmallVector<Value *, 8> InterleaveValues(IntII->args()); + const unsigned Factor = getInterleaveIntrinsicFactor(IntII->getIntrinsicID()); assert(Factor && "unexpected interleave intrinsic"); Value *Mask = nullptr; - if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) { - if (VPStore->getIntrinsicID() != Intrinsic::vp_store) + if (II) { + // Check mask operand. Handle both all-true/false and interleaved mask. + Value *WideMask; + switch (II->getIntrinsicID()) { + default: return false; - - Value *WideMask = VPStore->getOperand(2); + case Intrinsic::vp_store: + WideMask = II->getOperand(2); + break; + case Intrinsic::masked_store: + WideMask = II->getOperand(3); + break; + } Mask = getMask(WideMask, Factor, cast<VectorType>(InterleaveValues[0]->getType())); if (!Mask) return false; - LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic " - << *II << " and factor = " << Factor << "\n"); + LLVM_DEBUG(dbgs() << "IA: Found a vp.store or masked.store with interleave" + << " intrinsic " << *IntII << " and factor = " + << Factor << "\n"); } else { - auto *SI = cast<StoreInst>(StoredBy); if (!SI->isSimple()) return false; - LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II - << " and factor = " << Factor << "\n"); + LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " + << *IntII << " and factor = " << Factor << "\n"); } // Try and match this with target specific intrinsics. - if (!TLI->lowerInterleaveIntrinsicToStore(cast<Instruction>(StoredBy), Mask, - InterleaveValues)) + if (!TLI->lowerInterleaveIntrinsicToStore(StoredBy, Mask, InterleaveValues)) return false; // We now have a target-specific store, so delete the old one. - DeadInsts.insert(cast<Instruction>(StoredBy)); - DeadInsts.insert(II); + DeadInsts.insert(StoredBy); + DeadInsts.insert(IntII); return true; } |