diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 49 |
1 files changed, 27 insertions, 22 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4f13a14..f026726 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6439,7 +6439,9 @@ bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { } } - return true; + EVT PreExtScalarVT = ExtVal->getOperand(0).getValueType().getScalarType(); + return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 || + PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64; } unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { @@ -17155,7 +17157,7 @@ static Function *getStructuredStoreFunction(Module *M, unsigned Factor, /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 bool AArch64TargetLowering::lowerInterleavedLoad( - LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, + Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles, ArrayRef<unsigned> Indices, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); @@ -17163,6 +17165,11 @@ bool AArch64TargetLowering::lowerInterleavedLoad( assert(Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"); + auto *LI = dyn_cast<LoadInst>(Load); + if (!LI) + return false; + assert(!Mask && "Unexpected mask on a load"); + const DataLayout &DL = LI->getDataLayout(); VectorType *VTy = Shuffles[0]->getType(); @@ -17486,9 +17493,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, } bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( - Instruction *Load, Value *Mask, - ArrayRef<Value *> DeinterleavedValues) const { - unsigned Factor = DeinterleavedValues.size(); + Instruction *Load, Value *Mask, IntrinsicInst *DI) const { + const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); if (Factor != 2 && Factor != 4) { LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n"); return false; @@ -17498,9 +17504,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( return false; assert(!Mask && "Unexpected mask on a load\n"); - Value *FirstActive = *llvm::find_if(DeinterleavedValues, - [](Value *V) { return V != nullptr; }); - VectorType *VTy = cast<VectorType>(FirstActive->getType()); + VectorType *VTy = getDeinterleavedVectorType(DI); const DataLayout &DL = LI->getModule()->getDataLayout(); bool UseScalable; @@ -17528,6 +17532,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue()); Value *BaseAddr = LI->getPointerOperand(); + Value *Result = nullptr; if (NumLoads > 1) { // Create multiple legal small ldN. SmallVector<Value *, 4> ExtractedLdValues(Factor, PoisonValue::get(VTy)); @@ -17548,35 +17553,35 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( } LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump()); } - // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4 - for (unsigned J = 0; J < Factor; ++J) { - if (DeinterleavedValues[J]) - DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]); - } + + // Merge the values from different factors. + Result = PoisonValue::get(DI->getType()); + for (unsigned J = 0; J < Factor; ++J) + Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J); } else { - Value *Result; if (UseScalable) Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN"); else Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN"); - // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4 - for (unsigned I = 0; I < Factor; I++) { - if (DeinterleavedValues[I]) { - Value *NewExtract = Builder.CreateExtractValue(Result, I); - DeinterleavedValues[I]->replaceAllUsesWith(NewExtract); - } - } } + + // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4 + DI->replaceAllUsesWith(Result); return true; } bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore( - StoreInst *SI, ArrayRef<Value *> InterleavedValues) const { + Instruction *Store, Value *Mask, + ArrayRef<Value *> InterleavedValues) const { unsigned Factor = InterleavedValues.size(); if (Factor != 2 && Factor != 4) { LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n"); return false; } + StoreInst *SI = dyn_cast<StoreInst>(Store); + if (!SI) + return false; + assert(!Mask && "Unexpected mask on plain store"); VectorType *VTy = cast<VectorType>(InterleavedValues[0]->getType()); const DataLayout &DL = SI->getModule()->getDataLayout(); |