diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 38 |
1 files changed, 33 insertions, 5 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 197aae6..8729ed3 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4922,11 +4922,36 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost( if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps)) return InstructionCost::getInvalid(); - if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) { + unsigned NumLoadStores = 1; + InstructionCost ShuffleCost = 0; + bool isInterleaveWithShuffle = false; + unsigned MaxSupportedFactor = TLI->getMaxSupportedInterleaveFactor(); + + auto *SubVecTy = + VectorType::get(VecVTy->getElementType(), + VecVTy->getElementCount().divideCoefficientBy(Factor)); + + if (TLI->isProfitableToInterleaveWithGatherScatter() && + Opcode == Instruction::Store && (0 == Factor % MaxSupportedFactor) && + Factor > MaxSupportedFactor) { + isInterleaveWithShuffle = true; + SmallVector<int, 16> Mask; + // preparing interleave Mask. + for (unsigned i = 0; i < VecVTy->getElementCount().getKnownMinValue() / 2; + i++) { + for (unsigned j = 0; j < 2; j++) + Mask.push_back(j * Factor + i); + } + + NumLoadStores = Factor / MaxSupportedFactor; + ShuffleCost = + (Factor * getShuffleCost(TargetTransformInfo::SK_Splice, VecVTy, VecVTy, + Mask, CostKind, 0, SubVecTy)); + } + + if (!UseMaskForGaps && + (Factor <= MaxSupportedFactor || isInterleaveWithShuffle)) { unsigned MinElts = VecVTy->getElementCount().getKnownMinValue(); - auto *SubVecTy = - VectorType::get(VecVTy->getElementType(), - VecVTy->getElementCount().divideCoefficientBy(Factor)); // ldN/stN only support legal vector types of size 64 or 128 in bits. // Accesses having vector types that are a multiple of 128 bits can be @@ -4934,7 +4959,10 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost( bool UseScalable; if (MinElts % Factor == 0 && TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable)) - return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable); + return (Factor * + TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable) * + NumLoadStores) + + ShuffleCost; } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
