aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp38
1 files changed, 33 insertions, 5 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 197aae6..8729ed3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4922,11 +4922,36 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps))
return InstructionCost::getInvalid();
- if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ unsigned NumLoadStores = 1;
+ InstructionCost ShuffleCost = 0;
+ bool isInterleaveWithShuffle = false;
+ unsigned MaxSupportedFactor = TLI->getMaxSupportedInterleaveFactor();
+
+ auto *SubVecTy =
+ VectorType::get(VecVTy->getElementType(),
+ VecVTy->getElementCount().divideCoefficientBy(Factor));
+
+ if (TLI->isProfitableToInterleaveWithGatherScatter() &&
+ Opcode == Instruction::Store && (0 == Factor % MaxSupportedFactor) &&
+ Factor > MaxSupportedFactor) {
+ isInterleaveWithShuffle = true;
+ SmallVector<int, 16> Mask;
+ // preparing interleave Mask.
+ for (unsigned i = 0; i < VecVTy->getElementCount().getKnownMinValue() / 2;
+ i++) {
+ for (unsigned j = 0; j < 2; j++)
+ Mask.push_back(j * Factor + i);
+ }
+
+ NumLoadStores = Factor / MaxSupportedFactor;
+ ShuffleCost =
+ (Factor * getShuffleCost(TargetTransformInfo::SK_Splice, VecVTy, VecVTy,
+ Mask, CostKind, 0, SubVecTy));
+ }
+
+ if (!UseMaskForGaps &&
+ (Factor <= MaxSupportedFactor || isInterleaveWithShuffle)) {
unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
- auto *SubVecTy =
- VectorType::get(VecVTy->getElementType(),
- VecVTy->getElementCount().divideCoefficientBy(Factor));
// ldN/stN only support legal vector types of size 64 or 128 in bits.
// Accesses having vector types that are a multiple of 128 bits can be
@@ -4934,7 +4959,10 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
bool UseScalable;
if (MinElts % Factor == 0 &&
TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
- return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
+ return (Factor *
+ TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable) *
+ NumLoadStores) +
+ ShuffleCost;
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,