aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp24
1 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 808f48e..c19bcfc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1650,6 +1650,13 @@ InstructionCost X86TTIImpl::getShuffleCost(
return MatchingTypes ? TTI::TCC_Free : SubLT.first;
}
+ // Attempt to match MOVSS (Idx == 0) or INSERTPS pattern. This will have
+ // been matched by improveShuffleKindFromMask as a SK_InsertSubvector of
+ // v1f32 (legalised to f32) into a v4f32.
+ if (LT.first == 1 && LT.second == MVT::v4f32 && SubLT.first == 1 &&
+ SubLT.second == MVT::f32 && (Index == 0 || ST->hasSSE41()))
+ return 1;
+
// If the insertion isn't aligned, treat it like a 2-op shuffle.
Kind = TTI::SK_PermuteTwoSrc;
}
@@ -1698,8 +1705,7 @@ InstructionCost X86TTIImpl::getShuffleCost(
// We are going to permute multiple sources and the result will be in multiple
// destinations. Providing an accurate cost only for splits where the element
// type remains the same.
- if ((Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_PermuteTwoSrc) &&
- LT.first != 1) {
+ if (LT.first != 1) {
MVT LegalVT = LT.second;
if (LegalVT.isVector() &&
LegalVT.getVectorElementType().getSizeInBits() ==
@@ -2227,9 +2233,18 @@ InstructionCost X86TTIImpl::getShuffleCost(
{ TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps
};
- if (ST->hasSSE1())
+ if (ST->hasSSE1()) {
+ if (LT.first == 1 && LT.second == MVT::v4f32 && Mask.size() == 4) {
+ // SHUFPS: both pairs must come from the same source register.
+ auto MatchSHUFPS = [](int X, int Y) {
+ return X < 0 || Y < 0 || ((X & 4) == (Y & 4));
+ };
+ if (MatchSHUFPS(Mask[0], Mask[1]) && MatchSHUFPS(Mask[2], Mask[3]))
+ return 1;
+ }
if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
+ }
return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);
}
@@ -4789,9 +4804,12 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
MVT MScalarTy = LT.second.getScalarType();
auto IsCheapPInsrPExtrInsertPS = [&]() {
// Assume pinsr/pextr XMM <-> GPR is relatively cheap on all targets.
+ // Inserting f32 into index0 is just movss.
// Also, assume insertps is relatively cheap on all >= SSE41 targets.
return (MScalarTy == MVT::i16 && ST->hasSSE2()) ||
(MScalarTy.isInteger() && ST->hasSSE41()) ||
+ (MScalarTy == MVT::f32 && ST->hasSSE1() && Index == 0 &&
+ Opcode == Instruction::InsertElement) ||
(MScalarTy == MVT::f32 && ST->hasSSE41() &&
Opcode == Instruction::InsertElement);
};