diff options
author | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-09 18:49:54 +0900 |
---|---|---|
committer | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-09 18:49:54 +0900 |
commit | e2810c9a248f4c7fbfae84bb32b6f7e01027458b (patch) | |
tree | ae0b02a8491b969a1cee94ea16ffe42c559143c5 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp | |
parent | fa04eb4af95c1ca7377279728cb004bcd2324d01 (diff) | |
parent | bdcf47e4bcb92889665825654bb80a8bbe30379e (diff) | |
download | llvm-users/chapuni/cov/single/switch.zip llvm-users/chapuni/cov/single/switch.tar.gz llvm-users/chapuni/cov/single/switch.tar.bz2 |
Merge branch 'users/chapuni/cov/single/base' into users/chapuni/cov/single/switchusers/chapuni/cov/single/switch
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 24 |
1 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 808f48e..c19bcfc 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1650,6 +1650,13 @@ InstructionCost X86TTIImpl::getShuffleCost( return MatchingTypes ? TTI::TCC_Free : SubLT.first; } + // Attempt to match MOVSS (Idx == 0) or INSERTPS pattern. This will have + // been matched by improveShuffleKindFromMask as a SK_InsertSubvector of + // v1f32 (legalised to f32) into a v4f32. + if (LT.first == 1 && LT.second == MVT::v4f32 && SubLT.first == 1 && + SubLT.second == MVT::f32 && (Index == 0 || ST->hasSSE41())) + return 1; + // If the insertion isn't aligned, treat it like a 2-op shuffle. Kind = TTI::SK_PermuteTwoSrc; } @@ -1698,8 +1705,7 @@ InstructionCost X86TTIImpl::getShuffleCost( // We are going to permute multiple sources and the result will be in multiple // destinations. Providing an accurate cost only for splits where the element // type remains the same. - if ((Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_PermuteTwoSrc) && - LT.first != 1) { + if (LT.first != 1) { MVT LegalVT = LT.second; if (LegalVT.isVector() && LegalVT.getVectorElementType().getSizeInBits() == @@ -2227,9 +2233,18 @@ InstructionCost X86TTIImpl::getShuffleCost( { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps }; - if (ST->hasSSE1()) + if (ST->hasSSE1()) { + if (LT.first == 1 && LT.second == MVT::v4f32 && Mask.size() == 4) { + // SHUFPS: both pairs must come from the same source register. + auto MatchSHUFPS = [](int X, int Y) { + return X < 0 || Y < 0 || ((X & 4) == (Y & 4)); + }; + if (MatchSHUFPS(Mask[0], Mask[1]) && MatchSHUFPS(Mask[2], Mask[3])) + return 1; + } if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; + } return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp); } @@ -4789,9 +4804,12 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, MVT MScalarTy = LT.second.getScalarType(); auto IsCheapPInsrPExtrInsertPS = [&]() { // Assume pinsr/pextr XMM <-> GPR is relatively cheap on all targets. + // Inserting f32 into index0 is just movss. // Also, assume insertps is relatively cheap on all >= SSE41 targets. return (MScalarTy == MVT::i16 && ST->hasSSE2()) || (MScalarTy.isInteger() && ST->hasSSE41()) || + (MScalarTy == MVT::f32 && ST->hasSSE1() && Index == 0 && + Opcode == Instruction::InsertElement) || (MScalarTy == MVT::f32 && ST->hasSSE41() && Opcode == Instruction::InsertElement); }; |