diff options
author | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-10 19:25:56 +0900 |
---|---|---|
committer | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-10 19:25:56 +0900 |
commit | 63f5dc16d6bfca0512fb034052b41d13c3751e20 (patch) | |
tree | e70266be1fda941e0974e71e3d2c1cf080081311 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp | |
parent | 9e5734688ed3d5f6b3fb76a26b3d90a736d60781 (diff) | |
parent | 397ac44f623f891d8f05d6673a95984ac0a26671 (diff) | |
download | llvm-users/chapuni/cov/single/unify.zip llvm-users/chapuni/cov/single/unify.tar.gz llvm-users/chapuni/cov/single/unify.tar.bz2 |
Merge branch 'main' into users/chapuni/cov/single/unifyusers/chapuni/cov/single/unify
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 7a7554c..c19bcfc 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1650,6 +1650,13 @@ InstructionCost X86TTIImpl::getShuffleCost( return MatchingTypes ? TTI::TCC_Free : SubLT.first; } + // Attempt to match MOVSS (Idx == 0) or INSERTPS pattern. This will have + // been matched by improveShuffleKindFromMask as a SK_InsertSubvector of + // v1f32 (legalised to f32) into a v4f32. + if (LT.first == 1 && LT.second == MVT::v4f32 && SubLT.first == 1 && + SubLT.second == MVT::f32 && (Index == 0 || ST->hasSSE41())) + return 1; + // If the insertion isn't aligned, treat it like a 2-op shuffle. Kind = TTI::SK_PermuteTwoSrc; } @@ -4797,9 +4804,12 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, MVT MScalarTy = LT.second.getScalarType(); auto IsCheapPInsrPExtrInsertPS = [&]() { // Assume pinsr/pextr XMM <-> GPR is relatively cheap on all targets. + // Inserting f32 into index0 is just movss. // Also, assume insertps is relatively cheap on all >= SSE41 targets. return (MScalarTy == MVT::i16 && ST->hasSSE2()) || (MScalarTy.isInteger() && ST->hasSSE41()) || + (MScalarTy == MVT::f32 && ST->hasSSE1() && Index == 0 && + Opcode == Instruction::InsertElement) || (MScalarTy == MVT::f32 && ST->hasSSE41() && Opcode == Instruction::InsertElement); }; |