aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorNAKAMURA Takumi <geek4civic@gmail.com>2025-01-10 19:25:56 +0900
committerNAKAMURA Takumi <geek4civic@gmail.com>2025-01-10 19:25:56 +0900
commit63f5dc16d6bfca0512fb034052b41d13c3751e20 (patch)
treee70266be1fda941e0974e71e3d2c1cf080081311 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent9e5734688ed3d5f6b3fb76a26b3d90a736d60781 (diff)
parent397ac44f623f891d8f05d6673a95984ac0a26671 (diff)
downloadllvm-users/chapuni/cov/single/unify.zip
llvm-users/chapuni/cov/single/unify.tar.gz
llvm-users/chapuni/cov/single/unify.tar.bz2
Merge branch 'main' into users/chapuni/cov/single/unifyusers/chapuni/cov/single/unify
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp10
1 files changed, 10 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7a7554c..c19bcfc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1650,6 +1650,13 @@ InstructionCost X86TTIImpl::getShuffleCost(
return MatchingTypes ? TTI::TCC_Free : SubLT.first;
}
+ // Attempt to match MOVSS (Idx == 0) or INSERTPS pattern. This will have
+ // been matched by improveShuffleKindFromMask as a SK_InsertSubvector of
+ // v1f32 (legalised to f32) into a v4f32.
+ if (LT.first == 1 && LT.second == MVT::v4f32 && SubLT.first == 1 &&
+ SubLT.second == MVT::f32 && (Index == 0 || ST->hasSSE41()))
+ return 1;
+
// If the insertion isn't aligned, treat it like a 2-op shuffle.
Kind = TTI::SK_PermuteTwoSrc;
}
@@ -4797,9 +4804,12 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
MVT MScalarTy = LT.second.getScalarType();
auto IsCheapPInsrPExtrInsertPS = [&]() {
// Assume pinsr/pextr XMM <-> GPR is relatively cheap on all targets.
+ // Inserting f32 into index0 is just movss.
// Also, assume insertps is relatively cheap on all >= SSE41 targets.
return (MScalarTy == MVT::i16 && ST->hasSSE2()) ||
(MScalarTy.isInteger() && ST->hasSSE41()) ||
+ (MScalarTy == MVT::f32 && ST->hasSSE1() && Index == 0 &&
+ Opcode == Instruction::InsertElement) ||
(MScalarTy == MVT::f32 && ST->hasSSE41() &&
Opcode == Instruction::InsertElement);
};