diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-05-03 11:53:55 +0100 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-05-03 12:17:18 +0100 |
commit | fc7e74e879f37301edd9450d3bbf0fec620338a6 (patch) | |
tree | 05560350312aa02145620a10df7cfd50b989c9fe /llvm/lib | |
parent | bcdbd0bf50a3845130c5db9e3284f056233f12d7 (diff) | |
download | llvm-fc7e74e879f37301edd9450d3bbf0fec620338a6.zip llvm-fc7e74e879f37301edd9450d3bbf0fec620338a6.tar.gz llvm-fc7e74e879f37301edd9450d3bbf0fec620338a6.tar.bz2 |
[CostModel][X86] getCastInstrCost - improve CostKind adjustment when splitting src/dst types
Noticed in #90883 review - for non-Throughput costs, we weren't applying the split count to '0 or 1' cost value.
This still doesn't work well as many of the type legalizations are hidden so we don't have the split count, really we need to move a CostKindCosts based costs table, but that's going to be a lot of work :/
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 27 |
1 files changed, 14 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index aac3557..6c174f1 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2121,10 +2121,11 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, assert(ISD && "Invalid opcode"); // TODO: Allow non-throughput costs that aren't binary. - auto AdjustCost = [&CostKind](InstructionCost Cost) -> InstructionCost { + auto AdjustCost = [&CostKind](InstructionCost Cost, + InstructionCost N = 1) -> InstructionCost { if (CostKind != TTI::TCK_RecipThroughput) - return Cost == 0 ? 0 : 1; - return Cost; + return Cost == 0 ? 0 : N; + return Cost * N; }; // The cost tables include both specific, custom (non-legal) src/dst type @@ -3004,53 +3005,53 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (ST->hasBWI()) if (const auto *Entry = ConvertCostTableLookup( AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); if (ST->hasDQI()) if (const auto *Entry = ConvertCostTableLookup( AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); if (ST->hasAVX512()) if (const auto *Entry = ConvertCostTableLookup( AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); } if (ST->hasBWI()) if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); if (ST->hasDQI()) if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); if (ST->hasAVX512()) if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); if (ST->hasAVX2()) if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); if (ST->hasAVX()) if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); if (ST->hasSSE41()) if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); if (ST->hasSSE2()) if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + return AdjustCost(Entry->Cost, std::max(LTSrc.first, LTDest.first)); // Fallback, for i8/i16 sitofp/uitofp cases we need to extend to i32 for // sitofp. |