diff options
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86PassRegistry.def | 44 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 131 |
4 files changed, 122 insertions, 95 deletions
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 8213e51..d7671ed 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -4803,7 +4803,7 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) { getStreamer().initSections(false, getSTI()); Section = getStreamer().getCurrentSectionOnly(); } - if (Section->useCodeAlign()) + if (getContext().getAsmInfo()->useCodeAlign(*Section)) getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0); else getStreamer().emitValueToAlignment(Align(2), 0, 1, 0); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index e213923..7f9d474 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -388,36 +388,6 @@ static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) { return false; } -/// Check if the instruction to be emitted is right after any data. -static bool -isRightAfterData(MCFragment *CurrentFragment, - const std::pair<MCFragment *, size_t> &PrevInstPosition) { - MCFragment *F = CurrentFragment; - // Since data is always emitted into a DataFragment, our check strategy is - // simple here. - // - If the fragment is a DataFragment - // - If it's empty (section start or data after align), return false. - // - If it's not the fragment where the previous instruction is, - // returns true. - // - If it's the fragment holding the previous instruction but its - // size changed since the previous instruction was emitted into - // it, returns true. - // - Otherwise returns false. - // - If the fragment is not a DataFragment, returns false. - if (F->getKind() == MCFragment::FT_Data) - return F->getFixedSize() && (F != PrevInstPosition.first || - F->getFixedSize() != PrevInstPosition.second); - - return false; -} - -/// \returns the fragment size if it has instructions, otherwise returns 0. -static size_t getSizeForInstFragment(const MCFragment *F) { - if (!F || !F->hasInstructions()) - return 0; - return F->getSize(); -} - /// Return true if we can insert NOP or prefixes automatically before the /// the instruction to be emitted. bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { @@ -441,9 +411,11 @@ bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { // semantic. return false; - if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) - // If this instruction follows any data, there is no clear - // instruction boundary, inserting a nop/prefix would change semantic. + // If this instruction follows any data, there is no clear instruction + // boundary, inserting a nop/prefix would change semantic. + auto Offset = OS.getCurFragSize(); + if (Offset && (OS.getCurrentFragment() != PrevInstPosition.first || + Offset != PrevInstPosition.second)) return false; return true; @@ -552,7 +524,7 @@ void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, // Update PrevInstOpcode here, canPadInst() reads that. MCFragment *CF = OS.getCurrentFragment(); PrevInstOpcode = Inst.getOpcode(); - PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); + PrevInstPosition = std::make_pair(CF, OS.getCurFragSize()); if (!canPadBranches(OS)) return; diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def index 620526ff..3f2a433 100644 --- a/llvm/lib/Target/X86/X86PassRegistry.def +++ b/llvm/lib/Target/X86/X86PassRegistry.def @@ -12,8 +12,52 @@ // NOTE: NO INCLUDE GUARD DESIRED! +#ifndef DUMMY_FUNCTION_PASS +#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS) +#endif +DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this)) +DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this)) +DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction()) +DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass()) +#undef DUMMY_FUNCTION_PASS + #ifndef MACHINE_FUNCTION_PASS #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) #endif MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this)) #undef MACHINE_FUNCTION_PASS + +#ifndef DUMMY_MACHINE_FUNCTION_PASS +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME) +#endif +DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-SFB", X86AvoidSFBPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-trailing-call", X86AvoidTrailingCallPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-cf-opt", X86CallFrameOptimization()) +DUMMY_MACHINE_FUNCTION_PASS("x86-cmov-conversion", X86CmovConverterPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-codege", FPS()) +DUMMY_MACHINE_FUNCTION_PASS("x86-compress-evex", CompressEVEXPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-domain-reassignment", X86DomainReassignment()) +DUMMY_MACHINE_FUNCTION_PASS("x86-dyn-alloca-expander", X86DynAllocaExpander()) +DUMMY_MACHINE_FUNCTION_PASS("x86-execution-domain-fix", X86ExecutionDomainFix()) +DUMMY_MACHINE_FUNCTION_PASS("fastpretileconfig", X86FastPreTileConfig()) +DUMMY_MACHINE_FUNCTION_PASS("fasttileconfig", X86FastTileConfig()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-LEAs", FixupLEAPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-bw-inst", FixupBWInstPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-inst-tuning", X86FixupInstTuningPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-setcc", X86FixupSetCCPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-vector-constants", X86FixupVectorConstantsPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-flags-copy-lowering", X86FlagsCopyLoweringPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-lower-tile-copy", X86LowerTileCopy()) +DUMMY_MACHINE_FUNCTION_PASS("x86-lvi-load", X86LoadValueInjectionLoadHardeningPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-lvi-ret", X86LoadValueInjectionRetHardeningPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-optimize-LEAs", X86OptimizeLEAPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-pseudo", X86ExpandPseudo()) +DUMMY_MACHINE_FUNCTION_PASS("x86-return-thunks", X86ReturnThunks()) +DUMMY_MACHINE_FUNCTION_PASS("x86-seses", X86SpeculativeExecutionSideEffectSuppression()) +DUMMY_MACHINE_FUNCTION_PASS("x86-slh", X86SpeculativeLoadHardeningPass()) +DUMMY_MACHINE_FUNCTION_PASS("x86-suppress-apx-for-relocation", X86SuppressAPXForRelocationPass()) +DUMMY_MACHINE_FUNCTION_PASS("tile-pre-config", X86PreTileConfig()) +DUMMY_MACHINE_FUNCTION_PASS("tileconfig", X86TileConfig()) +DUMMY_MACHINE_FUNCTION_PASS("x86-wineh-unwindv2", X86WinEHUnwindV2()) +DUMMY_MACHINE_FUNCTION_PASS("x86argumentstackrebase", X86ArgumentStackSlotPass()) +#undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 37a7b37..90791fc 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1838,14 +1838,15 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry AVX512BWShuffleTbl[] = { - { TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw - { TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw - { TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb + { TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb - { TTI::SK_Reverse, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw - { TTI::SK_Reverse, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw + { TTI::SK_Reverse, MVT::v32i16, { 2, 6, 2, 4 } }, // vpermw + { TTI::SK_Reverse, MVT::v32f16, { 2, 6, 2, 4 } }, // vpermw { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw - { TTI::SK_Reverse, MVT::v64i8, { 2, 2, 2, 2 } }, // pshufb + vshufi64x2 + { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vpermw + { TTI::SK_Reverse, MVT::v64i8, { 2, 9, 2, 3 } }, // pshufb + vshufi64x2 { TTI::SK_PermuteSingleSrc, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw { TTI::SK_PermuteSingleSrc, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw @@ -1874,18 +1875,25 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry AVX512ShuffleTbl[] = { - {TTI::SK_Broadcast, MVT::v8f64, { 1, 1, 1, 1 } }, // vbroadcastsd - {TTI::SK_Broadcast, MVT::v16f32, { 1, 1, 1, 1 } }, // vbroadcastss - {TTI::SK_Broadcast, MVT::v8i64, { 1, 1, 1, 1 } }, // vpbroadcastq - {TTI::SK_Broadcast, MVT::v16i32, { 1, 1, 1, 1 } }, // vpbroadcastd - {TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw - {TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw - {TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb - - {TTI::SK_Reverse, MVT::v8f64, { 1, 3, 1, 1 } }, // vpermpd - {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps - {TTI::SK_Reverse, MVT::v8i64, { 1, 3, 1, 1 } }, // vpermq - {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermd + {TTI::SK_Broadcast, MVT::v8f64, { 1, 3, 1, 1 } }, // vbroadcastsd + {TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 1 } }, // vbroadcastsd + {TTI::SK_Broadcast, MVT::v16f32, { 1, 3, 1, 1 } }, // vbroadcastss + {TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 1 } }, // vbroadcastss + {TTI::SK_Broadcast, MVT::v8i64, { 1, 3, 1, 1 } }, // vpbroadcastq + {TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 1 } }, // vpbroadcastq + {TTI::SK_Broadcast, MVT::v16i32, { 1, 3, 1, 1 } }, // vpbroadcastd + {TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 1 } }, // vpbroadcastd + {TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 1 } }, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 1 } }, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb + {TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 1 }}, // vpbroadcastb + + {TTI::SK_Reverse, MVT::v8f64, { 1, 5, 2, 3 } }, // vpermpd + {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 2, 3 } }, // vpermps + {TTI::SK_Reverse, MVT::v8i64, { 1, 5, 2, 3 } }, // vpermq + {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 2, 3 } }, // vpermd {TTI::SK_Reverse, MVT::v32i16, { 7, 7, 7, 7 } }, // per mca {TTI::SK_Reverse, MVT::v32f16, { 7, 7, 7, 7 } }, // per mca {TTI::SK_Reverse, MVT::v64i8, { 7, 7, 7, 7 } }, // per mca @@ -1973,21 +1981,24 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry AVX2ShuffleTbl[] = { - { TTI::SK_Broadcast, MVT::v4f64, { 1, 1, 1, 1 } }, // vbroadcastpd - { TTI::SK_Broadcast, MVT::v8f32, { 1, 1, 1, 1 } }, // vbroadcastps - { TTI::SK_Broadcast, MVT::v4i64, { 1, 1, 1, 1 } }, // vpbroadcastq - { TTI::SK_Broadcast, MVT::v8i32, { 1, 1, 1, 1 } }, // vpbroadcastd - { TTI::SK_Broadcast, MVT::v16i16, { 1, 1, 1, 1 } }, // vpbroadcastw - { TTI::SK_Broadcast, MVT::v16f16, { 1, 1, 1, 1 } }, // vpbroadcastw - { TTI::SK_Broadcast, MVT::v32i8, { 1, 1, 1, 1 } }, // vpbroadcastb - - { TTI::SK_Reverse, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd - { TTI::SK_Reverse, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps - { TTI::SK_Reverse, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermq - { TTI::SK_Reverse, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermd - { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb - { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb - { TTI::SK_Reverse, MVT::v32i8, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb + { TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 2 } }, // vbroadcastpd + { TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 2 } }, // vbroadcastps + { TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 2 } }, // vpbroadcastq + { TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 2 } }, // vpbroadcastd + { TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 2 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v8i16, { 1, 3, 1, 1 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 2 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v8f16, { 1, 3, 1, 1 } }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 2 } }, // vpbroadcastb + { TTI::SK_Broadcast, MVT::v16i8, { 1, 3, 1, 1 } }, // vpbroadcastb + + { TTI::SK_Reverse, MVT::v4f64, { 1, 6, 1, 2 } }, // vpermpd + { TTI::SK_Reverse, MVT::v8f32, { 2, 7, 2, 4 } }, // vpermps + { TTI::SK_Reverse, MVT::v4i64, { 1, 6, 1, 2 } }, // vpermq + { TTI::SK_Reverse, MVT::v8i32, { 2, 7, 2, 4 } }, // vpermd + { TTI::SK_Reverse, MVT::v16i16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb + { TTI::SK_Reverse, MVT::v16f16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb + { TTI::SK_Reverse, MVT::v32i8, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb { TTI::SK_Select, MVT::v16i16, { 1, 1, 1, 1 } }, // vpblendvb { TTI::SK_Select, MVT::v16f16, { 1, 1, 1, 1 } }, // vpblendvb @@ -2077,23 +2088,23 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry AVX1ShuffleTbl[] = { - {TTI::SK_Broadcast, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd - {TTI::SK_Broadcast, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps - {TTI::SK_Broadcast, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd - {TTI::SK_Broadcast, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps - {TTI::SK_Broadcast, MVT::v16i16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128 - {TTI::SK_Broadcast, MVT::v16f16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128 - {TTI::SK_Broadcast, MVT::v32i8, {2,2,2,2}}, // vpshufb + vinsertf128 - - {TTI::SK_Reverse, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd - {TTI::SK_Reverse, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps - {TTI::SK_Reverse, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd - {TTI::SK_Reverse, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps - {TTI::SK_Reverse, MVT::v16i16, {4,4,4,4}}, // vextractf128 + 2*pshufb + {TTI::SK_Broadcast, MVT::v4f64, {2,3,2,3}}, // vperm2f128 + vpermilpd + {TTI::SK_Broadcast, MVT::v8f32, {2,3,2,3}}, // vperm2f128 + vpermilps + {TTI::SK_Broadcast, MVT::v4i64, {2,3,2,3}}, // vperm2f128 + vpermilpd + {TTI::SK_Broadcast, MVT::v8i32, {2,3,2,3}}, // vperm2f128 + vpermilps + {TTI::SK_Broadcast, MVT::v16i16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128 + {TTI::SK_Broadcast, MVT::v16f16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128 + {TTI::SK_Broadcast, MVT::v32i8, {3,4,3,6}}, // vpshufb + vinsertf128 + + {TTI::SK_Reverse, MVT::v4f64, {2,6,2,2}}, // vperm2f128 + vpermilpd + {TTI::SK_Reverse, MVT::v8f32, {2,7,2,4}}, // vperm2f128 + vpermilps + {TTI::SK_Reverse, MVT::v4i64, {2,6,2,2}}, // vperm2f128 + vpermilpd + {TTI::SK_Reverse, MVT::v8i32, {2,7,2,4}}, // vperm2f128 + vpermilps + {TTI::SK_Reverse, MVT::v16i16, {2,9,5,5}}, // vextractf128 + 2*pshufb // + vinsertf128 - {TTI::SK_Reverse, MVT::v16f16, {4,4,4,4}}, // vextractf128 + 2*pshufb + {TTI::SK_Reverse, MVT::v16f16, {2,9,5,5}}, // vextractf128 + 2*pshufb // + vinsertf128 - {TTI::SK_Reverse, MVT::v32i8, {4,4,4,4}}, // vextractf128 + 2*pshufb + {TTI::SK_Reverse, MVT::v32i8, {2,9,5,5}}, // vextractf128 + 2*pshufb // + vinsertf128 {TTI::SK_Select, MVT::v4i64, {1,1,1,1}}, // vblendpd @@ -2156,13 +2167,13 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * *KindCost; static const CostKindTblEntry SSSE3ShuffleTbl[] = { - {TTI::SK_Broadcast, MVT::v8i16, {1, 1, 1, 1}}, // pshufb - {TTI::SK_Broadcast, MVT::v8f16, {1, 1, 1, 1}}, // pshufb - {TTI::SK_Broadcast, MVT::v16i8, {1, 1, 1, 1}}, // pshufb + {TTI::SK_Broadcast, MVT::v8i16, {1, 3, 2, 2}}, // pshufb + {TTI::SK_Broadcast, MVT::v8f16, {1, 3, 2, 2}}, // pshufb + {TTI::SK_Broadcast, MVT::v16i8, {1, 3, 2, 2}}, // pshufb - {TTI::SK_Reverse, MVT::v8i16, {1, 1, 1, 1}}, // pshufb - {TTI::SK_Reverse, MVT::v8f16, {1, 1, 1, 1}}, // pshufb - {TTI::SK_Reverse, MVT::v16i8, {1, 1, 1, 1}}, // pshufb + {TTI::SK_Reverse, MVT::v8i16, {1, 2, 1, 2}}, // pshufb + {TTI::SK_Reverse, MVT::v8f16, {1, 2, 1, 2}}, // pshufb + {TTI::SK_Reverse, MVT::v16i8, {1, 2, 1, 2}}, // pshufb {TTI::SK_Select, MVT::v8i16, {3, 3, 3, 3}}, // 2*pshufb + por {TTI::SK_Select, MVT::v8f16, {3, 3, 3, 3}}, // 2*pshufb + por @@ -2192,16 +2203,16 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v2f64, {1, 1, 1, 1}}, // shufpd {TTI::SK_Broadcast, MVT::v2i64, {1, 1, 1, 1}}, // pshufd {TTI::SK_Broadcast, MVT::v4i32, {1, 1, 1, 1}}, // pshufd - {TTI::SK_Broadcast, MVT::v8i16, {2, 2, 2, 2}}, // pshuflw + pshufd - {TTI::SK_Broadcast, MVT::v8f16, {2, 2, 2, 2}}, // pshuflw + pshufd - {TTI::SK_Broadcast, MVT::v16i8, {3, 3, 3, 3}}, // unpck + pshuflw + pshufd + {TTI::SK_Broadcast, MVT::v8i16, {1, 2, 2, 2}}, // pshuflw + pshufd + {TTI::SK_Broadcast, MVT::v8f16, {1, 2, 2, 2}}, // pshuflw + pshufd + {TTI::SK_Broadcast, MVT::v16i8, {2, 3, 3, 4}}, // unpck + pshuflw + pshufd {TTI::SK_Reverse, MVT::v2f64, {1, 1, 1, 1}}, // shufpd {TTI::SK_Reverse, MVT::v2i64, {1, 1, 1, 1}}, // pshufd {TTI::SK_Reverse, MVT::v4i32, {1, 1, 1, 1}}, // pshufd - {TTI::SK_Reverse, MVT::v8i16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd - {TTI::SK_Reverse, MVT::v8f16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd - {TTI::SK_Reverse, MVT::v16i8, {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw + {TTI::SK_Reverse, MVT::v8i16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd + {TTI::SK_Reverse, MVT::v8f16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd + {TTI::SK_Reverse, MVT::v16i8, {5, 6,11,11}}, // 2*pshuflw + 2*pshufhw // + 2*pshufd + 2*unpck + packus {TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // movsd |