diff options
Diffstat (limited to 'llvm/lib/Target/Hexagon')
20 files changed, 2678 insertions, 94 deletions
diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index fb0928b8..ede8463 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -79,6 +79,12 @@ def ExtensionHVXV79: SubtargetFeature<"hvxv79", "HexagonHVXVersion", ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71, ExtensionHVXV73, ExtensionHVXV75]>; +def ExtensionHVXV81: SubtargetFeature<"hvxv81", "HexagonHVXVersion", + "Hexagon::ArchEnum::V81", "Hexagon HVX instructions", + [ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67, + ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71, + ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79]>; + def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>; def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps", @@ -151,6 +157,8 @@ def UseHVXV75 : Predicate<"HST->useHVXV75Ops()">, AssemblerPredicate<(all_of ExtensionHVXV75)>; def UseHVXV79 : Predicate<"HST->useHVXV79Ops()">, AssemblerPredicate<(all_of ExtensionHVXV79)>; +def UseHVXV81 : Predicate<"HST->useHVXV81Ops()">, + AssemblerPredicate<(all_of ExtensionHVXV81)>; def UseAudio : Predicate<"HST->useAudioOps()">, AssemblerPredicate<(all_of ExtensionAudio)>; def UseZReg : Predicate<"HST->useZRegOps()">, @@ -488,6 +496,11 @@ def : Proc<"hexagonv79", HexagonModelV79, ArchV68, ArchV69, ArchV71, ArchV73, ArchV75, ArchV79, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; +def : Proc<"hexagonv81", HexagonModelV81, + [ArchV65, ArchV66, ArchV67, ArchV68, ArchV69, ArchV71, ArchV73, + ArchV75, ArchV79, ArchV81, + FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; // Need to update the correct features for tiny core. // Disable NewValueJumps since the packetizer is unable to handle a packet with diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h index 8984534..9bf4034 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.h +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -29,7 +29,8 @@ enum class ArchEnum { V71, V73, V75, - V79 + V79, + V81 }; inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) { @@ -50,6 +51,7 @@ inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) { .Case("hexagonv73", Hexagon::ArchEnum::V73) .Case("hexagonv75", Hexagon::ArchEnum::V75) .Case("hexagonv79", Hexagon::ArchEnum::V79) + .Case("hexagonv81", Hexagon::ArchEnum::V81) .Default(std::nullopt); } } // namespace Hexagon diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.td b/llvm/lib/Target/Hexagon/HexagonDepArch.td index 8ec1d93..f623fd0 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.td +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td @@ -34,3 +34,5 @@ def ArchV75: SubtargetFeature<"v75", "HexagonArchVersion", "Hexagon::ArchEnum::V def HasV75 : Predicate<"HST->hasV75Ops()">, AssemblerPredicate<(all_of ArchV75)>; def ArchV79: SubtargetFeature<"v79", "HexagonArchVersion", "Hexagon::ArchEnum::V79", "Enable Hexagon V79 architecture">; def HasV79 : Predicate<"HST->hasV79Ops()">, AssemblerPredicate<(all_of ArchV79)>; +def ArchV81: SubtargetFeature<"v81", "HexagonArchVersion", "Hexagon::ArchEnum::V81", "Enable Hexagon V81 architecture">; +def HasV81 : Predicate<"HST->hasV81Ops()">, AssemblerPredicate<(all_of ArchV81)>; diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td index 93696e0..f4e36fa7 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -7222,3 +7222,595 @@ class DepHVXItinV79 { [Hex_FWD, Hex_FWD, HVX_FWD]> ]; } + +class DepHVXItinV81 { + list<InstrItinData> DepHVXItinV81_list = [ + InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_37820f4c, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_3904b926, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_3ce09744, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_4942646a, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_531b383c, /*SLOT0123*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_540c3da3, /*SLOT0,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_56e64202, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_649072c2, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_7095ecba, /*SLOT01,LOAD,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_7177e272, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + + InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_72e2b393, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_73efe966, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_7417e785, /*SLOT0123,VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_7d68d5c2, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_8772086c, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/ + [InstrStage<1, [SLOT2], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_946013d8, /*SLOT0123,VP*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a19b9305, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_a69eeee1, /*SLOT01,LOAD,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ab23f776, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ac4046bc, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_b091f1c6, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb599486, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c127de3a, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_c4edf264, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_cda936da, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_dcca380f, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_e2fdd6e6, /*SLOT0123*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e699ae41, /*SLOT01,ZW*/ + [InstrStage<1, [SLOT0, SLOT1], 0>, + InstrStage<1, [CVI_ZW]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData <tc_f175e046, /*SLOT23,VX*/ + [InstrStage<1, [SLOT2, SLOT3], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/ + [InstrStage<1, [SLOT2], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/ + [InstrStage<1, [SLOT0], 0>, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]> + ]; +}
\ No newline at end of file diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td index 7a1ad3e..48b665c 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -13740,3 +13740,891 @@ class DepScalarItinV79 { [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> ]; } + +class DepScalarItinV81 { + list<InstrItinData> DepScalarItinV81_list = [ + InstrItinData <tc_011e0e9d, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_01d44cb2, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_01e1be3b, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_02fe1c65, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0655b949, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_075c8dd8, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0a195f2c, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0a43be35, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_0a6c20ae, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0ba0d5da, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_0dfac0a7, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_0fac1eb8, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_112d30d6, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_1242dc2a, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_1248597c, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_139ef484, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_14ab4f41, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_151bf368, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_158aa3f7, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_197dce51, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1981450d, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_1c2c7a4a, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1c7522a8, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1d41f8b7, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1fcb8495, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_1fe4ab69, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_20131976, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_2237d952, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_23708a21, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], + []>, + + InstrItinData <tc_2471c1c8, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_24e109c7, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_24f426ab, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_27106296, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_280f7fe1, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_28e55c6f, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_2c13e7f5, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_2c3e17fc, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_2f573607, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_33e7e673, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [], + []>, + + InstrItinData <tc_362b0be2, /*tc_3*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_38382228, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_388f9897, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_38e0bae9, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3d14a17b, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3edca78f, /*tc_2*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_3fbf1042, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_407e96f9, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_40d64c94, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4222e6bf, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_42ff66ba, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_442395f3, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_449acf79, /*tc_latepredstaia*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_44d5a428, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_44fffc58, /*tc_3*/ + [InstrStage<1, [SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_45791fb8, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_45f9d1be, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_46c18ecf, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_49fdfd4b, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4a55d03c, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4abdbdc6, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4ac61d92, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_4bf903b0, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_503ce0f3, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_512b1653, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_53c851ab, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_54f0cee2, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_5502c366, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_55255f2b, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [], + []>, + + InstrItinData <tc_556f6577, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_55a9a350, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_55b33fda, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_56a124a7, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_57a55b54, /*tc_1*/ + [InstrStage<1, [SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5944960d, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_59a7822c, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5a222e89, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5a4b5e58, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5b347363, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5ceb2f9e, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5da50c4b, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5deb5e47, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5e4cf0e8, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_60e324ff, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_63567288, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4], + [Hex_FWD]>, + + InstrItinData <tc_64b00d8a, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_651cbe02, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_65279839, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_65cbd974, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_69bfb303, /*tc_3*/ + [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6aa823ab, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6ae3426b, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6d861a95, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6e20402a, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6f42bc60, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6fb52018, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_6fc5dbea, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_711c805f, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_713b66bf, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7401744f, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7476d766, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_74a42bda, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_759e57be, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_76bb5435, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7d6a2568, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_77f94a5e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_788b1d09, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_78f87ed3, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_7af3a37e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7b9187d3, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7c28bd7e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3], + [Hex_FWD]>, + + InstrItinData <tc_7c31e19a, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7c6d32e4, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7dc63b5c, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7f58404a, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [], + []>, + + InstrItinData <tc_7f7f45f5, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_7f8ae742, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8035e91f, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_822c3c68, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_829d8a86, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_838c4d7a, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_84a7500d, /*tc_2*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_86173609, /*tc_2latepred*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_887d1bb7, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8a6d0d94, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8a825db2, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8b5bd4f5, /*tc_2*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8e82e8ca, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_8f36a2fd, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9124c04f, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_92240447, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_934753bb, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_937dd41c, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [], + []>, + + InstrItinData <tc_9406230a, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_95a33176, /*tc_2*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_95f43c5e, /*tc_3*/ + [InstrStage<1, [SLOT2]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_96ef76ef, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_975a4e54, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9783714b, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9b20a062, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9b34f5e0, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [], + []>, + + InstrItinData <tc_9b3c0462, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9bcfb2ee, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9c52f549, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9e27f2f9, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9e72dc89, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9edb7c77, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9edefe01, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_9f6cd987, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a08b630b, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a1297125, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a154b476, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a2b365d2, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a3070909, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a32e03e7, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a38c45dc, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a4e22bbd, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a4ee89db, /*tc_2early*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_a724463d, /*tc_3stall*/ + [InstrStage<1, [SLOT0]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a7a13fac, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a7bdb22c, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_a9edeffa, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_abfd9a6d, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ac65613f, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_addc37a8, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ae5babd7, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_aee6250c, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_af6af259, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b1ae5f67, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_b2196a3f, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b3d46584, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_b4dc7630, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b7c4062a, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_b837298f, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], + []>, + + InstrItinData <tc_b9bec29e, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [], + []>, + + InstrItinData <tc_ba9255a6, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb07f2c5, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb78483e, /*tc_3stall*/ + [InstrStage<1, [SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bb831a7c, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_bf2ffc0f, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c20701f0, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c21d7447, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c57d9f39, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_c818ff7f, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [], + []>, + + InstrItinData <tc_ce59038e, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_cfa0e29b, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d03278fd, /*tc_st*/ + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d234b61a, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_d33e5eee, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d3632d88, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d45ba9cd, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_d57d649c, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_d61dfdc3, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d68dca5c, /*tc_3stall*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d71ea8fa, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_d7718fbe, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_db596beb, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_db96aa6b, /*tc_st*/ + [InstrStage<1, [SLOT0]>], [1], + [Hex_FWD]>, + + InstrItinData <tc_dc51281d, /*tc_3*/ + [InstrStage<1, [SLOT2]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_decdde8a, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_df5d53f9, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e3d699e3, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_e60def48, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_e9170fb7, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ed03645c, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ed3f8d2a, /*tc_ld*/ + [InstrStage<1, [SLOT0]>], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_eed07714, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_eeda4109, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_ef921005, /*tc_1*/ + [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f098b237, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f0cdeccf, /*tc_3x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f0e8e832, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f34c1c21, /*tc_2*/ + [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f38f92e1, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_f529831b, /*tc_latepredstaia*/ + [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f6e2aff9, /*tc_newvjump*/ + [InstrStage<1, [SLOT0]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f7569068, /*tc_4x*/ + [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_f97707c1, /*tc_1*/ + [InstrStage<1, [SLOT2]>], [2], + [Hex_FWD]>, + + InstrItinData <tc_f999c66e, /*tc_1*/ + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_fae9dfa5, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData <tc_fedb7e19, /*tc_ld*/ + [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> + ]; +}
\ No newline at end of file diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index ae96753..f8f1c2a 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -39178,6 +39178,19 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vsub_hf_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vsub($Vu32.hf,$Vv32.qf16)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011010000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsub_qf16 : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -39269,6 +39282,19 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vsub_sf_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vsub($Vu32.sf,$Vv32.qf32)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011010000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsub_sf_sf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -41116,6 +41142,17 @@ let hasNewValue = 1; let opNewValue = 0; let isSolo = 1; } +def Y2_tlbpp : HInst< +(outs IntRegs:$Rd32), +(ins DoubleRegs:$Rss32), +"$Rd32 = tlbp($Rss32)", +tc_6aa823ab, TypeCR>, Enc_90cd8b, Requires<[HasV81]> { +let Inst{13-5} = 0b000000000; +let Inst{31-21} = 0b01101100011; +let hasNewValue = 1; +let opNewValue = 0; +let isSolo = 1; +} def Y2_tlbr : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index 17cb96c..23f4b3a 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -3827,3 +3827,14 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2), (V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2), (V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX128B]>; + +// V81 HVX Instructions. + +def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index e285e04..7ee280d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -654,7 +654,9 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { IntNo == Intrinsic::hexagon_V6_vgathermh || IntNo == Intrinsic::hexagon_V6_vgathermh_128B || IntNo == Intrinsic::hexagon_V6_vgathermhw || - IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) { + IntNo == Intrinsic::hexagon_V6_vgathermhw_128B || + IntNo == Intrinsic::hexagon_V6_vgather_vscattermh || + IntNo == Intrinsic::hexagon_V6_vgather_vscattermh_128B) { SelectV65Gather(N); return; } diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index c7a4f68..3cc146b 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -2953,6 +2953,10 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { case Intrinsic::hexagon_V6_vgathermhw_128B: Opcode = Hexagon::V6_vgathermhw_pseudo; break; + case Intrinsic::hexagon_V6_vgather_vscattermh: + case Intrinsic::hexagon_V6_vgather_vscattermh_128B: + Opcode = Hexagon::V6_vgather_vscatter_mh_pseudo; + break; } SDVTList VTs = CurDAG->getVTList(MVT::Other); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 9f7f434..526b4de 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2145,7 +2145,9 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::hexagon_V6_vgathermhq: case Intrinsic::hexagon_V6_vgathermhq_128B: case Intrinsic::hexagon_V6_vgathermhwq: - case Intrinsic::hexagon_V6_vgathermhwq_128B: { + case Intrinsic::hexagon_V6_vgathermhwq_128B: + case Intrinsic::hexagon_V6_vgather_vscattermh: + case Intrinsic::hexagon_V6_vgather_vscattermh_128B: { const Module &M = *I.getParent()->getParent()->getParent(); Info.opc = ISD::INTRINSIC_W_CHAIN; Type *VecTy = I.getArgOperand(1)->getType(); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 939841a..47726d6 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1554,80 +1554,93 @@ HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const { MachineBasicBlock::iterator First; switch (Opc) { - case Hexagon::V6_vgathermh_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermw_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermhw_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermhq_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)) - .add(MI.getOperand(5)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermwq_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)) - .add(MI.getOperand(5)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); - - case Hexagon::V6_vgathermhwq_pseudo: - First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq)) - .add(MI.getOperand(2)) - .add(MI.getOperand(3)) - .add(MI.getOperand(4)) - .add(MI.getOperand(5)); - BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) - .add(MI.getOperand(0)) - .addImm(MI.getOperand(1).getImm()) - .addReg(Hexagon::VTMP); - MBB.erase(MI); - return First.getInstrIterator(); + case Hexagon::V6_vgather_vscatter_mh_pseudo: + // This is mainly a place holder. It will be extended. + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vscattermh)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + case Hexagon::V6_vgathermh_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermw_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermhw_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermhq_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermwq_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); + + case Hexagon::V6_vgathermhwq_pseudo: + First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return First.getInstrIterator(); } return MI.getIterator(); @@ -2806,6 +2819,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::V6_vL32b_nt_tmp_npred_ai: case Hexagon::V6_vS32Ub_npred_ai: case Hexagon::V6_vgathermh_pseudo: + case Hexagon::V6_vgather_vscatter_mh_pseudo: case Hexagon::V6_vgathermw_pseudo: case Hexagon::V6_vgathermhw_pseudo: case Hexagon::V6_vgathermhq_pseudo: diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td index f927f9b..42393d0 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td @@ -40,6 +40,19 @@ defm V6_vgathermh_pseudo : vgathermh<HvxVR>; defm V6_vgathermw_pseudo : vgathermw<HvxVR>; defm V6_vgathermhw_pseudo : vgathermhw<HvxWR>; + +multiclass vgather_scatter_mh<RegisterClass RC> { + let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, + mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in + def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), + (ins IntRegs:$_dst_, s4_0Imm:$Ii, + IntRegs:$Rt, ModRegs:$Mu, RC:$Vv), + ".error \"should not emit\" ", + []>; +} + +defm V6_vgather_vscatter_mh_pseudo : vgather_scatter_mh<HvxVR>; + multiclass vgathermhq<RegisterClass RC1, RegisterClass RC2> { let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td index b8a9cf3..9bcd4bf 100644 --- a/llvm/lib/Target/Hexagon/HexagonSchedule.td +++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -75,3 +75,4 @@ include "HexagonScheduleV71T.td" include "HexagonScheduleV73.td" include "HexagonScheduleV75.td" include "HexagonScheduleV79.td" +include "HexagonScheduleV81.td"
\ No newline at end of file diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV81.td b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td new file mode 100644 index 0000000..dd5f5a0 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td @@ -0,0 +1,31 @@ +//=-HexagonScheduleV81.td - HexagonV81 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def HexagonV81ItinList : DepScalarItinV81, ScalarItin, + DepHVXItinV81, HVXItin, PseudoItin { + list<InstrItinData> ItinList = + !listconcat(DepScalarItinV81_list, ScalarItin_list, + DepHVXItinV81_list, HVXItin_list, PseudoItin_list); +} + +def HexagonItinerariesV81 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM, CVI_ZW], + [Hex_FWD, HVX_FWD], + HexagonV81ItinList.ItinList>; + +def HexagonModelV81 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV81; + let LoadLatency = 1; + let CompleteModel = 0; +} diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 7430567..995f66d 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -224,6 +224,15 @@ public: bool useHVXV79Ops() const { return HexagonHVXVersion >= Hexagon::ArchEnum::V79; } + bool hasV81Ops() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V81; + } + bool hasV81OpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V81; + } + bool useHVXV81Ops() const { + return HexagonHVXVersion >= Hexagon::ArchEnum::V81; + } bool useAudioOps() const { return UseAudioOps; } bool useCompound() const { return UseCompound; } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 171e294..e925e04 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -31,6 +31,10 @@ using namespace llvm; static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false), cl::Hidden, cl::desc("Enable loop vectorizer for HVX")); +cl::opt<bool> HexagonAllowScatterGatherHVX( + "hexagon-allow-scatter-gather-hvx", cl::init(false), cl::Hidden, + cl::desc("Allow auto-generation of HVX scatter-gather")); + static cl::opt<bool> EnableV68FloatAutoHVX( "force-hvx-float", cl::Hidden, cl::desc("Enable auto-vectorization of floatint point types on v68.")); @@ -354,6 +358,61 @@ bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/, return HexagonMaskedVMem && ST.isTypeForHVX(DataType); } +bool HexagonTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) const { + // For now assume we can not deal with all HVX datatypes. + if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) || + !HexagonAllowScatterGatherHVX) + return false; + // This must be in sync with HexagonVectorCombine pass. + switch (Ty->getScalarSizeInBits()) { + case 8: + return (getTypeNumElements(Ty) == 128); + case 16: + if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32) + return (Alignment >= 2); + break; + case 32: + if (getTypeNumElements(Ty) == 32) + return (Alignment >= 4); + break; + default: + break; + } + return false; +} + +bool HexagonTTIImpl::isLegalMaskedScatter(Type *Ty, Align Alignment) const { + if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) || + !HexagonAllowScatterGatherHVX) + return false; + // This must be in sync with HexagonVectorCombine pass. + switch (Ty->getScalarSizeInBits()) { + case 8: + return (getTypeNumElements(Ty) == 128); + case 16: + if (getTypeNumElements(Ty) == 64) + return (Alignment >= 2); + break; + case 32: + if (getTypeNumElements(Ty) == 32) + return (Alignment >= 4); + break; + default: + break; + } + return false; +} + +bool HexagonTTIImpl::forceScalarizeMaskedGather(VectorType *VTy, + Align Alignment) const { + return !isLegalMaskedGather(VTy, Alignment); +} + +bool HexagonTTIImpl::forceScalarizeMaskedScatter(VectorType *VTy, + Align Alignment) const { + return !isLegalMaskedScatter(VTy, Alignment); +} + /// --- Vector TTI end --- unsigned HexagonTTIImpl::getPrefetchDistance() const { diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index dbf16c9..cec2bf9 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -169,6 +169,12 @@ public: unsigned AddressSpace) const override; bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace) const override; + bool isLegalMaskedGather(Type *Ty, Align Alignment) const override; + bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override; + bool forceScalarizeMaskedGather(VectorType *VTy, + Align Alignment) const override; + bool forceScalarizeMaskedScatter(VectorType *VTy, + Align Alignment) const override; /// @} diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 9ab5202..5c50ec2 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -57,6 +57,11 @@ #define DEBUG_TYPE "hexagon-vc" +// This is a const that represents default HVX VTCM page size. +// It is boot time configurable, so we probably want an API to +// read it, but for now assume 128KB +#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072 + using namespace llvm; namespace { @@ -418,6 +423,18 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) { class HvxIdioms { public: + enum DstQualifier { + Undefined = 0, + Arithmetic, + LdSt, + LLVM_Gather, + LLVM_Scatter, + HEX_Gather_Scatter, + HEX_Gather, + HEX_Scatter, + Call + }; + HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) { auto *Int32Ty = HVC.getIntTy(32); HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false); @@ -473,6 +490,11 @@ private: auto createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX, Signedness SgnX, ArrayRef<Value *> WordY, Signedness SgnY) const -> SmallVector<Value *>; + // Vector manipulations for Ripple + bool matchScatter(Instruction &In) const; + bool matchGather(Instruction &In) const; + Value *processVScatter(Instruction &In) const; + Value *processVGather(Instruction &In) const; VectorType *HvxI32Ty; VectorType *HvxP32Ty; @@ -1545,7 +1567,7 @@ auto AlignVectors::isSectorTy(Type *Ty) const -> bool { } auto AlignVectors::run() -> bool { - LLVM_DEBUG(dbgs() << "Running HVC::AlignVectors on " << HVC.F.getName() + LLVM_DEBUG(dbgs() << "\nRunning HVC::AlignVectors on " << HVC.F.getName() << '\n'); if (!createAddressGroups()) return false; @@ -1797,6 +1819,846 @@ auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const return Ext; } +inline bool HvxIdioms::matchScatter(Instruction &In) const { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In); + if (!II) + return false; + return (II->getIntrinsicID() == Intrinsic::masked_scatter); +} + +inline bool HvxIdioms::matchGather(Instruction &In) const { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In); + if (!II) + return false; + return (II->getIntrinsicID() == Intrinsic::masked_gather); +} + +Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual); + +// Binary instructions we want to handle as users of gather/scatter. +inline bool isArithmetic(unsigned Opc) { + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::Shl: + case Instruction::UDiv: + return true; + } + return false; +} + +// TODO: Maybe use MemoryLocation for this. See getLocOrNone above. +inline Value *getPointer(Value *Ptr) { + assert(Ptr && "Unable to extract pointer"); + if (isa<AllocaInst>(Ptr) || isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) + return Ptr; + if (isa<LoadInst>(Ptr) || isa<StoreInst>(Ptr)) + return getLoadStorePointerOperand(Ptr); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) { + if (II->getIntrinsicID() == Intrinsic::masked_store) + return II->getOperand(1); + } + return nullptr; +} + +static Instruction *selectDestination(Instruction *In, + HvxIdioms::DstQualifier &Qual) { + Instruction *Destination = nullptr; + if (!In) + return Destination; + if (isa<StoreInst>(In)) { + Destination = In; + Qual = HvxIdioms::LdSt; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) { + if (II->getIntrinsicID() == Intrinsic::masked_gather) { + Destination = In; + Qual = HvxIdioms::LLVM_Gather; + } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) { + Destination = In; + Qual = HvxIdioms::LLVM_Scatter; + } else if (II->getIntrinsicID() == Intrinsic::masked_store) { + Destination = In; + Qual = HvxIdioms::LdSt; + } else if (II->getIntrinsicID() == + Intrinsic::hexagon_V6_vgather_vscattermh) { + Destination = In; + Qual = HvxIdioms::HEX_Gather_Scatter; + } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) { + Destination = In; + Qual = HvxIdioms::HEX_Scatter; + } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) { + Destination = In; + Qual = HvxIdioms::HEX_Gather; + } + } else if (isa<ZExtInst>(In)) { + return locateDestination(In, Qual); + } else if (isa<CastInst>(In)) { + return locateDestination(In, Qual); + } else if (isa<CallInst>(In)) { + Destination = In; + Qual = HvxIdioms::Call; + } else if (isa<GetElementPtrInst>(In)) { + return locateDestination(In, Qual); + } else if (isArithmetic(In->getOpcode())) { + Destination = In; + Qual = HvxIdioms::Arithmetic; + } else { + LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n"); + } + return Destination; +} + +// This method attempts to find destination (user) for a given intrinsic. +// Given that these are produced only by Ripple, the number of options is +// limited. Simplest case is explicit store which in fact is redundant (since +// HVX gater creates its own store during packetization). Nevertheless we need +// to figure address where we storing. Other cases are more complicated, but +// still few. +Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual) { + Instruction *Destination = nullptr; + if (!In) + return Destination; + // Get all possible destinations + SmallVector<Instruction *> Users; + // Iterate over the uses of the instruction + for (auto &U : In->uses()) { + if (auto *UI = dyn_cast<Instruction>(U.getUser())) { + Destination = selectDestination(UI, Qual); + if (Destination) + Users.push_back(Destination); + } + } + // Now see which of the users (if any) is a memory destination. + for (auto *I : Users) + if (getPointer(I)) + return I; + return Destination; +} + +// The two intrinsics we handle here have GEP in a different position. +inline GetElementPtrInst *locateGepFromIntrinsic(Instruction *In) { + assert(In && "Bad instruction"); + IntrinsicInst *IIn = dyn_cast<IntrinsicInst>(In); + assert((IIn && (IIn->getIntrinsicID() == Intrinsic::masked_gather || + IIn->getIntrinsicID() == Intrinsic::masked_scatter)) && + "Not a gather Intrinsic"); + GetElementPtrInst *GEPIndex = nullptr; + if (IIn->getIntrinsicID() == Intrinsic::masked_gather) + GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(0)); + else + GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(1)); + return GEPIndex; +} + +// Given the intrinsic find its GEP argument and extract base address it uses. +// The method relies on the way how Ripple typically forms the GEP for +// scatter/gather. +static Value *locateAddressFromIntrinsic(Instruction *In) { + GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In); + if (!GEPIndex) { + LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n"); + return nullptr; + } + Value *BaseAddress = GEPIndex->getPointerOperand(); + auto *IndexLoad = dyn_cast<LoadInst>(BaseAddress); + if (IndexLoad) + return IndexLoad; + + auto *IndexZEx = dyn_cast<ZExtInst>(BaseAddress); + if (IndexZEx) { + IndexLoad = dyn_cast<LoadInst>(IndexZEx->getOperand(0)); + if (IndexLoad) + return IndexLoad; + IntrinsicInst *II = dyn_cast<IntrinsicInst>(IndexZEx->getOperand(0)); + if (II && II->getIntrinsicID() == Intrinsic::masked_gather) + return locateAddressFromIntrinsic(II); + } + auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(BaseAddress); + if (BaseShuffle) { + IndexLoad = dyn_cast<LoadInst>(BaseShuffle->getOperand(0)); + if (IndexLoad) + return IndexLoad; + auto *IE = dyn_cast<InsertElementInst>(BaseShuffle->getOperand(0)); + if (IE) { + auto *Src = IE->getOperand(1); + IndexLoad = dyn_cast<LoadInst>(Src); + if (IndexLoad) + return IndexLoad; + auto *Alloca = dyn_cast<AllocaInst>(Src); + if (Alloca) + return Alloca; + if (isa<Argument>(Src)) { + return Src; + } + if (isa<GlobalValue>(Src)) { + return Src; + } + } + } + LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n"); + return nullptr; +} + +static Type *getIndexType(Value *In) { + if (!In) + return nullptr; + + if (isa<LoadInst>(In) || isa<StoreInst>(In)) + return getLoadStoreType(In); + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return II->getType(); + if (II->getIntrinsicID() == Intrinsic::masked_store) + return II->getOperand(0)->getType(); + } + return In->getType(); +} + +static Value *locateIndexesFromGEP(Value *In) { + if (!In) + return nullptr; + if (isa<LoadInst>(In)) + return In; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return In; + if (II->getIntrinsicID() == Intrinsic::masked_gather) + return In; + } + if (auto *IndexZEx = dyn_cast<ZExtInst>(In)) + return locateIndexesFromGEP(IndexZEx->getOperand(0)); + if (auto *IndexSEx = dyn_cast<SExtInst>(In)) + return locateIndexesFromGEP(IndexSEx->getOperand(0)); + if (auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(In)) + return locateIndexesFromGEP(BaseShuffle->getOperand(0)); + if (auto *IE = dyn_cast<InsertElementInst>(In)) + return locateIndexesFromGEP(IE->getOperand(1)); + if (auto *cstDataVector = dyn_cast<ConstantDataVector>(In)) + return cstDataVector; + if (auto *GEPIndex = dyn_cast<GetElementPtrInst>(In)) + return GEPIndex->getOperand(0); + return nullptr; +} + +// Given the intrinsic find its GEP argument and extract offsetts from the base +// address it uses. +static Value *locateIndexesFromIntrinsic(Instruction *In) { + GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In); + if (!GEPIndex) { + LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n"); + return nullptr; + } + Value *Indexes = GEPIndex->getOperand(1); + if (auto *IndexLoad = locateIndexesFromGEP(Indexes)) + return IndexLoad; + + LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n"); + return nullptr; +} + +// Because of aukward definition of many Hex intrinsics we often have to +// reinterprete HVX native <64 x i16> as <32 x i32> which in practice is a NOP +// for all use cases, so this only exist to make IR builder happy. +inline Value *getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, + IRBuilderBase &Builder, + LLVMContext &Ctx, Value *I) { + assert(I && "Unable to reinterprete cast"); + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + std::vector<unsigned> shuffleMask; + for (unsigned i = 0; i < 64; ++i) + shuffleMask.push_back(i); + Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask); + Value *CastShuffle = + Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle"); + return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32"); +} + +// Recast <128 x i8> as <32 x i32> +inline Value *getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, + IRBuilderBase &Builder, + LLVMContext &Ctx, Value *I) { + assert(I && "Unable to reinterprete cast"); + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + std::vector<unsigned> shuffleMask; + for (unsigned i = 0; i < 128; ++i) + shuffleMask.push_back(i); + Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask); + Value *CastShuffle = + Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle"); + return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32"); +} + +// Create <32 x i32> mask reinterpreted as <128 x i1> with a given pattern +inline Value *get_i32_Mask(const HexagonVectorCombine &HVC, + IRBuilderBase &Builder, LLVMContext &Ctx, + unsigned int pattern) { + std::vector<unsigned int> byteMask; + for (unsigned i = 0; i < 32; ++i) + byteMask.push_back(pattern); + + return Builder.CreateIntrinsic( + HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt), + {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)}, + nullptr); +} + +Value *HvxIdioms::processVScatter(Instruction &In) const { + auto *InpTy = dyn_cast<VectorType>(In.getOperand(0)->getType()); + assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather"); + unsigned InpSize = HVC.getSizeOf(InpTy); + auto *F = In.getFunction(); + LLVMContext &Ctx = F->getContext(); + auto *ElemTy = dyn_cast<IntegerType>(InpTy->getElementType()); + assert(ElemTy && "llvm.scatter needs integer type argument"); + unsigned ElemWidth = HVC.DL.getTypeAllocSize(ElemTy); + LLVM_DEBUG({ + unsigned Elements = HVC.length(InpTy); + dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n"; + dbgs() << " Input type(" << *InpTy << ") elements(" << Elements + << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth(" + << ElemWidth << ")\n"; + }); + + IRBuilder Builder(In.getParent(), In.getIterator(), + InstSimplifyFolder(HVC.DL)); + + auto *ValueToScatter = In.getOperand(0); + LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n"); + + if (HVC.HST.getVectorLength() != InpSize) { + LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize + << ") for vscatter\n"); + return nullptr; + } + + // Base address of indexes. + auto *IndexLoad = locateAddressFromIntrinsic(&In); + if (!IndexLoad) + return nullptr; + LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n"); + + // Address of destination. Must be in VTCM. + auto *Ptr = getPointer(IndexLoad); + if (!Ptr) + return nullptr; + LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n"); + // Indexes/offsets + auto *Indexes = locateIndexesFromIntrinsic(&In); + if (!Indexes) + return nullptr; + LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n"); + Value *CastedDst = Builder.CreateBitOrPointerCast(Ptr, Type::getInt32Ty(Ctx), + "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n"); + // Adjust Indexes + auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes); + Value *CastIndex = nullptr; + if (cstDataVector) { + // Our indexes are represented as a constant. We need it in a reg. + AllocaInst *IndexesAlloca = + Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false)); + [[maybe_unused]] auto *StoreIndexes = + Builder.CreateStore(cstDataVector, IndexesAlloca); + LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n"); + CastIndex = Builder.CreateLoad(IndexesAlloca->getAllocatedType(), + IndexesAlloca, "reload_index"); + } else { + if (ElemWidth == 2) + CastIndex = getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes); + else + CastIndex = Indexes; + } + LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n"); + + if (ElemWidth == 1) { + // v128i8 There is no native instruction for this. + // Do this as two Hi/Lo gathers with masking. + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + // Extend indexes. We assume that indexes are in 128i8 format - need to + // expand them to Hi/Lo 64i16 + Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32"); + auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub); + auto *UnpackedIndexes = Builder.CreateIntrinsic( + HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr); + LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n"); + + auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi); + auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo); + [[maybe_unused]] Value *IndexHi = + HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes); + [[maybe_unused]] Value *IndexLo = + HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes); + LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n"); + LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n"); + // Now unpack values to scatter + Value *CastSrc = + getReinterpretiveCast_i8_to_i32(HVC, Builder, Ctx, ValueToScatter); + LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n"); + auto *UnpackedValueToScatter = Builder.CreateIntrinsic( + HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr); + LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter + << ")\n"); + + [[maybe_unused]] Value *UVSHi = + HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter); + [[maybe_unused]] Value *UVSLo = + HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter); + LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n"); + LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n"); + + // Create the mask for individual bytes + auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff); + LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n"); + [[maybe_unused]] auto *ResHi = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B, + {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + IndexHi, UVSHi}, + nullptr); + LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n"); + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B, + {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + IndexLo, UVSLo}, + nullptr); + } else if (ElemWidth == 2) { + Value *CastSrc = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, ValueToScatter); + LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n"); + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B, + {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex, + CastSrc}, + nullptr); + } else if (ElemWidth == 4) { + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B, + {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex, + ValueToScatter}, + nullptr); + } else { + LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n"); + return nullptr; + } +} + +Value *HvxIdioms::processVGather(Instruction &In) const { + [[maybe_unused]] auto *InpTy = + dyn_cast<VectorType>(In.getOperand(0)->getType()); + assert(InpTy && "Cannot handle no vector type for llvm.gather"); + [[maybe_unused]] auto *ElemTy = + dyn_cast<PointerType>(InpTy->getElementType()); + assert(ElemTy && "llvm.gather needs vector of ptr argument"); + auto *F = In.getFunction(); + LLVMContext &Ctx = F->getContext(); + LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n" + << *In.getParent() << "\n"); + LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements(" + << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy) + << ") type(" << *ElemTy << ") Access alignment(" + << *In.getOperand(1) << ") AddressSpace(" + << ElemTy->getAddressSpace() << ")\n"); + + // TODO: Handle masking of elements. + assert(dyn_cast<VectorType>(In.getOperand(2)->getType()) && + "llvm.gather needs vector for mask"); + IRBuilder Builder(In.getParent(), In.getIterator(), + InstSimplifyFolder(HVC.DL)); + + // See who is using the result. The difference between LLVM and HVX vgather + // Intrinsic makes it impossible to handle all cases with temp storage. Alloca + // in VTCM is not yet supported, so for now we just bail out for those cases. + HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined; + Instruction *Dst = locateDestination(&In, Qual); + if (!Dst) { + LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n"); + return nullptr; + } + LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual + << ")\n"); + + // Address of destination. Must be in VTCM. + auto *Ptr = getPointer(Dst); + if (!Ptr) { + LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n"); + return nullptr; + } + + // Result type. Assume it is a vector type. + auto *DstType = cast<VectorType>(getIndexType(Dst)); + assert(DstType && "Cannot handle non vector dst type for llvm.gather"); + + // Base address for sources to be loaded + auto *IndexLoad = locateAddressFromIntrinsic(&In); + if (!IndexLoad) + return nullptr; + LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n"); + + // Gather indexes/offsets + auto *Indexes = locateIndexesFromIntrinsic(&In); + if (!Indexes) + return nullptr; + LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n"); + + Instruction *Gather = nullptr; + Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false); + if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) { + // We fully assume the address space is in VTCM. We also assume that all + // pointers in Operand(0) have the same base(!). + // This is the most basic case of all the above. + unsigned OutputSize = HVC.getSizeOf(DstType); + auto *DstElemTy = cast<IntegerType>(DstType->getElementType()); + unsigned ElemWidth = HVC.DL.getTypeAllocSize(DstElemTy); + LLVM_DEBUG(dbgs() << " Buffer type : " << *Ptr->getType() + << " Address space (" + << Ptr->getType()->getPointerAddressSpace() << ")\n" + << " Result type : " << *DstType + << "\n Size in bytes : " << OutputSize + << " element type(" << *DstElemTy + << ")\n ElemWidth : " << ElemWidth << " bytes\n"); + + auto *IndexType = cast<VectorType>(getIndexType(Indexes)); + assert(IndexType && "Cannot handle non vector index type for llvm.gather"); + unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType()); + LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n"); + + // Intrinsic takes i32 instead of pointer so cast. + Value *CastedPtr = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + // [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ...] + // int_hexagon_V6_vgathermh [... , llvm_v16i32_ty] + // int_hexagon_V6_vgathermh_128B [... , llvm_v32i32_ty] + // int_hexagon_V6_vgathermhw [... , llvm_v32i32_ty] + // int_hexagon_V6_vgathermhw_128B [... , llvm_v64i32_ty] + // int_hexagon_V6_vgathermw [... , llvm_v16i32_ty] + // int_hexagon_V6_vgathermw_128B [... , llvm_v32i32_ty] + if (HVC.HST.getVectorLength() == OutputSize) { + if (ElemWidth == 1) { + // v128i8 There is no native instruction for this. + // Do this as two Hi/Lo gathers with masking. + // Unpack indexes. We assume that indexes are in 128i8 format - need to + // expand them to Hi/Lo 64i16 + Value *CastIndexes = + Builder.CreateBitCast(Indexes, NT, "cast_to_32i32"); + auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub); + auto *UnpackedIndexes = + Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true), + V6_vunpack, CastIndexes, nullptr); + LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes + << ")\n"); + + auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi); + auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo); + [[maybe_unused]] Value *IndexHi = + HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes); + [[maybe_unused]] Value *IndexLo = + HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes); + LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n"); + LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n"); + // Create the mask for individual bytes + auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff); + LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n"); + // We use our destination allocation as a temp storage + // This is unlikely to work properly for masked gather. + auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq); + [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), V6_vgather, + {Ptr, QByteMask, CastedPtr, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexHi}, + nullptr); + LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n"); + // Rematerialize the result + [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi"); + LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n"); + // Same for the low part. Here we use Gather to return non-NULL result + // from this function and continue to iterate. We also are deleting Dst + // store below. + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), V6_vgather, + {Ptr, QByteMask, CastedPtr, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexLo}, + nullptr); + LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n"); + Value *LoadedResultLo = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo"); + LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n"); + // Now we have properly sized bytes in every other position + // B b A a c a A b B c f F g G h H is presented as + // B . b . A . a . c . a . A . b . B . c . f . F . g . G . h . H + // Use vpack to gather them + auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb); + [[maybe_unused]] auto Res = Builder.CreateIntrinsic( + NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr); + LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n"); + [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr); + LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n"); + } else if (ElemWidth == 2) { + // v32i16 + if (IndexWidth == 2) { + // Reinterprete 64i16 as 32i32. Only needed for syntactic IR match. + Value *CastIndex = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes); + LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n"); + // shift all i16 left by 1 to match short addressing mode instead of + // byte. + auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh); + Value *AdjustedIndex = HVC.createHvxIntrinsic( + Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)}); + LLVM_DEBUG(dbgs() + << " Shifted half index: " << *AdjustedIndex << ")\n"); + + auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh); + // The 3rd argument is the size of the region to gather from. Probably + // want to set it to max VTCM size. + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), V6_vgather, + {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + AdjustedIndex}, + nullptr); + for (auto &U : Dst->uses()) { + if (auto *UI = dyn_cast<Instruction>(U.getUser())) + dbgs() << " dst used by: " << *UI << "\n"; + } + for (auto &U : In.uses()) { + if (auto *UI = dyn_cast<Instruction>(U.getUser())) + dbgs() << " In used by : " << *UI << "\n"; + } + // Create temp load from result in case the result is used by any + // other instruction. + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + In.replaceAllUsesWith(LoadedResult); + } else { + dbgs() << " Unhandled index type for vgather\n"; + return nullptr; + } + } else if (ElemWidth == 4) { + if (IndexWidth == 4) { + // v32i32 + auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh); + Value *AdjustedIndex = HVC.createHvxIntrinsic( + Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)}); + LLVM_DEBUG(dbgs() + << " Shifted word index: " << *AdjustedIndex << ")\n"); + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B, + {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + AdjustedIndex}, + nullptr); + } else { + LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n"); + return nullptr; + } + } else { + LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n"); + return nullptr; + } + } else if (HVC.HST.getVectorLength() == OutputSize * 2) { + // This is half of the reg width, duplicate low in high + LLVM_DEBUG(dbgs() << " Unhandled half of register size\n"); + return nullptr; + } else if (HVC.HST.getVectorLength() * 2 == OutputSize) { + LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n"); + return nullptr; + } + // Erase the original intrinsic and store that consumes it. + // HVX will create a pseudo for gather that is expanded to gather + store + // during packetization. + Dst->eraseFromParent(); + } else if (Qual == HvxIdioms::LLVM_Scatter) { + // Gather feeds directly into scatter. + LLVM_DEBUG({ + auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType()); + assert(DstInpTy && "Cannot handle no vector type for llvm.scatter"); + unsigned DstInpSize = HVC.getSizeOf(DstInpTy); + unsigned DstElements = HVC.length(DstInpTy); + auto *DstElemTy = cast<PointerType>(DstInpTy->getElementType()); + assert(DstElemTy && "llvm.scatter needs vector of ptr argument"); + dbgs() << " Gather feeds into scatter\n Values to scatter : " + << *Dst->getOperand(0) << "\n"; + dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements + << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy + << ") Access alignment(" << *Dst->getOperand(2) << ")\n"; + }); + // Address of source + auto *Src = getPointer(IndexLoad); + if (!Src) + return nullptr; + LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n"); + + if (!isa<PointerType>(Src->getType())) { + LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n"); + return nullptr; + } + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n"); + + auto *DstLoad = locateAddressFromIntrinsic(Dst); + if (!DstLoad) { + LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n"); + return nullptr; + } + LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n"); + + Value *Ptr = getPointer(DstLoad); + if (!Ptr) + return nullptr; + LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n"); + Value *CastIndex = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, IndexLoad); + LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n"); + // Shift all i16 left by 1 to match short addressing mode instead of + // byte. + auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh); + Value *AdjustedIndex = HVC.createHvxIntrinsic( + Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)}); + LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n"); + + return Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + AdjustedIndex}, + nullptr); + } else if (Qual == HvxIdioms::HEX_Gather_Scatter) { + // Gather feeds into previously inserted pseudo intrinsic. + // These could not be in the same packet, so we need to generate another + // pseudo that is expanded to .tmp + store V6_vgathermh_pseudo + // V6_vgathermh_pseudo (ins IntRegs:$_dst_, s4_0Imm:$Ii, IntRegs:$Rt, + // ModRegs:$Mu, HvxVR:$Vv) + if (isa<AllocaInst>(IndexLoad)) { + auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes); + if (cstDataVector) { + // Our indexes are represented as a constant. We need THEM in a reg. + // This most likely will not work properly since alloca gives us DDR + // stack location. This will be fixed once we teach compiler about VTCM. + AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT); + [[maybe_unused]] auto *StoreIndexes = + Builder.CreateStore(cstDataVector, IndexesAlloca); + LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n"); + Value *LoadedIndex = Builder.CreateLoad( + IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index"); + AllocaInst *ResultAlloca = Builder.CreateAlloca(NT); + LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n"); + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {ResultAlloca, CastedSrc, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex}, + nullptr); + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n"); + In.replaceAllUsesWith(LoadedResult); + } + } else { + // Address of source + auto *Src = getPointer(IndexLoad); + if (!Src) + return nullptr; + LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n"); + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n"); + + auto *DstLoad = locateAddressFromIntrinsic(Dst); + if (!DstLoad) + return nullptr; + LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n"); + auto *Ptr = getPointer(DstLoad); + if (!Ptr) + return nullptr; + LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh, + {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + Indexes}, + nullptr); + } + return Gather; + } else if (Qual == HvxIdioms::HEX_Scatter) { + // This is the case when result of a gather is used as an argument to + // Intrinsic::hexagon_V6_vscattermh_128B. Most likely we just inserted it + // ourselves. We have to create alloca, store to it, and replace all uses + // with that. + AllocaInst *ResultAlloca = Builder.CreateAlloca(NT); + Value *CastedSrc = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n"); + Value *CastIndex = + getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes); + LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {ResultAlloca, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), + CastIndex}, + nullptr); + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + In.replaceAllUsesWith(LoadedResult); + } else if (Qual == HvxIdioms::HEX_Gather) { + // Gather feeds to another gather but already replaced with + // hexagon_V6_vgathermh_128B + if (isa<AllocaInst>(IndexLoad)) { + auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes); + if (cstDataVector) { + // Our indexes are represented as a constant. We need it in a reg. + AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT); + + [[maybe_unused]] auto *StoreIndexes = + Builder.CreateStore(cstDataVector, IndexesAlloca); + LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n"); + Value *LoadedIndex = Builder.CreateLoad( + IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index"); + AllocaInst *ResultAlloca = Builder.CreateAlloca(NT); + LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca + << "\n AddressSpace: " + << ResultAlloca->getAddressSpace() << "\n";); + + Value *CastedSrc = Builder.CreateBitOrPointerCast( + IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32"); + LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n"); + + Gather = Builder.CreateIntrinsic( + Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B, + {ResultAlloca, CastedSrc, + HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex}, + nullptr); + Value *LoadedResult = Builder.CreateLoad( + HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result"); + LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n"); + LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n"); + In.replaceAllUsesWith(LoadedResult); + } + } + } else if (Qual == HvxIdioms::LLVM_Gather) { + // Gather feeds into another gather + errs() << " Underimplemented vgather to vgather sequence\n"; + return nullptr; + } else + llvm_unreachable("Unhandled Qual enum"); + + return Gather; +} + auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In, const FxpOp &Op) const -> Value * { assert(Op.X.Val->getType() == Op.Y.Val->getType()); @@ -2138,6 +3000,26 @@ auto HvxIdioms::run() -> bool { It = StartOver ? B.rbegin() : cast<Instruction>(New)->getReverseIterator(); Changed = true; + } else if (matchGather(*It)) { + Value *New = processVGather(*It); + if (!New) + continue; + LLVM_DEBUG(dbgs() << " Gather : " << *New << "\n"); + // We replace original intrinsic with a new pseudo call. + It->eraseFromParent(); + It = cast<Instruction>(New)->getReverseIterator(); + RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI); + Changed = true; + } else if (matchScatter(*It)) { + Value *New = processVScatter(*It); + if (!New) + continue; + LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n"); + // We replace original intrinsic with a new pseudo call. + It->eraseFromParent(); + It = cast<Instruction>(New)->getReverseIterator(); + RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI); + Changed = true; } } } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 6455757..2f59b7c 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -186,6 +186,9 @@ static unsigned featureToArchVersion(unsigned Feature) { case Hexagon::ArchV79: case Hexagon::ExtensionHVXV79: return 79; + case Hexagon::ArchV81: + case Hexagon::ExtensionHVXV81: + return 81; } llvm_unreachable("Expected valid arch feature"); return 0; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 6b48a21..b8075bd 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -96,6 +96,8 @@ cl::opt<bool> MV75("mv75", cl::Hidden, cl::desc("Build for Hexagon V75"), cl::init(false)); cl::opt<bool> MV79("mv79", cl::Hidden, cl::desc("Build for Hexagon V79"), cl::init(false)); +cl::opt<bool> MV81("mv81", cl::Hidden, cl::desc("Build for Hexagon V81"), + cl::init(false)); } // namespace static cl::opt<Hexagon::ArchEnum> EnableHVX( @@ -111,6 +113,7 @@ static cl::opt<Hexagon::ArchEnum> EnableHVX( clEnumValN(Hexagon::ArchEnum::V73, "v73", "Build for HVX v73"), clEnumValN(Hexagon::ArchEnum::V75, "v75", "Build for HVX v75"), clEnumValN(Hexagon::ArchEnum::V79, "v79", "Build for HVX v79"), + clEnumValN(Hexagon::ArchEnum::V81, "v81", "Build for HVX v81"), // Sentinel for no value specified. clEnumValN(Hexagon::ArchEnum::Generic, "", "")), // Sentinel for flag not present. @@ -159,6 +162,8 @@ static StringRef HexagonGetArchVariant() { return "hexagonv75"; if (MV79) return "hexagonv79"; + if (MV81) + return "hexagonv81"; return ""; } @@ -474,6 +479,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { case Hexagon::ArchEnum::V79: Result.push_back("+hvxv79"); break; + case Hexagon::ArchEnum::V81: + Result.push_back("+hvxv81"); + break; case Hexagon::ArchEnum::Generic: { Result.push_back(StringSwitch<StringRef>(CPU) @@ -489,7 +497,8 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { .Case("hexagonv71t", "+hvxv71") .Case("hexagonv73", "+hvxv73") .Case("hexagonv75", "+hvxv75") - .Case("hexagonv79", "+hvxv79")); + .Case("hexagonv79", "+hvxv79") + .Case("hexagonv81", "+hvxv81")); break; } case Hexagon::ArchEnum::NoArch: @@ -538,8 +547,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { FeatureBitset FB = S; unsigned CpuArch = ArchV5; for (unsigned F : - {ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, ArchV66, - ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) { + {ArchV81, ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, + ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) { if (!FB.test(F)) continue; CpuArch = F; @@ -556,7 +565,7 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71, - ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79}) { + ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79, ExtensionHVXV81}) { if (!FB.test(F)) continue; HasHvxVer = true; @@ -569,6 +578,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { // HasHvxVer is false, and UseHvx is true. switch (CpuArch) { + case ArchV81: + FB.set(ExtensionHVXV81); + [[fallthrough]]; case ArchV79: FB.set(ExtensionHVXV79); [[fallthrough]]; @@ -668,12 +680,12 @@ void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS) { std::optional<unsigned> Hexagon_MC::getHVXVersion(const FeatureBitset &Features) { - for (auto Arch : {Hexagon::ExtensionHVXV79, Hexagon::ExtensionHVXV75, - Hexagon::ExtensionHVXV73, Hexagon::ExtensionHVXV71, - Hexagon::ExtensionHVXV69, Hexagon::ExtensionHVXV68, - Hexagon::ExtensionHVXV67, Hexagon::ExtensionHVXV66, - Hexagon::ExtensionHVXV65, Hexagon::ExtensionHVXV62, - Hexagon::ExtensionHVXV60}) + for (auto Arch : {Hexagon::ExtensionHVXV81, Hexagon::ExtensionHVXV79, + Hexagon::ExtensionHVXV75, Hexagon::ExtensionHVXV73, + Hexagon::ExtensionHVXV71, Hexagon::ExtensionHVXV69, + Hexagon::ExtensionHVXV68, Hexagon::ExtensionHVXV67, + Hexagon::ExtensionHVXV66, Hexagon::ExtensionHVXV65, + Hexagon::ExtensionHVXV62, Hexagon::ExtensionHVXV60}) if (Features.test(Arch)) return Arch; return {}; @@ -681,13 +693,13 @@ Hexagon_MC::getHVXVersion(const FeatureBitset &Features) { unsigned Hexagon_MC::getArchVersion(const FeatureBitset &Features) { for (auto Arch : - {Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73, Hexagon::ArchV71, - Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67, Hexagon::ArchV66, - Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60, Hexagon::ArchV55, - Hexagon::ArchV5}) + {Hexagon::ArchV81, Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73, + Hexagon::ArchV71, Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67, + Hexagon::ArchV66, Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60, + Hexagon::ArchV55, Hexagon::ArchV5}) if (Features.test(Arch)) return Arch; - llvm_unreachable("Expected arch v5-v79"); + llvm_unreachable("Expected arch v5-v81"); return 0; } @@ -708,7 +720,8 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { .Case("hexagonv71t", llvm::ELF::EF_HEXAGON_MACH_V71T) .Case("hexagonv73", llvm::ELF::EF_HEXAGON_MACH_V73) .Case("hexagonv75", llvm::ELF::EF_HEXAGON_MACH_V75) - .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79); + .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79) + .Case("hexagonv81", llvm::ELF::EF_HEXAGON_MACH_V81); } llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() { |
