aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/Hexagon
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/Hexagon')
-rw-r--r--llvm/lib/Target/Hexagon/Hexagon.td13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepArch.h4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepArch.td2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICHVX.td592
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICScalar.td888
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td37
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp162
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsV65.td13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSchedule.td1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonScheduleV81.td31
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.h9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp59
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp884
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp45
20 files changed, 2678 insertions, 94 deletions
diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td
index fb0928b8..ede8463 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/llvm/lib/Target/Hexagon/Hexagon.td
@@ -79,6 +79,12 @@ def ExtensionHVXV79: SubtargetFeature<"hvxv79", "HexagonHVXVersion",
ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71,
ExtensionHVXV73, ExtensionHVXV75]>;
+def ExtensionHVXV81: SubtargetFeature<"hvxv81", "HexagonHVXVersion",
+ "Hexagon::ArchEnum::V81", "Hexagon HVX instructions",
+ [ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67,
+ ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71,
+ ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79]>;
+
def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
"true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps",
@@ -151,6 +157,8 @@ def UseHVXV75 : Predicate<"HST->useHVXV75Ops()">,
AssemblerPredicate<(all_of ExtensionHVXV75)>;
def UseHVXV79 : Predicate<"HST->useHVXV79Ops()">,
AssemblerPredicate<(all_of ExtensionHVXV79)>;
+def UseHVXV81 : Predicate<"HST->useHVXV81Ops()">,
+ AssemblerPredicate<(all_of ExtensionHVXV81)>;
def UseAudio : Predicate<"HST->useAudioOps()">,
AssemblerPredicate<(all_of ExtensionAudio)>;
def UseZReg : Predicate<"HST->useZRegOps()">,
@@ -488,6 +496,11 @@ def : Proc<"hexagonv79", HexagonModelV79,
ArchV68, ArchV69, ArchV71, ArchV73, ArchV75, ArchV79,
FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+def : Proc<"hexagonv81", HexagonModelV81,
+ [ArchV65, ArchV66, ArchV67, ArchV68, ArchV69, ArchV71, ArchV73,
+ ArchV75, ArchV79, ArchV81,
+ FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
// Need to update the correct features for tiny core.
// Disable NewValueJumps since the packetizer is unable to handle a packet with
diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h
index 8984534..9bf4034 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -29,7 +29,8 @@ enum class ArchEnum {
V71,
V73,
V75,
- V79
+ V79,
+ V81
};
inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) {
@@ -50,6 +51,7 @@ inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) {
.Case("hexagonv73", Hexagon::ArchEnum::V73)
.Case("hexagonv75", Hexagon::ArchEnum::V75)
.Case("hexagonv79", Hexagon::ArchEnum::V79)
+ .Case("hexagonv81", Hexagon::ArchEnum::V81)
.Default(std::nullopt);
}
} // namespace Hexagon
diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.td b/llvm/lib/Target/Hexagon/HexagonDepArch.td
index 8ec1d93..f623fd0 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td
@@ -34,3 +34,5 @@ def ArchV75: SubtargetFeature<"v75", "HexagonArchVersion", "Hexagon::ArchEnum::V
def HasV75 : Predicate<"HST->hasV75Ops()">, AssemblerPredicate<(all_of ArchV75)>;
def ArchV79: SubtargetFeature<"v79", "HexagonArchVersion", "Hexagon::ArchEnum::V79", "Enable Hexagon V79 architecture">;
def HasV79 : Predicate<"HST->hasV79Ops()">, AssemblerPredicate<(all_of ArchV79)>;
+def ArchV81: SubtargetFeature<"v81", "HexagonArchVersion", "Hexagon::ArchEnum::V81", "Enable Hexagon V81 architecture">;
+def HasV81 : Predicate<"HST->hasV81Ops()">, AssemblerPredicate<(all_of ArchV81)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index 93696e0..f4e36fa7 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -7222,3 +7222,595 @@ class DepHVXItinV79 {
[Hex_FWD, Hex_FWD, HVX_FWD]>
];
}
+
+class DepHVXItinV81 {
+ list<InstrItinData> DepHVXItinV81_list = [
+ InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_37820f4c, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_531b383c, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7095ecba, /*SLOT01,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_72e2b393, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7d68d5c2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a69eeee1, /*SLOT01,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb599486, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_dcca380f, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+} \ No newline at end of file
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
index 7a1ad3e..48b665c 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -13740,3 +13740,891 @@ class DepScalarItinV79 {
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
];
}
+
+class DepScalarItinV81 {
+ list<InstrItinData> DepScalarItinV81_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_151bf368, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1d41f8b7, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20131976, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_23708a21, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_2471c1c8, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24f426ab, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_33e7e673, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_362b0be2, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_388f9897, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_38e0bae9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_407e96f9, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_42ff66ba, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_442395f3, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44fffc58, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4a55d03c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4abdbdc6, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4bf903b0, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_53c851ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55b33fda, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56a124a7, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57a55b54, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_59a7822c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_60e324ff, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_63567288, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_651cbe02, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_69bfb303, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6d861a95, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_711c805f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7476d766, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_7af3a37e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_84a7500d, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_86173609, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b5bd4f5, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8f36a2fd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95f43c5e, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e27f2f9, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7a13fac, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af6af259, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d33e5eee, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_dc51281d, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_decdde8a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e60def48, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed03645c, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eeda4109, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f97707c1, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f999c66e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+} \ No newline at end of file
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index ae96753..f8f1c2a 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -39178,6 +39178,19 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vsub_hf_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vsub($Vu32.hf,$Vv32.qf16)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsub_qf16 : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -39269,6 +39282,19 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vsub_sf_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vsub($Vu32.sf,$Vv32.qf32)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsub_sf_sf : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -41116,6 +41142,17 @@ let hasNewValue = 1;
let opNewValue = 0;
let isSolo = 1;
}
+def Y2_tlbpp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = tlbp($Rss32)",
+tc_6aa823ab, TypeCR>, Enc_90cd8b, Requires<[HasV81]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+}
def Y2_tlbr : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index 17cb96c..23f4b3a 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -3827,3 +3827,14 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2),
(V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2),
(V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX128B]>;
+
+// V81 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_sf_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index e285e04..7ee280d 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -654,7 +654,9 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
IntNo == Intrinsic::hexagon_V6_vgathermh ||
IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
IntNo == Intrinsic::hexagon_V6_vgathermhw ||
- IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
+ IntNo == Intrinsic::hexagon_V6_vgathermhw_128B ||
+ IntNo == Intrinsic::hexagon_V6_vgather_vscattermh ||
+ IntNo == Intrinsic::hexagon_V6_vgather_vscattermh_128B) {
SelectV65Gather(N);
return;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index c7a4f68..3cc146b 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -2953,6 +2953,10 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
case Intrinsic::hexagon_V6_vgathermhw_128B:
Opcode = Hexagon::V6_vgathermhw_pseudo;
break;
+ case Intrinsic::hexagon_V6_vgather_vscattermh:
+ case Intrinsic::hexagon_V6_vgather_vscattermh_128B:
+ Opcode = Hexagon::V6_vgather_vscatter_mh_pseudo;
+ break;
}
SDVTList VTs = CurDAG->getVTList(MVT::Other);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 9f7f434..526b4de 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2145,7 +2145,9 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::hexagon_V6_vgathermhq:
case Intrinsic::hexagon_V6_vgathermhq_128B:
case Intrinsic::hexagon_V6_vgathermhwq:
- case Intrinsic::hexagon_V6_vgathermhwq_128B: {
+ case Intrinsic::hexagon_V6_vgathermhwq_128B:
+ case Intrinsic::hexagon_V6_vgather_vscattermh:
+ case Intrinsic::hexagon_V6_vgather_vscattermh_128B: {
const Module &M = *I.getParent()->getParent()->getParent();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Type *VecTy = I.getArgOperand(1)->getType();
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 939841a..47726d6 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1554,80 +1554,93 @@ HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const {
MachineBasicBlock::iterator First;
switch (Opc) {
- case Hexagon::V6_vgathermh_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermw_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermhw_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermhq_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermwq_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermhwq_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
+ case Hexagon::V6_vgather_vscatter_mh_pseudo:
+ // This is mainly a place holder. It will be extended.
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vscattermh))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+ case Hexagon::V6_vgathermh_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
}
return MI.getIterator();
@@ -2806,6 +2819,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::V6_vL32b_nt_tmp_npred_ai:
case Hexagon::V6_vS32Ub_npred_ai:
case Hexagon::V6_vgathermh_pseudo:
+ case Hexagon::V6_vgather_vscatter_mh_pseudo:
case Hexagon::V6_vgathermw_pseudo:
case Hexagon::V6_vgathermhw_pseudo:
case Hexagon::V6_vgathermhq_pseudo:
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
index f927f9b..42393d0 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
@@ -40,6 +40,19 @@ defm V6_vgathermh_pseudo : vgathermh<HvxVR>;
defm V6_vgathermw_pseudo : vgathermw<HvxVR>;
defm V6_vgathermhw_pseudo : vgathermhw<HvxWR>;
+
+multiclass vgather_scatter_mh<RegisterClass RC> {
+ let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
+ mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
+ def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ),
+ (ins IntRegs:$_dst_, s4_0Imm:$Ii,
+ IntRegs:$Rt, ModRegs:$Mu, RC:$Vv),
+ ".error \"should not emit\" ",
+ []>;
+}
+
+defm V6_vgather_vscatter_mh_pseudo : vgather_scatter_mh<HvxVR>;
+
multiclass vgathermhq<RegisterClass RC1, RegisterClass RC2> {
let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td
index b8a9cf3..9bcd4bf 100644
--- a/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -75,3 +75,4 @@ include "HexagonScheduleV71T.td"
include "HexagonScheduleV73.td"
include "HexagonScheduleV75.td"
include "HexagonScheduleV79.td"
+include "HexagonScheduleV81.td" \ No newline at end of file
diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV81.td b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td
new file mode 100644
index 0000000..dd5f5a0
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td
@@ -0,0 +1,31 @@
+//=-HexagonScheduleV81.td - HexagonV81 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def HexagonV81ItinList : DepScalarItinV81, ScalarItin,
+ DepHVXItinV81, HVXItin, PseudoItin {
+ list<InstrItinData> ItinList =
+ !listconcat(DepScalarItinV81_list, ScalarItin_list,
+ DepHVXItinV81_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV81 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+ CVI_ALL_NOMEM, CVI_ZW],
+ [Hex_FWD, HVX_FWD],
+ HexagonV81ItinList.ItinList>;
+
+def HexagonModelV81 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV81;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 7430567..995f66d 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -224,6 +224,15 @@ public:
bool useHVXV79Ops() const {
return HexagonHVXVersion >= Hexagon::ArchEnum::V79;
}
+ bool hasV81Ops() const {
+ return getHexagonArchVersion() >= Hexagon::ArchEnum::V81;
+ }
+ bool hasV81OpsOnly() const {
+ return getHexagonArchVersion() == Hexagon::ArchEnum::V81;
+ }
+ bool useHVXV81Ops() const {
+ return HexagonHVXVersion >= Hexagon::ArchEnum::V81;
+ }
bool useAudioOps() const { return UseAudioOps; }
bool useCompound() const { return UseCompound; }
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 171e294..e925e04 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -31,6 +31,10 @@ using namespace llvm;
static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
+cl::opt<bool> HexagonAllowScatterGatherHVX(
+ "hexagon-allow-scatter-gather-hvx", cl::init(false), cl::Hidden,
+ cl::desc("Allow auto-generation of HVX scatter-gather"));
+
static cl::opt<bool> EnableV68FloatAutoHVX(
"force-hvx-float", cl::Hidden,
cl::desc("Enable auto-vectorization of floatint point types on v68."));
@@ -354,6 +358,61 @@ bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/,
return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
}
+bool HexagonTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) const {
+ // For now assume we can not deal with all HVX datatypes.
+ if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
+ !HexagonAllowScatterGatherHVX)
+ return false;
+ // This must be in sync with HexagonVectorCombine pass.
+ switch (Ty->getScalarSizeInBits()) {
+ case 8:
+ return (getTypeNumElements(Ty) == 128);
+ case 16:
+ if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32)
+ return (Alignment >= 2);
+ break;
+ case 32:
+ if (getTypeNumElements(Ty) == 32)
+ return (Alignment >= 4);
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool HexagonTTIImpl::isLegalMaskedScatter(Type *Ty, Align Alignment) const {
+ if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
+ !HexagonAllowScatterGatherHVX)
+ return false;
+ // This must be in sync with HexagonVectorCombine pass.
+ switch (Ty->getScalarSizeInBits()) {
+ case 8:
+ return (getTypeNumElements(Ty) == 128);
+ case 16:
+ if (getTypeNumElements(Ty) == 64)
+ return (Alignment >= 2);
+ break;
+ case 32:
+ if (getTypeNumElements(Ty) == 32)
+ return (Alignment >= 4);
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool HexagonTTIImpl::forceScalarizeMaskedGather(VectorType *VTy,
+ Align Alignment) const {
+ return !isLegalMaskedGather(VTy, Alignment);
+}
+
+bool HexagonTTIImpl::forceScalarizeMaskedScatter(VectorType *VTy,
+ Align Alignment) const {
+ return !isLegalMaskedScatter(VTy, Alignment);
+}
+
/// --- Vector TTI end ---
unsigned HexagonTTIImpl::getPrefetchDistance() const {
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index dbf16c9..cec2bf9 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -169,6 +169,12 @@ public:
unsigned AddressSpace) const override;
bool isLegalMaskedLoad(Type *DataType, Align Alignment,
unsigned AddressSpace) const override;
+ bool isLegalMaskedGather(Type *Ty, Align Alignment) const override;
+ bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override;
+ bool forceScalarizeMaskedGather(VectorType *VTy,
+ Align Alignment) const override;
+ bool forceScalarizeMaskedScatter(VectorType *VTy,
+ Align Alignment) const override;
/// @}
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 9ab5202..5c50ec2 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -57,6 +57,11 @@
#define DEBUG_TYPE "hexagon-vc"
+// This is a const that represents default HVX VTCM page size.
+// It is boot time configurable, so we probably want an API to
+// read it, but for now assume 128KB
+#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
+
using namespace llvm;
namespace {
@@ -418,6 +423,18 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
class HvxIdioms {
public:
+ enum DstQualifier {
+ Undefined = 0,
+ Arithmetic,
+ LdSt,
+ LLVM_Gather,
+ LLVM_Scatter,
+ HEX_Gather_Scatter,
+ HEX_Gather,
+ HEX_Scatter,
+ Call
+ };
+
HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {
auto *Int32Ty = HVC.getIntTy(32);
HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false);
@@ -473,6 +490,11 @@ private:
auto createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
Signedness SgnX, ArrayRef<Value *> WordY,
Signedness SgnY) const -> SmallVector<Value *>;
+ // Vector manipulations for Ripple
+ bool matchScatter(Instruction &In) const;
+ bool matchGather(Instruction &In) const;
+ Value *processVScatter(Instruction &In) const;
+ Value *processVGather(Instruction &In) const;
VectorType *HvxI32Ty;
VectorType *HvxP32Ty;
@@ -1545,7 +1567,7 @@ auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
}
auto AlignVectors::run() -> bool {
- LLVM_DEBUG(dbgs() << "Running HVC::AlignVectors on " << HVC.F.getName()
+ LLVM_DEBUG(dbgs() << "\nRunning HVC::AlignVectors on " << HVC.F.getName()
<< '\n');
if (!createAddressGroups())
return false;
@@ -1797,6 +1819,846 @@ auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
return Ext;
}
+inline bool HvxIdioms::matchScatter(Instruction &In) const {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In);
+ if (!II)
+ return false;
+ return (II->getIntrinsicID() == Intrinsic::masked_scatter);
+}
+
+inline bool HvxIdioms::matchGather(Instruction &In) const {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In);
+ if (!II)
+ return false;
+ return (II->getIntrinsicID() == Intrinsic::masked_gather);
+}
+
+Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual);
+
+// Binary instructions we want to handle as users of gather/scatter.
+inline bool isArithmetic(unsigned Opc) {
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl:
+ case Instruction::UDiv:
+ return true;
+ }
+ return false;
+}
+
+// TODO: Maybe use MemoryLocation for this. See getLocOrNone above.
+inline Value *getPointer(Value *Ptr) {
+ assert(Ptr && "Unable to extract pointer");
+ if (isa<AllocaInst>(Ptr) || isa<Argument>(Ptr) || isa<GlobalValue>(Ptr))
+ return Ptr;
+ if (isa<LoadInst>(Ptr) || isa<StoreInst>(Ptr))
+ return getLoadStorePointerOperand(Ptr);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(1);
+ }
+ return nullptr;
+}
+
+static Instruction *selectDestination(Instruction *In,
+ HvxIdioms::DstQualifier &Qual) {
+ Instruction *Destination = nullptr;
+ if (!In)
+ return Destination;
+ if (isa<StoreInst>(In)) {
+ Destination = In;
+ Qual = HvxIdioms::LdSt;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_gather) {
+ Destination = In;
+ Qual = HvxIdioms::LLVM_Gather;
+ } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) {
+ Destination = In;
+ Qual = HvxIdioms::LLVM_Scatter;
+ } else if (II->getIntrinsicID() == Intrinsic::masked_store) {
+ Destination = In;
+ Qual = HvxIdioms::LdSt;
+ } else if (II->getIntrinsicID() ==
+ Intrinsic::hexagon_V6_vgather_vscattermh) {
+ Destination = In;
+ Qual = HvxIdioms::HEX_Gather_Scatter;
+ } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
+ Destination = In;
+ Qual = HvxIdioms::HEX_Scatter;
+ } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
+ Destination = In;
+ Qual = HvxIdioms::HEX_Gather;
+ }
+ } else if (isa<ZExtInst>(In)) {
+ return locateDestination(In, Qual);
+ } else if (isa<CastInst>(In)) {
+ return locateDestination(In, Qual);
+ } else if (isa<CallInst>(In)) {
+ Destination = In;
+ Qual = HvxIdioms::Call;
+ } else if (isa<GetElementPtrInst>(In)) {
+ return locateDestination(In, Qual);
+ } else if (isArithmetic(In->getOpcode())) {
+ Destination = In;
+ Qual = HvxIdioms::Arithmetic;
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n");
+ }
+ return Destination;
+}
+
+// This method attempts to find destination (user) for a given intrinsic.
+// Given that these are produced only by Ripple, the number of options is
+// limited. Simplest case is explicit store which in fact is redundant (since
+// HVX gater creates its own store during packetization). Nevertheless we need
+// to figure address where we storing. Other cases are more complicated, but
+// still few.
+Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual) {
+ Instruction *Destination = nullptr;
+ if (!In)
+ return Destination;
+ // Get all possible destinations
+ SmallVector<Instruction *> Users;
+ // Iterate over the uses of the instruction
+ for (auto &U : In->uses()) {
+ if (auto *UI = dyn_cast<Instruction>(U.getUser())) {
+ Destination = selectDestination(UI, Qual);
+ if (Destination)
+ Users.push_back(Destination);
+ }
+ }
+ // Now see which of the users (if any) is a memory destination.
+ for (auto *I : Users)
+ if (getPointer(I))
+ return I;
+ return Destination;
+}
+
+// The two intrinsics we handle here have GEP in a different position.
+inline GetElementPtrInst *locateGepFromIntrinsic(Instruction *In) {
+ assert(In && "Bad instruction");
+ IntrinsicInst *IIn = dyn_cast<IntrinsicInst>(In);
+ assert((IIn && (IIn->getIntrinsicID() == Intrinsic::masked_gather ||
+ IIn->getIntrinsicID() == Intrinsic::masked_scatter)) &&
+ "Not a gather Intrinsic");
+ GetElementPtrInst *GEPIndex = nullptr;
+ if (IIn->getIntrinsicID() == Intrinsic::masked_gather)
+ GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(0));
+ else
+ GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(1));
+ return GEPIndex;
+}
+
+// Given the intrinsic find its GEP argument and extract base address it uses.
+// The method relies on the way how Ripple typically forms the GEP for
+// scatter/gather.
+static Value *locateAddressFromIntrinsic(Instruction *In) {
+ GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In);
+ if (!GEPIndex) {
+ LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n");
+ return nullptr;
+ }
+ Value *BaseAddress = GEPIndex->getPointerOperand();
+ auto *IndexLoad = dyn_cast<LoadInst>(BaseAddress);
+ if (IndexLoad)
+ return IndexLoad;
+
+ auto *IndexZEx = dyn_cast<ZExtInst>(BaseAddress);
+ if (IndexZEx) {
+ IndexLoad = dyn_cast<LoadInst>(IndexZEx->getOperand(0));
+ if (IndexLoad)
+ return IndexLoad;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(IndexZEx->getOperand(0));
+ if (II && II->getIntrinsicID() == Intrinsic::masked_gather)
+ return locateAddressFromIntrinsic(II);
+ }
+ auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(BaseAddress);
+ if (BaseShuffle) {
+ IndexLoad = dyn_cast<LoadInst>(BaseShuffle->getOperand(0));
+ if (IndexLoad)
+ return IndexLoad;
+ auto *IE = dyn_cast<InsertElementInst>(BaseShuffle->getOperand(0));
+ if (IE) {
+ auto *Src = IE->getOperand(1);
+ IndexLoad = dyn_cast<LoadInst>(Src);
+ if (IndexLoad)
+ return IndexLoad;
+ auto *Alloca = dyn_cast<AllocaInst>(Src);
+ if (Alloca)
+ return Alloca;
+ if (isa<Argument>(Src)) {
+ return Src;
+ }
+ if (isa<GlobalValue>(Src)) {
+ return Src;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n");
+ return nullptr;
+}
+
+static Type *getIndexType(Value *In) {
+ if (!In)
+ return nullptr;
+
+ if (isa<LoadInst>(In) || isa<StoreInst>(In))
+ return getLoadStoreType(In);
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getType();
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(0)->getType();
+ }
+ return In->getType();
+}
+
+static Value *locateIndexesFromGEP(Value *In) {
+ if (!In)
+ return nullptr;
+ if (isa<LoadInst>(In))
+ return In;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return In;
+ if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ return In;
+ }
+ if (auto *IndexZEx = dyn_cast<ZExtInst>(In))
+ return locateIndexesFromGEP(IndexZEx->getOperand(0));
+ if (auto *IndexSEx = dyn_cast<SExtInst>(In))
+ return locateIndexesFromGEP(IndexSEx->getOperand(0));
+ if (auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(In))
+ return locateIndexesFromGEP(BaseShuffle->getOperand(0));
+ if (auto *IE = dyn_cast<InsertElementInst>(In))
+ return locateIndexesFromGEP(IE->getOperand(1));
+ if (auto *cstDataVector = dyn_cast<ConstantDataVector>(In))
+ return cstDataVector;
+ if (auto *GEPIndex = dyn_cast<GetElementPtrInst>(In))
+ return GEPIndex->getOperand(0);
+ return nullptr;
+}
+
+// Given the intrinsic find its GEP argument and extract offsetts from the base
+// address it uses.
+static Value *locateIndexesFromIntrinsic(Instruction *In) {
+ GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In);
+ if (!GEPIndex) {
+ LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n");
+ return nullptr;
+ }
+ Value *Indexes = GEPIndex->getOperand(1);
+ if (auto *IndexLoad = locateIndexesFromGEP(Indexes))
+ return IndexLoad;
+
+ LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n");
+ return nullptr;
+}
+
+// Because of aukward definition of many Hex intrinsics we often have to
+// reinterprete HVX native <64 x i16> as <32 x i32> which in practice is a NOP
+// for all use cases, so this only exist to make IR builder happy.
+inline Value *getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC,
+ IRBuilderBase &Builder,
+ LLVMContext &Ctx, Value *I) {
+ assert(I && "Unable to reinterprete cast");
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ std::vector<unsigned> shuffleMask;
+ for (unsigned i = 0; i < 64; ++i)
+ shuffleMask.push_back(i);
+ Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask);
+ Value *CastShuffle =
+ Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
+ return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32");
+}
+
+// Recast <128 x i8> as <32 x i32>
+inline Value *getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC,
+ IRBuilderBase &Builder,
+ LLVMContext &Ctx, Value *I) {
+ assert(I && "Unable to reinterprete cast");
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ std::vector<unsigned> shuffleMask;
+ for (unsigned i = 0; i < 128; ++i)
+ shuffleMask.push_back(i);
+ Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask);
+ Value *CastShuffle =
+ Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
+ return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32");
+}
+
+// Create <32 x i32> mask reinterpreted as <128 x i1> with a given pattern
+inline Value *get_i32_Mask(const HexagonVectorCombine &HVC,
+ IRBuilderBase &Builder, LLVMContext &Ctx,
+ unsigned int pattern) {
+ std::vector<unsigned int> byteMask;
+ for (unsigned i = 0; i < 32; ++i)
+ byteMask.push_back(pattern);
+
+ return Builder.CreateIntrinsic(
+ HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt),
+ {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
+ nullptr);
+}
+
+Value *HvxIdioms::processVScatter(Instruction &In) const {
+ auto *InpTy = dyn_cast<VectorType>(In.getOperand(0)->getType());
+ assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather");
+ unsigned InpSize = HVC.getSizeOf(InpTy);
+ auto *F = In.getFunction();
+ LLVMContext &Ctx = F->getContext();
+ auto *ElemTy = dyn_cast<IntegerType>(InpTy->getElementType());
+ assert(ElemTy && "llvm.scatter needs integer type argument");
+ unsigned ElemWidth = HVC.DL.getTypeAllocSize(ElemTy);
+ LLVM_DEBUG({
+ unsigned Elements = HVC.length(InpTy);
+ dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n";
+ dbgs() << " Input type(" << *InpTy << ") elements(" << Elements
+ << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth("
+ << ElemWidth << ")\n";
+ });
+
+ IRBuilder Builder(In.getParent(), In.getIterator(),
+ InstSimplifyFolder(HVC.DL));
+
+ auto *ValueToScatter = In.getOperand(0);
+ LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n");
+
+ if (HVC.HST.getVectorLength() != InpSize) {
+ LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize
+ << ") for vscatter\n");
+ return nullptr;
+ }
+
+ // Base address of indexes.
+ auto *IndexLoad = locateAddressFromIntrinsic(&In);
+ if (!IndexLoad)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
+
+ // Address of destination. Must be in VTCM.
+ auto *Ptr = getPointer(IndexLoad);
+ if (!Ptr)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n");
+ // Indexes/offsets
+ auto *Indexes = locateIndexesFromIntrinsic(&In);
+ if (!Indexes)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
+ Value *CastedDst = Builder.CreateBitOrPointerCast(Ptr, Type::getInt32Ty(Ctx),
+ "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n");
+ // Adjust Indexes
+ auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes);
+ Value *CastIndex = nullptr;
+ if (cstDataVector) {
+ // Our indexes are represented as a constant. We need it in a reg.
+ AllocaInst *IndexesAlloca =
+ Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false));
+ [[maybe_unused]] auto *StoreIndexes =
+ Builder.CreateStore(cstDataVector, IndexesAlloca);
+ LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
+ CastIndex = Builder.CreateLoad(IndexesAlloca->getAllocatedType(),
+ IndexesAlloca, "reload_index");
+ } else {
+ if (ElemWidth == 2)
+ CastIndex = getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes);
+ else
+ CastIndex = Indexes;
+ }
+ LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
+
+ if (ElemWidth == 1) {
+ // v128i8 There is no native instruction for this.
+ // Do this as two Hi/Lo gathers with masking.
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ // Extend indexes. We assume that indexes are in 128i8 format - need to
+ // expand them to Hi/Lo 64i16
+ Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32");
+ auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
+ auto *UnpackedIndexes = Builder.CreateIntrinsic(
+ HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr);
+ LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n");
+
+ auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
+ auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
+ [[maybe_unused]] Value *IndexHi =
+ HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
+ [[maybe_unused]] Value *IndexLo =
+ HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
+ LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
+ LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
+ // Now unpack values to scatter
+ Value *CastSrc =
+ getReinterpretiveCast_i8_to_i32(HVC, Builder, Ctx, ValueToScatter);
+ LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
+ auto *UnpackedValueToScatter = Builder.CreateIntrinsic(
+ HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr);
+ LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter
+ << ")\n");
+
+ [[maybe_unused]] Value *UVSHi =
+ HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
+ [[maybe_unused]] Value *UVSLo =
+ HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
+ LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n");
+ LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n");
+
+ // Create the mask for individual bytes
+ auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
+ LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
+ [[maybe_unused]] auto *ResHi = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
+ {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ IndexHi, UVSHi},
+ nullptr);
+ LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n");
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
+ {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ IndexLo, UVSLo},
+ nullptr);
+ } else if (ElemWidth == 2) {
+ Value *CastSrc =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, ValueToScatter);
+ LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
+ {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex,
+ CastSrc},
+ nullptr);
+ } else if (ElemWidth == 4) {
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
+ {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex,
+ ValueToScatter},
+ nullptr);
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n");
+ return nullptr;
+ }
+}
+
+Value *HvxIdioms::processVGather(Instruction &In) const {
+ [[maybe_unused]] auto *InpTy =
+ dyn_cast<VectorType>(In.getOperand(0)->getType());
+ assert(InpTy && "Cannot handle no vector type for llvm.gather");
+ [[maybe_unused]] auto *ElemTy =
+ dyn_cast<PointerType>(InpTy->getElementType());
+ assert(ElemTy && "llvm.gather needs vector of ptr argument");
+ auto *F = In.getFunction();
+ LLVMContext &Ctx = F->getContext();
+ LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n"
+ << *In.getParent() << "\n");
+ LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements("
+ << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy)
+ << ") type(" << *ElemTy << ") Access alignment("
+ << *In.getOperand(1) << ") AddressSpace("
+ << ElemTy->getAddressSpace() << ")\n");
+
+ // TODO: Handle masking of elements.
+ assert(dyn_cast<VectorType>(In.getOperand(2)->getType()) &&
+ "llvm.gather needs vector for mask");
+ IRBuilder Builder(In.getParent(), In.getIterator(),
+ InstSimplifyFolder(HVC.DL));
+
+ // See who is using the result. The difference between LLVM and HVX vgather
+ // Intrinsic makes it impossible to handle all cases with temp storage. Alloca
+ // in VTCM is not yet supported, so for now we just bail out for those cases.
+ HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
+ Instruction *Dst = locateDestination(&In, Qual);
+ if (!Dst) {
+ LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n");
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual
+ << ")\n");
+
+ // Address of destination. Must be in VTCM.
+ auto *Ptr = getPointer(Dst);
+ if (!Ptr) {
+ LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n");
+ return nullptr;
+ }
+
+ // Result type. Assume it is a vector type.
+ auto *DstType = cast<VectorType>(getIndexType(Dst));
+ assert(DstType && "Cannot handle non vector dst type for llvm.gather");
+
+ // Base address for sources to be loaded
+ auto *IndexLoad = locateAddressFromIntrinsic(&In);
+ if (!IndexLoad)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
+
+ // Gather indexes/offsets
+ auto *Indexes = locateIndexesFromIntrinsic(&In);
+ if (!Indexes)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
+
+ Instruction *Gather = nullptr;
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
+ // We fully assume the address space is in VTCM. We also assume that all
+ // pointers in Operand(0) have the same base(!).
+ // This is the most basic case of all the above.
+ unsigned OutputSize = HVC.getSizeOf(DstType);
+ auto *DstElemTy = cast<IntegerType>(DstType->getElementType());
+ unsigned ElemWidth = HVC.DL.getTypeAllocSize(DstElemTy);
+ LLVM_DEBUG(dbgs() << " Buffer type : " << *Ptr->getType()
+ << " Address space ("
+ << Ptr->getType()->getPointerAddressSpace() << ")\n"
+ << " Result type : " << *DstType
+ << "\n Size in bytes : " << OutputSize
+ << " element type(" << *DstElemTy
+ << ")\n ElemWidth : " << ElemWidth << " bytes\n");
+
+ auto *IndexType = cast<VectorType>(getIndexType(Indexes));
+ assert(IndexType && "Cannot handle non vector index type for llvm.gather");
+ unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType());
+ LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n");
+
+ // Intrinsic takes i32 instead of pointer so cast.
+ Value *CastedPtr = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ // [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ...]
+ // int_hexagon_V6_vgathermh [... , llvm_v16i32_ty]
+ // int_hexagon_V6_vgathermh_128B [... , llvm_v32i32_ty]
+ // int_hexagon_V6_vgathermhw [... , llvm_v32i32_ty]
+ // int_hexagon_V6_vgathermhw_128B [... , llvm_v64i32_ty]
+ // int_hexagon_V6_vgathermw [... , llvm_v16i32_ty]
+ // int_hexagon_V6_vgathermw_128B [... , llvm_v32i32_ty]
+ if (HVC.HST.getVectorLength() == OutputSize) {
+ if (ElemWidth == 1) {
+ // v128i8 There is no native instruction for this.
+ // Do this as two Hi/Lo gathers with masking.
+ // Unpack indexes. We assume that indexes are in 128i8 format - need to
+ // expand them to Hi/Lo 64i16
+ Value *CastIndexes =
+ Builder.CreateBitCast(Indexes, NT, "cast_to_32i32");
+ auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
+ auto *UnpackedIndexes =
+ Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true),
+ V6_vunpack, CastIndexes, nullptr);
+ LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes
+ << ")\n");
+
+ auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
+ auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
+ [[maybe_unused]] Value *IndexHi =
+ HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
+ [[maybe_unused]] Value *IndexLo =
+ HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
+ LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
+ LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
+ // Create the mask for individual bytes
+ auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
+ LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
+ // We use our destination allocation as a temp storage
+ // This is unlikely to work properly for masked gather.
+ auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq);
+ [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), V6_vgather,
+ {Ptr, QByteMask, CastedPtr,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexHi},
+ nullptr);
+ LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n");
+ // Rematerialize the result
+ [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi");
+ LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n");
+ // Same for the low part. Here we use Gather to return non-NULL result
+ // from this function and continue to iterate. We also are deleting Dst
+ // store below.
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), V6_vgather,
+ {Ptr, QByteMask, CastedPtr,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexLo},
+ nullptr);
+ LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n");
+ Value *LoadedResultLo = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo");
+ LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n");
+ // Now we have properly sized bytes in every other position
+ // B b A a c a A b B c f F g G h H is presented as
+ // B . b . A . a . c . a . A . b . B . c . f . F . g . G . h . H
+ // Use vpack to gather them
+ auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb);
+ [[maybe_unused]] auto Res = Builder.CreateIntrinsic(
+ NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr);
+ LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n");
+ [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr);
+ LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n");
+ } else if (ElemWidth == 2) {
+ // v32i16
+ if (IndexWidth == 2) {
+ // Reinterprete 64i16 as 32i32. Only needed for syntactic IR match.
+ Value *CastIndex =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes);
+ LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
+ // shift all i16 left by 1 to match short addressing mode instead of
+ // byte.
+ auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
+ Value *AdjustedIndex = HVC.createHvxIntrinsic(
+ Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
+ LLVM_DEBUG(dbgs()
+ << " Shifted half index: " << *AdjustedIndex << ")\n");
+
+ auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh);
+ // The 3rd argument is the size of the region to gather from. Probably
+ // want to set it to max VTCM size.
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), V6_vgather,
+ {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ AdjustedIndex},
+ nullptr);
+ for (auto &U : Dst->uses()) {
+ if (auto *UI = dyn_cast<Instruction>(U.getUser()))
+ dbgs() << " dst used by: " << *UI << "\n";
+ }
+ for (auto &U : In.uses()) {
+ if (auto *UI = dyn_cast<Instruction>(U.getUser()))
+ dbgs() << " In used by : " << *UI << "\n";
+ }
+ // Create temp load from result in case the result is used by any
+ // other instruction.
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ } else {
+ dbgs() << " Unhandled index type for vgather\n";
+ return nullptr;
+ }
+ } else if (ElemWidth == 4) {
+ if (IndexWidth == 4) {
+ // v32i32
+ auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
+ Value *AdjustedIndex = HVC.createHvxIntrinsic(
+ Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
+ LLVM_DEBUG(dbgs()
+ << " Shifted word index: " << *AdjustedIndex << ")\n");
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
+ {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ AdjustedIndex},
+ nullptr);
+ } else {
+ LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n");
+ return nullptr;
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n");
+ return nullptr;
+ }
+ } else if (HVC.HST.getVectorLength() == OutputSize * 2) {
+ // This is half of the reg width, duplicate low in high
+ LLVM_DEBUG(dbgs() << " Unhandled half of register size\n");
+ return nullptr;
+ } else if (HVC.HST.getVectorLength() * 2 == OutputSize) {
+ LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n");
+ return nullptr;
+ }
+ // Erase the original intrinsic and store that consumes it.
+ // HVX will create a pseudo for gather that is expanded to gather + store
+ // during packetization.
+ Dst->eraseFromParent();
+ } else if (Qual == HvxIdioms::LLVM_Scatter) {
+ // Gather feeds directly into scatter.
+ LLVM_DEBUG({
+ auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType());
+ assert(DstInpTy && "Cannot handle no vector type for llvm.scatter");
+ unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
+ unsigned DstElements = HVC.length(DstInpTy);
+ auto *DstElemTy = cast<PointerType>(DstInpTy->getElementType());
+ assert(DstElemTy && "llvm.scatter needs vector of ptr argument");
+ dbgs() << " Gather feeds into scatter\n Values to scatter : "
+ << *Dst->getOperand(0) << "\n";
+ dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements
+ << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy
+ << ") Access alignment(" << *Dst->getOperand(2) << ")\n";
+ });
+ // Address of source
+ auto *Src = getPointer(IndexLoad);
+ if (!Src)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n");
+
+ if (!isa<PointerType>(Src->getType())) {
+ LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n");
+ return nullptr;
+ }
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
+
+ auto *DstLoad = locateAddressFromIntrinsic(Dst);
+ if (!DstLoad) {
+ LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n");
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
+
+ Value *Ptr = getPointer(DstLoad);
+ if (!Ptr)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n");
+ Value *CastIndex =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, IndexLoad);
+ LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
+ // Shift all i16 left by 1 to match short addressing mode instead of
+ // byte.
+ auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
+ Value *AdjustedIndex = HVC.createHvxIntrinsic(
+ Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
+ LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n");
+
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ AdjustedIndex},
+ nullptr);
+ } else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
+ // Gather feeds into previously inserted pseudo intrinsic.
+ // These could not be in the same packet, so we need to generate another
+ // pseudo that is expanded to .tmp + store V6_vgathermh_pseudo
+ // V6_vgathermh_pseudo (ins IntRegs:$_dst_, s4_0Imm:$Ii, IntRegs:$Rt,
+ // ModRegs:$Mu, HvxVR:$Vv)
+ if (isa<AllocaInst>(IndexLoad)) {
+ auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes);
+ if (cstDataVector) {
+ // Our indexes are represented as a constant. We need THEM in a reg.
+ // This most likely will not work properly since alloca gives us DDR
+ // stack location. This will be fixed once we teach compiler about VTCM.
+ AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
+ [[maybe_unused]] auto *StoreIndexes =
+ Builder.CreateStore(cstDataVector, IndexesAlloca);
+ LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
+ Value *LoadedIndex = Builder.CreateLoad(
+ IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
+ AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
+ LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n");
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {ResultAlloca, CastedSrc,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex},
+ nullptr);
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ }
+ } else {
+ // Address of source
+ auto *Src = getPointer(IndexLoad);
+ if (!Src)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n");
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
+
+ auto *DstLoad = locateAddressFromIntrinsic(Dst);
+ if (!DstLoad)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
+ auto *Ptr = getPointer(DstLoad);
+ if (!Ptr)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
+ {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ Indexes},
+ nullptr);
+ }
+ return Gather;
+ } else if (Qual == HvxIdioms::HEX_Scatter) {
+ // This is the case when result of a gather is used as an argument to
+ // Intrinsic::hexagon_V6_vscattermh_128B. Most likely we just inserted it
+ // ourselves. We have to create alloca, store to it, and replace all uses
+ // with that.
+ AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
+ Value *CastIndex =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes);
+ LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {ResultAlloca, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ CastIndex},
+ nullptr);
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ } else if (Qual == HvxIdioms::HEX_Gather) {
+ // Gather feeds to another gather but already replaced with
+ // hexagon_V6_vgathermh_128B
+ if (isa<AllocaInst>(IndexLoad)) {
+ auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes);
+ if (cstDataVector) {
+ // Our indexes are represented as a constant. We need it in a reg.
+ AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
+
+ [[maybe_unused]] auto *StoreIndexes =
+ Builder.CreateStore(cstDataVector, IndexesAlloca);
+ LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
+ Value *LoadedIndex = Builder.CreateLoad(
+ IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
+ AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
+ LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca
+ << "\n AddressSpace: "
+ << ResultAlloca->getAddressSpace() << "\n";);
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {ResultAlloca, CastedSrc,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex},
+ nullptr);
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ }
+ }
+ } else if (Qual == HvxIdioms::LLVM_Gather) {
+ // Gather feeds into another gather
+ errs() << " Underimplemented vgather to vgather sequence\n";
+ return nullptr;
+ } else
+ llvm_unreachable("Unhandled Qual enum");
+
+ return Gather;
+}
+
auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
const FxpOp &Op) const -> Value * {
assert(Op.X.Val->getType() == Op.Y.Val->getType());
@@ -2138,6 +3000,26 @@ auto HvxIdioms::run() -> bool {
It = StartOver ? B.rbegin()
: cast<Instruction>(New)->getReverseIterator();
Changed = true;
+ } else if (matchGather(*It)) {
+ Value *New = processVGather(*It);
+ if (!New)
+ continue;
+ LLVM_DEBUG(dbgs() << " Gather : " << *New << "\n");
+ // We replace original intrinsic with a new pseudo call.
+ It->eraseFromParent();
+ It = cast<Instruction>(New)->getReverseIterator();
+ RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI);
+ Changed = true;
+ } else if (matchScatter(*It)) {
+ Value *New = processVScatter(*It);
+ if (!New)
+ continue;
+ LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n");
+ // We replace original intrinsic with a new pseudo call.
+ It->eraseFromParent();
+ It = cast<Instruction>(New)->getReverseIterator();
+ RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI);
+ Changed = true;
}
}
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 6455757..2f59b7c 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -186,6 +186,9 @@ static unsigned featureToArchVersion(unsigned Feature) {
case Hexagon::ArchV79:
case Hexagon::ExtensionHVXV79:
return 79;
+ case Hexagon::ArchV81:
+ case Hexagon::ExtensionHVXV81:
+ return 81;
}
llvm_unreachable("Expected valid arch feature");
return 0;
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 6b48a21..b8075bd 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -96,6 +96,8 @@ cl::opt<bool> MV75("mv75", cl::Hidden, cl::desc("Build for Hexagon V75"),
cl::init(false));
cl::opt<bool> MV79("mv79", cl::Hidden, cl::desc("Build for Hexagon V79"),
cl::init(false));
+cl::opt<bool> MV81("mv81", cl::Hidden, cl::desc("Build for Hexagon V81"),
+ cl::init(false));
} // namespace
static cl::opt<Hexagon::ArchEnum> EnableHVX(
@@ -111,6 +113,7 @@ static cl::opt<Hexagon::ArchEnum> EnableHVX(
clEnumValN(Hexagon::ArchEnum::V73, "v73", "Build for HVX v73"),
clEnumValN(Hexagon::ArchEnum::V75, "v75", "Build for HVX v75"),
clEnumValN(Hexagon::ArchEnum::V79, "v79", "Build for HVX v79"),
+ clEnumValN(Hexagon::ArchEnum::V81, "v81", "Build for HVX v81"),
// Sentinel for no value specified.
clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
// Sentinel for flag not present.
@@ -159,6 +162,8 @@ static StringRef HexagonGetArchVariant() {
return "hexagonv75";
if (MV79)
return "hexagonv79";
+ if (MV81)
+ return "hexagonv81";
return "";
}
@@ -474,6 +479,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
case Hexagon::ArchEnum::V79:
Result.push_back("+hvxv79");
break;
+ case Hexagon::ArchEnum::V81:
+ Result.push_back("+hvxv81");
+ break;
case Hexagon::ArchEnum::Generic: {
Result.push_back(StringSwitch<StringRef>(CPU)
@@ -489,7 +497,8 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
.Case("hexagonv71t", "+hvxv71")
.Case("hexagonv73", "+hvxv73")
.Case("hexagonv75", "+hvxv75")
- .Case("hexagonv79", "+hvxv79"));
+ .Case("hexagonv79", "+hvxv79")
+ .Case("hexagonv81", "+hvxv81"));
break;
}
case Hexagon::ArchEnum::NoArch:
@@ -538,8 +547,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
FeatureBitset FB = S;
unsigned CpuArch = ArchV5;
for (unsigned F :
- {ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, ArchV66,
- ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
+ {ArchV81, ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67,
+ ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
if (!FB.test(F))
continue;
CpuArch = F;
@@ -556,7 +565,7 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
for (unsigned F :
{ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66,
ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71,
- ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79}) {
+ ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79, ExtensionHVXV81}) {
if (!FB.test(F))
continue;
HasHvxVer = true;
@@ -569,6 +578,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// HasHvxVer is false, and UseHvx is true.
switch (CpuArch) {
+ case ArchV81:
+ FB.set(ExtensionHVXV81);
+ [[fallthrough]];
case ArchV79:
FB.set(ExtensionHVXV79);
[[fallthrough]];
@@ -668,12 +680,12 @@ void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS) {
std::optional<unsigned>
Hexagon_MC::getHVXVersion(const FeatureBitset &Features) {
- for (auto Arch : {Hexagon::ExtensionHVXV79, Hexagon::ExtensionHVXV75,
- Hexagon::ExtensionHVXV73, Hexagon::ExtensionHVXV71,
- Hexagon::ExtensionHVXV69, Hexagon::ExtensionHVXV68,
- Hexagon::ExtensionHVXV67, Hexagon::ExtensionHVXV66,
- Hexagon::ExtensionHVXV65, Hexagon::ExtensionHVXV62,
- Hexagon::ExtensionHVXV60})
+ for (auto Arch : {Hexagon::ExtensionHVXV81, Hexagon::ExtensionHVXV79,
+ Hexagon::ExtensionHVXV75, Hexagon::ExtensionHVXV73,
+ Hexagon::ExtensionHVXV71, Hexagon::ExtensionHVXV69,
+ Hexagon::ExtensionHVXV68, Hexagon::ExtensionHVXV67,
+ Hexagon::ExtensionHVXV66, Hexagon::ExtensionHVXV65,
+ Hexagon::ExtensionHVXV62, Hexagon::ExtensionHVXV60})
if (Features.test(Arch))
return Arch;
return {};
@@ -681,13 +693,13 @@ Hexagon_MC::getHVXVersion(const FeatureBitset &Features) {
unsigned Hexagon_MC::getArchVersion(const FeatureBitset &Features) {
for (auto Arch :
- {Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73, Hexagon::ArchV71,
- Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67, Hexagon::ArchV66,
- Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60, Hexagon::ArchV55,
- Hexagon::ArchV5})
+ {Hexagon::ArchV81, Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73,
+ Hexagon::ArchV71, Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67,
+ Hexagon::ArchV66, Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60,
+ Hexagon::ArchV55, Hexagon::ArchV5})
if (Features.test(Arch))
return Arch;
- llvm_unreachable("Expected arch v5-v79");
+ llvm_unreachable("Expected arch v5-v81");
return 0;
}
@@ -708,7 +720,8 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
.Case("hexagonv71t", llvm::ELF::EF_HEXAGON_MACH_V71T)
.Case("hexagonv73", llvm::ELF::EF_HEXAGON_MACH_V73)
.Case("hexagonv75", llvm::ELF::EF_HEXAGON_MACH_V75)
- .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79);
+ .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79)
+ .Case("hexagonv81", llvm::ELF::EF_HEXAGON_MACH_V81);
}
llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() {