aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp44
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td74
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp5
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp21
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h19
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td17
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp53
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td12
-rw-r--r--llvm/lib/Target/DirectX/DXILPrepare.cpp89
-rw-r--r--llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp130
-rw-r--r--llvm/lib/Target/DirectX/DXILTranslateMetadata.h3
-rw-r--r--llvm/lib/Target/Hexagon/Hexagon.td13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepArch.h4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepArch.td2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICHVX.td592
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICScalar.td888
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td37
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp162
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsV65.td13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSchedule.td1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonScheduleV81.td31
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.h9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp59
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp884
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp45
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp35
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td81
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td18
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleXSf.td20
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h4
-rw-r--r--llvm/lib/Target/X86/X86.h8
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp56
45 files changed, 3184 insertions, 312 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 018ef31..d16b116 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9002,12 +9002,12 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
}
static SMECallAttrs
-getSMECallAttrs(const Function &Caller, const AArch64TargetLowering &TLI,
+getSMECallAttrs(const Function &Caller, const RTLIB::RuntimeLibcallsInfo &RTLCI,
const TargetLowering::CallLoweringInfo &CLI) {
if (CLI.CB)
- return SMECallAttrs(*CLI.CB, &TLI);
+ return SMECallAttrs(*CLI.CB, &RTLCI);
if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
- return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), TLI));
+ return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), RTLCI));
return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(SMEAttrs::Normal));
}
@@ -9029,7 +9029,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// SME Streaming functions are not eligible for TCO as they may require
// the streaming mode or ZA to be restored after returning from the call.
- SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, *this, CLI);
+ SMECallAttrs CallAttrs =
+ getSMECallAttrs(CallerF, getRuntimeLibcallsInfo(), CLI);
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingAllZAState() ||
CallAttrs.caller().hasStreamingBody())
@@ -9454,7 +9455,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Determine whether we need any streaming mode changes.
- SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), *this, CLI);
+ SMECallAttrs CallAttrs =
+ getSMECallAttrs(MF.getFunction(), getRuntimeLibcallsInfo(), CLI);
std::optional<unsigned> ZAMarkerNode;
bool UseNewSMEABILowering = getTM().useNewSMEABILowering();
@@ -26723,11 +26725,34 @@ static SDValue performDUPCombine(SDNode *N,
}
if (N->getOpcode() == AArch64ISD::DUP) {
+ SDValue Op = N->getOperand(0);
+
+ // Optimize DUP(extload/zextload i8/i16/i32) to avoid GPR->FPR transfer.
+ // For example:
+ // v4i32 = DUP (i32 (zextloadi8 addr))
+ // =>
+ // v4i32 = SCALAR_TO_VECTOR (i32 (zextloadi8 addr)) ; Matches to ldr b0
+ // v4i32 = DUPLANE32 (v4i32), 0
+ if (auto *LD = dyn_cast<LoadSDNode>(Op)) {
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ EVT MemVT = LD->getMemoryVT();
+ EVT ElemVT = VT.getVectorElementType();
+ if ((ExtType == ISD::EXTLOAD || ExtType == ISD::ZEXTLOAD) &&
+ (MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) &&
+ ElemVT != MemVT && LD->hasOneUse()) {
+ EVT Vec128VT = EVT::getVectorVT(*DCI.DAG.getContext(), ElemVT,
+ 128 / ElemVT.getSizeInBits());
+ SDValue ScalarToVec =
+ DCI.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, Vec128VT, Op);
+ return DCI.DAG.getNode(getDUPLANEOp(ElemVT), DL, VT, ScalarToVec,
+ DCI.DAG.getConstant(0, DL, MVT::i64));
+ }
+ }
+
// If the instruction is known to produce a scalar in SIMD registers, we can
// duplicate it across the vector lanes using DUPLANE instead of moving it
// to a GPR first. For example, this allows us to handle:
// v4i32 = DUP (i32 (FCMGT (f32, f32)))
- SDValue Op = N->getOperand(0);
// FIXME: Ideally, we should be able to handle all instructions that
// produce a scalar value in FPRs.
if (Op.getOpcode() == AArch64ISD::FCMEQ ||
@@ -29496,11 +29521,6 @@ AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
if (Subtarget->isTargetAndroid())
return UseTlsOffset(IRB, 0x48);
- // Fuchsia is similar.
- // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
- if (Subtarget->isTargetFuchsia())
- return UseTlsOffset(IRB, -0x8);
-
return TargetLowering::getSafeStackPointerLocation(IRB);
}
@@ -29818,7 +29838,7 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
// Checks to allow the use of SME instructions
if (auto *Base = dyn_cast<CallBase>(&Inst)) {
- auto CallAttrs = SMECallAttrs(*Base, this);
+ auto CallAttrs = SMECallAttrs(*Base, &getRuntimeLibcallsInfo());
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingZT0() ||
CallAttrs.requiresPreservingAllZAState())
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index d5117da..457e540 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5151,7 +5151,15 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// GPR32 zeroing
if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
- if (Subtarget.hasZeroCycleZeroingGPR32()) {
+ if (Subtarget.hasZeroCycleZeroingGPR64() &&
+ !Subtarget.hasZeroCycleZeroingGPR32()) {
+ MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
+ assert(DestRegX.isValid() && "Destination super-reg not valid");
+ BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a352096..b9e299e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4022,22 +4022,6 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
-// load zero-extended i32, bitcast to f64
-def : Pat<(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
-// load zero-extended i16, bitcast to f64
-def : Pat<(f64 (bitconvert (i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-// load zero-extended i8, bitcast to f64
-def : Pat<(f64 (bitconvert (i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-// load zero-extended i16, bitcast to f32
-def : Pat<(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
- (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-// load zero-extended i8, bitcast to f32
-def : Pat<(f32 (bitconvert (i32 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
- (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch timm:$Rt,
@@ -4389,6 +4373,64 @@ def : Pat <(v1i64 (scalar_to_vector (i64
(load (ro64.Xpat GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend))))),
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend)>;
+// Patterns for bitconvert or scalar_to_vector of load operations.
+// Enables direct SIMD register loads for small integer types (i8/i16) that are
+// naturally zero-extended to i32/i64.
+multiclass ExtLoad8_16AllModes<ValueType OutTy, ValueType InnerTy,
+ SDPatternOperator OuterOp,
+ PatFrags LoadOp8, PatFrags LoadOp16> {
+ // 8-bit loads.
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDURBi GPR64sp:$Rn, simm9:$offset), bsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend), bsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend), bsub)>;
+
+ // 16-bit loads.
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDURHi GPR64sp:$Rn, simm9:$offset), hsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend), hsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend), hsub)>;
+}
+
+// Extended multiclass that includes 32-bit loads in addition to 8-bit and 16-bit.
+multiclass ExtLoad8_16_32AllModes<ValueType OutTy, ValueType InnerTy,
+ SDPatternOperator OuterOp,
+ PatFrags LoadOp8, PatFrags LoadOp16, PatFrags LoadOp32> {
+ defm : ExtLoad8_16AllModes<OutTy, InnerTy, OuterOp, LoadOp8, LoadOp16>;
+
+ // 32-bit loads.
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDURSi GPR64sp:$Rn, simm9:$offset), ssub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend), ssub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend), ssub)>;
+}
+
+// Instantiate bitconvert patterns for floating-point types.
+defm : ExtLoad8_16AllModes<f32, i32, bitconvert, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16_32AllModes<f64, i64, bitconvert, zextloadi8, zextloadi16, zextloadi32>;
+
+// Instantiate scalar_to_vector patterns for all vector types.
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, extloadi8, extloadi16>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, zextloadi8, zextloadi16, zextloadi32>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, extloadi8, extloadi16, extloadi32>;
+
// Pre-fetch.
defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
[(AArch64Prefetch timm:$Rt,
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index bdde8e3..2387f17 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -2762,11 +2762,11 @@ def : InstRW<[V2Write_11c_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
def : InstRW<[V2Write_11c_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
// Non temporal store, scalar + imm
-def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
+def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BHWD]_ZRI$")>;
// Non temporal store, scalar + scalar
-def : InstRW<[V2Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
-def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
+def : InstRW<[V2Write_2c_1L01_1S_1V01], (instrs STNT1H_ZRR)>;
+def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BWD]_ZRR$")>;
// Scatter non temporal store, vector + scalar 32-bit element size
def : InstRW<[V2Write_4c_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d50af11..fede586 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -224,7 +224,8 @@ static cl::opt<bool> EnableScalableAutovecInStreamingMode(
static bool isSMEABIRoutineCall(const CallInst &CI,
const AArch64TargetLowering &TLI) {
const auto *F = CI.getCalledFunction();
- return F && SMEAttrs(F->getName(), TLI).isSMEABIRoutine();
+ return F &&
+ SMEAttrs(F->getName(), TLI.getRuntimeLibcallsInfo()).isSMEABIRoutine();
}
/// Returns true if the function has explicit operations that can only be
@@ -355,7 +356,7 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
// change only once and avoid inlining of G into F.
SMEAttrs FAttrs(*F);
- SMECallAttrs CallAttrs(Call, getTLI());
+ SMECallAttrs CallAttrs(Call, &getTLI()->getRuntimeLibcallsInfo());
if (SMECallAttrs(FAttrs, CallAttrs.callee()).requiresSMChange()) {
if (F == Call.getCaller()) // (1)
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index d71f728..085c8588 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -75,8 +75,8 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
}
void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName,
- const AArch64TargetLowering &TLI) {
- RTLIB::LibcallImpl Impl = TLI.getSupportedLibcallImpl(FuncName);
+ const RTLIB::RuntimeLibcallsInfo &RTLCI) {
+ RTLIB::LibcallImpl Impl = RTLCI.getSupportedLibcallImpl(FuncName);
if (Impl == RTLIB::Unsupported)
return;
unsigned KnownAttrs = SMEAttrs::Normal;
@@ -124,21 +124,22 @@ bool SMECallAttrs::requiresSMChange() const {
return true;
}
-SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI)
+SMECallAttrs::SMECallAttrs(const CallBase &CB,
+ const RTLIB::RuntimeLibcallsInfo *RTLCI)
: CallerFn(*CB.getFunction()), CalledFn(SMEAttrs::Normal),
Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) {
if (auto *CalledFunction = CB.getCalledFunction())
- CalledFn = SMEAttrs(*CalledFunction, TLI);
-
- // An `invoke` of an agnostic ZA function may not return normally (it may
- // resume in an exception block). In this case, it acts like a private ZA
- // callee and may require a ZA save to be set up before it is called.
- if (isa<InvokeInst>(CB))
- CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
+ CalledFn = SMEAttrs(*CalledFunction, RTLCI);
// FIXME: We probably should not allow SME attributes on direct calls but
// clang duplicates streaming mode attributes at each callsite.
assert((IsIndirect ||
((Callsite.withoutPerCallsiteFlags() | CalledFn) == CalledFn)) &&
"SME attributes at callsite do not match declaration");
+
+ // An `invoke` of an agnostic ZA function may not return normally (it may
+ // resume in an exception block). In this case, it acts like a private ZA
+ // callee and may require a ZA save to be set up before it is called.
+ if (isa<InvokeInst>(CB))
+ CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index d26e3cd..28c397e 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -12,8 +12,9 @@
#include "llvm/IR/Function.h"
namespace llvm {
-
-class AArch64TargetLowering;
+namespace RTLIB {
+struct RuntimeLibcallsInfo;
+}
class Function;
class CallBase;
@@ -52,14 +53,14 @@ public:
SMEAttrs() = default;
SMEAttrs(unsigned Mask) { set(Mask); }
- SMEAttrs(const Function &F, const AArch64TargetLowering *TLI = nullptr)
+ SMEAttrs(const Function &F, const RTLIB::RuntimeLibcallsInfo *RTLCI = nullptr)
: SMEAttrs(F.getAttributes()) {
- if (TLI)
- addKnownFunctionAttrs(F.getName(), *TLI);
+ if (RTLCI)
+ addKnownFunctionAttrs(F.getName(), *RTLCI);
}
SMEAttrs(const AttributeList &L);
- SMEAttrs(StringRef FuncName, const AArch64TargetLowering &TLI) {
- addKnownFunctionAttrs(FuncName, TLI);
+ SMEAttrs(StringRef FuncName, const RTLIB::RuntimeLibcallsInfo &RTLCI) {
+ addKnownFunctionAttrs(FuncName, RTLCI);
};
void set(unsigned M, bool Enable = true) {
@@ -157,7 +158,7 @@ public:
private:
void addKnownFunctionAttrs(StringRef FuncName,
- const AArch64TargetLowering &TLI);
+ const RTLIB::RuntimeLibcallsInfo &RTLCI);
void validate() const;
};
@@ -175,7 +176,7 @@ public:
SMEAttrs Callsite = SMEAttrs::Normal)
: CallerFn(Caller), CalledFn(Callee), Callsite(Callsite) {}
- SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI);
+ SMECallAttrs(const CallBase &CB, const RTLIB::RuntimeLibcallsInfo *RTLCI);
SMEAttrs &caller() { return CallerFn; }
SMEAttrs &callee() { return IsIndirect ? Callsite : CalledFn; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index e8b211f..7f00ead 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -176,6 +176,19 @@ def binop_s64_with_s32_mask_combines : GICombineGroup<[
combine_or_s64_with_s32_mask, combine_and_s64_with_s32_mask
]>;
+// (or i64:x, (zext i32:y)) -> i64:(merge (or lo_32(x), i32:y), hi_32(x))
+// (or (zext i32:y), i64:x) -> i64:(merge (or lo_32(x), i32:y), hi_32(x))
+def or_s64_zext_s32_frag : GICombinePatFrag<(outs root:$dst), (ins $src_s64, $src_s32),
+ [(pattern (G_OR $dst, i64:$src_s64, i64:$zext_val), (G_ZEXT i64:$zext_val, i32:$src_s32)),
+ (pattern (G_OR $dst, i64:$zext_val, i64:$src_s64), (G_ZEXT i64:$zext_val, i32:$src_s32))]>;
+
+def combine_or_s64_s32 : GICombineRule<
+ (defs root:$dst),
+ (match (or_s64_zext_s32_frag $dst, i64:$x, i32:$y):$dst),
+ (apply (G_UNMERGE_VALUES $x_lo, $x_hi, $x),
+ (G_OR $or, $x_lo, $y),
+ (G_MERGE_VALUES $dst, $or, $x_hi))>;
+
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
// saves one instruction compared to the promotion.
@@ -206,7 +219,7 @@ def AMDGPUPreLegalizerCombiner: GICombiner<
"AMDGPUPreLegalizerCombinerImpl",
[all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16,
foldable_fneg, combine_shuffle_vector_to_build_vector,
- binop_s64_with_s32_mask_combines]> {
+ binop_s64_with_s32_mask_combines, combine_or_s64_s32]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
@@ -215,7 +228,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
[all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64,
- binop_s64_with_s32_mask_combines]> {
+ binop_s64_with_s32_mask_combines, combine_or_s64_s32]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 596a895..1a13b22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -976,9 +976,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
}
+ auto &MinNumMaxNumIeee =
+ getActionDefinitionsBuilder({G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
+
+ if (ST.hasVOP3PInsts()) {
+ MinNumMaxNumIeee.legalFor(FPTypesPK16)
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .clampMaxNumElements(0, S16, 2)
+ .clampScalar(0, S16, S64)
+ .scalarize(0);
+ } else if (ST.has16BitInsts()) {
+ MinNumMaxNumIeee.legalFor(FPTypes16).clampScalar(0, S16, S64).scalarize(0);
+ } else {
+ MinNumMaxNumIeee.legalFor(FPTypesBase)
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
+ }
+
auto &MinNumMaxNum = getActionDefinitionsBuilder(
- {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE,
- G_FMAXNUM_IEEE});
+ {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM});
if (ST.hasVOP3PInsts()) {
MinNumMaxNum.customFor(FPTypesPK16)
@@ -2136,9 +2152,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor(FPTypesPK16)
.clampMaxNumElements(0, S16, 2)
.scalarize(0);
+ } else if (ST.hasVOP3PInsts()) {
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
+ .lowerFor({V2S16})
+ .clampMaxNumElementsStrict(0, S16, 2)
+ .scalarize(0)
+ .lower();
} else {
- // TODO: Implement
- getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
+ .scalarize(0)
+ .clampScalar(0, S32, S64)
+ .lower();
}
getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
@@ -2195,8 +2219,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINIMUMNUM:
case TargetOpcode::G_FMAXIMUMNUM:
- case TargetOpcode::G_FMINNUM_IEEE:
- case TargetOpcode::G_FMAXNUM_IEEE:
return legalizeMinNumMaxNum(Helper, MI);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return legalizeExtractVectorElt(MI, MRI, B);
@@ -2817,23 +2839,8 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
MachineFunction &MF = Helper.MIRBuilder.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
- MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
-
- // With ieee_mode disabled, the instructions have the correct behavior
- // already for G_FMINIMUMNUM/G_FMAXIMUMNUM.
- //
- // FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode
- // enabled.
- if (!MFI->getMode().IEEE) {
- if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM ||
- MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM)
- return true;
-
- return !IsIEEEOp;
- }
-
- if (IsIEEEOp)
+ // With ieee_mode disabled, the instructions have the correct behavior.
+ if (!MFI->getMode().IEEE)
return true;
return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 53be167..10d4cd5 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -6546,23 +6546,25 @@ def KCFI_CHECK_ARM
: PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>,
Sched<[]>,
Requires<[IsARM]> {
- let Size = 28; // 7 instructions (bic, ldr, 4x eor, beq, udf)
+ let Size = 40; // worst-case 10 instructions @ 4 bytes each
+ // (push, bic, ldr, 4x eor, pop, beq, udf)
}
def KCFI_CHECK_Thumb2
: PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>,
Sched<[]>,
Requires<[IsThumb2]> {
- let Size =
- 32; // worst-case 9 instructions (push, bic, ldr, 4x eor, pop, beq.w, udf)
+ let Size = 34; // worst-case (push.w[2], bic[4], ldr[4], 4x eor[16], pop.w[2],
+ // beq.w[4], udf[2])
}
def KCFI_CHECK_Thumb1
: PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>,
Sched<[]>,
Requires<[IsThumb1Only]> {
- let Size = 50; // worst-case 25 instructions (pushes, bic helper, type
- // building, cmp, pops)
+ let Size = 38; // worst-case 19 instructions @ 2 bytes each
+ // (2x push, 3x bic-helper, subs+ldr, 13x type-building, cmp,
+ // 2x pop, beq, bkpt)
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 42e90f0..d6fa65f 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
///
-/// \file This file contains pases and utilities to convert a modern LLVM
+/// \file This file contains passes and utilities to convert a modern LLVM
/// module into a module compatible with the LLVM 3.7-based DirectX Intermediate
/// Language (DXIL).
//===----------------------------------------------------------------------===//
@@ -16,7 +16,6 @@
#include "DirectX.h"
#include "DirectXIRPasses/PointerTypeAnalysis.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/DXILMetadataAnalysis.h"
#include "llvm/Analysis/DXILResource.h"
@@ -27,7 +26,6 @@
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/VersionTuple.h"
#define DEBUG_TYPE "dxil-prepare"
@@ -116,31 +114,6 @@ static void removeStringFunctionAttributes(Function &F,
F.removeRetAttrs(DeadAttrs);
}
-static void cleanModuleFlags(Module &M) {
- NamedMDNode *MDFlags = M.getModuleFlagsMetadata();
- if (!MDFlags)
- return;
-
- SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries;
- M.getModuleFlagsMetadata(FlagEntries);
- bool Updated = false;
- for (auto &Flag : FlagEntries) {
- // llvm 3.7 only supports behavior up to AppendUnique.
- if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique)
- continue;
- Flag.Behavior = Module::ModFlagBehavior::Warning;
- Updated = true;
- }
-
- if (!Updated)
- return;
-
- MDFlags->eraseFromParent();
-
- for (auto &Flag : FlagEntries)
- M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val);
-}
-
class DXILPrepareModule : public ModulePass {
static Value *maybeGenerateBitcast(IRBuilder<> &Builder,
@@ -202,15 +175,6 @@ class DXILPrepareModule : public ModulePass {
Builder.getPtrTy(PtrTy->getAddressSpace())));
}
- static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) {
- return {M.getMDKindID("dx.nonuniform"),
- M.getMDKindID("dx.controlflow.hints"),
- M.getMDKindID("dx.precise"),
- llvm::LLVMContext::MD_range,
- llvm::LLVMContext::MD_alias_scope,
- llvm::LLVMContext::MD_noalias};
- }
-
public:
bool runOnModule(Module &M) override {
PointerTypeMap PointerTypes = PointerTypeAnalysis::run(M);
@@ -224,10 +188,7 @@ public:
const dxil::ModuleMetadataInfo MetadataInfo =
getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata();
VersionTuple ValVer = MetadataInfo.ValidatorVersion;
- bool SkipValidation = ValVer.getMajor() == 0 && ValVer.getMinor() == 0;
-
- // construct allowlist of valid metadata node kinds
- std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M);
+ bool AllowExperimental = ValVer.getMajor() == 0 && ValVer.getMinor() == 0;
for (auto &F : M.functions()) {
F.removeFnAttrs(AttrMask);
@@ -235,7 +196,7 @@ public:
// Only remove string attributes if we are not skipping validation.
// This will reserve the experimental attributes when validation version
// is 0.0 for experiment mode.
- removeStringFunctionAttributes(F, SkipValidation);
+ removeStringFunctionAttributes(F, AllowExperimental);
for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx)
F.removeParamAttrs(Idx, AttrMask);
@@ -243,11 +204,17 @@ public:
IRBuilder<> Builder(&BB);
for (auto &I : make_early_inc_range(BB)) {
- I.dropUnknownNonDebugMetadata(DXILCompatibleMDs);
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ CB->removeFnAttrs(AttrMask);
+ CB->removeRetAttrs(AttrMask);
+ for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx)
+ CB->removeParamAttrs(Idx, AttrMask);
+ continue;
+ }
// Emtting NoOp bitcast instructions allows the ValueEnumerator to be
// unmodified as it reserves instruction IDs during contruction.
- if (auto LI = dyn_cast<LoadInst>(&I)) {
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
if (Value *NoOpBitcast = maybeGenerateBitcast(
Builder, PointerTypes, I, LI->getPointerOperand(),
LI->getType())) {
@@ -257,7 +224,7 @@ public:
}
continue;
}
- if (auto SI = dyn_cast<StoreInst>(&I)) {
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
if (Value *NoOpBitcast = maybeGenerateBitcast(
Builder, PointerTypes, I, SI->getPointerOperand(),
SI->getValueOperand()->getType())) {
@@ -268,39 +235,16 @@ public:
}
continue;
}
- if (auto GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
if (Value *NoOpBitcast = maybeGenerateBitcast(
Builder, PointerTypes, I, GEP->getPointerOperand(),
GEP->getSourceElementType()))
GEP->setOperand(0, NoOpBitcast);
continue;
}
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- CB->removeFnAttrs(AttrMask);
- CB->removeRetAttrs(AttrMask);
- for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx)
- CB->removeParamAttrs(Idx, AttrMask);
- continue;
- }
}
}
}
- // Remove flags not for DXIL.
- cleanModuleFlags(M);
-
- // dx.rootsignatures will have been parsed from its metadata form as its
- // binary form as part of the RootSignatureAnalysisWrapper, so safely
- // remove it as it is not recognized in DXIL
- if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures"))
- RootSignature->eraseFromParent();
-
- // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and
- // causes all tests using the DXIL Validator to fail.
- //
- // This is a temporary fix and should be replaced with a whitelist once
- // we have determined all metadata that the DXIL Validator allows
- if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa"))
- ErrNo->eraseFromParent();
return true;
}
@@ -308,11 +252,11 @@ public:
DXILPrepareModule() : ModulePass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DXILMetadataAnalysisWrapperPass>();
- AU.addRequired<RootSignatureAnalysisWrapper>();
- AU.addPreserved<RootSignatureAnalysisWrapper>();
- AU.addPreserved<ShaderFlagsAnalysisWrapper>();
+
AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
AU.addPreserved<DXILResourceWrapperPass>();
+ AU.addPreserved<RootSignatureAnalysisWrapper>();
+ AU.addPreserved<ShaderFlagsAnalysisWrapper>();
}
static char ID; // Pass identification.
};
@@ -323,7 +267,6 @@ char DXILPrepareModule::ID = 0;
INITIALIZE_PASS_BEGIN(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module",
false, false)
INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper)
INITIALIZE_PASS_END(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false,
false)
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
index 9eebcc9..1e4797b 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
@@ -7,8 +7,10 @@
//===----------------------------------------------------------------------===//
#include "DXILTranslateMetadata.h"
+#include "DXILRootSignature.h"
#include "DXILShaderFlags.h"
#include "DirectX.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/DXILMetadataAnalysis.h"
@@ -204,9 +206,9 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags,
return MDNode::get(Ctx, MDVals);
}
-MDTuple *constructEntryMetadata(const Function *EntryFn, MDTuple *Signatures,
- MDNode *Resources, MDTuple *Properties,
- LLVMContext &Ctx) {
+static MDTuple *constructEntryMetadata(const Function *EntryFn,
+ MDTuple *Signatures, MDNode *Resources,
+ MDTuple *Properties, LLVMContext &Ctx) {
// Each entry point metadata record specifies:
// * reference to the entry point function global symbol
// * unmangled name
@@ -290,42 +292,82 @@ static MDTuple *emitTopLevelLibraryNode(Module &M, MDNode *RMD,
return constructEntryMetadata(nullptr, nullptr, RMD, Properties, Ctx);
}
-// TODO: We might need to refactor this to be more generic,
-// in case we need more metadata to be replaced.
-static void translateBranchMetadata(Module &M) {
- for (Function &F : M) {
- for (BasicBlock &BB : F) {
- Instruction *BBTerminatorInst = BB.getTerminator();
+static void translateBranchMetadata(Module &M, Instruction *BBTerminatorInst) {
+ MDNode *HlslControlFlowMD =
+ BBTerminatorInst->getMetadata("hlsl.controlflow.hint");
+
+ if (!HlslControlFlowMD)
+ return;
- MDNode *HlslControlFlowMD =
- BBTerminatorInst->getMetadata("hlsl.controlflow.hint");
+ assert(HlslControlFlowMD->getNumOperands() == 2 &&
+ "invalid operands for hlsl.controlflow.hint");
- if (!HlslControlFlowMD)
- continue;
+ MDBuilder MDHelper(M.getContext());
- assert(HlslControlFlowMD->getNumOperands() == 2 &&
- "invalid operands for hlsl.controlflow.hint");
+ llvm::Metadata *HintsStr = MDHelper.createString("dx.controlflow.hints");
+ llvm::Metadata *HintsValue = MDHelper.createConstant(
+ mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1)));
- MDBuilder MDHelper(M.getContext());
- ConstantInt *Op1 =
- mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1));
+ MDNode *MDNode = llvm::MDNode::get(M.getContext(), {HintsStr, HintsValue});
- SmallVector<llvm::Metadata *, 2> Vals(
- ArrayRef<Metadata *>{MDHelper.createString("dx.controlflow.hints"),
- MDHelper.createConstant(Op1)});
+ BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode);
+ BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr);
+}
+
+static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) {
+ return {
+ M.getMDKindID("dx.nonuniform"), M.getMDKindID("dx.controlflow.hints"),
+ M.getMDKindID("dx.precise"), llvm::LLVMContext::MD_range,
+ llvm::LLVMContext::MD_alias_scope, llvm::LLVMContext::MD_noalias};
+}
- MDNode *MDNode = llvm::MDNode::get(M.getContext(), Vals);
+static void translateInstructionMetadata(Module &M) {
+ // construct allowlist of valid metadata node kinds
+ std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M);
- BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode);
- BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr);
+ for (Function &F : M) {
+ for (BasicBlock &BB : F) {
+ // This needs to be done first so that "hlsl.controlflow.hints" isn't
+ // removed in the whitelist below
+ if (auto *I = BB.getTerminator())
+ translateBranchMetadata(M, I);
+
+ for (auto &I : make_early_inc_range(BB)) {
+ I.dropUnknownNonDebugMetadata(DXILCompatibleMDs);
+ }
}
}
}
-static void translateMetadata(Module &M, DXILResourceMap &DRM,
- DXILResourceTypeMap &DRTM,
- const ModuleShaderFlags &ShaderFlags,
- const ModuleMetadataInfo &MMDI) {
+static void cleanModuleFlags(Module &M) {
+ NamedMDNode *MDFlags = M.getModuleFlagsMetadata();
+ if (!MDFlags)
+ return;
+
+ SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries;
+ M.getModuleFlagsMetadata(FlagEntries);
+ bool Updated = false;
+ for (auto &Flag : FlagEntries) {
+ // llvm 3.7 only supports behavior up to AppendUnique.
+ if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique)
+ continue;
+ Flag.Behavior = Module::ModFlagBehavior::Warning;
+ Updated = true;
+ }
+
+ if (!Updated)
+ return;
+
+ MDFlags->eraseFromParent();
+
+ for (auto &Flag : FlagEntries)
+ M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val);
+}
+
+static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM,
+ DXILResourceTypeMap &DRTM,
+ const ModuleShaderFlags &ShaderFlags,
+ const ModuleMetadataInfo &MMDI) {
LLVMContext &Ctx = M.getContext();
IRBuilder<> IRB(Ctx);
SmallVector<MDNode *> EntryFnMDNodes;
@@ -381,6 +423,22 @@ static void translateMetadata(Module &M, DXILResourceMap &DRM,
M.getOrInsertNamedMetadata("dx.entryPoints");
for (auto *Entry : EntryFnMDNodes)
EntryPointsNamedMD->addOperand(Entry);
+
+ cleanModuleFlags(M);
+
+ // dx.rootsignatures will have been parsed from its metadata form as its
+ // binary form as part of the RootSignatureAnalysisWrapper, so safely
+ // remove it as it is not recognized in DXIL
+ if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures"))
+ RootSignature->eraseFromParent();
+
+ // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and
+ // causes all tests using the DXIL Validator to fail.
+ //
+ // This is a temporary fix and should be replaced with a allowlist once
+ // we have determined all metadata that the DXIL Validator allows
+ if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa"))
+ ErrNo->eraseFromParent();
}
PreservedAnalyses DXILTranslateMetadata::run(Module &M,
@@ -390,8 +448,8 @@ PreservedAnalyses DXILTranslateMetadata::run(Module &M,
const ModuleShaderFlags &ShaderFlags = MAM.getResult<ShaderFlagsAnalysis>(M);
const dxil::ModuleMetadataInfo MMDI = MAM.getResult<DXILMetadataAnalysis>(M);
- translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
- translateBranchMetadata(M);
+ translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
+ translateInstructionMetadata(M);
return PreservedAnalyses::all();
}
@@ -409,10 +467,13 @@ public:
AU.addRequired<DXILResourceWrapperPass>();
AU.addRequired<ShaderFlagsAnalysisWrapper>();
AU.addRequired<DXILMetadataAnalysisWrapperPass>();
- AU.addPreserved<DXILResourceWrapperPass>();
+ AU.addRequired<RootSignatureAnalysisWrapper>();
+
AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
- AU.addPreserved<ShaderFlagsAnalysisWrapper>();
AU.addPreserved<DXILResourceBindingWrapperPass>();
+ AU.addPreserved<DXILResourceWrapperPass>();
+ AU.addPreserved<RootSignatureAnalysisWrapper>();
+ AU.addPreserved<ShaderFlagsAnalysisWrapper>();
}
bool runOnModule(Module &M) override {
@@ -425,8 +486,8 @@ public:
dxil::ModuleMetadataInfo MMDI =
getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata();
- translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
- translateBranchMetadata(M);
+ translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
+ translateInstructionMetadata(M);
return true;
}
};
@@ -443,6 +504,7 @@ INITIALIZE_PASS_BEGIN(DXILTranslateMetadataLegacy, "dxil-translate-metadata",
"DXIL Translate Metadata", false, false)
INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper)
+INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper)
INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass)
INITIALIZE_PASS_END(DXILTranslateMetadataLegacy, "dxil-translate-metadata",
"DXIL Translate Metadata", false, false)
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h
index f3f5eb1..4c1ffac 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h
@@ -13,7 +13,8 @@
namespace llvm {
-/// A pass that transforms DXIL Intrinsics that don't have DXIL opCodes
+/// A pass that transforms LLVM Metadata in the module to it's DXIL equivalent,
+/// then emits all recognized DXIL Metadata
class DXILTranslateMetadata : public PassInfoMixin<DXILTranslateMetadata> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td
index fb0928b8..ede8463 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/llvm/lib/Target/Hexagon/Hexagon.td
@@ -79,6 +79,12 @@ def ExtensionHVXV79: SubtargetFeature<"hvxv79", "HexagonHVXVersion",
ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71,
ExtensionHVXV73, ExtensionHVXV75]>;
+def ExtensionHVXV81: SubtargetFeature<"hvxv81", "HexagonHVXVersion",
+ "Hexagon::ArchEnum::V81", "Hexagon HVX instructions",
+ [ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67,
+ ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71,
+ ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79]>;
+
def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
"true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps",
@@ -151,6 +157,8 @@ def UseHVXV75 : Predicate<"HST->useHVXV75Ops()">,
AssemblerPredicate<(all_of ExtensionHVXV75)>;
def UseHVXV79 : Predicate<"HST->useHVXV79Ops()">,
AssemblerPredicate<(all_of ExtensionHVXV79)>;
+def UseHVXV81 : Predicate<"HST->useHVXV81Ops()">,
+ AssemblerPredicate<(all_of ExtensionHVXV81)>;
def UseAudio : Predicate<"HST->useAudioOps()">,
AssemblerPredicate<(all_of ExtensionAudio)>;
def UseZReg : Predicate<"HST->useZRegOps()">,
@@ -488,6 +496,11 @@ def : Proc<"hexagonv79", HexagonModelV79,
ArchV68, ArchV69, ArchV71, ArchV73, ArchV75, ArchV79,
FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+def : Proc<"hexagonv81", HexagonModelV81,
+ [ArchV65, ArchV66, ArchV67, ArchV68, ArchV69, ArchV71, ArchV73,
+ ArchV75, ArchV79, ArchV81,
+ FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
// Need to update the correct features for tiny core.
// Disable NewValueJumps since the packetizer is unable to handle a packet with
diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h
index 8984534..9bf4034 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -29,7 +29,8 @@ enum class ArchEnum {
V71,
V73,
V75,
- V79
+ V79,
+ V81
};
inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) {
@@ -50,6 +51,7 @@ inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) {
.Case("hexagonv73", Hexagon::ArchEnum::V73)
.Case("hexagonv75", Hexagon::ArchEnum::V75)
.Case("hexagonv79", Hexagon::ArchEnum::V79)
+ .Case("hexagonv81", Hexagon::ArchEnum::V81)
.Default(std::nullopt);
}
} // namespace Hexagon
diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.td b/llvm/lib/Target/Hexagon/HexagonDepArch.td
index 8ec1d93..f623fd0 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td
@@ -34,3 +34,5 @@ def ArchV75: SubtargetFeature<"v75", "HexagonArchVersion", "Hexagon::ArchEnum::V
def HasV75 : Predicate<"HST->hasV75Ops()">, AssemblerPredicate<(all_of ArchV75)>;
def ArchV79: SubtargetFeature<"v79", "HexagonArchVersion", "Hexagon::ArchEnum::V79", "Enable Hexagon V79 architecture">;
def HasV79 : Predicate<"HST->hasV79Ops()">, AssemblerPredicate<(all_of ArchV79)>;
+def ArchV81: SubtargetFeature<"v81", "HexagonArchVersion", "Hexagon::ArchEnum::V81", "Enable Hexagon V81 architecture">;
+def HasV81 : Predicate<"HST->hasV81Ops()">, AssemblerPredicate<(all_of ArchV81)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index 93696e0..f4e36fa7 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -7222,3 +7222,595 @@ class DepHVXItinV79 {
[Hex_FWD, Hex_FWD, HVX_FWD]>
];
}
+
+class DepHVXItinV81 {
+ list<InstrItinData> DepHVXItinV81_list = [
+ InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_37820f4c, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_531b383c, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7095ecba, /*SLOT01,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_72e2b393, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7d68d5c2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a69eeee1, /*SLOT01,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb599486, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_dcca380f, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+} \ No newline at end of file
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
index 7a1ad3e..48b665c 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -13740,3 +13740,891 @@ class DepScalarItinV79 {
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
];
}
+
+class DepScalarItinV81 {
+ list<InstrItinData> DepScalarItinV81_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_151bf368, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1d41f8b7, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20131976, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_23708a21, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_2471c1c8, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24f426ab, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_33e7e673, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_362b0be2, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_388f9897, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_38e0bae9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_407e96f9, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_42ff66ba, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_442395f3, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44fffc58, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4a55d03c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4abdbdc6, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4bf903b0, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_53c851ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55b33fda, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56a124a7, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57a55b54, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_59a7822c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_60e324ff, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_63567288, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_651cbe02, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_69bfb303, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6d861a95, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_711c805f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7476d766, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_7af3a37e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_84a7500d, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_86173609, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b5bd4f5, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8f36a2fd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95f43c5e, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e27f2f9, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7a13fac, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af6af259, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d33e5eee, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_dc51281d, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_decdde8a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e60def48, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed03645c, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eeda4109, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f97707c1, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f999c66e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+} \ No newline at end of file
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index ae96753..f8f1c2a 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -39178,6 +39178,19 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vsub_hf_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vsub($Vu32.hf,$Vv32.qf16)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsub_qf16 : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -39269,6 +39282,19 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vsub_sf_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vsub($Vu32.sf,$Vv32.qf32)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsub_sf_sf : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -41116,6 +41142,17 @@ let hasNewValue = 1;
let opNewValue = 0;
let isSolo = 1;
}
+def Y2_tlbpp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = tlbp($Rss32)",
+tc_6aa823ab, TypeCR>, Enc_90cd8b, Requires<[HasV81]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+}
def Y2_tlbr : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index 17cb96c..23f4b3a 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -3827,3 +3827,14 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2),
(V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2),
(V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX128B]>;
+
+// V81 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_sf_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index e285e04..7ee280d 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -654,7 +654,9 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
IntNo == Intrinsic::hexagon_V6_vgathermh ||
IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
IntNo == Intrinsic::hexagon_V6_vgathermhw ||
- IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
+ IntNo == Intrinsic::hexagon_V6_vgathermhw_128B ||
+ IntNo == Intrinsic::hexagon_V6_vgather_vscattermh ||
+ IntNo == Intrinsic::hexagon_V6_vgather_vscattermh_128B) {
SelectV65Gather(N);
return;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index c7a4f68..3cc146b 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -2953,6 +2953,10 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
case Intrinsic::hexagon_V6_vgathermhw_128B:
Opcode = Hexagon::V6_vgathermhw_pseudo;
break;
+ case Intrinsic::hexagon_V6_vgather_vscattermh:
+ case Intrinsic::hexagon_V6_vgather_vscattermh_128B:
+ Opcode = Hexagon::V6_vgather_vscatter_mh_pseudo;
+ break;
}
SDVTList VTs = CurDAG->getVTList(MVT::Other);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 9f7f434..526b4de 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2145,7 +2145,9 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::hexagon_V6_vgathermhq:
case Intrinsic::hexagon_V6_vgathermhq_128B:
case Intrinsic::hexagon_V6_vgathermhwq:
- case Intrinsic::hexagon_V6_vgathermhwq_128B: {
+ case Intrinsic::hexagon_V6_vgathermhwq_128B:
+ case Intrinsic::hexagon_V6_vgather_vscattermh:
+ case Intrinsic::hexagon_V6_vgather_vscattermh_128B: {
const Module &M = *I.getParent()->getParent()->getParent();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Type *VecTy = I.getArgOperand(1)->getType();
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 939841a..47726d6 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1554,80 +1554,93 @@ HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const {
MachineBasicBlock::iterator First;
switch (Opc) {
- case Hexagon::V6_vgathermh_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermw_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermhw_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermhq_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermwq_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermhwq_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
+ case Hexagon::V6_vgather_vscatter_mh_pseudo:
+ // This is mainly a place holder. It will be extended.
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vscattermh))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+ case Hexagon::V6_vgathermh_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
}
return MI.getIterator();
@@ -2806,6 +2819,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::V6_vL32b_nt_tmp_npred_ai:
case Hexagon::V6_vS32Ub_npred_ai:
case Hexagon::V6_vgathermh_pseudo:
+ case Hexagon::V6_vgather_vscatter_mh_pseudo:
case Hexagon::V6_vgathermw_pseudo:
case Hexagon::V6_vgathermhw_pseudo:
case Hexagon::V6_vgathermhq_pseudo:
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
index f927f9b..42393d0 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
@@ -40,6 +40,19 @@ defm V6_vgathermh_pseudo : vgathermh<HvxVR>;
defm V6_vgathermw_pseudo : vgathermw<HvxVR>;
defm V6_vgathermhw_pseudo : vgathermhw<HvxWR>;
+
+multiclass vgather_scatter_mh<RegisterClass RC> {
+ let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
+ mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
+ def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ),
+ (ins IntRegs:$_dst_, s4_0Imm:$Ii,
+ IntRegs:$Rt, ModRegs:$Mu, RC:$Vv),
+ ".error \"should not emit\" ",
+ []>;
+}
+
+defm V6_vgather_vscatter_mh_pseudo : vgather_scatter_mh<HvxVR>;
+
multiclass vgathermhq<RegisterClass RC1, RegisterClass RC2> {
let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td
index b8a9cf3..9bcd4bf 100644
--- a/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -75,3 +75,4 @@ include "HexagonScheduleV71T.td"
include "HexagonScheduleV73.td"
include "HexagonScheduleV75.td"
include "HexagonScheduleV79.td"
+include "HexagonScheduleV81.td" \ No newline at end of file
diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV81.td b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td
new file mode 100644
index 0000000..dd5f5a0
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td
@@ -0,0 +1,31 @@
+//=-HexagonScheduleV81.td - HexagonV81 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def HexagonV81ItinList : DepScalarItinV81, ScalarItin,
+ DepHVXItinV81, HVXItin, PseudoItin {
+ list<InstrItinData> ItinList =
+ !listconcat(DepScalarItinV81_list, ScalarItin_list,
+ DepHVXItinV81_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV81 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+ CVI_ALL_NOMEM, CVI_ZW],
+ [Hex_FWD, HVX_FWD],
+ HexagonV81ItinList.ItinList>;
+
+def HexagonModelV81 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV81;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 7430567..995f66d 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -224,6 +224,15 @@ public:
bool useHVXV79Ops() const {
return HexagonHVXVersion >= Hexagon::ArchEnum::V79;
}
+ bool hasV81Ops() const {
+ return getHexagonArchVersion() >= Hexagon::ArchEnum::V81;
+ }
+ bool hasV81OpsOnly() const {
+ return getHexagonArchVersion() == Hexagon::ArchEnum::V81;
+ }
+ bool useHVXV81Ops() const {
+ return HexagonHVXVersion >= Hexagon::ArchEnum::V81;
+ }
bool useAudioOps() const { return UseAudioOps; }
bool useCompound() const { return UseCompound; }
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 171e294..e925e04 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -31,6 +31,10 @@ using namespace llvm;
static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
+cl::opt<bool> HexagonAllowScatterGatherHVX(
+ "hexagon-allow-scatter-gather-hvx", cl::init(false), cl::Hidden,
+ cl::desc("Allow auto-generation of HVX scatter-gather"));
+
static cl::opt<bool> EnableV68FloatAutoHVX(
"force-hvx-float", cl::Hidden,
cl::desc("Enable auto-vectorization of floatint point types on v68."));
@@ -354,6 +358,61 @@ bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/,
return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
}
+bool HexagonTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) const {
+ // For now assume we can not deal with all HVX datatypes.
+ if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
+ !HexagonAllowScatterGatherHVX)
+ return false;
+ // This must be in sync with HexagonVectorCombine pass.
+ switch (Ty->getScalarSizeInBits()) {
+ case 8:
+ return (getTypeNumElements(Ty) == 128);
+ case 16:
+ if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32)
+ return (Alignment >= 2);
+ break;
+ case 32:
+ if (getTypeNumElements(Ty) == 32)
+ return (Alignment >= 4);
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool HexagonTTIImpl::isLegalMaskedScatter(Type *Ty, Align Alignment) const {
+ if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
+ !HexagonAllowScatterGatherHVX)
+ return false;
+ // This must be in sync with HexagonVectorCombine pass.
+ switch (Ty->getScalarSizeInBits()) {
+ case 8:
+ return (getTypeNumElements(Ty) == 128);
+ case 16:
+ if (getTypeNumElements(Ty) == 64)
+ return (Alignment >= 2);
+ break;
+ case 32:
+ if (getTypeNumElements(Ty) == 32)
+ return (Alignment >= 4);
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool HexagonTTIImpl::forceScalarizeMaskedGather(VectorType *VTy,
+ Align Alignment) const {
+ return !isLegalMaskedGather(VTy, Alignment);
+}
+
+bool HexagonTTIImpl::forceScalarizeMaskedScatter(VectorType *VTy,
+ Align Alignment) const {
+ return !isLegalMaskedScatter(VTy, Alignment);
+}
+
/// --- Vector TTI end ---
unsigned HexagonTTIImpl::getPrefetchDistance() const {
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index dbf16c9..cec2bf9 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -169,6 +169,12 @@ public:
unsigned AddressSpace) const override;
bool isLegalMaskedLoad(Type *DataType, Align Alignment,
unsigned AddressSpace) const override;
+ bool isLegalMaskedGather(Type *Ty, Align Alignment) const override;
+ bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override;
+ bool forceScalarizeMaskedGather(VectorType *VTy,
+ Align Alignment) const override;
+ bool forceScalarizeMaskedScatter(VectorType *VTy,
+ Align Alignment) const override;
/// @}
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 9ab5202..5c50ec2 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -57,6 +57,11 @@
#define DEBUG_TYPE "hexagon-vc"
+// This is a const that represents default HVX VTCM page size.
+// It is boot time configurable, so we probably want an API to
+// read it, but for now assume 128KB
+#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
+
using namespace llvm;
namespace {
@@ -418,6 +423,18 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
class HvxIdioms {
public:
+ enum DstQualifier {
+ Undefined = 0,
+ Arithmetic,
+ LdSt,
+ LLVM_Gather,
+ LLVM_Scatter,
+ HEX_Gather_Scatter,
+ HEX_Gather,
+ HEX_Scatter,
+ Call
+ };
+
HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {
auto *Int32Ty = HVC.getIntTy(32);
HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false);
@@ -473,6 +490,11 @@ private:
auto createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
Signedness SgnX, ArrayRef<Value *> WordY,
Signedness SgnY) const -> SmallVector<Value *>;
+ // Vector manipulations for Ripple
+ bool matchScatter(Instruction &In) const;
+ bool matchGather(Instruction &In) const;
+ Value *processVScatter(Instruction &In) const;
+ Value *processVGather(Instruction &In) const;
VectorType *HvxI32Ty;
VectorType *HvxP32Ty;
@@ -1545,7 +1567,7 @@ auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
}
auto AlignVectors::run() -> bool {
- LLVM_DEBUG(dbgs() << "Running HVC::AlignVectors on " << HVC.F.getName()
+ LLVM_DEBUG(dbgs() << "\nRunning HVC::AlignVectors on " << HVC.F.getName()
<< '\n');
if (!createAddressGroups())
return false;
@@ -1797,6 +1819,846 @@ auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
return Ext;
}
+inline bool HvxIdioms::matchScatter(Instruction &In) const {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In);
+ if (!II)
+ return false;
+ return (II->getIntrinsicID() == Intrinsic::masked_scatter);
+}
+
+inline bool HvxIdioms::matchGather(Instruction &In) const {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In);
+ if (!II)
+ return false;
+ return (II->getIntrinsicID() == Intrinsic::masked_gather);
+}
+
+Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual);
+
+// Binary instructions we want to handle as users of gather/scatter.
+inline bool isArithmetic(unsigned Opc) {
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl:
+ case Instruction::UDiv:
+ return true;
+ }
+ return false;
+}
+
+// TODO: Maybe use MemoryLocation for this. See getLocOrNone above.
+inline Value *getPointer(Value *Ptr) {
+ assert(Ptr && "Unable to extract pointer");
+ if (isa<AllocaInst>(Ptr) || isa<Argument>(Ptr) || isa<GlobalValue>(Ptr))
+ return Ptr;
+ if (isa<LoadInst>(Ptr) || isa<StoreInst>(Ptr))
+ return getLoadStorePointerOperand(Ptr);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(1);
+ }
+ return nullptr;
+}
+
+static Instruction *selectDestination(Instruction *In,
+ HvxIdioms::DstQualifier &Qual) {
+ Instruction *Destination = nullptr;
+ if (!In)
+ return Destination;
+ if (isa<StoreInst>(In)) {
+ Destination = In;
+ Qual = HvxIdioms::LdSt;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_gather) {
+ Destination = In;
+ Qual = HvxIdioms::LLVM_Gather;
+ } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) {
+ Destination = In;
+ Qual = HvxIdioms::LLVM_Scatter;
+ } else if (II->getIntrinsicID() == Intrinsic::masked_store) {
+ Destination = In;
+ Qual = HvxIdioms::LdSt;
+ } else if (II->getIntrinsicID() ==
+ Intrinsic::hexagon_V6_vgather_vscattermh) {
+ Destination = In;
+ Qual = HvxIdioms::HEX_Gather_Scatter;
+ } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
+ Destination = In;
+ Qual = HvxIdioms::HEX_Scatter;
+ } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
+ Destination = In;
+ Qual = HvxIdioms::HEX_Gather;
+ }
+ } else if (isa<ZExtInst>(In)) {
+ return locateDestination(In, Qual);
+ } else if (isa<CastInst>(In)) {
+ return locateDestination(In, Qual);
+ } else if (isa<CallInst>(In)) {
+ Destination = In;
+ Qual = HvxIdioms::Call;
+ } else if (isa<GetElementPtrInst>(In)) {
+ return locateDestination(In, Qual);
+ } else if (isArithmetic(In->getOpcode())) {
+ Destination = In;
+ Qual = HvxIdioms::Arithmetic;
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n");
+ }
+ return Destination;
+}
+
+// This method attempts to find destination (user) for a given intrinsic.
+// Given that these are produced only by Ripple, the number of options is
+// limited. Simplest case is explicit store which in fact is redundant (since
+// HVX gater creates its own store during packetization). Nevertheless we need
+// to figure address where we storing. Other cases are more complicated, but
+// still few.
+Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual) {
+ Instruction *Destination = nullptr;
+ if (!In)
+ return Destination;
+ // Get all possible destinations
+ SmallVector<Instruction *> Users;
+ // Iterate over the uses of the instruction
+ for (auto &U : In->uses()) {
+ if (auto *UI = dyn_cast<Instruction>(U.getUser())) {
+ Destination = selectDestination(UI, Qual);
+ if (Destination)
+ Users.push_back(Destination);
+ }
+ }
+ // Now see which of the users (if any) is a memory destination.
+ for (auto *I : Users)
+ if (getPointer(I))
+ return I;
+ return Destination;
+}
+
+// The two intrinsics we handle here have GEP in a different position.
+inline GetElementPtrInst *locateGepFromIntrinsic(Instruction *In) {
+ assert(In && "Bad instruction");
+ IntrinsicInst *IIn = dyn_cast<IntrinsicInst>(In);
+ assert((IIn && (IIn->getIntrinsicID() == Intrinsic::masked_gather ||
+ IIn->getIntrinsicID() == Intrinsic::masked_scatter)) &&
+ "Not a gather Intrinsic");
+ GetElementPtrInst *GEPIndex = nullptr;
+ if (IIn->getIntrinsicID() == Intrinsic::masked_gather)
+ GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(0));
+ else
+ GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(1));
+ return GEPIndex;
+}
+
+// Given the intrinsic find its GEP argument and extract base address it uses.
+// The method relies on the way how Ripple typically forms the GEP for
+// scatter/gather.
+static Value *locateAddressFromIntrinsic(Instruction *In) {
+ GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In);
+ if (!GEPIndex) {
+ LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n");
+ return nullptr;
+ }
+ Value *BaseAddress = GEPIndex->getPointerOperand();
+ auto *IndexLoad = dyn_cast<LoadInst>(BaseAddress);
+ if (IndexLoad)
+ return IndexLoad;
+
+ auto *IndexZEx = dyn_cast<ZExtInst>(BaseAddress);
+ if (IndexZEx) {
+ IndexLoad = dyn_cast<LoadInst>(IndexZEx->getOperand(0));
+ if (IndexLoad)
+ return IndexLoad;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(IndexZEx->getOperand(0));
+ if (II && II->getIntrinsicID() == Intrinsic::masked_gather)
+ return locateAddressFromIntrinsic(II);
+ }
+ auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(BaseAddress);
+ if (BaseShuffle) {
+ IndexLoad = dyn_cast<LoadInst>(BaseShuffle->getOperand(0));
+ if (IndexLoad)
+ return IndexLoad;
+ auto *IE = dyn_cast<InsertElementInst>(BaseShuffle->getOperand(0));
+ if (IE) {
+ auto *Src = IE->getOperand(1);
+ IndexLoad = dyn_cast<LoadInst>(Src);
+ if (IndexLoad)
+ return IndexLoad;
+ auto *Alloca = dyn_cast<AllocaInst>(Src);
+ if (Alloca)
+ return Alloca;
+ if (isa<Argument>(Src)) {
+ return Src;
+ }
+ if (isa<GlobalValue>(Src)) {
+ return Src;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n");
+ return nullptr;
+}
+
+static Type *getIndexType(Value *In) {
+ if (!In)
+ return nullptr;
+
+ if (isa<LoadInst>(In) || isa<StoreInst>(In))
+ return getLoadStoreType(In);
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getType();
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(0)->getType();
+ }
+ return In->getType();
+}
+
+static Value *locateIndexesFromGEP(Value *In) {
+ if (!In)
+ return nullptr;
+ if (isa<LoadInst>(In))
+ return In;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return In;
+ if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ return In;
+ }
+ if (auto *IndexZEx = dyn_cast<ZExtInst>(In))
+ return locateIndexesFromGEP(IndexZEx->getOperand(0));
+ if (auto *IndexSEx = dyn_cast<SExtInst>(In))
+ return locateIndexesFromGEP(IndexSEx->getOperand(0));
+ if (auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(In))
+ return locateIndexesFromGEP(BaseShuffle->getOperand(0));
+ if (auto *IE = dyn_cast<InsertElementInst>(In))
+ return locateIndexesFromGEP(IE->getOperand(1));
+ if (auto *cstDataVector = dyn_cast<ConstantDataVector>(In))
+ return cstDataVector;
+ if (auto *GEPIndex = dyn_cast<GetElementPtrInst>(In))
+ return GEPIndex->getOperand(0);
+ return nullptr;
+}
+
+// Given the intrinsic find its GEP argument and extract offsetts from the base
+// address it uses.
+static Value *locateIndexesFromIntrinsic(Instruction *In) {
+ GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In);
+ if (!GEPIndex) {
+ LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n");
+ return nullptr;
+ }
+ Value *Indexes = GEPIndex->getOperand(1);
+ if (auto *IndexLoad = locateIndexesFromGEP(Indexes))
+ return IndexLoad;
+
+ LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n");
+ return nullptr;
+}
+
+// Because of aukward definition of many Hex intrinsics we often have to
+// reinterprete HVX native <64 x i16> as <32 x i32> which in practice is a NOP
+// for all use cases, so this only exist to make IR builder happy.
+inline Value *getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC,
+ IRBuilderBase &Builder,
+ LLVMContext &Ctx, Value *I) {
+ assert(I && "Unable to reinterprete cast");
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ std::vector<unsigned> shuffleMask;
+ for (unsigned i = 0; i < 64; ++i)
+ shuffleMask.push_back(i);
+ Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask);
+ Value *CastShuffle =
+ Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
+ return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32");
+}
+
+// Recast <128 x i8> as <32 x i32>
+inline Value *getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC,
+ IRBuilderBase &Builder,
+ LLVMContext &Ctx, Value *I) {
+ assert(I && "Unable to reinterprete cast");
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ std::vector<unsigned> shuffleMask;
+ for (unsigned i = 0; i < 128; ++i)
+ shuffleMask.push_back(i);
+ Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask);
+ Value *CastShuffle =
+ Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
+ return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32");
+}
+
+// Create <32 x i32> mask reinterpreted as <128 x i1> with a given pattern
+inline Value *get_i32_Mask(const HexagonVectorCombine &HVC,
+ IRBuilderBase &Builder, LLVMContext &Ctx,
+ unsigned int pattern) {
+ std::vector<unsigned int> byteMask;
+ for (unsigned i = 0; i < 32; ++i)
+ byteMask.push_back(pattern);
+
+ return Builder.CreateIntrinsic(
+ HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt),
+ {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
+ nullptr);
+}
+
+Value *HvxIdioms::processVScatter(Instruction &In) const {
+ auto *InpTy = dyn_cast<VectorType>(In.getOperand(0)->getType());
+ assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather");
+ unsigned InpSize = HVC.getSizeOf(InpTy);
+ auto *F = In.getFunction();
+ LLVMContext &Ctx = F->getContext();
+ auto *ElemTy = dyn_cast<IntegerType>(InpTy->getElementType());
+ assert(ElemTy && "llvm.scatter needs integer type argument");
+ unsigned ElemWidth = HVC.DL.getTypeAllocSize(ElemTy);
+ LLVM_DEBUG({
+ unsigned Elements = HVC.length(InpTy);
+ dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n";
+ dbgs() << " Input type(" << *InpTy << ") elements(" << Elements
+ << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth("
+ << ElemWidth << ")\n";
+ });
+
+ IRBuilder Builder(In.getParent(), In.getIterator(),
+ InstSimplifyFolder(HVC.DL));
+
+ auto *ValueToScatter = In.getOperand(0);
+ LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n");
+
+ if (HVC.HST.getVectorLength() != InpSize) {
+ LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize
+ << ") for vscatter\n");
+ return nullptr;
+ }
+
+ // Base address of indexes.
+ auto *IndexLoad = locateAddressFromIntrinsic(&In);
+ if (!IndexLoad)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
+
+ // Address of destination. Must be in VTCM.
+ auto *Ptr = getPointer(IndexLoad);
+ if (!Ptr)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n");
+ // Indexes/offsets
+ auto *Indexes = locateIndexesFromIntrinsic(&In);
+ if (!Indexes)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
+ Value *CastedDst = Builder.CreateBitOrPointerCast(Ptr, Type::getInt32Ty(Ctx),
+ "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n");
+ // Adjust Indexes
+ auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes);
+ Value *CastIndex = nullptr;
+ if (cstDataVector) {
+ // Our indexes are represented as a constant. We need it in a reg.
+ AllocaInst *IndexesAlloca =
+ Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false));
+ [[maybe_unused]] auto *StoreIndexes =
+ Builder.CreateStore(cstDataVector, IndexesAlloca);
+ LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
+ CastIndex = Builder.CreateLoad(IndexesAlloca->getAllocatedType(),
+ IndexesAlloca, "reload_index");
+ } else {
+ if (ElemWidth == 2)
+ CastIndex = getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes);
+ else
+ CastIndex = Indexes;
+ }
+ LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
+
+ if (ElemWidth == 1) {
+ // v128i8 There is no native instruction for this.
+ // Do this as two Hi/Lo gathers with masking.
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ // Extend indexes. We assume that indexes are in 128i8 format - need to
+ // expand them to Hi/Lo 64i16
+ Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32");
+ auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
+ auto *UnpackedIndexes = Builder.CreateIntrinsic(
+ HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr);
+ LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n");
+
+ auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
+ auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
+ [[maybe_unused]] Value *IndexHi =
+ HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
+ [[maybe_unused]] Value *IndexLo =
+ HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
+ LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
+ LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
+ // Now unpack values to scatter
+ Value *CastSrc =
+ getReinterpretiveCast_i8_to_i32(HVC, Builder, Ctx, ValueToScatter);
+ LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
+ auto *UnpackedValueToScatter = Builder.CreateIntrinsic(
+ HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr);
+ LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter
+ << ")\n");
+
+ [[maybe_unused]] Value *UVSHi =
+ HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
+ [[maybe_unused]] Value *UVSLo =
+ HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
+ LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n");
+ LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n");
+
+ // Create the mask for individual bytes
+ auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
+ LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
+ [[maybe_unused]] auto *ResHi = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
+ {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ IndexHi, UVSHi},
+ nullptr);
+ LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n");
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
+ {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ IndexLo, UVSLo},
+ nullptr);
+ } else if (ElemWidth == 2) {
+ Value *CastSrc =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, ValueToScatter);
+ LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
+ {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex,
+ CastSrc},
+ nullptr);
+ } else if (ElemWidth == 4) {
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
+ {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex,
+ ValueToScatter},
+ nullptr);
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n");
+ return nullptr;
+ }
+}
+
+Value *HvxIdioms::processVGather(Instruction &In) const {
+ [[maybe_unused]] auto *InpTy =
+ dyn_cast<VectorType>(In.getOperand(0)->getType());
+ assert(InpTy && "Cannot handle no vector type for llvm.gather");
+ [[maybe_unused]] auto *ElemTy =
+ dyn_cast<PointerType>(InpTy->getElementType());
+ assert(ElemTy && "llvm.gather needs vector of ptr argument");
+ auto *F = In.getFunction();
+ LLVMContext &Ctx = F->getContext();
+ LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n"
+ << *In.getParent() << "\n");
+ LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements("
+ << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy)
+ << ") type(" << *ElemTy << ") Access alignment("
+ << *In.getOperand(1) << ") AddressSpace("
+ << ElemTy->getAddressSpace() << ")\n");
+
+ // TODO: Handle masking of elements.
+ assert(dyn_cast<VectorType>(In.getOperand(2)->getType()) &&
+ "llvm.gather needs vector for mask");
+ IRBuilder Builder(In.getParent(), In.getIterator(),
+ InstSimplifyFolder(HVC.DL));
+
+ // See who is using the result. The difference between LLVM and HVX vgather
+ // Intrinsic makes it impossible to handle all cases with temp storage. Alloca
+ // in VTCM is not yet supported, so for now we just bail out for those cases.
+ HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
+ Instruction *Dst = locateDestination(&In, Qual);
+ if (!Dst) {
+ LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n");
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual
+ << ")\n");
+
+ // Address of destination. Must be in VTCM.
+ auto *Ptr = getPointer(Dst);
+ if (!Ptr) {
+ LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n");
+ return nullptr;
+ }
+
+ // Result type. Assume it is a vector type.
+ auto *DstType = cast<VectorType>(getIndexType(Dst));
+ assert(DstType && "Cannot handle non vector dst type for llvm.gather");
+
+ // Base address for sources to be loaded
+ auto *IndexLoad = locateAddressFromIntrinsic(&In);
+ if (!IndexLoad)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
+
+ // Gather indexes/offsets
+ auto *Indexes = locateIndexesFromIntrinsic(&In);
+ if (!Indexes)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
+
+ Instruction *Gather = nullptr;
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
+ // We fully assume the address space is in VTCM. We also assume that all
+ // pointers in Operand(0) have the same base(!).
+ // This is the most basic case of all the above.
+ unsigned OutputSize = HVC.getSizeOf(DstType);
+ auto *DstElemTy = cast<IntegerType>(DstType->getElementType());
+ unsigned ElemWidth = HVC.DL.getTypeAllocSize(DstElemTy);
+ LLVM_DEBUG(dbgs() << " Buffer type : " << *Ptr->getType()
+ << " Address space ("
+ << Ptr->getType()->getPointerAddressSpace() << ")\n"
+ << " Result type : " << *DstType
+ << "\n Size in bytes : " << OutputSize
+ << " element type(" << *DstElemTy
+ << ")\n ElemWidth : " << ElemWidth << " bytes\n");
+
+ auto *IndexType = cast<VectorType>(getIndexType(Indexes));
+ assert(IndexType && "Cannot handle non vector index type for llvm.gather");
+ unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType());
+ LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n");
+
+ // Intrinsic takes i32 instead of pointer so cast.
+ Value *CastedPtr = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ // [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ...]
+ // int_hexagon_V6_vgathermh [... , llvm_v16i32_ty]
+ // int_hexagon_V6_vgathermh_128B [... , llvm_v32i32_ty]
+ // int_hexagon_V6_vgathermhw [... , llvm_v32i32_ty]
+ // int_hexagon_V6_vgathermhw_128B [... , llvm_v64i32_ty]
+ // int_hexagon_V6_vgathermw [... , llvm_v16i32_ty]
+ // int_hexagon_V6_vgathermw_128B [... , llvm_v32i32_ty]
+ if (HVC.HST.getVectorLength() == OutputSize) {
+ if (ElemWidth == 1) {
+ // v128i8 There is no native instruction for this.
+ // Do this as two Hi/Lo gathers with masking.
+ // Unpack indexes. We assume that indexes are in 128i8 format - need to
+ // expand them to Hi/Lo 64i16
+ Value *CastIndexes =
+ Builder.CreateBitCast(Indexes, NT, "cast_to_32i32");
+ auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
+ auto *UnpackedIndexes =
+ Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true),
+ V6_vunpack, CastIndexes, nullptr);
+ LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes
+ << ")\n");
+
+ auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
+ auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
+ [[maybe_unused]] Value *IndexHi =
+ HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
+ [[maybe_unused]] Value *IndexLo =
+ HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
+ LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
+ LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
+ // Create the mask for individual bytes
+ auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
+ LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
+ // We use our destination allocation as a temp storage
+ // This is unlikely to work properly for masked gather.
+ auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq);
+ [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), V6_vgather,
+ {Ptr, QByteMask, CastedPtr,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexHi},
+ nullptr);
+ LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n");
+ // Rematerialize the result
+ [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi");
+ LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n");
+ // Same for the low part. Here we use Gather to return non-NULL result
+ // from this function and continue to iterate. We also are deleting Dst
+ // store below.
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), V6_vgather,
+ {Ptr, QByteMask, CastedPtr,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexLo},
+ nullptr);
+ LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n");
+ Value *LoadedResultLo = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo");
+ LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n");
+ // Now we have properly sized bytes in every other position
+ // B b A a c a A b B c f F g G h H is presented as
+ // B . b . A . a . c . a . A . b . B . c . f . F . g . G . h . H
+ // Use vpack to gather them
+ auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb);
+ [[maybe_unused]] auto Res = Builder.CreateIntrinsic(
+ NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr);
+ LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n");
+ [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr);
+ LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n");
+ } else if (ElemWidth == 2) {
+ // v32i16
+ if (IndexWidth == 2) {
+ // Reinterprete 64i16 as 32i32. Only needed for syntactic IR match.
+ Value *CastIndex =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes);
+ LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
+ // shift all i16 left by 1 to match short addressing mode instead of
+ // byte.
+ auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
+ Value *AdjustedIndex = HVC.createHvxIntrinsic(
+ Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
+ LLVM_DEBUG(dbgs()
+ << " Shifted half index: " << *AdjustedIndex << ")\n");
+
+ auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh);
+ // The 3rd argument is the size of the region to gather from. Probably
+ // want to set it to max VTCM size.
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), V6_vgather,
+ {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ AdjustedIndex},
+ nullptr);
+ for (auto &U : Dst->uses()) {
+ if (auto *UI = dyn_cast<Instruction>(U.getUser()))
+ dbgs() << " dst used by: " << *UI << "\n";
+ }
+ for (auto &U : In.uses()) {
+ if (auto *UI = dyn_cast<Instruction>(U.getUser()))
+ dbgs() << " In used by : " << *UI << "\n";
+ }
+ // Create temp load from result in case the result is used by any
+ // other instruction.
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ } else {
+ dbgs() << " Unhandled index type for vgather\n";
+ return nullptr;
+ }
+ } else if (ElemWidth == 4) {
+ if (IndexWidth == 4) {
+ // v32i32
+ auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
+ Value *AdjustedIndex = HVC.createHvxIntrinsic(
+ Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
+ LLVM_DEBUG(dbgs()
+ << " Shifted word index: " << *AdjustedIndex << ")\n");
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
+ {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ AdjustedIndex},
+ nullptr);
+ } else {
+ LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n");
+ return nullptr;
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n");
+ return nullptr;
+ }
+ } else if (HVC.HST.getVectorLength() == OutputSize * 2) {
+ // This is half of the reg width, duplicate low in high
+ LLVM_DEBUG(dbgs() << " Unhandled half of register size\n");
+ return nullptr;
+ } else if (HVC.HST.getVectorLength() * 2 == OutputSize) {
+ LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n");
+ return nullptr;
+ }
+ // Erase the original intrinsic and store that consumes it.
+ // HVX will create a pseudo for gather that is expanded to gather + store
+ // during packetization.
+ Dst->eraseFromParent();
+ } else if (Qual == HvxIdioms::LLVM_Scatter) {
+ // Gather feeds directly into scatter.
+ LLVM_DEBUG({
+ auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType());
+ assert(DstInpTy && "Cannot handle no vector type for llvm.scatter");
+ unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
+ unsigned DstElements = HVC.length(DstInpTy);
+ auto *DstElemTy = cast<PointerType>(DstInpTy->getElementType());
+ assert(DstElemTy && "llvm.scatter needs vector of ptr argument");
+ dbgs() << " Gather feeds into scatter\n Values to scatter : "
+ << *Dst->getOperand(0) << "\n";
+ dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements
+ << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy
+ << ") Access alignment(" << *Dst->getOperand(2) << ")\n";
+ });
+ // Address of source
+ auto *Src = getPointer(IndexLoad);
+ if (!Src)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n");
+
+ if (!isa<PointerType>(Src->getType())) {
+ LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n");
+ return nullptr;
+ }
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
+
+ auto *DstLoad = locateAddressFromIntrinsic(Dst);
+ if (!DstLoad) {
+ LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n");
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
+
+ Value *Ptr = getPointer(DstLoad);
+ if (!Ptr)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n");
+ Value *CastIndex =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, IndexLoad);
+ LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
+ // Shift all i16 left by 1 to match short addressing mode instead of
+ // byte.
+ auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
+ Value *AdjustedIndex = HVC.createHvxIntrinsic(
+ Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
+ LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n");
+
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ AdjustedIndex},
+ nullptr);
+ } else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
+ // Gather feeds into previously inserted pseudo intrinsic.
+ // These could not be in the same packet, so we need to generate another
+ // pseudo that is expanded to .tmp + store V6_vgathermh_pseudo
+ // V6_vgathermh_pseudo (ins IntRegs:$_dst_, s4_0Imm:$Ii, IntRegs:$Rt,
+ // ModRegs:$Mu, HvxVR:$Vv)
+ if (isa<AllocaInst>(IndexLoad)) {
+ auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes);
+ if (cstDataVector) {
+ // Our indexes are represented as a constant. We need THEM in a reg.
+ // This most likely will not work properly since alloca gives us DDR
+ // stack location. This will be fixed once we teach compiler about VTCM.
+ AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
+ [[maybe_unused]] auto *StoreIndexes =
+ Builder.CreateStore(cstDataVector, IndexesAlloca);
+ LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
+ Value *LoadedIndex = Builder.CreateLoad(
+ IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
+ AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
+ LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n");
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {ResultAlloca, CastedSrc,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex},
+ nullptr);
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ }
+ } else {
+ // Address of source
+ auto *Src = getPointer(IndexLoad);
+ if (!Src)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n");
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
+
+ auto *DstLoad = locateAddressFromIntrinsic(Dst);
+ if (!DstLoad)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
+ auto *Ptr = getPointer(DstLoad);
+ if (!Ptr)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
+ {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ Indexes},
+ nullptr);
+ }
+ return Gather;
+ } else if (Qual == HvxIdioms::HEX_Scatter) {
+ // This is the case when result of a gather is used as an argument to
+ // Intrinsic::hexagon_V6_vscattermh_128B. Most likely we just inserted it
+ // ourselves. We have to create alloca, store to it, and replace all uses
+ // with that.
+ AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
+ Value *CastIndex =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes);
+ LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {ResultAlloca, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ CastIndex},
+ nullptr);
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ } else if (Qual == HvxIdioms::HEX_Gather) {
+ // Gather feeds to another gather but already replaced with
+ // hexagon_V6_vgathermh_128B
+ if (isa<AllocaInst>(IndexLoad)) {
+ auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes);
+ if (cstDataVector) {
+ // Our indexes are represented as a constant. We need it in a reg.
+ AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
+
+ [[maybe_unused]] auto *StoreIndexes =
+ Builder.CreateStore(cstDataVector, IndexesAlloca);
+ LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
+ Value *LoadedIndex = Builder.CreateLoad(
+ IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
+ AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
+ LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca
+ << "\n AddressSpace: "
+ << ResultAlloca->getAddressSpace() << "\n";);
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {ResultAlloca, CastedSrc,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex},
+ nullptr);
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ }
+ }
+ } else if (Qual == HvxIdioms::LLVM_Gather) {
+ // Gather feeds into another gather
+ errs() << " Underimplemented vgather to vgather sequence\n";
+ return nullptr;
+ } else
+ llvm_unreachable("Unhandled Qual enum");
+
+ return Gather;
+}
+
auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
const FxpOp &Op) const -> Value * {
assert(Op.X.Val->getType() == Op.Y.Val->getType());
@@ -2138,6 +3000,26 @@ auto HvxIdioms::run() -> bool {
It = StartOver ? B.rbegin()
: cast<Instruction>(New)->getReverseIterator();
Changed = true;
+ } else if (matchGather(*It)) {
+ Value *New = processVGather(*It);
+ if (!New)
+ continue;
+ LLVM_DEBUG(dbgs() << " Gather : " << *New << "\n");
+ // We replace original intrinsic with a new pseudo call.
+ It->eraseFromParent();
+ It = cast<Instruction>(New)->getReverseIterator();
+ RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI);
+ Changed = true;
+ } else if (matchScatter(*It)) {
+ Value *New = processVScatter(*It);
+ if (!New)
+ continue;
+ LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n");
+ // We replace original intrinsic with a new pseudo call.
+ It->eraseFromParent();
+ It = cast<Instruction>(New)->getReverseIterator();
+ RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI);
+ Changed = true;
}
}
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 6455757..2f59b7c 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -186,6 +186,9 @@ static unsigned featureToArchVersion(unsigned Feature) {
case Hexagon::ArchV79:
case Hexagon::ExtensionHVXV79:
return 79;
+ case Hexagon::ArchV81:
+ case Hexagon::ExtensionHVXV81:
+ return 81;
}
llvm_unreachable("Expected valid arch feature");
return 0;
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 6b48a21..b8075bd 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -96,6 +96,8 @@ cl::opt<bool> MV75("mv75", cl::Hidden, cl::desc("Build for Hexagon V75"),
cl::init(false));
cl::opt<bool> MV79("mv79", cl::Hidden, cl::desc("Build for Hexagon V79"),
cl::init(false));
+cl::opt<bool> MV81("mv81", cl::Hidden, cl::desc("Build for Hexagon V81"),
+ cl::init(false));
} // namespace
static cl::opt<Hexagon::ArchEnum> EnableHVX(
@@ -111,6 +113,7 @@ static cl::opt<Hexagon::ArchEnum> EnableHVX(
clEnumValN(Hexagon::ArchEnum::V73, "v73", "Build for HVX v73"),
clEnumValN(Hexagon::ArchEnum::V75, "v75", "Build for HVX v75"),
clEnumValN(Hexagon::ArchEnum::V79, "v79", "Build for HVX v79"),
+ clEnumValN(Hexagon::ArchEnum::V81, "v81", "Build for HVX v81"),
// Sentinel for no value specified.
clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
// Sentinel for flag not present.
@@ -159,6 +162,8 @@ static StringRef HexagonGetArchVariant() {
return "hexagonv75";
if (MV79)
return "hexagonv79";
+ if (MV81)
+ return "hexagonv81";
return "";
}
@@ -474,6 +479,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
case Hexagon::ArchEnum::V79:
Result.push_back("+hvxv79");
break;
+ case Hexagon::ArchEnum::V81:
+ Result.push_back("+hvxv81");
+ break;
case Hexagon::ArchEnum::Generic: {
Result.push_back(StringSwitch<StringRef>(CPU)
@@ -489,7 +497,8 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
.Case("hexagonv71t", "+hvxv71")
.Case("hexagonv73", "+hvxv73")
.Case("hexagonv75", "+hvxv75")
- .Case("hexagonv79", "+hvxv79"));
+ .Case("hexagonv79", "+hvxv79")
+ .Case("hexagonv81", "+hvxv81"));
break;
}
case Hexagon::ArchEnum::NoArch:
@@ -538,8 +547,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
FeatureBitset FB = S;
unsigned CpuArch = ArchV5;
for (unsigned F :
- {ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, ArchV66,
- ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
+ {ArchV81, ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67,
+ ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
if (!FB.test(F))
continue;
CpuArch = F;
@@ -556,7 +565,7 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
for (unsigned F :
{ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66,
ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71,
- ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79}) {
+ ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79, ExtensionHVXV81}) {
if (!FB.test(F))
continue;
HasHvxVer = true;
@@ -569,6 +578,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// HasHvxVer is false, and UseHvx is true.
switch (CpuArch) {
+ case ArchV81:
+ FB.set(ExtensionHVXV81);
+ [[fallthrough]];
case ArchV79:
FB.set(ExtensionHVXV79);
[[fallthrough]];
@@ -668,12 +680,12 @@ void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS) {
std::optional<unsigned>
Hexagon_MC::getHVXVersion(const FeatureBitset &Features) {
- for (auto Arch : {Hexagon::ExtensionHVXV79, Hexagon::ExtensionHVXV75,
- Hexagon::ExtensionHVXV73, Hexagon::ExtensionHVXV71,
- Hexagon::ExtensionHVXV69, Hexagon::ExtensionHVXV68,
- Hexagon::ExtensionHVXV67, Hexagon::ExtensionHVXV66,
- Hexagon::ExtensionHVXV65, Hexagon::ExtensionHVXV62,
- Hexagon::ExtensionHVXV60})
+ for (auto Arch : {Hexagon::ExtensionHVXV81, Hexagon::ExtensionHVXV79,
+ Hexagon::ExtensionHVXV75, Hexagon::ExtensionHVXV73,
+ Hexagon::ExtensionHVXV71, Hexagon::ExtensionHVXV69,
+ Hexagon::ExtensionHVXV68, Hexagon::ExtensionHVXV67,
+ Hexagon::ExtensionHVXV66, Hexagon::ExtensionHVXV65,
+ Hexagon::ExtensionHVXV62, Hexagon::ExtensionHVXV60})
if (Features.test(Arch))
return Arch;
return {};
@@ -681,13 +693,13 @@ Hexagon_MC::getHVXVersion(const FeatureBitset &Features) {
unsigned Hexagon_MC::getArchVersion(const FeatureBitset &Features) {
for (auto Arch :
- {Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73, Hexagon::ArchV71,
- Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67, Hexagon::ArchV66,
- Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60, Hexagon::ArchV55,
- Hexagon::ArchV5})
+ {Hexagon::ArchV81, Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73,
+ Hexagon::ArchV71, Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67,
+ Hexagon::ArchV66, Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60,
+ Hexagon::ArchV55, Hexagon::ArchV5})
if (Features.test(Arch))
return Arch;
- llvm_unreachable("Expected arch v5-v79");
+ llvm_unreachable("Expected arch v5-v81");
return 0;
}
@@ -708,7 +720,8 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
.Case("hexagonv71t", llvm::ELF::EF_HEXAGON_MACH_V71T)
.Case("hexagonv73", llvm::ELF::EF_HEXAGON_MACH_V73)
.Case("hexagonv75", llvm::ELF::EF_HEXAGON_MACH_V75)
- .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79);
+ .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79)
+ .Case("hexagonv81", llvm::ELF::EF_HEXAGON_MACH_V81);
}
llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index aca7abd..44d1a44 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -4578,6 +4578,8 @@ def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>;
def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>;
def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>;
+def : InstAlias<"mtpidr $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsISA3_0]>;
+def : InstAlias<"mfpidr $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsISA3_0]>;
foreach SPRG = 4-7 in {
def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 9e6b7f0..2754d78 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1124,7 +1124,8 @@ def HasStdExtZbkbOrP
"'Base P' (Packed-SIMD)">;
def HasStdExtZbbOrZbkbOrP
- : Predicate<"Subtarget->HasStdExtZbbOrZbkb()|| Subtarget->hasStdExtP()">,
+ : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb() || "
+ "Subtarget->hasStdExtP()">,
AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbkb, FeatureStdExtP),
"'Zbb' (Basic Bit-Manipulation) or "
"'Zbkb' (Bitmanip instructions for Cryptography) or "
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 219e3f2..1c930ac 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -318,8 +318,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
- if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
- !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
+ if (!Subtarget.hasStdExtZbb() && !Subtarget.hasStdExtP() &&
+ !Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
+ !Subtarget.hasVendorXAndesPerf() &&
!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
@@ -392,7 +393,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
}
- if (Subtarget.hasStdExtZbb() ||
+ if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP() ||
(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
Legal);
@@ -403,6 +404,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
+ // If have a CLZW, but not CTZW, custom promote i32.
+ if (Subtarget.hasStdExtP() && Subtarget.is64Bit())
+ setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
}
if (!Subtarget.hasCPOPLike()) {
@@ -419,13 +423,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// We need the custom lowering to make sure that the resulting sequence
// for the 32bit case is efficient on 64bit targets.
// Use default promotion for i32 without Zbb.
- if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb())
+ if (Subtarget.is64Bit() &&
+ (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP()))
setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction(ISD::CTLZ, XLenVT, Expand);
}
- if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
+ if (Subtarget.hasStdExtP() ||
+ (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
setOperationAction(ISD::ABS, XLenVT, Legal);
} else if (Subtarget.hasShortForwardBranchOpt()) {
// We can use PseudoCCSUB to implement ABS.
@@ -14669,6 +14675,25 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
bool IsCTZ =
N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
+
+ // Without Zbb, lower as 32 - clzw(~X & (X-1))
+ if (IsCTZ && !Subtarget.hasStdExtZbb()) {
+ assert(Subtarget.hasStdExtP());
+
+ NewOp0 = DAG.getFreeze(NewOp0);
+ SDValue Not = DAG.getNOT(DL, NewOp0, MVT::i64);
+ SDValue Minus1 = DAG.getNode(ISD::SUB, DL, MVT::i64, NewOp0,
+ DAG.getConstant(1, DL, MVT::i64));
+ SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Not, Minus1);
+ SDValue CLZW = DAG.getNode(RISCVISD::CLZW, DL, MVT::i64, And);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, MVT::i64,
+ DAG.getConstant(32, DL, MVT::i64), CLZW);
+ SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Sub,
+ DAG.getValueType(MVT::i32));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ return;
+ }
+
unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 7d8a919..cc085bb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1455,3 +1455,11 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PMAXU_DW : RVPPairBinaryExchanged_rr<0b1111, 0b01, "pmaxu.dw">;
def PMAXU_DB : RVPPairBinaryExchanged_rr<0b1111, 0b10, "pmaxu.db">;
} // Predicates = [HasStdExtP, IsRV32]
+
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtP] in
+def : PatGpr<abs, ABS>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 4c2f7f6..f7b4914 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -218,11 +218,13 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
}
let Predicates = [HasVendorXSfvfexpAny], DecoderNamespace = "XSfvector" in {
- def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">;
+ def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">,
+ SchedUnaryMC<"WriteSF_VFExp", "ReadSF_VFExp">;
}
let Predicates = [HasVendorXSfvfexpa], DecoderNamespace = "XSfvector" in {
- def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">;
+ def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">,
+ SchedUnaryMC<"WriteSF_VFExpa", "ReadSF_VFExpa">;
}
let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvector",
@@ -487,6 +489,48 @@ let Predicates = [HasVendorXSfvfnrclipxfqf], AltFmtType = IS_NOT_ALTFMT in {
defm SF_VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP;
}
+class VFExpSchedSEWSet<string mx, bit IsBF16, bit IsApprox> {
+ defvar BaseSet = SchedSEWSet<mx, isF=1>.val;
+ list<int> val = !if(IsBF16, !listremove(BaseSet, [32, 64]),
+ !if(IsApprox, BaseSet, !listremove(BaseSet, [64])));
+}
+multiclass VPseudoVFExp_V<bit IsBF16 = false, bit IsApprox = false> {
+ defvar SchedSuffix = !if(IsApprox, "VFExpa", "VFExp");
+
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ foreach e = VFExpSchedSEWSet<mx, IsBF16, IsApprox>.val in {
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E" # e
+ : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix,
+ mx, e, forcePassthruRead=true>;
+ def "_V_" # mx # "_E" # e # "_MASK"
+ : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix,
+ mx, e, forcePassthruRead=true>;
+ }
+ }
+ }
+}
+
+let Predicates = [HasVendorXSfvfbfexp16e], hasSideEffects = 0 in {
+ let AltFmtType = IS_ALTFMT in {
+ defm PseudoSF_VFEXP_ALT : VPseudoVFExp_V<IsBF16=true>;
+ }
+}
+
+let Predicates = [HasVendorXSfvfexpAnyFloat], hasSideEffects = 0 in {
+ let AltFmtType = IS_NOT_ALTFMT in {
+ defm PseudoSF_VFEXP : VPseudoVFExp_V;
+ }
+}
+
+let Predicates = [HasVendorXSfvfexpa], AltFmtType = IS_NOT_ALTFMT in {
+ defm PseudoSF_VFEXPA : VPseudoVFExp_V<IsApprox=true>;
+}
+
// SDNode
def SDT_SF_VC_V_X : SDTypeProfile<1, 4, [SDTCisVec<0>,
SDTCisVT<1, XLenVT>,
@@ -893,3 +937,36 @@ let Predicates = [HasVendorXSfcease] in {
let rs2 = 0b00101;
}
}
+
+let Predicates = [HasVendorXSfvfbfexp16e] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP_ALT",
+ AllBF16Vectors,
+ isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexp16e] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP",
+ [VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8],
+ isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexp32e] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP",
+ [VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexpa] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA",
+ [VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexpa, HasVInstructionsF16] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA",
+ [VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8],
+ isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexpa64e] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA",
+ [VF64M1, VF64M2, VF64M4, VF64M8], isSEWAware=1>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 6b9a75f..5429c2a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -599,14 +599,20 @@ def : PatGpr<riscv_zip, ZIP_RV32, i32>;
def : PatGpr<riscv_unzip, UNZIP_RV32, i32>;
} // Predicates = [HasStdExtZbkb, IsRV32]
-let Predicates = [HasStdExtZbb] in {
+let Predicates = [HasStdExtZbbOrP] in {
def : PatGpr<ctlz, CLZ>;
+}
+
+let Predicates = [HasStdExtZbb] in {
def : PatGpr<cttz, CTZ>;
def : PatGpr<ctpop, CPOP>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbb, IsRV64] in {
+let Predicates = [HasStdExtZbbOrP, IsRV64] in {
def : PatGpr<riscv_clzw, CLZW>;
+}
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
def : PatGpr<riscv_ctzw, CTZW>;
def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
@@ -614,22 +620,22 @@ def : Pat<(i64 (riscv_negw_max GPR:$rs1)),
(MAX GPR:$rs1, (XLenVT (SUBW (XLenVT X0), GPR:$rs1)))>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbb] in {
+let Predicates = [HasStdExtZbbOrP] in {
def : Pat<(XLenVT (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>;
def : Pat<(XLenVT (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbb] in {
+let Predicates = [HasStdExtZbbOrP] in {
def : PatGprGpr<smin, MIN>;
def : PatGprGpr<smax, MAX>;
def : PatGprGpr<umin, MINU>;
def : PatGprGpr<umax, MAXU>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in
+let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV32] in
def : PatGpr<bswap, REV8_RV32, i32>;
-let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in
+let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV64] in
def : PatGpr<bswap, REV8_RV64, i64>;
let Predicates = [HasStdExtZbkb] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 637d61fe..36a2f46 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -1588,6 +1588,10 @@ multiclass SiFive7SchedResources<int vlen, bit dualVALU,
//===----------------------------------------------------------------------===//
// Unsupported extensions
defm : UnsupportedSchedQ;
+ // TODO: scheduling info of XSfvfexp* and XSfvfexpa*
+ // for SiFive7 will be added in follow-up patches.
+ defm : UnsupportedSchedXSfvfexp;
+ defm : UnsupportedSchedXSfvfexpa;
defm : UnsupportedSchedZabha;
defm : UnsupportedSchedZbc;
defm : UnsupportedSchedZbkb;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index 9ab9636..64ccfd8 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -523,6 +523,8 @@ include "RISCVScheduleZvk.td"
// Vendor Extensions
multiclass UnsupportedSchedXsf {
defm : UnsupportedSchedXsfvcp;
+ defm : UnsupportedSchedXSfvfexp;
+ defm : UnsupportedSchedXSfvfexpa;
defm : UnsupportedSchedXSfvfnrclipxfqf;
defm : UnsupportedSchedXSfvfwmaccqqq;
defm : UnsupportedSchedXSfvqmaccdod;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
index 99632e4..1ee6dc1 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
@@ -99,3 +99,23 @@ defm : LMULWriteRes<"WriteSF_VFWMACC_QQQ", []>;
defm : LMULReadAdvance<"ReadSF_VFWMACC_QQQ", 0>;
} // Unsupported = true
}
+
+defm "" : LMULSEWSchedWritesF<"WriteSF_VFExp">;
+defm "" : LMULSEWSchedReadsF<"ReadSF_VFExp">;
+
+multiclass UnsupportedSchedXSfvfexp {
+let Unsupported = true in {
+defm : LMULSEWWriteResF<"WriteSF_VFExp", []>;
+defm : LMULSEWReadAdvanceF<"ReadSF_VFExp", 0>;
+} // Unsupported = true
+}
+
+defm "" : LMULSEWSchedWritesF<"WriteSF_VFExpa">;
+defm "" : LMULSEWSchedReadsF<"ReadSF_VFExpa">;
+
+multiclass UnsupportedSchedXSfvfexpa {
+let Unsupported = true in {
+defm : LMULSEWWriteResF<"WriteSF_VFExpa", []>;
+defm : LMULSEWReadAdvanceF<"ReadSF_VFExpa", 0>;
+} // Unsupported = true
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 334db4b..4b4fc8f 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -187,7 +187,7 @@ public:
}
bool hasCLZLike() const {
- return HasStdExtZbb || HasVendorXTHeadBb ||
+ return HasStdExtZbb || HasStdExtP || HasVendorXTHeadBb ||
(HasVendorXCVbitmanip && !IsRV64);
}
bool hasCTZLike() const {
@@ -197,7 +197,7 @@ public:
return HasStdExtZbb || (HasVendorXCVbitmanip && !IsRV64);
}
bool hasREV8Like() const {
- return HasStdExtZbb || HasStdExtZbkb || HasVendorXTHeadBb;
+ return HasStdExtZbb || HasStdExtZbkb || HasStdExtP || HasVendorXTHeadBb;
}
bool hasBEXTILike() const { return HasStdExtZbs || HasVendorXTHeadBs; }
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 6261fad..706ab2b 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -160,6 +160,14 @@ FunctionPass *createX86PartialReductionPass();
/// // Analyzes and emits pseudos to support Win x64 Unwind V2.
FunctionPass *createX86WinEHUnwindV2Pass();
+/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics
+/// or split the data to two <128 x i32>.
+FunctionPass *createX86LowerAMXTypePass();
+
+/// The pass transforms amx intrinsics to scalar operation if the function has
+/// optnone attribute or it is O0.
+FunctionPass *createX86LowerAMXIntrinsicsPass();
+
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
const X86Subtarget &,
const X86RegisterBankInfo &);
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 62073ec..4393f6e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4721,9 +4721,6 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
return false;
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
auto getFoldableLogicOp = [](SDValue Op) {
// Peek through single use bitcast.
if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
@@ -4740,13 +4737,47 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
return SDValue();
};
- SDValue A, FoldableOp;
- if ((FoldableOp = getFoldableLogicOp(N1))) {
- A = N0;
- } else if ((FoldableOp = getFoldableLogicOp(N0))) {
- A = N1;
- } else
- return false;
+ SDValue N0, N1, A, FoldableOp;
+
+ // Identify and (optionally) peel an outer NOT that wraps a pure logic tree
+ auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) {
+ if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() &&
+ ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) {
+ SDValue InnerOp = Op->getOperand(0);
+
+ if (!getFoldableLogicOp(InnerOp))
+ return SDValue();
+
+ N0 = InnerOp.getOperand(0);
+ N1 = InnerOp.getOperand(1);
+ if ((FoldableOp = getFoldableLogicOp(N1))) {
+ A = N0;
+ return InnerOp;
+ }
+ if ((FoldableOp = getFoldableLogicOp(N0))) {
+ A = N1;
+ return InnerOp;
+ }
+ }
+ return SDValue();
+ };
+
+ bool PeeledOuterNot = false;
+ SDNode *OriN = N;
+ if (SDValue InnerOp = tryPeelOuterNotWrappingLogic(N)) {
+ PeeledOuterNot = true;
+ N = InnerOp.getNode();
+ } else {
+ N0 = N->getOperand(0);
+ N1 = N->getOperand(1);
+
+ if ((FoldableOp = getFoldableLogicOp(N1)))
+ A = N0;
+ else if ((FoldableOp = getFoldableLogicOp(N0)))
+ A = N1;
+ else
+ return false;
+ }
SDValue B = FoldableOp.getOperand(0);
SDValue C = FoldableOp.getOperand(1);
@@ -4798,7 +4829,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
case ISD::XOR: Imm ^= TernlogMagicA; break;
}
- return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm);
+ if (PeeledOuterNot)
+ Imm = ~Imm;
+
+ return matchVPTERNLOG(OriN, ParentA, ParentB, ParentC, A, B, C, Imm);
}
/// If the high bits of an 'and' operand are known zero, try setting the