aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp51
-rw-r--r--llvm/lib/Analysis/MemoryProfileInfo.cpp10
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp58
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp48
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp100
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp7
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp3
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp16
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp95
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h12
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp5
-rw-r--r--llvm/lib/IR/AsmWriter.cpp4
-rw-r--r--llvm/lib/IR/Module.cpp9
-rw-r--r--llvm/lib/LTO/LTO.cpp26
-rw-r--r--llvm/lib/MC/CMakeLists.txt3
-rw-r--r--llvm/lib/MC/MCSFrame.cpp155
-rw-r--r--llvm/lib/Support/Timer.cpp20
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td13
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td1
-rw-r--r--llvm/lib/Target/AArch64/AArch64Features.td48
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp111
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td117
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td637
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td11
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td83
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64SystemOperands.td370
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp64
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp201
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp10
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp16
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp18
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp115
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h5
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td72
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td167
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp33
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h64
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp21
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h19
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td17
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp53
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td8
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td8
-rw-r--r--llvm/lib/Target/ARM/ARMArchitectures.td12
-rw-r--r--llvm/lib/Target/ARM/ARMFeatures.td5
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h1
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td12
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp1
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.td16
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.td25
-rw-r--r--llvm/lib/Target/DirectX/DXILPrepare.cpp89
-rw-r--r--llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp130
-rw-r--r--llvm/lib/Target/DirectX/DXILTranslateMetadata.h3
-rw-r--r--llvm/lib/Target/Hexagon/Hexagon.td13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepArch.h4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepArch.td2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICHVX.td592
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepIICScalar.td888
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td37
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp162
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsV65.td13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSchedule.td1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonScheduleV81.td31
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.h9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp59
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp884
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp45
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp39
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td83
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td22
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleXSf.td20
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h4
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp56
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp25
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h1
-rw-r--r--llvm/lib/Target/X86/X86ISelLoweringCall.cpp9
-rw-r--r--llvm/lib/Target/Xtensa/XtensaInstrInfo.td2
-rw-r--r--llvm/lib/TargetParser/ARMTargetParser.cpp2
-rw-r--r--llvm/lib/TargetParser/ARMTargetParserCommon.cpp1
-rw-r--r--llvm/lib/TargetParser/Triple.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp12
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp26
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp43
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp40
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h14
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp166
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp3
109 files changed, 5609 insertions, 1013 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 853bd66..a572eef 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -1582,6 +1582,23 @@ static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
return nullptr;
}
+/// Returns the absolute value of \p A. In the context of dependence analysis,
+/// we need an absolute value in a mathematical sense. If \p A is the signed
+/// minimum value, we cannot represent it unless extending the original type.
+/// Thus if we cannot prove that \p A is not the signed minimum value, returns
+/// nullptr.
+static const SCEV *absSCEVNoSignedOverflow(const SCEV *A, ScalarEvolution &SE) {
+ IntegerType *Ty = cast<IntegerType>(A->getType());
+ if (!Ty)
+ return nullptr;
+
+ const SCEV *SMin =
+ SE.getConstant(APInt::getSignedMinValue(Ty->getBitWidth()));
+ if (!SE.isKnownPredicate(CmpInst::ICMP_NE, A, SMin))
+ return nullptr;
+ return SE.getAbsExpr(A, /*IsNSW=*/true);
+}
+
/// Returns true iff \p Test is enabled.
static bool isDependenceTestEnabled(DependenceTestType Test) {
if (EnableDependenceTest == DependenceTestType::All)
@@ -1669,21 +1686,25 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
// check that |Delta| < iteration count
- if (const SCEV *UpperBound =
- collectUpperBound(CurSrcLoop, Delta->getType())) {
+ bool IsDeltaLarge = [&] {
+ const SCEV *UpperBound = collectUpperBound(CurSrcLoop, Delta->getType());
+ if (!UpperBound)
+ return false;
+
LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
- const SCEV *AbsDelta =
- SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
- const SCEV *AbsCoeff =
- SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
+ const SCEV *AbsDelta = absSCEVNoSignedOverflow(Delta, *SE);
+ const SCEV *AbsCoeff = absSCEVNoSignedOverflow(Coeff, *SE);
+ if (!AbsDelta || !AbsCoeff)
+ return false;
const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
- if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
- // Distance greater than trip count - no dependence
- ++StrongSIVindependence;
- ++StrongSIVsuccesses;
- return true;
- }
+ return isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product);
+ }();
+ if (IsDeltaLarge) {
+ // Distance greater than trip count - no dependence
+ ++StrongSIVindependence;
+ ++StrongSIVsuccesses;
+ return true;
}
// Can we compute distance?
@@ -2259,6 +2280,9 @@ bool DependenceInfo::weakZeroSrcSIVtest(
const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
if (!ConstCoeff)
return false;
+
+ // Since ConstCoeff is constant, !isKnownNegative means it's non-negative.
+ // TODO: Bail out if it's a signed minimum value.
const SCEV *AbsCoeff = SE->isKnownNegative(ConstCoeff)
? SE->getNegativeSCEV(ConstCoeff)
: ConstCoeff;
@@ -2369,6 +2393,9 @@ bool DependenceInfo::weakZeroDstSIVtest(
const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
if (!ConstCoeff)
return false;
+
+ // Since ConstCoeff is constant, !isKnownNegative means it's non-negative.
+ // TODO: Bail out if it's a signed minimum value.
const SCEV *AbsCoeff = SE->isKnownNegative(ConstCoeff)
? SE->getNegativeSCEV(ConstCoeff)
: ConstCoeff;
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 92a5b6f..b09f4ed 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -241,9 +241,13 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
ColdBytes += TotalSize;
// If we have the max cold context size from summary information and have
// requested identification of contexts above a percentage of the max, see
- // if this context qualifies.
- if (MaxColdSize > 0 && MinPercentMaxColdSize < 100 &&
- TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize)
+ // if this context qualifies. We should assume this is large if we rebuilt
+ // the trie from existing metadata (i.e. to update after inlining), in
+ // which case we don't have a MaxSize from the profile - we assume any
+ // context size info in existence on the metadata should be propagated.
+ if (BuiltFromExistingMetadata ||
+ (MaxColdSize > 0 && MinPercentMaxColdSize < 100 &&
+ TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize))
LargeColdContext = true;
}
// Only add the context size info as metadata if we need it in the thin
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b425b95..1f10478 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -391,19 +391,6 @@ void CombinerHelper::applyCombineConcatVectors(
MI.eraseFromParent();
}
-bool CombinerHelper::matchCombineShuffleToBuildVector(MachineInstr &MI) const {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
- "Invalid instruction");
- auto &Shuffle = cast<GShuffleVector>(MI);
-
- Register SrcVec1 = Shuffle.getSrc1Reg();
- Register SrcVec2 = Shuffle.getSrc2Reg();
-
- LLT SrcVec1Type = MRI.getType(SrcVec1);
- LLT SrcVec2Type = MRI.getType(SrcVec2);
- return SrcVec1Type.isVector() && SrcVec2Type.isVector();
-}
-
void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const {
auto &Shuffle = cast<GShuffleVector>(MI);
@@ -535,11 +522,9 @@ bool CombinerHelper::matchCombineShuffleVector(
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
Register Src1 = MI.getOperand(1).getReg();
LLT SrcType = MRI.getType(Src1);
- // As bizarre as it may look, shuffle vector can actually produce
- // scalar! This is because at the IR level a <1 x ty> shuffle
- // vector is perfectly valid.
- unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
- unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
+
+ unsigned DstNumElts = DstType.getNumElements();
+ unsigned SrcNumElts = SrcType.getNumElements();
// If the resulting vector is smaller than the size of the source
// vectors being concatenated, we won't be able to replace the
@@ -556,7 +541,7 @@ bool CombinerHelper::matchCombineShuffleVector(
//
// TODO: If the size between the source and destination don't match
// we could still emit an extract vector element in that case.
- if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
+ if (DstNumElts < 2 * SrcNumElts)
return false;
// Check that the shuffle mask can be broken evenly between the
@@ -619,39 +604,6 @@ void CombinerHelper::applyCombineShuffleVector(
MI.eraseFromParent();
}
-bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) const {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
- "Invalid instruction kind");
-
- ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- return Mask.size() == 1;
-}
-
-void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) const {
- Register DstReg = MI.getOperand(0).getReg();
- Builder.setInsertPt(*MI.getParent(), MI);
-
- int I = MI.getOperand(3).getShuffleMask()[0];
- Register Src1 = MI.getOperand(1).getReg();
- LLT Src1Ty = MRI.getType(Src1);
- int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
- Register SrcReg;
- if (I >= Src1NumElts) {
- SrcReg = MI.getOperand(2).getReg();
- I -= Src1NumElts;
- } else if (I >= 0)
- SrcReg = Src1;
-
- if (I < 0)
- Builder.buildUndef(DstReg);
- else if (!MRI.getType(SrcReg).isVector())
- Builder.buildCopy(DstReg, SrcReg);
- else
- Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I);
-
- MI.eraseFromParent();
-}
-
namespace {
/// Select a preference between two uses. CurrentUse is the current preference
@@ -8369,7 +8321,7 @@ bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
return false;
ArrayRef<int> Mask = Shuffle.getMask();
- const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
+ const unsigned NumSrcElems = Src1Ty.getNumElements();
bool TouchesSrc1 = false;
bool TouchesSrc2 = false;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 04d9309..d6f23b6 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -602,6 +602,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
Depth + 1);
+ OffsetKnown = OffsetKnown.sext(BitWidth);
+ WidthKnown = WidthKnown.sext(BitWidth);
Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
// Sign extend the extracted value using shift left and arithmetic shift
// right.
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b49040b..1fc90d0 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3359,6 +3359,54 @@ bool IRTranslator::translateShuffleVector(const User &U,
Mask = SVI->getShuffleMask();
else
Mask = cast<ConstantExpr>(U).getShuffleMask();
+
+ // As GISel does not represent <1 x > vectors as a separate type from scalars,
+ // we transform shuffle_vector with a scalar output to an
+ // ExtractVectorElement. If the input type is also scalar it becomes a Copy.
+ unsigned DstElts = cast<FixedVectorType>(U.getType())->getNumElements();
+ unsigned SrcElts =
+ cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements();
+ if (DstElts == 1) {
+ unsigned M = Mask[0];
+ if (SrcElts == 1) {
+ if (M == 0 || M == 1)
+ return translateCopy(U, *U.getOperand(M), MIRBuilder);
+ MIRBuilder.buildUndef(getOrCreateVReg(U));
+ } else {
+ Register Dst = getOrCreateVReg(U);
+ if (M < SrcElts) {
+ MIRBuilder.buildExtractVectorElementConstant(
+ Dst, getOrCreateVReg(*U.getOperand(0)), M);
+ } else if (M < SrcElts * 2) {
+ MIRBuilder.buildExtractVectorElementConstant(
+ Dst, getOrCreateVReg(*U.getOperand(1)), M - SrcElts);
+ } else {
+ MIRBuilder.buildUndef(Dst);
+ }
+ }
+ return true;
+ }
+
+ // A single element src is transformed to a build_vector.
+ if (SrcElts == 1) {
+ SmallVector<Register> Ops;
+ Register Undef;
+ for (int M : Mask) {
+ LLT SrcTy = getLLTForType(*U.getOperand(0)->getType(), *DL);
+ if (M == 0 || M == 1) {
+ Ops.push_back(getOrCreateVReg(*U.getOperand(M)));
+ } else {
+ if (!Undef.isValid()) {
+ Undef = MRI->createGenericVirtualRegister(SrcTy);
+ MIRBuilder.buildUndef(Undef);
+ }
+ Ops.push_back(Undef);
+ }
+ }
+ MIRBuilder.buildBuildVector(getOrCreateVReg(U), Ops);
+ return true;
+ }
+
ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
MIRBuilder
.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)},
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 38ec83f..178529f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4748,6 +4748,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_FMINIMUMNUM:
case G_FMAXIMUMNUM:
return lowerFMinNumMaxNum(MI);
+ case G_FMINIMUM:
+ case G_FMAXIMUM:
+ return lowerFMinimumMaximum(MI);
case G_MERGE_VALUES:
return lowerMergeValues(MI);
case G_UNMERGE_VALUES:
@@ -5819,6 +5822,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
} else if (InputUsed[0] == -1U) {
// No input vectors were used! The result is undefined.
Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
+ } else if (NewElts == 1) {
+ Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
} else {
Register Op0 = Inputs[InputUsed[0]];
// If only one input was used, use an undefined vector for the other.
@@ -8775,6 +8780,77 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
return Legalized;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
+ LLT Ty = MRI.getType(Dst);
+ LLT CmpTy = Ty.changeElementSize(1);
+
+ bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
+ unsigned OpcIeee =
+ IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
+ unsigned OpcNonIeee =
+ IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
+ bool MinMaxMustRespectOrderedZero = false;
+ Register Res;
+
+ // IEEE variants don't need canonicalization
+ if (LI.isLegalOrCustom({OpcIeee, Ty})) {
+ Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
+ MinMaxMustRespectOrderedZero = true;
+ } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
+ Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
+ } else {
+ auto Compare = MIRBuilder.buildFCmp(
+ IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
+ Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
+ }
+
+ // Propagate any NaN of both operands
+ if (!MI.getFlag(MachineInstr::FmNoNans) &&
+ (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
+ auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
+
+ LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
+ APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
+ Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
+ if (Ty.isVector())
+ NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
+
+ Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
+ }
+
+ // fminimum/fmaximum requires -0.0 less than +0.0
+ if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
+ GISelValueTracking VT(MIRBuilder.getMF());
+ KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
+ KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
+
+ if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
+ const unsigned Flags = MI.getFlags();
+ Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
+ auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
+
+ unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
+
+ auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
+ auto LHSSelect =
+ MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
+
+ auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
+ auto RHSSelect =
+ MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
+
+ Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
+ }
+ }
+
+ MIRBuilder.buildCopy(Dst, Res);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
// Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
Register DstReg = MI.getOperand(0).getReg();
@@ -9016,22 +9092,18 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
continue;
}
- if (Src0Ty.isScalar()) {
- BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
- } else {
- int NumElts = Src0Ty.getNumElements();
- Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
- int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
- auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
- auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
- BuildVec.push_back(Extract.getReg(0));
- }
+ assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
+
+ int NumElts = Src0Ty.getNumElements();
+ Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
+ int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
+ auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
+ auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
+ BuildVec.push_back(Extract.getReg(0));
}
- if (DstTy.isVector())
- MIRBuilder.buildBuildVector(DstReg, BuildVec);
- else
- MIRBuilder.buildCopy(DstReg, BuildVec[0]);
+ assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
+ MIRBuilder.buildBuildVector(DstReg, BuildVec);
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 27df7e3..4b4df98 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -800,10 +800,11 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
LLT DstTy = Res.getLLTTy(*getMRI());
LLT Src1Ty = Src1.getLLTTy(*getMRI());
LLT Src2Ty = Src2.getLLTTy(*getMRI());
- const LLT DstElemTy = DstTy.isVector() ? DstTy.getElementType() : DstTy;
- const LLT ElemTy1 = Src1Ty.isVector() ? Src1Ty.getElementType() : Src1Ty;
- const LLT ElemTy2 = Src2Ty.isVector() ? Src2Ty.getElementType() : Src2Ty;
+ const LLT DstElemTy = DstTy.getScalarType();
+ const LLT ElemTy1 = Src1Ty.getScalarType();
+ const LLT ElemTy2 = Src2Ty.getScalarType();
assert(DstElemTy == ElemTy1 && DstElemTy == ElemTy2);
+ assert(Mask.size() > 1 && "Scalar G_SHUFFLE_VECTOR are not supported");
(void)DstElemTy;
(void)ElemTy1;
(void)ElemTy2;
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 6a464d9..4795d81 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -2788,6 +2788,9 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
if (expectAndConsume(MIToken::rparen))
return error("shufflemask should be terminated by ')'.");
+ if (ShufMask.size() < 2)
+ return error("shufflemask should have > 1 element");
+
ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask);
Dest = MachineOperand::CreateShuffleMask(MaskAlloc);
return false;
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 1154855..c0710c4 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1924,13 +1924,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (Src0Ty != Src1Ty)
report("Source operands must be the same type", MI);
- if (Src0Ty.getScalarType() != DstTy.getScalarType())
+ if (Src0Ty.getScalarType() != DstTy.getScalarType()) {
report("G_SHUFFLE_VECTOR cannot change element type", MI);
+ break;
+ }
+ if (!Src0Ty.isVector()) {
+ report("G_SHUFFLE_VECTOR must have vector src", MI);
+ break;
+ }
+ if (!DstTy.isVector()) {
+ report("G_SHUFFLE_VECTOR must have vector dst", MI);
+ break;
+ }
// Don't check that all operands are vector because scalars are used in
// place of 1 element vectors.
- int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1;
- int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
+ int SrcNumElts = Src0Ty.getNumElements();
+ int DstNumElts = DstTy.getNumElements();
ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask();
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 72b364c..697b779 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -211,7 +211,7 @@ private:
unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); }
};
- using LiveRegMap = SparseSet<LiveReg, unsigned, identity_cxx20, uint16_t>;
+ using LiveRegMap = SparseSet<LiveReg, unsigned, identity, uint16_t>;
/// This map contains entries for each virtual register that is currently
/// available in a physical register.
LiveRegMap LiveVirtRegs;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d2ea652..8676060 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19993,8 +19993,12 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
// nor a successor of N. Otherwise, if Op is folded that would
// create a cycle.
unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
- for (SDNode *Op : Ptr->users()) {
+ for (SDUse &U : Ptr->uses()) {
+ if (U.getResNo() != Ptr.getResNo())
+ continue;
+
// Check for #1.
+ SDNode *Op = U.getUser();
if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
continue;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index bfa566a..dee0909 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1162,6 +1162,43 @@ SDValue SelectionDAGBuilder::getMemoryRoot() {
return updateRoot(PendingLoads);
}
+SDValue SelectionDAGBuilder::getFPOperationRoot(fp::ExceptionBehavior EB) {
+ // If the new exception behavior differs from that of the pending
+ // ones, chain up them and update the root.
+ switch (EB) {
+ case fp::ExceptionBehavior::ebMayTrap:
+ case fp::ExceptionBehavior::ebIgnore:
+ // Floating-point exceptions produced by such operations are not intended
+ // to be observed, so the sequence of these operations does not need to be
+ // preserved.
+ //
+ // They however must not be mixed with the instructions that have strict
+ // exception behavior. Placing an operation with 'ebIgnore' behavior between
+ // 'ebStrict' operations could distort the observed exception behavior.
+ if (!PendingConstrainedFPStrict.empty()) {
+ assert(PendingConstrainedFP.empty());
+ updateRoot(PendingConstrainedFPStrict);
+ }
+ break;
+ case fp::ExceptionBehavior::ebStrict:
+ // Floating-point exception produced by these operations may be observed, so
+ // they must be correctly chained. If trapping on FP exceptions is
+ // disabled, the exceptions can be observed only by functions that read
+ // exception flags, like 'llvm.get_fpenv' or 'fetestexcept'. It means that
+ // the order of operations is not significant between barriers.
+ //
+ // If trapping is enabled, each operation becomes an implicit observation
+ // point, so the operations must be sequenced according their original
+ // source order.
+ if (!PendingConstrainedFP.empty()) {
+ assert(PendingConstrainedFPStrict.empty());
+ updateRoot(PendingConstrainedFP);
+ }
+ // TODO: Add support for trapping-enabled scenarios.
+ }
+ return DAG.getRoot();
+}
+
SDValue SelectionDAGBuilder::getRoot() {
// Chain up all pending constrained intrinsics together with all
// pending loads, by simply appending them to PendingLoads and
@@ -8298,6 +8335,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
}
+void SelectionDAGBuilder::pushFPOpOutChain(SDValue Result,
+ fp::ExceptionBehavior EB) {
+ assert(Result.getNode()->getNumValues() == 2);
+ SDValue OutChain = Result.getValue(1);
+ assert(OutChain.getValueType() == MVT::Other);
+
+ // Instead of updating the root immediately, push the produced chain to the
+ // appropriate list, deferring the update until the root is requested. In this
+ // case, the nodes from the lists are chained using TokenFactor, indicating
+ // that the operations are independent.
+ //
+ // In particular, the root is updated before any call that might access the
+ // floating-point environment, except for constrained intrinsics.
+ switch (EB) {
+ case fp::ExceptionBehavior::ebMayTrap:
+ case fp::ExceptionBehavior::ebIgnore:
+ PendingConstrainedFP.push_back(OutChain);
+ break;
+ case fp::ExceptionBehavior::ebStrict:
+ PendingConstrainedFPStrict.push_back(OutChain);
+ break;
+ }
+}
+
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
const ConstrainedFPIntrinsic &FPI) {
SDLoc sdl = getCurSDLoc();
@@ -8305,42 +8366,16 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
// We do not need to serialize constrained FP intrinsics against
// each other or against (nonvolatile) loads, so they can be
// chained like loads.
- SDValue Chain = DAG.getRoot();
+ fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
+ SDValue Chain = getFPOperationRoot(EB);
SmallVector<SDValue, 4> Opers;
Opers.push_back(Chain);
for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I)
Opers.push_back(getValue(FPI.getArgOperand(I)));
- auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
- assert(Result.getNode()->getNumValues() == 2);
-
- // Push node to the appropriate list so that future instructions can be
- // chained up correctly.
- SDValue OutChain = Result.getValue(1);
- switch (EB) {
- case fp::ExceptionBehavior::ebIgnore:
- // The only reason why ebIgnore nodes still need to be chained is that
- // they might depend on the current rounding mode, and therefore must
- // not be moved across instruction that may change that mode.
- [[fallthrough]];
- case fp::ExceptionBehavior::ebMayTrap:
- // These must not be moved across calls or instructions that may change
- // floating-point exception masks.
- PendingConstrainedFP.push_back(OutChain);
- break;
- case fp::ExceptionBehavior::ebStrict:
- // These must not be moved across calls or instructions that may change
- // floating-point exception masks or read floating-point exception flags.
- // In addition, they cannot be optimized out even if unused.
- PendingConstrainedFPStrict.push_back(OutChain);
- break;
- }
- };
-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType());
SDVTList VTs = DAG.getVTList(VT, MVT::Other);
- fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
SDNodeFlags Flags;
if (EB == fp::ExceptionBehavior::ebIgnore)
@@ -8364,7 +8399,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
!TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
Opers.pop_back();
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
- pushOutChain(Mul, EB);
+ pushFPOpOutChain(Mul, EB);
Opcode = ISD::STRICT_FADD;
Opers.clear();
Opers.push_back(Mul.getValue(1));
@@ -8395,7 +8430,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
}
SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
- pushOutChain(Result, EB);
+ pushFPOpOutChain(Result, EB);
SDValue FPResult = Result.getValue(0);
setValue(&FPI, FPResult);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index c7577fa..47e19f7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -195,6 +195,11 @@ private:
/// Update root to include all chains from the Pending list.
SDValue updateRoot(SmallVectorImpl<SDValue> &Pending);
+ /// Given a node representing a floating-point operation and its specified
+ /// exception behavior, this either updates the root or stores the node in
+ /// a list to be added to chains latter.
+ void pushFPOpOutChain(SDValue Result, fp::ExceptionBehavior EB);
+
/// A unique monotonically increasing number used to order the SDNodes we
/// create.
unsigned SDNodeOrder;
@@ -300,6 +305,13 @@ public:
/// memory node that may need to be ordered after any prior load instructions.
SDValue getMemoryRoot();
+ /// Return the current virtual root of the Selection DAG, flushing
+ /// PendingConstrainedFP or PendingConstrainedFPStrict items if the new
+ /// exception behavior (specified by \p EB) differs from that of the pending
+ /// instructions. This must be done before emitting constrained FP operation
+ /// call.
+ SDValue getFPOperationRoot(fp::ExceptionBehavior EB);
+
/// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict)
/// items. This must be done before emitting any call other any other node
/// that may need to be ordered after FP instructions due to other side
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 060b1dd..59798b3 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -2097,6 +2097,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
}
Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
+ // MSVC CRT has a function to validate security cookie.
+ RTLIB::LibcallImpl SecurityCheckCookieLibcall =
+ getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
+ if (SecurityCheckCookieLibcall != RTLIB::Unsupported)
+ return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall));
return nullptr;
}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 488b078..1096e57 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -4082,10 +4082,10 @@ void AssemblyWriter::printTypeIdentities() {
/// printFunction - Print all aspects of a function.
void AssemblyWriter::printFunction(const Function *F) {
- if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
-
if (F->isMaterializable())
Out << "; Materializable\n";
+ else if (AnnotationWriter)
+ AnnotationWriter->emitFunctionAnnot(F, Out);
const AttributeList &Attrs = F->getAttributes();
if (Attrs.hasFnAttrs()) {
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index 30b5e48..e19336e 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -403,9 +403,14 @@ void Module::setModuleFlag(ModFlagBehavior Behavior, StringRef Key,
Metadata *Val) {
NamedMDNode *ModFlags = getOrInsertModuleFlagsMetadata();
// Replace the flag if it already exists.
- for (MDNode *Flag : ModFlags->operands()) {
+ for (unsigned i = 0; i < ModFlags->getNumOperands(); ++i) {
+ MDNode *Flag = ModFlags->getOperand(i);
if (cast<MDString>(Flag->getOperand(1))->getString() == Key) {
- Flag->replaceOperandWith(2, Val);
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ Metadata *Ops[3] = {
+ ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Behavior)),
+ MDString::get(Context, Key), Val};
+ ModFlags->setOperand(i, MDNode::get(Context, Ops));
return;
}
}
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 9d0fa11..4bc2a18 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -471,16 +471,14 @@ static void thinLTOInternalizeAndPromoteGUID(
ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing) {
- auto ExternallyVisibleCopies =
- llvm::count_if(VI.getSummaryList(),
- [](const std::unique_ptr<GlobalValueSummary> &Summary) {
- return !GlobalValue::isLocalLinkage(Summary->linkage());
- });
-
// Before performing index-based internalization and promotion for this GUID,
// the local flag should be consistent with the summary list linkage types.
VI.verifyLocal();
+ const bool SingleExternallyVisibleCopy =
+ VI.getSummaryList().size() == 1 &&
+ !GlobalValue::isLocalLinkage(VI.getSummaryList().front()->linkage());
+
for (auto &S : VI.getSummaryList()) {
// First see if we need to promote an internal value because it is not
// exported.
@@ -543,7 +541,9 @@ static void thinLTOInternalizeAndPromoteGUID(
GlobalValue::isExternalWeakLinkage(S->linkage()))
continue;
- if (isPrevailing(VI.getGUID(), S.get()) && ExternallyVisibleCopies == 1)
+ // We may have a single summary copy that is externally visible but not
+ // prevailing if the prevailing copy is in a native object.
+ if (SingleExternallyVisibleCopy && isPrevailing(VI.getGUID(), S.get()))
S->setLinkage(GlobalValue::InternalLinkage);
}
}
@@ -1086,15 +1086,15 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
GlobalValue::getGlobalIdentifier(Sym.getIRName(),
GlobalValue::ExternalLinkage, ""));
if (R.Prevailing)
- ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
+ ThinLTO.setPrevailingModuleForGUID(GUID, BM.getModuleIdentifier());
}
}
if (Error Err =
BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
[&](GlobalValue::GUID GUID) {
- return ThinLTO.PrevailingModuleForGUID[GUID] ==
- BM.getModuleIdentifier();
+ return ThinLTO.isPrevailingModuleForGUID(
+ GUID, BM.getModuleIdentifier());
}))
return Err;
LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n");
@@ -1108,8 +1108,8 @@ LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
GlobalValue::getGlobalIdentifier(Sym.getIRName(),
GlobalValue::ExternalLinkage, ""));
if (R.Prevailing) {
- assert(ThinLTO.PrevailingModuleForGUID[GUID] ==
- BM.getModuleIdentifier());
+ assert(
+ ThinLTO.isPrevailingModuleForGUID(GUID, BM.getModuleIdentifier()));
// For linker redefined symbols (via --wrap or --defsym) we want to
// switch the linkage to `weak` to prevent IPOs from happening.
@@ -1988,7 +1988,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
LocalWPDTargetsMap);
auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
- return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
+ return ThinLTO.isPrevailingModuleForGUID(GUID, S->modulePath());
};
if (EnableMemProfContextDisambiguation) {
MemProfContextDisambiguation ContextDisambiguation;
diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt
index 1e1d0a6..70c4577 100644
--- a/llvm/lib/MC/CMakeLists.txt
+++ b/llvm/lib/MC/CMakeLists.txt
@@ -73,9 +73,10 @@ add_llvm_component_library(LLVMMC
${LLVM_MAIN_INCLUDE_DIR}/llvm/MC
LINK_COMPONENTS
+ BinaryFormat
+ DebugInfoDWARFLowLevel
Support
TargetParser
- BinaryFormat
DEPENDS
intrinsics_gen
diff --git a/llvm/lib/MC/MCSFrame.cpp b/llvm/lib/MC/MCSFrame.cpp
index d6fa54c..e0a90df 100644
--- a/llvm/lib/MC/MCSFrame.cpp
+++ b/llvm/lib/MC/MCSFrame.cpp
@@ -8,6 +8,8 @@
#include "llvm/MC/MCSFrame.h"
#include "llvm/BinaryFormat/SFrame.h"
+#include "llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h"
+#include "llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectFileInfo.h"
@@ -211,8 +213,152 @@ class SFrameEmitterImpl {
return true;
}
+ // Technically, the escape data could be anything, but it is commonly a dwarf
+ // CFI program. Even then, it could contain an arbitrarily complicated Dwarf
+ // expression. Following gnu-gas, look for certain common cases that could
+ // invalidate an FDE, emit a warning for those sequences, and don't generate
+ // an FDE in those cases. Allow any that are known safe. It is likely that
+ // more thorough test cases could refine this code, but it handles the most
+ // important ones compatibly with gas.
+ // Returns true if the CFI escape sequence is safe for sframes.
+ bool isCFIEscapeSafe(SFrameFDE &FDE, const SFrameFRE &FRE,
+ const MCCFIInstruction &CFI) {
+ const MCAsmInfo *AI = Streamer.getContext().getAsmInfo();
+ DWARFDataExtractorSimple data(CFI.getValues(), AI->isLittleEndian(),
+ AI->getCodePointerSize());
+
+ // Normally, both alignment factors are extracted from the enclosing Dwarf
+ // FDE or CIE. We don't have one here. Alignments are used for scaling
+ // factors for ops like CFA_def_cfa_offset_sf. But this particular function
+ // is only interested in registers.
+ dwarf::CFIProgram P(/*CodeAlignmentFactor=*/1,
+ /*DataAlignmentFactor=*/1,
+ Streamer.getContext().getTargetTriple().getArch());
+ uint64_t Offset = 0;
+ if (P.parse(data, &Offset, CFI.getValues().size())) {
+ // Not a parsable dwarf expression. Assume the worst.
+ Streamer.getContext().reportWarning(
+ CFI.getLoc(),
+ "skipping SFrame FDE; .cfi_escape with unknown effects");
+ return false;
+ }
+
+ // This loop deals with dwarf::CFIProgram::Instructions. Everywhere else
+ // this file deals with MCCFIInstructions.
+ for (const dwarf::CFIProgram::Instruction &I : P) {
+ switch (I.Opcode) {
+ case dwarf::DW_CFA_nop:
+ break;
+ case dwarf::DW_CFA_val_offset: {
+ // First argument is a register. Anything that touches CFA, FP, or RA is
+ // a problem, but allow others through. As an even more special case,
+ // allow SP + 0.
+ auto Reg = I.getOperandAsUnsigned(P, 0);
+ // The parser should have failed in this case.
+ assert(Reg && "DW_CFA_val_offset with no register.");
+ bool SPOk = true;
+ if (*Reg == SPReg) {
+ auto Opnd = I.getOperandAsSigned(P, 1);
+ if (!Opnd || *Opnd != 0)
+ SPOk = false;
+ }
+ if (!SPOk || *Reg == RAReg || *Reg == FPReg) {
+ StringRef RN = *Reg == SPReg
+ ? "SP reg "
+ : (*Reg == FPReg ? "FP reg " : "RA reg ");
+ Streamer.getContext().reportWarning(
+ CFI.getLoc(),
+ Twine(
+ "skipping SFrame FDE; .cfi_escape DW_CFA_val_offset with ") +
+ RN + Twine(*Reg));
+ return false;
+ }
+ } break;
+ case dwarf::DW_CFA_expression: {
+ // First argument is a register. Anything that touches CFA, FP, or RA is
+ // a problem, but allow others through.
+ auto Reg = I.getOperandAsUnsigned(P, 0);
+ if (!Reg) {
+ Streamer.getContext().reportWarning(
+ CFI.getLoc(),
+ "skipping SFrame FDE; .cfi_escape with unknown effects");
+ return false;
+ }
+ if (*Reg == SPReg || *Reg == RAReg || *Reg == FPReg) {
+ StringRef RN = *Reg == SPReg
+ ? "SP reg "
+ : (*Reg == FPReg ? "FP reg " : "RA reg ");
+ Streamer.getContext().reportWarning(
+ CFI.getLoc(),
+ Twine(
+ "skipping SFrame FDE; .cfi_escape DW_CFA_expression with ") +
+ RN + Twine(*Reg));
+ return false;
+ }
+ } break;
+ case dwarf::DW_CFA_GNU_args_size: {
+ auto Size = I.getOperandAsSigned(P, 0);
+ // Zero size doesn't affect the cfa.
+ if (Size && *Size == 0)
+ break;
+ if (FRE.Info.getBaseRegister() != BaseReg::FP) {
+ Streamer.getContext().reportWarning(
+ CFI.getLoc(),
+ Twine("skipping SFrame FDE; .cfi_escape DW_CFA_GNU_args_size "
+ "with non frame-pointer CFA"));
+ return false;
+ }
+ } break;
+ // Cases that gas doesn't specially handle. TODO: Some of these could be
+ // analyzed and handled instead of just punting. But these are uncommon,
+ // or should be written as normal cfi directives. Some will need fixes to
+ // the scaling factor.
+ case dwarf::DW_CFA_advance_loc:
+ case dwarf::DW_CFA_offset:
+ case dwarf::DW_CFA_restore:
+ case dwarf::DW_CFA_set_loc:
+ case dwarf::DW_CFA_advance_loc1:
+ case dwarf::DW_CFA_advance_loc2:
+ case dwarf::DW_CFA_advance_loc4:
+ case dwarf::DW_CFA_offset_extended:
+ case dwarf::DW_CFA_restore_extended:
+ case dwarf::DW_CFA_undefined:
+ case dwarf::DW_CFA_same_value:
+ case dwarf::DW_CFA_register:
+ case dwarf::DW_CFA_remember_state:
+ case dwarf::DW_CFA_restore_state:
+ case dwarf::DW_CFA_def_cfa:
+ case dwarf::DW_CFA_def_cfa_register:
+ case dwarf::DW_CFA_def_cfa_offset:
+ case dwarf::DW_CFA_def_cfa_expression:
+ case dwarf::DW_CFA_offset_extended_sf:
+ case dwarf::DW_CFA_def_cfa_sf:
+ case dwarf::DW_CFA_def_cfa_offset_sf:
+ case dwarf::DW_CFA_val_offset_sf:
+ case dwarf::DW_CFA_val_expression:
+ case dwarf::DW_CFA_MIPS_advance_loc8:
+ case dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc:
+ case dwarf::DW_CFA_AARCH64_negate_ra_state:
+ case dwarf::DW_CFA_LLVM_def_aspace_cfa:
+ case dwarf::DW_CFA_LLVM_def_aspace_cfa_sf:
+ Streamer.getContext().reportWarning(
+ CFI.getLoc(), "skipping SFrame FDE; .cfi_escape "
+ "CFA expression with unknown side effects");
+ return false;
+ default:
+ // Dwarf expression was only partially valid, and user could have
+ // written anything.
+ Streamer.getContext().reportWarning(
+ CFI.getLoc(),
+ "skipping SFrame FDE; .cfi_escape with unknown effects");
+ return false;
+ }
+ }
+ return true;
+ }
+
// Add the effects of CFI to the current FDE, creating a new FRE when
- // necessary.
+ // necessary. Return true if the CFI is representable in the sframe format.
bool handleCFI(SFrameFDE &FDE, SFrameFRE &FRE, const MCCFIInstruction &CFI) {
switch (CFI.getOperation()) {
case MCCFIInstruction::OpDefCfaRegister:
@@ -265,10 +411,11 @@ class SFrameEmitterImpl {
FRE = FDE.SaveState.pop_back_val();
return true;
case MCCFIInstruction::OpEscape:
- // TODO: Implement. Will use FDE.
- return true;
+ // This is a string of bytes that contains an arbitrary dwarf-expression
+ // that may or may not affect unwind info.
+ return isCFIEscapeSafe(FDE, FRE, CFI);
default:
- // Instructions that don't affect the CFA, RA, and SP can be safely
+ // Instructions that don't affect the CFA, RA, and FP can be safely
// ignored.
return true;
}
diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp
index 67483ba..9d45096 100644
--- a/llvm/lib/Support/Timer.cpp
+++ b/llvm/lib/Support/Timer.cpp
@@ -240,7 +240,8 @@ private:
getGroupEntry(StringRef GroupName, StringRef GroupDescription) {
std::pair<TimerGroup *, Name2TimerMap> &GroupEntry = Map[GroupName];
if (!GroupEntry.first)
- GroupEntry.first = new TimerGroup(GroupName, GroupDescription);
+ GroupEntry.first =
+ new TimerGroup(GroupName, GroupDescription, /*PrintOnExit=*/true);
return GroupEntry;
}
@@ -270,9 +271,10 @@ TimerGroup &NamedRegionTimer::getNamedTimerGroup(StringRef GroupName,
static TimerGroup *TimerGroupList = nullptr;
TimerGroup::TimerGroup(StringRef Name, StringRef Description,
- sys::SmartMutex<true> &lock)
+ sys::SmartMutex<true> &lock, bool PrintOnExit)
: Name(Name.begin(), Name.end()),
- Description(Description.begin(), Description.end()) {
+ Description(Description.begin(), Description.end()),
+ PrintOnExit(PrintOnExit) {
// Add the group to TimerGroupList.
sys::SmartScopedLock<true> L(lock);
if (TimerGroupList)
@@ -282,12 +284,12 @@ TimerGroup::TimerGroup(StringRef Name, StringRef Description,
TimerGroupList = this;
}
-TimerGroup::TimerGroup(StringRef Name, StringRef Description)
- : TimerGroup(Name, Description, timerLock()) {}
+TimerGroup::TimerGroup(StringRef Name, StringRef Description, bool PrintOnExit)
+ : TimerGroup(Name, Description, timerLock(), PrintOnExit) {}
TimerGroup::TimerGroup(StringRef Name, StringRef Description,
- const StringMap<TimeRecord> &Records)
- : TimerGroup(Name, Description) {
+ const StringMap<TimeRecord> &Records, bool PrintOnExit)
+ : TimerGroup(Name, Description, PrintOnExit) {
TimersToPrint.reserve(Records.size());
for (const auto &P : Records)
TimersToPrint.emplace_back(P.getValue(), std::string(P.getKey()),
@@ -301,7 +303,7 @@ TimerGroup::~TimerGroup() {
while (FirstTimer)
removeTimer(*FirstTimer);
- if (!TimersToPrint.empty()) {
+ if (!TimersToPrint.empty() && PrintOnExit) {
std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile();
PrintQueuedTimers(*OutStream);
}
@@ -530,7 +532,7 @@ public:
sys::SmartMutex<true> TimerLock;
TimerGroup DefaultTimerGroup{"misc", "Miscellaneous Ungrouped Timers",
- TimerLock};
+ TimerLock, /*PrintOnExit=*/true};
SignpostEmitter Signposts;
// Order of these members and initialization below is important. For example
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 86f9548..a4529a5 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -73,9 +73,16 @@ def SVEUnsupported : AArch64Unsupported {
SVE2Unsupported.F);
}
-let F = [HasSME2p2, HasSVE2p2_or_SME2p2, HasNonStreamingSVE_or_SME2p2,
- HasNonStreamingSVE2p2_or_SME2p2] in
-def SME2p2Unsupported : AArch64Unsupported;
+def SME2p3Unsupported : AArch64Unsupported {
+ let F = [HasSVE2p3_or_SME2p3, HasSVE_B16MM];
+}
+
+def SME2p2Unsupported : AArch64Unsupported {
+ let F = !listconcat([HasSME2p2, HasSVE2p2_or_SME2p2,
+ HasNonStreamingSVE_or_SME2p2,
+ HasNonStreamingSVE2p2_or_SME2p2],
+ SME2p3Unsupported.F);
+}
def SME2p1Unsupported : AArch64Unsupported {
let F = !listconcat([HasSME2p1, HasSVE2p1_or_SME2p1,
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index ecaeff7..b3ec65c 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -71,7 +71,6 @@ def AArch64PreLegalizerCombiner: GICombiner<
"AArch64PreLegalizerCombinerImpl", [all_combines,
icmp_redundant_trunc,
fold_global_offset,
- shuffle_to_extract,
ext_addv_to_udot_addv,
ext_uaddv_to_uaddlv,
push_sub_through_zext,
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 46f5f0c..0e94b78 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -585,6 +585,47 @@ def FeatureSME_TMOP: ExtensionWithMArch<"sme-tmop", "SME_TMOP", "FEAT_SME_TMOP",
def FeatureSSVE_FEXPA : ExtensionWithMArch<"ssve-fexpa", "SSVE_FEXPA", "FEAT_SSVE_FEXPA",
"Enable SVE FEXPA instruction in Streaming SVE mode", [FeatureSME2]>;
+//===----------------------------------------------------------------------===//
+// Armv9.7 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureCMH : ExtensionWithMArch<"cmh", "CMH", "FEAT_CMH",
+ "Enable Armv9.7-A Contention Management Hints">;
+
+def FeatureLSCP : ExtensionWithMArch<"lscp", "LSCP", "FEAT_LSCP",
+ "Enable Armv9.7-A Load-acquire and store-release pair extension">;
+
+def FeatureTLBID: ExtensionWithMArch<"tlbid", "TLBID", "FEAT_TLBID",
+ "Enable Armv9.7-A TLBI Domains extension">;
+
+def FeatureMPAMv2: ExtensionWithMArch<"mpamv2", "MPAMv2", "FEAT_MPAMv2",
+ "Enable Armv9.7-A MPAMv2 Lookaside Buffer Invalidate instructions">;
+
+def FeatureMTETC: ExtensionWithMArch<"mtetc", "MTETC", "FEAT_MTETC",
+ "Enable Virtual Memory Tagging Extension">;
+
+def FeatureGCIE: ExtensionWithMArch<"gcie", "GCIE", "FEAT_GCIE",
+ "Enable GICv5 (Generic Interrupt Controller) CPU Interface Extension">;
+
+def FeatureSVE2p3 : ExtensionWithMArch<"sve2p3", "SVE2p3", "FEAT_SVE2p3",
+ "Enable Armv9.7-A Scalable Vector Extension 2.3 instructions", [FeatureSVE2p2]>;
+
+def FeatureSME2p3 : ExtensionWithMArch<"sme2p3", "SME2p3", "FEAT_SME2p3",
+ "Enable Armv9.7-A Scalable Matrix Extension 2.3 instructions", [FeatureSME2p2]>;
+
+def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B16MM",
+ "Enable Armv9.7-A SVE non-widening BFloat16 matrix multiply-accumulate", [FeatureSVE]>;
+
+def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
+ "Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;
+
+def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT",
+ "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM",
+ "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+//===----------------------------------------------------------------------===//
// Other Features
//===----------------------------------------------------------------------===//
@@ -939,9 +980,12 @@ def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a",
[HasV9_4aOps, FeatureCPA],
!listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA, FeatureLUT, FeatureFAMINMAX])>;
def HasV9_6aOps : Architecture64<9, 6, "a", "v9.6a",
- [HasV9_5aOps, FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2, FeatureLSUI, FeatureOCCMO],
- !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2,
+ [HasV9_5aOps, FeatureCMPBR, FeatureLSUI, FeatureOCCMO],
+ !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR,
FeatureLSUI, FeatureOCCMO])>;
+def HasV9_7aOps : Architecture64<9, 7, "a", "v9.7a",
+ [HasV9_6aOps, FeatureSVE2p3, FeatureFPRCVT],
+ !listconcat(HasV9_6aOps.DefaultExts, [FeatureSVE2p3, FeatureFPRCVT])>;
def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
[ //v8.1
FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a81de5c..d16b116 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9002,12 +9002,12 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
}
static SMECallAttrs
-getSMECallAttrs(const Function &Caller, const AArch64TargetLowering &TLI,
+getSMECallAttrs(const Function &Caller, const RTLIB::RuntimeLibcallsInfo &RTLCI,
const TargetLowering::CallLoweringInfo &CLI) {
if (CLI.CB)
- return SMECallAttrs(*CLI.CB, &TLI);
+ return SMECallAttrs(*CLI.CB, &RTLCI);
if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
- return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), TLI));
+ return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), RTLCI));
return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(SMEAttrs::Normal));
}
@@ -9029,7 +9029,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// SME Streaming functions are not eligible for TCO as they may require
// the streaming mode or ZA to be restored after returning from the call.
- SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, *this, CLI);
+ SMECallAttrs CallAttrs =
+ getSMECallAttrs(CallerF, getRuntimeLibcallsInfo(), CLI);
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingAllZAState() ||
CallAttrs.caller().hasStreamingBody())
@@ -9454,7 +9455,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Determine whether we need any streaming mode changes.
- SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), *this, CLI);
+ SMECallAttrs CallAttrs =
+ getSMECallAttrs(MF.getFunction(), getRuntimeLibcallsInfo(), CLI);
std::optional<unsigned> ZAMarkerNode;
bool UseNewSMEABILowering = getTM().useNewSMEABILowering();
@@ -19476,6 +19478,61 @@ static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
Op1 ? Op1 : Mul->getOperand(1));
}
+// Multiplying an RDSVL value by a constant can sometimes be done cheaper by
+// folding a power-of-two factor of the constant into the RDSVL immediate and
+// compensating with an extra shift.
+//
+// We rewrite:
+// (mul (srl (rdsvl 1), w), x)
+// to one of:
+// (shl (rdsvl y), z) if z > 0
+// (srl (rdsvl y), abs(z)) if z < 0
+// where integers y, z satisfy x = y * 2^(w + z) and y ∈ [-32, 31].
+static SDValue performMulRdsvlCombine(SDNode *Mul, SelectionDAG &DAG) {
+ SDLoc DL(Mul);
+ EVT VT = Mul->getValueType(0);
+ SDValue MulOp0 = Mul->getOperand(0);
+ int ConstMultiplier =
+ cast<ConstantSDNode>(Mul->getOperand(1))->getSExtValue();
+ if ((MulOp0->getOpcode() != ISD::SRL) ||
+ (MulOp0->getOperand(0).getOpcode() != AArch64ISD::RDSVL))
+ return SDValue();
+
+ unsigned AbsConstValue = abs(ConstMultiplier);
+ unsigned OperandShift =
+ cast<ConstantSDNode>(MulOp0->getOperand(1))->getZExtValue();
+
+ // z ≤ ctz(|x|) - w (largest extra shift we can take while keeping y
+ // integral)
+ int UpperBound = llvm::countr_zero(AbsConstValue) - OperandShift;
+
+ // To keep y in range, with B = 31 for x > 0 and B = 32 for x < 0, we need:
+ // 2^(w + z) ≥ ceil(x / B) ⇒ z ≥ ceil_log2(ceil(x / B)) - w (LowerBound).
+ unsigned B = ConstMultiplier < 0 ? 32 : 31;
+ unsigned CeilAxOverB = (AbsConstValue + (B - 1)) / B; // ceil(|x|/B)
+ int LowerBound = llvm::Log2_32_Ceil(CeilAxOverB) - OperandShift;
+
+ // No valid solution found.
+ if (LowerBound > UpperBound)
+ return SDValue();
+
+ // Any value of z in [LowerBound, UpperBound] is valid. Prefer no extra
+ // shift if possible.
+ int Shift = std::min(std::max(/*prefer*/ 0, LowerBound), UpperBound);
+
+ // y = x / 2^(w + z)
+ int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
+ (ConstMultiplier < 0 ? -1 : 1);
+ auto Rdsvl = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+ DAG.getSignedConstant(RdsvlMul, DL, MVT::i32));
+
+ if (Shift == 0)
+ return Rdsvl;
+ return DAG.getNode(Shift < 0 ? ISD::SRL : ISD::SHL, DL, VT, Rdsvl,
+ DAG.getConstant(abs(Shift), DL, MVT::i32),
+ SDNodeFlags::Exact);
+}
+
// Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz
// Same for other types with equivalent constants.
static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG) {
@@ -19604,6 +19661,9 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (!isa<ConstantSDNode>(N1))
return SDValue();
+ if (SDValue Ext = performMulRdsvlCombine(N, DAG))
+ return Ext;
+
ConstantSDNode *C = cast<ConstantSDNode>(N1);
const APInt &ConstValue = C->getAPIntValue();
@@ -26665,11 +26725,34 @@ static SDValue performDUPCombine(SDNode *N,
}
if (N->getOpcode() == AArch64ISD::DUP) {
+ SDValue Op = N->getOperand(0);
+
+ // Optimize DUP(extload/zextload i8/i16/i32) to avoid GPR->FPR transfer.
+ // For example:
+ // v4i32 = DUP (i32 (zextloadi8 addr))
+ // =>
+ // v4i32 = SCALAR_TO_VECTOR (i32 (zextloadi8 addr)) ; Matches to ldr b0
+ // v4i32 = DUPLANE32 (v4i32), 0
+ if (auto *LD = dyn_cast<LoadSDNode>(Op)) {
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ EVT MemVT = LD->getMemoryVT();
+ EVT ElemVT = VT.getVectorElementType();
+ if ((ExtType == ISD::EXTLOAD || ExtType == ISD::ZEXTLOAD) &&
+ (MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) &&
+ ElemVT != MemVT && LD->hasOneUse()) {
+ EVT Vec128VT = EVT::getVectorVT(*DCI.DAG.getContext(), ElemVT,
+ 128 / ElemVT.getSizeInBits());
+ SDValue ScalarToVec =
+ DCI.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, Vec128VT, Op);
+ return DCI.DAG.getNode(getDUPLANEOp(ElemVT), DL, VT, ScalarToVec,
+ DCI.DAG.getConstant(0, DL, MVT::i64));
+ }
+ }
+
// If the instruction is known to produce a scalar in SIMD registers, we can
// duplicate it across the vector lanes using DUPLANE instead of moving it
// to a GPR first. For example, this allows us to handle:
// v4i32 = DUP (i32 (FCMGT (f32, f32)))
- SDValue Op = N->getOperand(0);
// FIXME: Ideally, we should be able to handle all instructions that
// produce a scalar value in FPRs.
if (Op.getOpcode() == AArch64ISD::FCMEQ ||
@@ -29430,15 +29513,6 @@ void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
TargetLowering::insertSSPDeclarations(M);
}
-Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
- // MSVC CRT has a function to validate security cookie.
- RTLIB::LibcallImpl SecurityCheckCookieLibcall =
- getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
- if (SecurityCheckCookieLibcall != RTLIB::Unsupported)
- return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall));
- return TargetLowering::getSSPStackGuardCheck(M);
-}
-
Value *
AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
@@ -29447,11 +29521,6 @@ AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
if (Subtarget->isTargetAndroid())
return UseTlsOffset(IRB, 0x48);
- // Fuchsia is similar.
- // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
- if (Subtarget->isTargetFuchsia())
- return UseTlsOffset(IRB, -0x8);
-
return TargetLowering::getSafeStackPointerLocation(IRB);
}
@@ -29769,7 +29838,7 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
// Checks to allow the use of SME instructions
if (auto *Base = dyn_cast<CallBase>(&Inst)) {
- auto CallAttrs = SMECallAttrs(*Base, this);
+ auto CallAttrs = SMECallAttrs(*Base, &getRuntimeLibcallsInfo());
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingZT0() ||
CallAttrs.requiresPreservingAllZAState())
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9495c9f..2cb8ed2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -366,7 +366,6 @@ public:
Value *getIRStackGuard(IRBuilderBase &IRB) const override;
void insertSSPDeclarations(Module &M) const override;
- Function *getSSPStackGuardCheck(const Module &M) const override;
/// If the target has a standard location for the unsafe stack pointer,
/// returns the address of that location. Otherwise, returns nullptr.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 09ce713..58a53af 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1894,6 +1894,21 @@ def btihint_op : Operand<i32> {
}];
}
+def CMHPriorityHintOperand : AsmOperandClass {
+ let Name = "CMHPriorityHint";
+ let ParserMethod = "tryParseCMHPriorityHint";
+}
+
+def CMHPriorityHint_op : Operand<i32> {
+ let ParserMatchClass = CMHPriorityHintOperand;
+ let PrintMethod = "printCMHPriorityHintOp";
+ let MCOperandPredicate = [{
+ if (!MCOp.isImm())
+ return false;
+ return AArch64CMHPriorityHint::lookupCMHPriorityHintByEncoding(MCOp.getImm()) != nullptr;
+ }];
+}
+
class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
bits<16> systemreg;
@@ -4636,6 +4651,48 @@ multiclass StorePairOffset<bits<2> opc, bit V, RegisterOperand regtype,
GPR64sp:$Rn, 0)>;
}
+class BaseLoadStoreAcquirePairOffset<bits<4> opc, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, #0]", "", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ let Inst{31-23} = 0b110110010;
+ let Inst{22} = L;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rt2;
+ let Inst{15-12} = opc;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass LoadAcquirePairOffset<bits<4> opc, string asm> {
+ let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in
+ def i : BaseLoadStoreAcquirePairOffset<opc, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2),
+ (ins GPR64sp:$Rn), asm>,
+ Sched<[WriteAtomic, WriteLDHi]>;
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") GPR64:$Rt, GPR64:$Rt2,
+ GPR64sp:$Rn)>;
+}
+
+
+multiclass StoreAcquirePairOffset<bits<4> opc, string asm> {
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+ def i : BaseLoadStoreAcquirePairOffset<opc, 0b0, (outs),
+ (ins GPR64:$Rt, GPR64:$Rt2,
+ GPR64sp:$Rn),
+ asm>,
+ Sched<[WriteSTP]>;
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") GPR64:$Rt, GPR64:$Rt2,
+ GPR64sp:$Rn)>;
+}
+
// (pre-indexed)
class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
string asm>
@@ -5241,7 +5298,7 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
- SDPatternOperator OpN = null_frag> {
+ SDPatternOperator OpN> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
[(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
@@ -6481,8 +6538,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
-
- def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ def v16i8_v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
V128, v8f16, v16i8, op>;
}
@@ -6491,6 +6547,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
V128, v4f32, v16i8, op>;
}
+multiclass SIMDThreeSameVectorFMLA<string asm> {
+ def v8f16_v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+ V128, v8f16, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
+ def v8f16_v4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+ V128, v4f32, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
+ def v4f16_v2f32 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+ v2f32, v4f16, OpNode>;
+ def v8f16_v4f32 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+ v4f32, v8f16, OpNode>;
+}
+
// FP8 assembly/disassembly classes
//----------------------------------------------------------------------------
@@ -9112,6 +9185,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>;
}
+multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
+ def v4f16_v2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+ V64, v2f32, v4f16, VectorIndexS, null_frag>;
+ def v8f16_v4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+ V128, v4f32, v8f16, VectorIndexS, null_frag>;
+}
+
//----------------------------------------------------------------------------
// FP8 Advanced SIMD vector x indexed element
multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> {
@@ -13227,3 +13307,34 @@ multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
let Predicates = [HasNEON, HasF8F32MM];
}
}
+
+//----------------------------------------------------------------------------
+// Contention Management Hints - FEAT_CMH
+//----------------------------------------------------------------------------
+
+class SHUHInst<string asm> : I<
+ (outs),
+ (ins CMHPriorityHint_op:$priority),
+ asm, "\t$priority", "", []>, Sched<[]> {
+ bits<1> priority;
+ let Inst{31-12} = 0b11010101000000110010;
+ let Inst{11-8} = 0b0110;
+ let Inst{7-6} = 0b01;
+ let Inst{5} = priority;
+ let Inst{4-0} = 0b11111;
+}
+
+multiclass SHUH<string asm> {
+ def NAME : SHUHInst<asm>;
+ def : InstAlias<asm, (!cast<Instruction>(NAME) 0), 1>;
+}
+
+class STCPHInst<string asm> : I<
+ (outs),
+ (ins),
+ asm, "", "", []>, Sched<[]> {
+ let Inst{31-12} = 0b11010101000000110010;
+ let Inst{11-8} = 0b0110;
+ let Inst{7-5} = 0b100;
+ let Inst{4-0} = 0b11111;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 92f260f..b9e299e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -50,63 +50,44 @@ def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">,
AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">;
def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
-
def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
- AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
-
+ AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
def HasEL3 : Predicate<"Subtarget->hasEL3()">,
- AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
-
+ AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
def HasVH : Predicate<"Subtarget->hasVH()">,
- AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
-
+ AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
def HasLOR : Predicate<"Subtarget->hasLOR()">,
- AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
-
+ AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
- AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
-
+ AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">,
- AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
-
+ AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
def HasJS : Predicate<"Subtarget->hasJS()">,
- AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
-
+ AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
- AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
-
-def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
- AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
-
+ AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
+def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
+ AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
def HasNV : Predicate<"Subtarget->hasNV()">,
- AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
-
+ AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
- AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
-
+ AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
def HasDIT : Predicate<"Subtarget->hasDIT()">,
- AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
-
-def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
- AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
-
+ AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
+def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
+ AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
def HasAM : Predicate<"Subtarget->hasAM()">,
- AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
-
+ AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
- AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
-
-def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
- AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
-
+ AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
+def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
+ AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
- AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
-
-def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
- AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
-
+ AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
+def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
+ AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
- AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
+ AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
def HasNEON : Predicate<"Subtarget->isNeonAvailable()">,
AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
@@ -149,13 +130,13 @@ def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasS
AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
def HasSVE2p1 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">;
-def HasSVEAES : Predicate<"Subtarget->hasSVEAES()">,
+def HasSVEAES : Predicate<"Subtarget->hasSVEAES()">,
AssemblerPredicateWithAll<(all_of FeatureSVEAES), "sve-aes">;
-def HasSVESM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVESM4()">,
+def HasSVESM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVESM4()">,
AssemblerPredicateWithAll<(all_of FeatureSVESM4), "sve-sm4">;
-def HasSVESHA3 : Predicate<"Subtarget->hasSVESHA3()">,
+def HasSVESHA3 : Predicate<"Subtarget->hasSVESHA3()">,
AssemblerPredicateWithAll<(all_of FeatureSVESHA3), "sve-sha3">;
-def HasSVEBitPerm : Predicate<"Subtarget->hasSVEBitPerm()">,
+def HasSVEBitPerm : Predicate<"Subtarget->hasSVEBitPerm()">,
AssemblerPredicateWithAll<(all_of FeatureSVEBitPerm), "sve-bitperm">;
def HasSMEandIsNonStreamingSafe
: Predicate<"Subtarget->hasSME()">,
@@ -196,7 +177,7 @@ def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
"(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2,
(all_of FeatureSVE2, FeatureFP8DOT2)),
- "ssve-fp8dot2 or (sve2 and fp8dot2)">;
+ "ssve-fp8dot2 or (sve2 and fp8dot2)">;
def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">,
AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">;
def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
@@ -204,43 +185,60 @@ def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
(all_of FeatureSVE2, FeatureFP8DOT4)),
"ssve-fp8dot4 or (sve2 and fp8dot4)">;
-def HasLUT : Predicate<"Subtarget->hasLUT()">,
+def HasLUT : Predicate<"Subtarget->hasLUT()">,
AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
-def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
+def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
-def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
+def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
-def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
+def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
-def HasSME_MOP4 : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_MOP4())">,
+def HasSME_MOP4 : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_MOP4())">,
AssemblerPredicateWithAll<(all_of FeatureSME_MOP4), "sme-mop4">;
-def HasSME_TMOP : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_TMOP())">,
+def HasSME_TMOP : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_TMOP())">,
AssemblerPredicateWithAll<(all_of FeatureSME_TMOP), "sme-tmop">;
-
-def HasCMPBR : Predicate<"Subtarget->hasCMPBR()">,
+def HasCMPBR : Predicate<"Subtarget->hasCMPBR()">,
AssemblerPredicateWithAll<(all_of FeatureCMPBR), "cmpbr">;
-def HasF8F32MM : Predicate<"Subtarget->hasF8F32MM()">,
+def HasF8F32MM : Predicate<"Subtarget->hasF8F32MM()">,
AssemblerPredicateWithAll<(all_of FeatureF8F32MM), "f8f32mm">;
-def HasF8F16MM : Predicate<"Subtarget->hasF8F16MM()">,
+def HasF8F16MM : Predicate<"Subtarget->hasF8F16MM()">,
AssemblerPredicateWithAll<(all_of FeatureF8F16MM), "f8f16mm">;
-def HasFPRCVT : Predicate<"Subtarget->hasFPRCVT()">,
+def HasFPRCVT : Predicate<"Subtarget->hasFPRCVT()">,
AssemblerPredicateWithAll<(all_of FeatureFPRCVT), "fprcvt">;
-def HasLSFE : Predicate<"Subtarget->hasLSFE()">,
+def HasLSFE : Predicate<"Subtarget->hasLSFE()">,
AssemblerPredicateWithAll<(all_of FeatureLSFE), "lsfe">;
-def HasSME2p2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">,
+def HasSME2p2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p2), "sme2p2">;
-def HasSVEAES2 : Predicate<"Subtarget->hasSVEAES2()">,
+def HasSVEAES2 : Predicate<"Subtarget->hasSVEAES2()">,
AssemblerPredicateWithAll<(all_of FeatureSVEAES2), "sve-aes2">;
-def HasSVEBFSCALE : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">,
+def HasSVEBFSCALE : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">,
AssemblerPredicateWithAll<(all_of FeatureSVEBFSCALE), "sve-bfscale">;
-def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">,
+def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">,
AssemblerPredicateWithAll<(all_of FeatureSVE_F16F32MM), "sve-f16f32mm">;
def HasPCDPHINT : Predicate<"Subtarget->hasPCDPHINT()">,
- AssemblerPredicateWithAll<(all_of FeaturePCDPHINT), "pcdphint">;
+ AssemblerPredicateWithAll<(all_of FeaturePCDPHINT), "pcdphint">;
def HasLSUI : Predicate<"Subtarget->hasLSUI()">,
- AssemblerPredicateWithAll<(all_of FeatureLSUI), "lsui">;
+ AssemblerPredicateWithAll<(all_of FeatureLSUI), "lsui">;
def HasOCCMO : Predicate<"Subtarget->hasOCCMO()">,
- AssemblerPredicateWithAll<(all_of FeatureOCCMO), "occmo">;
+ AssemblerPredicateWithAll<(all_of FeatureOCCMO), "occmo">;
+def HasCMH : Predicate<"Subtarget->hasCMH()">,
+ AssemblerPredicateWithAll<(all_of FeatureCMH), "cmh">;
+def HasLSCP : Predicate<"Subtarget->hasLSCP()">,
+ AssemblerPredicateWithAll<(all_of FeatureLSCP), "lscp">;
+def HasSVE2p2 : Predicate<"Subtarget->hasSVE2p2()">,
+ AssemblerPredicateWithAll<(all_of FeatureSVE2p2), "sve2p2">;
+def HasSVE_B16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_B16MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureSVE_B16MM), "sve-b16mm">;
+def HasF16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasF16MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16MM), "f16mm">;
+def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">,
+ AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
+def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">,
+ AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
+def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">;
+def HasF16F32MM : Predicate<"Subtarget->hasF16F32MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -310,6 +308,10 @@ def HasSVE2p2_or_SME2p2
: Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2())">,
AssemblerPredicateWithAll<(any_of FeatureSME2p2, FeatureSVE2p2),
"sme2p2 or sve2p2">;
+def HasSVE2p3_or_SME2p3
+ : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p3() || Subtarget->hasSME2p3())">,
+ AssemblerPredicateWithAll<(any_of FeatureSME2p3, FeatureSVE2p3),
+ "sme2p3 or sve2p3">;
def HasNonStreamingSVE2p2_or_SME2p2
: Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p2()) ||"
"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
@@ -328,100 +330,110 @@ def HasNEONandIsStreamingSafe
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2.
def HasNEONandIsSME2p2StreamingSafe
- : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
- AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
+ : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
+ AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
- AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
+ AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
- AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
+ AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
def HasSB : Predicate<"Subtarget->hasSB()">,
- AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
-def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
- AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
+ AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
+def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
+ AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
- AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
+ AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
def HasBTI : Predicate<"Subtarget->hasBTI()">,
- AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
+ AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
def HasMTE : Predicate<"Subtarget->hasMTE()">,
- AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
+ AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
def HasTME : Predicate<"Subtarget->hasTME()">,
- AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
+ AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
def HasETE : Predicate<"Subtarget->hasETE()">,
- AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
+ AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
- AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
+ AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">,
- AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
+ AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
def HasNoBF16 : Predicate<"!Subtarget->hasBF16()">;
def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
- AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
+ AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
- AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
+ AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
- AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
+ AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
def HasXS : Predicate<"Subtarget->hasXS()">,
- AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
+ AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
- AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
+ AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
def HasLS64 : Predicate<"Subtarget->hasLS64()">,
- AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
+ AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
- AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
+ AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
- AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
+ AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
def HasHBC : Predicate<"Subtarget->hasHBC()">,
- AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
+ AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
- AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
+ AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">,
- AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
+ AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">,
- AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
+ AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
def HasITE : Predicate<"Subtarget->hasITE()">,
- AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
+ AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
def HasTHE : Predicate<"Subtarget->hasTHE()">,
- AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
+ AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">,
- AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
+ AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
def HasLSE128 : Predicate<"Subtarget->hasLSE128()">,
- AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
+ AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
def HasD128 : Predicate<"Subtarget->hasD128()">,
- AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
+ AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
def HasCHK : Predicate<"Subtarget->hasCHK()">,
- AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
+ AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
def HasGCS : Predicate<"Subtarget->hasGCS()">,
- AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
+ AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
def HasCPA : Predicate<"Subtarget->hasCPA()">,
- AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
+ AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
+def HasTLBID : Predicate<"Subtarget->hasTLBID()">,
+ AssemblerPredicateWithAll<(all_of FeatureTLBID), "tlbid">;
+def HasMPAMv2 : Predicate<"Subtarget->hasMPAMv2()">,
+ AssemblerPredicateWithAll<(all_of FeatureMPAMv2), "mpamv2">;
+def HasMTETC : Predicate<"Subtarget->hasMTETC()">,
+ AssemblerPredicateWithAll<(all_of FeatureMTETC), "mtetc">;
+def HasGCIE : Predicate<"Subtarget->hasGCIE()">,
+ AssemblerPredicateWithAll<(all_of FeatureGCIE), "gcie">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
def UseExperimentalZeroingPseudos
- : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
+ : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
def UseAlternateSExtLoadCVTF32
- : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
+ : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
def UseNegativeImmediates
- : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
- "NegativeImmediates">;
+ : Predicate<"false">,
+ AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
+ "NegativeImmediates">;
-def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
+def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
-def HasFastIncVL : Predicate<"!Subtarget->hasDisableFastIncVL()">;
+def HasFastIncVL : Predicate<"!Subtarget->hasDisableFastIncVL()">;
-def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
+def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
-def UseLDAPUR : Predicate<"!Subtarget->avoidLDAPUR()">;
+def UseLDAPUR : Predicate<"!Subtarget->avoidLDAPUR()">;
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
-def AllowMisalignedMemAccesses : Predicate<"!Subtarget->requiresStrictAlign()">;
+def AllowMisalignedMemAccesses
+ : Predicate<"!Subtarget->requiresStrictAlign()">;
def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">;
@@ -3692,6 +3704,12 @@ def UDF : UDFType<0, "udf">;
// Load instructions.
//===----------------------------------------------------------------------===//
+let Predicates = [HasLSCP] in {
+defm LDAP : LoadAcquirePairOffset<0b0101, "ldap">;
+defm LDAPP : LoadAcquirePairOffset<0b0111, "ldapp">;
+defm STLP : StoreAcquirePairOffset<0b0101, "stlp">;
+}
+
// Pair (indexed, offset)
defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
@@ -4004,22 +4022,6 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
-// load zero-extended i32, bitcast to f64
-def : Pat<(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
-// load zero-extended i16, bitcast to f64
-def : Pat<(f64 (bitconvert (i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-// load zero-extended i8, bitcast to f64
-def : Pat<(f64 (bitconvert (i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-// load zero-extended i16, bitcast to f32
-def : Pat<(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
- (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-// load zero-extended i8, bitcast to f32
-def : Pat<(f32 (bitconvert (i32 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
- (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch timm:$Rt,
@@ -4371,6 +4373,64 @@ def : Pat <(v1i64 (scalar_to_vector (i64
(load (ro64.Xpat GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend))))),
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend)>;
+// Patterns for bitconvert or scalar_to_vector of load operations.
+// Enables direct SIMD register loads for small integer types (i8/i16) that are
+// naturally zero-extended to i32/i64.
+multiclass ExtLoad8_16AllModes<ValueType OutTy, ValueType InnerTy,
+ SDPatternOperator OuterOp,
+ PatFrags LoadOp8, PatFrags LoadOp16> {
+ // 8-bit loads.
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDURBi GPR64sp:$Rn, simm9:$offset), bsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend), bsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend), bsub)>;
+
+ // 16-bit loads.
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDURHi GPR64sp:$Rn, simm9:$offset), hsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend), hsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend), hsub)>;
+}
+
+// Extended multiclass that includes 32-bit loads in addition to 8-bit and 16-bit.
+multiclass ExtLoad8_16_32AllModes<ValueType OutTy, ValueType InnerTy,
+ SDPatternOperator OuterOp,
+ PatFrags LoadOp8, PatFrags LoadOp16, PatFrags LoadOp32> {
+ defm : ExtLoad8_16AllModes<OutTy, InnerTy, OuterOp, LoadOp8, LoadOp16>;
+
+ // 32-bit loads.
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDURSi GPR64sp:$Rn, simm9:$offset), ssub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend), ssub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend), ssub)>;
+}
+
+// Instantiate bitconvert patterns for floating-point types.
+defm : ExtLoad8_16AllModes<f32, i32, bitconvert, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16_32AllModes<f64, i64, bitconvert, zextloadi8, zextloadi16, zextloadi32>;
+
+// Instantiate scalar_to_vector patterns for all vector types.
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, extloadi8, extloadi16>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, zextloadi8, zextloadi16, zextloadi32>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, extloadi8, extloadi16, extloadi32>;
+
// Pre-fetch.
defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
[(AArch64Prefetch timm:$Rt,
@@ -5235,113 +5295,10 @@ let Predicates = [HasNEON, HasFPRCVT] in{
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
- defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
- defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
-}
-
-
-// AArch64's FCVT instructions saturate when out of range.
-multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-}
-
-defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
-defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
-
-multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
- def : Pat<(i32 (to_int (round f32:$Rn))),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int (round f32:$Rn))),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int (round f64:$Rn))),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int (round f64:$Rn))),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- // These instructions saturate like fp_to_[su]int_sat.
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+ defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>;
+ defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>;
}
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
-
let Predicates = [HasFullFP16] in {
@@ -6549,8 +6506,8 @@ defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtn
defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
+defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
@@ -6570,6 +6527,7 @@ defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
// Floating-point conversion patterns.
multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
+ let Predicates = [HasFPRCVT] in {
def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
(!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
@@ -6578,6 +6536,7 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
(!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+ }
def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
@@ -6592,6 +6551,8 @@ defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_sint, "FCVTZS">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_uint, "FCVTZU">;
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
let Predicates = [HasFullFP16] in {
@@ -6648,6 +6609,196 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
+// AArch64's FCVT instructions saturate when out of range.
+multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat f16:$Rn, i32)))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat f16:$Rn, i64)))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat f32:$Rn, i64)))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat f32:$Rn, i32)))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat f64:$Rn, i64)))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+}
+
+defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
+defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
+
+multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode to_int_sat_gi, SDNode round, string INST> {
+ def : Pat<(i32 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i64 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(i32 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(i64 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f64 (bitconvert (i64 (to_int (round f32:$Rn))))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int (round f64:$Rn))))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (to_int (round f32:$Rn))))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int (round f64:$Rn))))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ // These instructions saturate like fp_to_[su]int_sat.
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i32 (to_int_sat_gi (round f16:$Rn))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi (round f16:$Rn))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi (round f32:$Rn))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi (round f64:$Rn))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi (round f32:$Rn))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi (round f64:$Rn))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f64:$Rn), i64)))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+}
+
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
+
// f16 -> s16 conversions
let Predicates = [HasFullFP16] in {
def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
@@ -11244,8 +11395,28 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}
+let Predicates = [HasF16F32DOT] in {
+ defm FDOT :SIMDThreeSameVectorFDot<"fdot">;
+ defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">;
+}
+
+let Predicates = [HasF16MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">;
+
+let Predicates = [HasF16F32MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
+
let Uses = [FPMR, FPCR] in
-defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+
+//===----------------------------------------------------------------------===//
+// Contention Management Hints (FEAT_CMH)
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasCMH] in {
+ defm SHUH : SHUH<"shuh">; // Shared Update Hint instruction
+ def STCPH : STCPHInst<"stcph">; // Store Concurrent Priority Hint instruction
+}
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 47144c7..cd94a25 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1341,6 +1341,10 @@ def Z_q : RegisterOperand<ZPR, "printTypedVectorList<0,'q'>"> {
let ParserMatchClass = ZPRVectorList<128, 1>;
}
+def ZZ_Any : RegisterOperand<ZPR2, "printTypedVectorList<0,0>"> {
+ let ParserMatchClass = ZPRVectorList<0, 2>;
+}
+
def ZZ_b : RegisterOperand<ZPR2, "printTypedVectorList<0,'b'>"> {
let ParserMatchClass = ZPRVectorList<8, 2>;
}
@@ -1361,6 +1365,10 @@ def ZZ_q : RegisterOperand<ZPR2, "printTypedVectorList<0,'q'>"> {
let ParserMatchClass = ZPRVectorList<128, 2>;
}
+def ZZZ_Any : RegisterOperand<ZPR3, "printTypedVectorList<0,0>"> {
+ let ParserMatchClass = ZPRVectorList<0, 3>;
+}
+
def ZZZ_b : RegisterOperand<ZPR3, "printTypedVectorList<0,'b'>"> {
let ParserMatchClass = ZPRVectorList<8, 3>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index e552afe..752b185 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1173,3 +1173,14 @@ let Predicates = [HasSME_MOP4, HasSMEF64F64] in {
defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a", "int_aarch64_sme_mop4a">;
defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">;
}
+
+//===----------------------------------------------------------------------===//
+// SME2.3 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSME2p3] in {
+ def LUTI6_ZTZ : sme2_lut_single<"luti6">;
+ def LUTI6_4ZT3Z : sme2_luti6_zt_consecutive<"luti6">;
+ def LUTI6_S_4ZT3Z : sme2_luti6_zt_strided<"luti6">;
+ def LUTI6_4Z2Z2ZI : sme2_luti6_vector_vg4_consecutive<"luti6">;
+ def LUTI6_S_4Z2Z2ZI : sme2_luti6_vector_vg4_strided<"luti6">;
+} // [HasSME2p3]
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 98a128e..3b268dc 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2569,7 +2569,7 @@ let Predicates = [HasBF16, HasSVE_or_SME] in {
} // End HasBF16, HasSVE_or_SME
let Predicates = [HasBF16, HasSVE] in {
- defm BFMMLA_ZZZ_HtoS : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
+ defm BFMMLA_ZZZ_HtoS : sve_fp_matrix_mla<0b011, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
} // End HasBF16, HasSVE
let Predicates = [HasBF16, HasSVE_or_SME] in {
@@ -3680,15 +3680,15 @@ let Predicates = [HasSVE_or_SME, HasMatMulInt8] in {
} // End HasSVE_or_SME, HasMatMulInt8
let Predicates = [HasSVE, HasMatMulFP32] in {
- defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b10, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>;
+ defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b101, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>;
} // End HasSVE, HasMatMulFP32
let Predicates = [HasSVE_F16F32MM] in {
- def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b00, "fmmla", ZPR32, ZPR16>;
+ def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b001, "fmmla", ZPR32, ZPR16>;
} // End HasSVE_F16F32MM
let Predicates = [HasSVE, HasMatMulFP64] in {
- defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b11, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>;
+ defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b111, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>;
defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>;
defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>;
defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>;
@@ -4272,9 +4272,9 @@ def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv8i16:$MulLHS, nxv8i16:$
defm SQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00, int_aarch64_sve_sqcvtn_x2>;
defm UQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01, int_aarch64_sve_uqcvtn_x2>;
defm SQCVTUN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtun", 0b10, int_aarch64_sve_sqcvtun_x2>;
-defm SQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>;
-defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>;
-defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>;
+defm SQRSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>;
+defm UQRSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>;
+defm SQRSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>;
defm WHILEGE_2PXX : sve2p1_int_while_rr_pair<"whilege", 0b000>;
defm WHILEGT_2PXX : sve2p1_int_while_rr_pair<"whilegt", 0b001>;
@@ -4615,6 +4615,75 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
defm REVD_ZPzZ : sve_int_perm_rev_revd_z<"revd", AArch64revd_mt>;
} // End HasSME2p2orSVE2p2
+
+//===----------------------------------------------------------------------===//
+// SME2.3 or SVE2.3 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2p3_or_SME2p3] in {
+ // SVE2 Add pairwise within quadword vector segments (unpredicated)
+ defm ADDQP_ZZZ : sve2_int_mul<0b110, "addqp", null_frag>;
+
+ // SVE2 Add subtract/subtract pairwise
+ defm ADDSUBP_ZZZ : sve2_int_mul<0b111, "addsubp", null_frag>;
+ defm SUBP_ZPmZZ : sve2_int_arith_pred<0b100001, "subp", null_frag>;
+
+ // SVE2 integer absolute difference and accumulate long
+ defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal">;
+ defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal">;
+
+ // SVE2 integer dot product
+ def SDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b0, "sdot", ZPR16, ZPR8>;
+ def UDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b1, "udot", ZPR16, ZPR8>;
+
+ // SVE2 integer indexed dot product
+ def SDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b0, "sdot">;
+ def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">;
+
+ // SVE2 fp convert, narrow and interleave to integer, rounding toward zero
+ defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>;
+ defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1>;
+
+ // SVE2 signed/unsigned integer convert to floating-point
+ defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00>;
+ defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10>;
+ defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01>;
+ defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>;
+
+ // SVE2 saturating shift right narrow by immediate and interleave
+ defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101>;
+ defm SQRSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrun", 0b001>;
+ defm SQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrn", 0b000>;
+ defm SQSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrun", 0b100>;
+ defm UQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqrshrn", 0b111>;
+ defm UQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqshrn", 0b010>;
+ defm SQSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrun", 0b100, null_frag>;
+ defm SQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrn", 0b000, null_frag>;
+ defm UQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqshrn", 0b010, null_frag>;
+
+ defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">;
+} // End HasSME2p3orSVE2p3
+
+//===----------------------------------------------------------------------===//
+// SVE2.3 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2p3] in {
+ def LUTI6_Z2ZZ : sve2_luti6_vector<"luti6">;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE_B16MM Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE_B16MM] in {
+ def BFMMLA_ZZZ_H : sve_fp_matrix_mla<0b110, "bfmmla", ZPR16, ZPR16>;
+}
+
+//===----------------------------------------------------------------------===//
+// F16MM Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2p2, HasF16MM] in {
+ def FMMLA_ZZZ_H : sve_fp_matrix_mla<0b100, "fmmla", ZPR16, ZPR16>;
+}
+
//===----------------------------------------------------------------------===//
// SME2.2 or SVE2.2 instructions - Legal in streaming mode iff target has SME2p2
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index bdde8e3..2387f17 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -2762,11 +2762,11 @@ def : InstRW<[V2Write_11c_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
def : InstRW<[V2Write_11c_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
// Non temporal store, scalar + imm
-def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
+def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BHWD]_ZRI$")>;
// Non temporal store, scalar + scalar
-def : InstRW<[V2Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
-def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
+def : InstRW<[V2Write_2c_1L01_1S_1V01], (instrs STNT1H_ZRR)>;
+def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BWD]_ZRR$")>;
// Scatter non temporal store, vector + scalar 32-bit element size
def : InstRW<[V2Write_4c_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 9438917..ae46d71 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -205,6 +205,7 @@ def lookupDCByName : SearchIndex {
let Key = ["Name"];
}
+// Op1 CRn CRm Op2
def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>;
def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>;
def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>;
@@ -241,6 +242,11 @@ def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>;
def : DC<"GZVA", 0b011, 0b0111, 0b0100, 0b100>;
}
+let Requires = [{ {AArch64::FeatureMTETC} }] in {
+def : DC<"ZGBVA", 0b011, 0b0111, 0b0100, 0b101>;
+def : DC<"GBVA", 0b011, 0b0111, 0b0100, 0b111>;
+}
+
let Requires = [{ {AArch64::FeatureMEC} }] in {
def : DC<"CIPAE", 0b100, 0b0111, 0b1110, 0b000>;
def : DC<"CIGDPAE", 0b100, 0b0111, 0b1110, 0b111>;
@@ -813,11 +819,26 @@ def : BTI<"j", 0b100>;
def : BTI<"jc", 0b110>;
//===----------------------------------------------------------------------===//
+// CMHPriority instruction options.
+//===----------------------------------------------------------------------===//
+
+class CMHPriorityHint<string name, bits<1> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<1> Encoding;
+ let Encoding = encoding;
+}
+
+def : CMHPriorityHint<"ph", 0b1>;
+
+//===----------------------------------------------------------------------===//
// TLBI (translation lookaside buffer invalidate) instruction options.
//===----------------------------------------------------------------------===//
class TLBICommon<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg> {
+ bits<3> op2, bit needsreg, bit optionalreg> {
string Name = name;
bits<14> Encoding;
let Encoding{13-11} = op1;
@@ -825,24 +846,25 @@ class TLBICommon<string name, bits<3> op1, bits<4> crn, bits<4> crm,
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
bit NeedsReg = needsreg;
+ bit OptionalReg = optionalreg;
list<string> Requires = [];
list<string> ExtraRequires = [];
code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }];
}
class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg>
- : TLBICommon<name, op1, crn, crm, op2, needsreg>;
+ bits<3> op2, bit needsreg, bit optionalreg>
+ : TLBICommon<name, op1, crn, crm, op2, needsreg, optionalreg>;
class TLBIPEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg>
- : TLBICommon<name, op1, crn, crm, op2, needsreg>;
+ bits<3> op2, bit needsreg, bit optionalreg>
+ : TLBICommon<name, op1, crn, crm, op2, needsreg, optionalreg>;
multiclass TLBITableBase {
def NAME # Table : GenericTable {
let FilterClass = NAME # "Entry";
let CppTypeName = NAME;
- let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+ let Fields = ["Name", "Encoding", "NeedsReg", "OptionalReg", "RequiresStr"];
let PrimaryKey = ["Encoding"];
let PrimaryKeyName = "lookup" # NAME # "ByEncoding";
}
@@ -856,60 +878,60 @@ defm TLBI : TLBITableBase;
defm TLBIP : TLBITableBase;
multiclass TLBI<string name, bit hasTLBIP, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg = 1> {
- def : TLBIEntry<name, op1, crn, crm, op2, needsreg>;
- def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
+ bits<3> op2, bit needsreg = 1, bit optionalreg = 0> {
+ def : TLBIEntry<name, op1, crn, crm, op2, needsreg, optionalreg>;
+ def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg, optionalreg> {
let Encoding{7} = 1;
let ExtraRequires = ["AArch64::FeatureXS"];
}
if !eq(hasTLBIP, true) then {
- def : TLBIPEntry<name, op1, crn, crm, op2, needsreg>;
- def : TLBIPEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
+ def : TLBIPEntry<name, op1, crn, crm, op2, needsreg, optionalreg>;
+ def : TLBIPEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg, optionalreg> {
let Encoding{7} = 1;
let ExtraRequires = ["AArch64::FeatureXS"];
}
}
}
-// hasTLBIP op1 CRn CRm op2 needsreg
+// hasTLBIP op1 CRn CRm op2 needsreg, optreg
defm : TLBI<"IPAS2E1IS", 1, 0b100, 0b1000, 0b0000, 0b001>;
defm : TLBI<"IPAS2LE1IS", 1, 0b100, 0b1000, 0b0000, 0b101>;
-defm : TLBI<"VMALLE1IS", 0, 0b000, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"ALLE2IS", 0, 0b100, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"ALLE3IS", 0, 0b110, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"VMALLE1IS", 0, 0b000, 0b1000, 0b0011, 0b000, 0, 1>;
+defm : TLBI<"ALLE2IS", 0, 0b100, 0b1000, 0b0011, 0b000, 0, 1>;
+defm : TLBI<"ALLE3IS", 0, 0b110, 0b1000, 0b0011, 0b000, 0, 1>;
defm : TLBI<"VAE1IS", 1, 0b000, 0b1000, 0b0011, 0b001>;
defm : TLBI<"VAE2IS", 1, 0b100, 0b1000, 0b0011, 0b001>;
defm : TLBI<"VAE3IS", 1, 0b110, 0b1000, 0b0011, 0b001>;
defm : TLBI<"ASIDE1IS", 0, 0b000, 0b1000, 0b0011, 0b010>;
defm : TLBI<"VAAE1IS", 1, 0b000, 0b1000, 0b0011, 0b011>;
-defm : TLBI<"ALLE1IS", 0, 0b100, 0b1000, 0b0011, 0b100, 0>;
+defm : TLBI<"ALLE1IS", 0, 0b100, 0b1000, 0b0011, 0b100, 0, 1>;
defm : TLBI<"VALE1IS", 1, 0b000, 0b1000, 0b0011, 0b101>;
defm : TLBI<"VALE2IS", 1, 0b100, 0b1000, 0b0011, 0b101>;
defm : TLBI<"VALE3IS", 1, 0b110, 0b1000, 0b0011, 0b101>;
-defm : TLBI<"VMALLS12E1IS", 0, 0b100, 0b1000, 0b0011, 0b110, 0>;
+defm : TLBI<"VMALLS12E1IS", 0, 0b100, 0b1000, 0b0011, 0b110, 0, 1>;
defm : TLBI<"VAALE1IS", 1, 0b000, 0b1000, 0b0011, 0b111>;
defm : TLBI<"IPAS2E1", 1, 0b100, 0b1000, 0b0100, 0b001>;
defm : TLBI<"IPAS2LE1", 1, 0b100, 0b1000, 0b0100, 0b101>;
-defm : TLBI<"VMALLE1", 0, 0b000, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"ALLE2", 0, 0b100, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"ALLE3", 0, 0b110, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"VMALLE1", 0, 0b000, 0b1000, 0b0111, 0b000, 0, 0>;
+defm : TLBI<"ALLE2", 0, 0b100, 0b1000, 0b0111, 0b000, 0, 0>;
+defm : TLBI<"ALLE3", 0, 0b110, 0b1000, 0b0111, 0b000, 0, 0>;
defm : TLBI<"VAE1", 1, 0b000, 0b1000, 0b0111, 0b001>;
defm : TLBI<"VAE2", 1, 0b100, 0b1000, 0b0111, 0b001>;
defm : TLBI<"VAE3", 1, 0b110, 0b1000, 0b0111, 0b001>;
defm : TLBI<"ASIDE1", 0, 0b000, 0b1000, 0b0111, 0b010>;
defm : TLBI<"VAAE1", 1, 0b000, 0b1000, 0b0111, 0b011>;
-defm : TLBI<"ALLE1", 0, 0b100, 0b1000, 0b0111, 0b100, 0>;
+defm : TLBI<"ALLE1", 0, 0b100, 0b1000, 0b0111, 0b100, 0, 0>;
defm : TLBI<"VALE1", 1, 0b000, 0b1000, 0b0111, 0b101>;
defm : TLBI<"VALE2", 1, 0b100, 0b1000, 0b0111, 0b101>;
defm : TLBI<"VALE3", 1, 0b110, 0b1000, 0b0111, 0b101>;
-defm : TLBI<"VMALLS12E1", 0, 0b100, 0b1000, 0b0111, 0b110, 0>;
+defm : TLBI<"VMALLS12E1", 0, 0b100, 0b1000, 0b0111, 0b110, 0, 0>;
defm : TLBI<"VAALE1", 1, 0b000, 0b1000, 0b0111, 0b111>;
// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
let Requires = ["AArch64::FeatureTLB_RMI"] in {
// Armv8.4-A Outer Sharable TLB Maintenance instructions:
-// hasTLBIP op1 CRn CRm op2 needsreg
-defm : TLBI<"VMALLE1OS", 0, 0b000, 0b1000, 0b0001, 0b000, 0>;
+// hasTLBIP op1 CRn CRm op2 needsreg, optreg
+defm : TLBI<"VMALLE1OS", 0, 0b000, 0b1000, 0b0001, 0b000, 0, 1>;
defm : TLBI<"VAE1OS", 1, 0b000, 0b1000, 0b0001, 0b001>;
defm : TLBI<"ASIDE1OS", 0, 0b000, 0b1000, 0b0001, 0b010>;
defm : TLBI<"VAAE1OS", 1, 0b000, 0b1000, 0b0001, 0b011>;
@@ -919,15 +941,15 @@ defm : TLBI<"IPAS2E1OS", 1, 0b100, 0b1000, 0b0100, 0b000>;
defm : TLBI<"IPAS2LE1OS", 1, 0b100, 0b1000, 0b0100, 0b100>;
defm : TLBI<"VAE2OS", 1, 0b100, 0b1000, 0b0001, 0b001>;
defm : TLBI<"VALE2OS", 1, 0b100, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"VMALLS12E1OS", 0, 0b100, 0b1000, 0b0001, 0b110, 0>;
+defm : TLBI<"VMALLS12E1OS", 0, 0b100, 0b1000, 0b0001, 0b110, 0, 1>;
defm : TLBI<"VAE3OS", 1, 0b110, 0b1000, 0b0001, 0b001>;
defm : TLBI<"VALE3OS", 1, 0b110, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"ALLE2OS", 0, 0b100, 0b1000, 0b0001, 0b000, 0>;
-defm : TLBI<"ALLE1OS", 0, 0b100, 0b1000, 0b0001, 0b100, 0>;
-defm : TLBI<"ALLE3OS", 0, 0b110, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"ALLE2OS", 0, 0b100, 0b1000, 0b0001, 0b000, 0, 1>;
+defm : TLBI<"ALLE1OS", 0, 0b100, 0b1000, 0b0001, 0b100, 0, 1>;
+defm : TLBI<"ALLE3OS", 0, 0b110, 0b1000, 0b0001, 0b000, 0, 1>;
// Armv8.4-A TLB Range Maintenance instructions:
-// hasTLBIP op1 CRn CRm op2 needsreg
+// hasTLBIP op1 CRn CRm op2
defm : TLBI<"RVAE1", 1, 0b000, 0b1000, 0b0110, 0b001>;
defm : TLBI<"RVAAE1", 1, 0b000, 0b1000, 0b0110, 0b011>;
defm : TLBI<"RVALE1", 1, 0b000, 0b1000, 0b0110, 0b101>;
@@ -962,18 +984,19 @@ defm : TLBI<"RVALE3OS", 1, 0b110, 0b1000, 0b0101, 0b101>;
// Armv9-A Realm Management Extension TLBI Instructions
let Requires = ["AArch64::FeatureRME"] in {
+// hasTLBIP op1 CRn CRm op2 needsreg
defm : TLBI<"RPAOS", 0, 0b110, 0b1000, 0b0100, 0b011>;
defm : TLBI<"RPALOS", 0, 0b110, 0b1000, 0b0100, 0b111>;
-defm : TLBI<"PAALLOS", 0, 0b110, 0b1000, 0b0001, 0b100, 0>;
-defm : TLBI<"PAALL", 0, 0b110, 0b1000, 0b0111, 0b100, 0>;
+defm : TLBI<"PAALLOS", 0, 0b110, 0b1000, 0b0001, 0b100, 0, 0>;
+defm : TLBI<"PAALL", 0, 0b110, 0b1000, 0b0111, 0b100, 0, 0>;
}
// Armv9.5-A TLBI VMALL for Dirty State
let Requires = ["AArch64::FeatureTLBIW"] in {
-// op1, CRn, CRm, op2, needsreg
-defm : TLBI<"VMALLWS2E1", 0, 0b100, 0b1000, 0b0110, 0b010, 0>;
-defm : TLBI<"VMALLWS2E1IS", 0, 0b100, 0b1000, 0b0010, 0b010, 0>;
-defm : TLBI<"VMALLWS2E1OS", 0, 0b100, 0b1000, 0b0101, 0b010, 0>;
+// hasTLBIP op1 CRn CRm op2 needsreg, optreg
+defm : TLBI<"VMALLWS2E1", 0, 0b100, 0b1000, 0b0110, 0b010, 0, 0>;
+defm : TLBI<"VMALLWS2E1IS", 0, 0b100, 0b1000, 0b0010, 0b010, 0, 1>;
+defm : TLBI<"VMALLWS2E1OS", 0, 0b100, 0b1000, 0b0101, 0b010, 0, 1>;
}
//===----------------------------------------------------------------------===//
@@ -1862,13 +1885,6 @@ def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>;
// v8.4a MPAM registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::FeatureMPAM} }] in {
-def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>;
-def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>;
-def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>;
-def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>;
-def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>;
-def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>;
def : RWSysReg<"MPAMVPMV_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b001>;
def : RWSysReg<"MPAMVPM0_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b000>;
def : RWSysReg<"MPAMVPM1_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b001>;
@@ -1878,8 +1894,6 @@ def : RWSysReg<"MPAMVPM4_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b100>;
def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>;
def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
-def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>;
-} //FeatureMPAM
// v8.4a Activity Monitor registers
// Op0 Op1 CRn CRm Op2
@@ -2319,6 +2333,26 @@ def : RWSysReg<"MPAMBW0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b101>;
def : RWSysReg<"MPAMBWCAP_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b110>;
def : RWSysReg<"MPAMBWSM_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b111>;
+// v9.7a Memory partitioning and monitoring version 2
+// (FEAT_MPAMv2) registers
+// Op0 Op1 CRn CRm Op2
+// MPAM system registers that are also available for MPAMv2
+def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>;
+def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>;
+def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>;
+// Only MPAMv2 registers
+def : RWSysReg<"MPAMCTL_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b010>;
+def : RWSysReg<"MPAMCTL_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b010>;
+def : RWSysReg<"MPAMCTL_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b010>;
+def : RWSysReg<"MPAMCTL_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b010>;
+def : RWSysReg<"MPAMVIDCR_EL2", 0b11, 0b100, 0b1010, 0b0111, 0b000>;
+def : RWSysReg<"MPAMVIDSR_EL2", 0b11, 0b100, 0b1010, 0b0111, 0b001>;
+def : RWSysReg<"MPAMVIDSR_EL3", 0b11, 0b110, 0b1010, 0b0111, 0b001>;
+
//===----------------------------------------------------------------------===//
// FEAT_SRMASK v9.6a registers
//===----------------------------------------------------------------------===//
@@ -2412,3 +2446,251 @@ def : DC<"CIVAPS", 0b000, 0b0111, 0b1111, 0b001>;
let Requires = [{ {AArch64::FeaturePoPS, AArch64::FeatureMTE} }] in {
def : DC<"CIGDVAPS", 0b000, 0b0111, 0b1111, 0b101>;
}
+
+// v9.7a TLBI domains system registers (MemSys)
+foreach n = 0-3 in {
+ defvar nb = !cast<bits<3>>(n);
+ def : RWSysReg<"VTLBID"#n#"_EL2", 0b11, 0b100, 0b0010, 0b1000, nb>;
+}
+
+foreach n = 0-3 in {
+ defvar nb = !cast<bits<3>>(n);
+ def : RWSysReg<"VTLBIDOS"#n#"_EL2", 0b11, 0b100, 0b0010, 0b1001, nb>;
+}
+
+def : ROSysReg<"TLBIDIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b110>;
+
+// MPAM Lookaside Buffer Invalidate (MLBI) instructions
+class MLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2, bit needsreg> {
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ bit NeedsReg = needsreg;
+ string RequiresStr = [{ {AArch64::FeatureMPAMv2} }];
+}
+
+def MLBITable : GenericTable {
+ let FilterClass = "MLBI";
+ let CppTypeName = "MLBI";
+ let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+
+ let PrimaryKey = ["Encoding"];
+ let PrimaryKeyName = "lookupMLBIByEncoding";
+}
+
+def lookupMLBIByName : SearchIndex {
+ let Table = MLBITable;
+ let Key = ["Name"];
+}
+
+// Op1 CRn CRm Op2 needsReg
+def : MLBI<"ALLE1", 0b100, 0b0111, 0b0000, 0b100, 0>;
+def : MLBI<"VMALLE1", 0b100, 0b0111, 0b0000, 0b101, 0>;
+def : MLBI<"VPIDE1", 0b100, 0b0111, 0b0000, 0b110, 1>;
+def : MLBI<"VPMGE1", 0b100, 0b0111, 0b0000, 0b111, 1>;
+
+
+// v9.7-A GICv5 (FEAT_GCIE)
+// CPU Interface Registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ICC_APR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b000>;
+def : RWSysReg<"ICC_APR_EL3", 0b11, 0b110, 0b1100, 0b1000, 0b000>;
+def : RWSysReg<"ICC_CR0_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b001>;
+def : RWSysReg<"ICC_CR0_EL3", 0b11, 0b110, 0b1100, 0b1001, 0b000>;
+def : ROSysReg<"ICC_DOMHPPIR_EL3", 0b11, 0b110, 0b1100, 0b1000, 0b010>;
+def : ROSysReg<"ICC_HAPR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b011>;
+def : ROSysReg<"ICC_HPPIR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b011>;
+def : ROSysReg<"ICC_HPPIR_EL3", 0b11, 0b110, 0b1100, 0b1001, 0b001>;
+def : ROSysReg<"ICC_IAFFIDR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b101>;
+def : RWSysReg<"ICC_ICSR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b100>;
+def : ROSysReg<"ICC_IDR0_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b010>;
+def : RWSysReg<"ICC_PCR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b010>;
+def : RWSysReg<"ICC_PCR_EL3", 0b11, 0b110, 0b1100, 0b1000, 0b001>;
+
+// Virtual CPU Interface Registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ICV_APR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b000>;
+def : RWSysReg<"ICV_CR0_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b001>;
+def : RWSysReg<"ICV_HAPR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b011>;
+def : RWSysReg<"ICV_HPPIR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b011>;
+def : RWSysReg<"ICV_PCR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b010>;
+
+foreach n=0-3 in {
+ defvar nb = !cast<bits<2>>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<"ICC_PPI_DOMAINR"#n#"_EL3", 0b11, 0b110, 0b1100, 0b1000, {0b1,nb{1-0}}>;
+
+}
+
+foreach n=0-15 in{
+ defvar nb = !cast<bits<4>>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<"ICC_PPI_PRIORITYR"#n#"_EL1", 0b11, 0b000, 0b1100, {0b111,nb{3}}, nb{2-0}>;
+}
+
+// PPI and Virtual PPI Registers
+multiclass PPIRegisters<string prefix> {
+ foreach n=0-1 in {
+ defvar nb = !cast<bit>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<prefix#"_PPI_CACTIVER"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b00,nb}>;
+ def : RWSysReg<prefix#"_PPI_CPENDR"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b10,nb}>;
+ def : RWSysReg<prefix#"_PPI_ENABLER"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1010, {0b11,nb}>;
+ def : RWSysReg<prefix#"_PPI_SACTIVER"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b01,nb}>;
+ def : RWSysReg<prefix#"_PPI_SPENDR"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b11,nb}>;
+ def : RWSysReg<prefix#"_PPI_HMR"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1010, {0b00,nb}>;
+ }
+}
+
+defm : PPIRegisters<"ICC">; // PPI Registers
+defm : PPIRegisters<"ICV">; // Virtual PPI Registers
+
+foreach n=0-15 in {
+ defvar nb = !cast<bits<4>>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<"ICV_PPI_PRIORITYR"#n#"_EL1", 0b11, 0b000, 0b1100, {0b111,nb{3}}, nb{2-0}>;
+}
+
+// Hypervisor Control Registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ICH_APR_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b100>;
+def : RWSysReg<"ICH_CONTEXTR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b110>;
+def : RWSysReg<"ICH_HFGITR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b111>;
+def : RWSysReg<"ICH_HFGRTR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b100>;
+def : RWSysReg<"ICH_HFGWTR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b110>;
+def : ROSysReg<"ICH_HPPIR_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b101>;
+def : RWSysReg<"ICH_VCTLR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b100>;
+
+foreach n=0-1 in {
+ defvar nb = !cast<bit>(n);
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ICH_PPI_ACTIVER"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b11,nb}>;
+def : RWSysReg<"ICH_PPI_DVIR"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b00,nb}>;
+def : RWSysReg<"ICH_PPI_ENABLER"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b01,nb}>;
+def : RWSysReg<"ICH_PPI_PENDR"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b10,nb}>;
+}
+
+foreach n=0-15 in {
+ defvar nb = !cast<bits<4>>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<"ICH_PPI_PRIORITYR"#n#"_EL2", 0b11, 0b100, 0b1100, {0b111,nb{3}}, nb{2-0}>;
+}
+
+//===----------------------------------------------------------------------===//
+// GICv5 instruction options.
+//===----------------------------------------------------------------------===//
+
+// GIC
+class GIC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2> {
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ bit NeedsReg = 1;
+ string RequiresStr = [{ {AArch64::FeatureGCIE} }];
+}
+
+// GSB
+class GSB<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2> {
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ string RequiresStr = [{ {AArch64::FeatureGCIE} }];
+}
+
+// GICR
+class GICR<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2> {
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ bit NeedsReg = 1;
+ string RequiresStr = [{ {AArch64::FeatureGCIE} }];
+}
+
+def GICTable : GenericTable {
+ let FilterClass = "GIC";
+ let CppTypeName = "GIC";
+ let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+
+ let PrimaryKey = ["Encoding"];
+ let PrimaryKeyName = "lookupGICByEncoding";
+}
+
+def GSBTable : GenericTable {
+ let FilterClass = "GSB";
+ let CppTypeName = "GSB";
+ let Fields = ["Name", "Encoding", "RequiresStr"];
+
+ let PrimaryKey = ["Encoding"];
+ let PrimaryKeyName = "lookupGSBByEncoding";
+}
+
+def GICRTable : GenericTable {
+ let FilterClass = "GICR";
+ let CppTypeName = "GICR";
+ let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+
+ let PrimaryKey = ["Encoding"];
+ let PrimaryKeyName = "lookupGICRByEncoding";
+}
+
+def lookupGICByName : SearchIndex {
+ let Table = GICTable;
+ let Key = ["Name"];
+}
+
+def lookupGSBByName : SearchIndex {
+ let Table = GSBTable;
+ let Key = ["Name"];
+}
+
+def lookupGICRByName : SearchIndex {
+ let Table = GICRTable;
+ let Key = ["Name"];
+}
+
+// Op1 CRn CRm Op2
+def : GSB<"sys", 0b000, 0b1100, 0b0000, 0b000>;
+def : GSB<"ack", 0b000, 0b1100, 0b0000, 0b001>;
+
+// Op1 CRn CRm Op2
+def : GICR<"cdia", 0b000, 0b1100, 0b0011, 0b000>;
+def : GICR<"cdnmia", 0b000, 0b1100, 0b0011, 0b001>;
+
+// Op1 CRn CRm Op2
+def : GIC<"cdaff", 0b000, 0b1100, 0b0001, 0b011>;
+def : GIC<"cddi", 0b000, 0b1100, 0b0010, 0b000>;
+def : GIC<"cddis", 0b000, 0b1100, 0b0001, 0b000>;
+def : GIC<"cden", 0b000, 0b1100, 0b0001, 0b001>;
+def : GIC<"cdeoi", 0b000, 0b1100, 0b0001, 0b111>;
+def : GIC<"cdhm", 0b000, 0b1100, 0b0010, 0b001>;
+def : GIC<"cdpend", 0b000, 0b1100, 0b0001, 0b100>;
+def : GIC<"cdpri", 0b000, 0b1100, 0b0001, 0b010>;
+def : GIC<"cdrcfg", 0b000, 0b1100, 0b0001, 0b101>;
+def : GIC<"vdaff", 0b100, 0b1100, 0b0001, 0b011>;
+def : GIC<"vddi", 0b100, 0b1100, 0b0010, 0b000>;
+def : GIC<"vddis", 0b100, 0b1100, 0b0001, 0b000>;
+def : GIC<"vden", 0b100, 0b1100, 0b0001, 0b001>;
+def : GIC<"vdhm", 0b100, 0b1100, 0b0010, 0b001>;
+def : GIC<"vdpend", 0b100, 0b1100, 0b0001, 0b100>;
+def : GIC<"vdpri", 0b100, 0b1100, 0b0001, 0b010>;
+def : GIC<"vdrcfg", 0b100, 0b1100, 0b0001, 0b101>;
+def : GIC<"ldaff", 0b110, 0b1100, 0b0001, 0b011>;
+def : GIC<"lddi", 0b110, 0b1100, 0b0010, 0b000>;
+def : GIC<"lddis", 0b110, 0b1100, 0b0001, 0b000>;
+def : GIC<"lden", 0b110, 0b1100, 0b0001, 0b001>;
+def : GIC<"ldhm", 0b110, 0b1100, 0b0010, 0b001>;
+def : GIC<"ldpend", 0b110, 0b1100, 0b0001, 0b100>;
+def : GIC<"ldpri", 0b110, 0b1100, 0b0001, 0b010>;
+def : GIC<"ldrcfg", 0b110, 0b1100, 0b0001, 0b101>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 2053fc4..fede586 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -224,7 +224,8 @@ static cl::opt<bool> EnableScalableAutovecInStreamingMode(
static bool isSMEABIRoutineCall(const CallInst &CI,
const AArch64TargetLowering &TLI) {
const auto *F = CI.getCalledFunction();
- return F && SMEAttrs(F->getName(), TLI).isSMEABIRoutine();
+ return F &&
+ SMEAttrs(F->getName(), TLI.getRuntimeLibcallsInfo()).isSMEABIRoutine();
}
/// Returns true if the function has explicit operations that can only be
@@ -355,7 +356,7 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
// change only once and avoid inlining of G into F.
SMEAttrs FAttrs(*F);
- SMECallAttrs CallAttrs(Call, getTLI());
+ SMECallAttrs CallAttrs(Call, &getTLI()->getRuntimeLibcallsInfo());
if (SMECallAttrs(FAttrs, CallAttrs.callee()).requiresSMChange()) {
if (F == Call.getCaller()) // (1)
@@ -957,23 +958,50 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return TyL.first + ExtraCost;
}
case Intrinsic::get_active_lane_mask: {
- auto *RetTy = dyn_cast<FixedVectorType>(ICA.getReturnType());
- if (RetTy) {
- EVT RetVT = getTLI()->getValueType(DL, RetTy);
- EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
- if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
- !getTLI()->isTypeLegal(RetVT)) {
- // We don't have enough context at this point to determine if the mask
- // is going to be kept live after the block, which will force the vXi1
- // type to be expanded to legal vectors of integers, e.g. v4i1->v4i32.
- // For now, we just assume the vectorizer created this intrinsic and
- // the result will be the input for a PHI. In this case the cost will
- // be extremely high for fixed-width vectors.
- // NOTE: getScalarizationOverhead returns a cost that's far too
- // pessimistic for the actual generated codegen. In reality there are
- // two instructions generated per lane.
- return RetTy->getNumElements() * 2;
+ auto RetTy = cast<VectorType>(ICA.getReturnType());
+ EVT RetVT = getTLI()->getValueType(DL, RetTy);
+ EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
+ if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
+ break;
+
+ if (RetTy->isScalableTy()) {
+ if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
+ TargetLowering::TypeSplitVector)
+ break;
+
+ auto LT = getTypeLegalizationCost(RetTy);
+ InstructionCost Cost = LT.first;
+ // When SVE2p1 or SME2 is available, we can halve getTypeLegalizationCost
+ // as get_active_lane_mask may lower to the sve_whilelo_x2 intrinsic, e.g.
+ // nxv32i1 = get_active_lane_mask(base, idx) ->
+ // {nxv16i1, nxv16i1} = sve_whilelo_x2(base, idx)
+ if (ST->hasSVE2p1() || ST->hasSME2()) {
+ Cost /= 2;
+ if (Cost == 1)
+ return Cost;
}
+
+ // If more than one whilelo intrinsic is required, include the extra cost
+ // required by the saturating add & select required to increment the
+ // start value after the first intrinsic call.
+ Type *OpTy = ICA.getArgTypes()[0];
+ IntrinsicCostAttributes AddAttrs(Intrinsic::uadd_sat, OpTy, {OpTy, OpTy});
+ InstructionCost SplitCost = getIntrinsicInstrCost(AddAttrs, CostKind);
+ Type *CondTy = OpTy->getWithNewBitWidth(1);
+ SplitCost += getCmpSelInstrCost(Instruction::Select, OpTy, CondTy,
+ CmpInst::ICMP_UGT, CostKind);
+ return Cost + (SplitCost * (Cost - 1));
+ } else if (!getTLI()->isTypeLegal(RetVT)) {
+ // We don't have enough context at this point to determine if the mask
+ // is going to be kept live after the block, which will force the vXi1
+ // type to be expanded to legal vectors of integers, e.g. v4i1->v4i32.
+ // For now, we just assume the vectorizer created this intrinsic and
+ // the result will be the input for a PHI. In this case the cost will
+ // be extremely high for fixed-width vectors.
+ // NOTE: getScalarizationOverhead returns a cost that's far too
+ // pessimistic for the actual generated codegen. In reality there are
+ // two instructions generated per lane.
+ return cast<FixedVectorType>(RetTy)->getNumElements() * 2;
}
break;
}
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 636d4f8a..6273cfc 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -159,6 +159,7 @@ private:
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
+ bool parseSyslAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
bool parseSyspAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S);
AArch64CC::CondCode parseCondCodeString(StringRef Cond,
@@ -266,6 +267,7 @@ private:
ParseStatus tryParseRPRFMOperand(OperandVector &Operands);
ParseStatus tryParsePSBHint(OperandVector &Operands);
ParseStatus tryParseBTIHint(OperandVector &Operands);
+ ParseStatus tryParseCMHPriorityHint(OperandVector &Operands);
ParseStatus tryParseAdrpLabel(OperandVector &Operands);
ParseStatus tryParseAdrLabel(OperandVector &Operands);
template <bool AddFPZeroAsLiteral>
@@ -370,6 +372,7 @@ private:
k_PSBHint,
k_PHint,
k_BTIHint,
+ k_CMHPriorityHint,
} Kind;
SMLoc StartLoc, EndLoc;
@@ -499,6 +502,11 @@ private:
unsigned Length;
unsigned Val;
};
+ struct CMHPriorityHintOp {
+ const char *Data;
+ unsigned Length;
+ unsigned Val;
+ };
struct SVCROp {
const char *Data;
@@ -525,6 +533,7 @@ private:
struct PSBHintOp PSBHint;
struct PHintOp PHint;
struct BTIHintOp BTIHint;
+ struct CMHPriorityHintOp CMHPriorityHint;
struct ShiftExtendOp ShiftExtend;
struct SVCROp SVCR;
};
@@ -595,6 +604,9 @@ public:
case k_BTIHint:
BTIHint = o.BTIHint;
break;
+ case k_CMHPriorityHint:
+ CMHPriorityHint = o.CMHPriorityHint;
+ break;
case k_ShiftExtend:
ShiftExtend = o.ShiftExtend;
break;
@@ -769,6 +781,16 @@ public:
return StringRef(BTIHint.Data, BTIHint.Length);
}
+ unsigned getCMHPriorityHint() const {
+ assert(Kind == k_CMHPriorityHint && "Invalid access!");
+ return CMHPriorityHint.Val;
+ }
+
+ StringRef getCMHPriorityHintName() const {
+ assert(Kind == k_CMHPriorityHint && "Invalid access!");
+ return StringRef(CMHPriorityHint.Data, CMHPriorityHint.Length);
+ }
+
StringRef getSVCR() const {
assert(Kind == k_SVCR && "Invalid access!");
return StringRef(SVCR.Data, SVCR.Length);
@@ -1511,6 +1533,7 @@ public:
bool isPSBHint() const { return Kind == k_PSBHint; }
bool isPHint() const { return Kind == k_PHint; }
bool isBTIHint() const { return Kind == k_BTIHint; }
+ bool isCMHPriorityHint() const { return Kind == k_CMHPriorityHint; }
bool isShiftExtend() const { return Kind == k_ShiftExtend; }
bool isShifter() const {
if (!isShiftExtend())
@@ -2196,6 +2219,11 @@ public:
Inst.addOperand(MCOperand::createImm(getBTIHint()));
}
+ void addCMHPriorityHintOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getCMHPriorityHint()));
+ }
+
void addShifterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
unsigned Imm =
@@ -2547,6 +2575,17 @@ public:
}
static std::unique_ptr<AArch64Operand>
+ CreateCMHPriorityHint(unsigned Val, StringRef Str, SMLoc S, MCContext &Ctx) {
+ auto Op = std::make_unique<AArch64Operand>(k_CMHPriorityHint, Ctx);
+ Op->CMHPriorityHint.Val = Val;
+ Op->CMHPriorityHint.Data = Str.data();
+ Op->CMHPriorityHint.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<AArch64Operand>
CreateMatrixRegister(unsigned RegNum, unsigned ElementWidth, MatrixKind Kind,
SMLoc S, SMLoc E, MCContext &Ctx) {
auto Op = std::make_unique<AArch64Operand>(k_MatrixRegister, Ctx);
@@ -2656,6 +2695,9 @@ void AArch64Operand::print(raw_ostream &OS, const MCAsmInfo &MAI) const {
case k_BTIHint:
OS << getBTIHintName();
break;
+ case k_CMHPriorityHint:
+ OS << getCMHPriorityHintName();
+ break;
case k_MatrixRegister:
OS << "<matrix " << getMatrixReg() << ">";
break;
@@ -3279,6 +3321,24 @@ ParseStatus AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
return ParseStatus::Success;
}
+/// tryParseCMHPriorityHint - Try to parse a CMHPriority operand
+ParseStatus AArch64AsmParser::tryParseCMHPriorityHint(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ const AsmToken &Tok = getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("invalid operand for instruction");
+
+ auto CMHPriority =
+ AArch64CMHPriorityHint::lookupCMHPriorityHintByName(Tok.getString());
+ if (!CMHPriority)
+ return TokError("invalid operand for instruction");
+
+ Operands.push_back(AArch64Operand::CreateCMHPriorityHint(
+ CMHPriority->Encoding, Tok.getString(), S, getContext()));
+ Lex(); // Eat identifier token.
+ return ParseStatus::Success;
+}
+
/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
/// instruction.
ParseStatus AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
@@ -3824,6 +3884,18 @@ static const struct Extension {
{"ssve-bitperm", {AArch64::FeatureSSVE_BitPerm}},
{"sme-mop4", {AArch64::FeatureSME_MOP4}},
{"sme-tmop", {AArch64::FeatureSME_TMOP}},
+ {"cmh", {AArch64::FeatureCMH}},
+ {"lscp", {AArch64::FeatureLSCP}},
+ {"tlbid", {AArch64::FeatureTLBID}},
+ {"mpamv2", {AArch64::FeatureMPAMv2}},
+ {"mtetc", {AArch64::FeatureMTETC}},
+ {"gcie", {AArch64::FeatureGCIE}},
+ {"sme2p3", {AArch64::FeatureSME2p3}},
+ {"sve2p3", {AArch64::FeatureSVE2p3}},
+ {"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
+ {"f16mm", {AArch64::FeatureF16MM}},
+ {"f16f32dot", {AArch64::FeatureF16F32DOT}},
+ {"f16f32mm", {AArch64::FeatureF16F32MM}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -3861,6 +3933,8 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv9.5a";
else if (FBS[AArch64::HasV9_6aOps])
Str += "ARMv9.6a";
+ else if (FBS[AArch64::HasV9_7aOps])
+ Str += "ARMv9.7a";
else if (FBS[AArch64::HasV8_0rOps])
Str += "ARMv8r";
else {
@@ -3894,8 +3968,9 @@ void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands
AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));
}
-/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
-/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
+/// parseSysAlias - The IC, DC, AT, TLBI, MLBI and GIC{R} and GSB instructions
+/// are simple aliases for the SYS instruction. Parse them specially so that
+/// we create a SYS MCInst.
bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
if (Name.contains('.'))
@@ -3908,6 +3983,8 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
bool ExpectRegister = true;
+ bool OptionalRegister = false;
+ bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
if (Mnemonic == "ic") {
const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op);
@@ -3950,13 +4027,50 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
return TokError(Str);
}
ExpectRegister = TLBI->NeedsReg;
+ bool hasTLBID = getSTI().hasFeature(AArch64::FeatureTLBID);
+ if (hasAll || hasTLBID) {
+ OptionalRegister = TLBI->OptionalReg;
+ }
createSysAlias(TLBI->Encoding, Operands, S);
- } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp" || Mnemonic == "cosp") {
+ } else if (Mnemonic == "mlbi") {
+ const AArch64MLBI::MLBI *MLBI = AArch64MLBI::lookupMLBIByName(Op);
+ if (!MLBI)
+ return TokError("invalid operand for MLBI instruction");
+ else if (!MLBI->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("MLBI " + std::string(MLBI->Name) + " requires: ");
+ setRequiredFeatureString(MLBI->getRequiredFeatures(), Str);
+ return TokError(Str);
+ }
+ ExpectRegister = MLBI->NeedsReg;
+ createSysAlias(MLBI->Encoding, Operands, S);
+ } else if (Mnemonic == "gic") {
+ const AArch64GIC::GIC *GIC = AArch64GIC::lookupGICByName(Op);
+ if (!GIC)
+ return TokError("invalid operand for GIC instruction");
+ else if (!GIC->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("GIC " + std::string(GIC->Name) + " requires: ");
+ setRequiredFeatureString(GIC->getRequiredFeatures(), Str);
+ return TokError(Str);
+ }
+ ExpectRegister = true;
+ createSysAlias(GIC->Encoding, Operands, S);
+ } else if (Mnemonic == "gsb") {
+ const AArch64GSB::GSB *GSB = AArch64GSB::lookupGSBByName(Op);
+ if (!GSB)
+ return TokError("invalid operand for GSB instruction");
+ else if (!GSB->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("GSB " + std::string(GSB->Name) + " requires: ");
+ setRequiredFeatureString(GSB->getRequiredFeatures(), Str);
+ return TokError(Str);
+ }
+ ExpectRegister = false;
+ createSysAlias(GSB->Encoding, Operands, S);
+ } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp" ||
+ Mnemonic == "cosp") {
if (Op.lower() != "rctx")
return TokError("invalid operand for prediction restriction instruction");
- bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
bool hasPredres = hasAll || getSTI().hasFeature(AArch64::FeaturePredRes);
bool hasSpecres2 = hasAll || getSTI().hasFeature(AArch64::FeatureSPECRES2);
@@ -3989,10 +4103,61 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
HasRegister = true;
}
- if (ExpectRegister && !HasRegister)
- return TokError("specified " + Mnemonic + " op requires a register");
- else if (!ExpectRegister && HasRegister)
- return TokError("specified " + Mnemonic + " op does not use a register");
+ if (!OptionalRegister) {
+ if (ExpectRegister && !HasRegister)
+ return TokError("specified " + Mnemonic + " op requires a register");
+ else if (!ExpectRegister && HasRegister)
+ return TokError("specified " + Mnemonic + " op does not use a register");
+ }
+
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
+ return true;
+
+ return false;
+}
+
+/// parseSyslAlias - The GICR instructions are simple aliases for
+/// the SYSL instruction. Parse them specially so that we create a
+/// SYS MCInst.
+bool AArch64AsmParser::parseSyslAlias(StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
+
+ Mnemonic = Name;
+ Operands.push_back(
+ AArch64Operand::CreateToken("sysl", NameLoc, getContext()));
+
+ // Now expect two operands (identifier + register)
+ SMLoc startLoc = getLoc();
+ const AsmToken &regTok = getTok();
+ StringRef reg = regTok.getString();
+ unsigned RegNum = matchRegisterNameAlias(reg.lower(), RegKind::Scalar);
+ if (!RegNum)
+ return TokError("expected register operand");
+
+ Operands.push_back(AArch64Operand::CreateReg(
+ RegNum, RegKind::Scalar, startLoc, getLoc(), getContext(), EqualsReg));
+
+ Lex(); // Eat token
+ if (parseToken(AsmToken::Comma))
+ return true;
+
+ // Check for identifier
+ const AsmToken &operandTok = getTok();
+ StringRef Op = operandTok.getString();
+ SMLoc S2 = operandTok.getLoc();
+ Lex(); // Eat token
+
+ if (Mnemonic == "gicr") {
+ const AArch64GICR::GICR *GICR = AArch64GICR::lookupGICRByName(Op);
+ if (!GICR)
+ return Error(S2, "invalid operand for GICR instruction");
+ else if (!GICR->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("GICR " + std::string(GICR->Name) + " requires: ");
+ setRequiredFeatureString(GICR->getRequiredFeatures(), Str);
+ return Error(S2, Str);
+ }
+ createSysAlias(GICR->Encoding, Operands, S2);
+ }
if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
return true;
@@ -4025,7 +4190,7 @@ bool AArch64AsmParser::parseSyspAlias(StringRef Name, SMLoc NameLoc,
return TokError("invalid operand for TLBIP instruction");
const AArch64TLBIP::TLBIP TLBIP(
TLBIPorig->Name, TLBIPorig->Encoding | (HasnXSQualifier ? (1 << 7) : 0),
- TLBIPorig->NeedsReg,
+ TLBIPorig->NeedsReg, TLBIPorig->OptionalReg,
HasnXSQualifier
? TLBIPorig->FeaturesRequired | FeatureBitset({AArch64::FeatureXS})
: TLBIPorig->FeaturesRequired);
@@ -4719,6 +4884,13 @@ ParseStatus AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
FirstReg, Count, Stride, NumElements, ElementWidth, VectorKind, S,
getLoc(), getContext()));
+ if (getTok().is(AsmToken::LBrac)) {
+ ParseStatus Res = tryParseVectorIndex(Operands);
+ if (Res.isFailure())
+ return ParseStatus::Failure;
+ return ParseStatus::Success;
+ }
+
return ParseStatus::Success;
}
@@ -5267,12 +5439,17 @@ bool AArch64AsmParser::parseInstruction(ParseInstructionInfo &Info,
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
- // IC, DC, AT, TLBI and Prediction invalidation instructions are aliases for
- // the SYS instruction.
+ // IC, DC, AT, TLBI, MLBI, GIC{R}, GSB and Prediction invalidation
+ // instructions are aliases for the SYS instruction.
if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi" ||
- Head == "cfp" || Head == "dvp" || Head == "cpp" || Head == "cosp")
+ Head == "cfp" || Head == "dvp" || Head == "cpp" || Head == "cosp" ||
+ Head == "mlbi" || Head == "gic" || Head == "gsb")
return parseSysAlias(Head, NameLoc, Operands);
+ // GICR instructions are aliases for the SYSL instruction.
+ if (Head == "gicr")
+ return parseSyslAlias(Head, NameLoc, Operands);
+
// TLBIP instructions are aliases for the SYSP instruction.
if (Head == "tlbip")
return parseSyspAlias(Head, NameLoc, Operands);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 3e55b76..14b0f9a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5126,23 +5126,13 @@ bool AArch64InstructionSelector::selectShuffleVector(
MachineInstr &I, MachineRegisterInfo &MRI) {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Register Src1Reg = I.getOperand(1).getReg();
- const LLT Src1Ty = MRI.getType(Src1Reg);
Register Src2Reg = I.getOperand(2).getReg();
- const LLT Src2Ty = MRI.getType(Src2Reg);
ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
LLVMContext &Ctx = MF.getFunction().getContext();
- // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
- // it's originated from a <1 x T> type. Those should have been lowered into
- // G_BUILD_VECTOR earlier.
- if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
- LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
- return false;
- }
-
unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
SmallVector<Constant *, 64> CstIdxs;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 05a4313..5f93847 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1201,25 +1201,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return llvm::is_contained(
{v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
})
- // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
- // destinations, we just want those lowered into G_BUILD_VECTOR or
- // G_EXTRACT_ELEMENT.
- .lowerIf([=](const LegalityQuery &Query) {
- return !Query.Types[0].isVector() || !Query.Types[1].isVector();
- })
.moreElementsIf(
[](const LegalityQuery &Query) {
- return Query.Types[0].isVector() && Query.Types[1].isVector() &&
- Query.Types[0].getNumElements() >
- Query.Types[1].getNumElements();
+ return Query.Types[0].getNumElements() >
+ Query.Types[1].getNumElements();
},
changeTo(1, 0))
.moreElementsToNextPow2(0)
.moreElementsIf(
[](const LegalityQuery &Query) {
- return Query.Types[0].isVector() && Query.Types[1].isVector() &&
- Query.Types[0].getNumElements() <
- Query.Types[1].getNumElements();
+ return Query.Types[0].getNumElements() <
+ Query.Types[1].getNumElements();
},
changeTo(0, 1))
.widenScalarOrEltToNextPow2OrMinSize(0, 8)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 830a35bb..6d2d705 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -856,7 +856,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case TargetOpcode::G_FPTOSI_SAT:
- case TargetOpcode::G_FPTOUI_SAT: {
+ case TargetOpcode::G_FPTOUI_SAT:
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI: {
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
if (DstType.isVector())
break;
@@ -864,11 +866,19 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
break;
}
- OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
+ TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+ TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
+ if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
+ all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ return onlyUsesFP(UseMI, MRI, TRI) ||
+ prefersFPUse(UseMI, MRI, TRI);
+ }))
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ else
+ OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
}
- case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_INTRINSIC_LLRINT:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 35bd244..5c3e26e 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -84,6 +84,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
return;
}
+ if (Opcode == AArch64::SYSLxt)
+ if (printSyslAlias(MI, STI, O)) {
+ printAnnotation(O, Annot);
+ return;
+ }
+
if (Opcode == AArch64::SYSPxt || Opcode == AArch64::SYSPxt_XZR)
if (printSyspAlias(MI, STI, O)) {
printAnnotation(O, Annot);
@@ -909,13 +915,25 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
Encoding |= CnVal << 7;
Encoding |= Op1Val << 11;
- bool NeedsReg;
+ bool NeedsReg = false;
+ bool OptionalReg = false;
std::string Ins;
std::string Name;
if (CnVal == 7) {
switch (CmVal) {
default: return false;
+ // MLBI aliases
+ case 0: {
+ const AArch64MLBI::MLBI *MLBI =
+ AArch64MLBI::lookupMLBIByEncoding(Encoding);
+ if (!MLBI || !MLBI->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = MLBI->NeedsReg;
+ Ins = "mlbi\t";
+ Name = std::string(MLBI->Name);
+ } break;
// Maybe IC, maybe Prediction Restriction
case 1:
switch (Op1Val) {
@@ -1004,19 +1022,41 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
return false;
NeedsReg = TLBI->NeedsReg;
+ if (STI.hasFeature(AArch64::FeatureAll) ||
+ STI.hasFeature(AArch64::FeatureTLBID))
+ OptionalReg = TLBI->OptionalReg;
Ins = "tlbi\t";
Name = std::string(TLBI->Name);
- }
- else
+ } else if (CnVal == 12) {
+ if (CmVal != 0) {
+ // GIC aliases
+ const AArch64GIC::GIC *GIC = AArch64GIC::lookupGICByEncoding(Encoding);
+ if (!GIC || !GIC->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = true;
+ Ins = "gic\t";
+ Name = std::string(GIC->Name);
+ } else {
+ // GSB aliases
+ const AArch64GSB::GSB *GSB = AArch64GSB::lookupGSBByEncoding(Encoding);
+ if (!GSB || !GSB->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = false;
+ Ins = "gsb\t";
+ Name = std::string(GSB->Name);
+ }
+ } else
return false;
StringRef Reg = getRegisterName(MI->getOperand(4).getReg());
bool NotXZR = Reg != "xzr";
- // If a mandatory is not specified in the TableGen
+ // If a mandatory or optional register is not specified in the TableGen
// (i.e. no register operand should be present), and the register value
// is not xzr/x31, then disassemble to a SYS alias instead.
- if (NotXZR && !NeedsReg)
+ if (NotXZR && !NeedsReg && !OptionalReg)
return false;
std::string Str = Ins + Name;
@@ -1024,12 +1064,64 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
O << '\t' << Str;
- if (NeedsReg)
+ // For optional registers, don't print the value if it's xzr/x31
+ // since this defaults to xzr/x31 if register is not specified.
+ if (NeedsReg || (OptionalReg && NotXZR))
O << ", " << Reg;
return true;
}
+bool AArch64InstPrinter::printSyslAlias(const MCInst *MI,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+#ifndef NDEBUG
+ unsigned Opcode = MI->getOpcode();
+ assert(Opcode == AArch64::SYSLxt && "Invalid opcode for SYSL alias!");
+#endif
+
+ StringRef Reg = getRegisterName(MI->getOperand(0).getReg());
+ const MCOperand &Op1 = MI->getOperand(1);
+ const MCOperand &Cn = MI->getOperand(2);
+ const MCOperand &Cm = MI->getOperand(3);
+ const MCOperand &Op2 = MI->getOperand(4);
+
+ unsigned Op1Val = Op1.getImm();
+ unsigned CnVal = Cn.getImm();
+ unsigned CmVal = Cm.getImm();
+ unsigned Op2Val = Op2.getImm();
+
+ uint16_t Encoding = Op2Val;
+ Encoding |= CmVal << 3;
+ Encoding |= CnVal << 7;
+ Encoding |= Op1Val << 11;
+
+ std::string Ins;
+ std::string Name;
+
+ if (CnVal == 12) {
+ if (CmVal == 3) {
+ // GICR aliases
+ const AArch64GICR::GICR *GICR =
+ AArch64GICR::lookupGICRByEncoding(Encoding);
+ if (!GICR || !GICR->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ Ins = "gicr";
+ Name = std::string(GICR->Name);
+ } else
+ return false;
+ } else
+ return false;
+
+ std::string Str;
+ llvm::transform(Name, Name.begin(), ::tolower);
+
+ O << '\t' << Ins << '\t' << Reg.str() << ", " << Name;
+
+ return true;
+}
+
bool AArch64InstPrinter::printSyspAlias(const MCInst *MI,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1508,6 +1600,17 @@ void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
markup(O, Markup::Immediate) << '#' << formatImm(btihintop);
}
+void AArch64InstPrinter::printCMHPriorityHintOp(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned priorityhint_op = MI->getOperand(OpNum).getImm();
+ auto PHint =
+ AArch64CMHPriorityHint::lookupCMHPriorityHintByEncoding(priorityhint_op);
+ if (PHint)
+ O << PHint->Name;
+}
+
void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 15ef2dd..307402d 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -52,6 +52,8 @@ public:
protected:
bool printSysAlias(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
+ bool printSyslAlias(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
bool printSyspAlias(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
bool printRangePrefetchAlias(const MCInst *MI, const MCSubtargetInfo &STI,
@@ -151,6 +153,9 @@ protected:
void printBTIHintOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCMHPriorityHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
void printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 33f35ad..99836ae 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3920,6 +3920,78 @@ multiclass sme2_luti4_vector_vg4_index<string mnemonic> {
def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
}
+// 8-bit Look up table
+class sme2_lut_single<string asm>
+ : I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn),
+ asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
+ bits<0> ZTt;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-10} = 0b1100000011001000010000;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+//===----------------------------------------------------------------------===//
+// Lookup table read with 6-bit indices (8-bit)
+class sme2_luti6_zt_base<RegisterOperand zd_ty, string asm>
+ : I<(outs zd_ty:$Zd), (ins ZTR:$ZTt, ZZZ_Any:$Zn),
+ asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
+ bits<0> ZTt;
+ bits<3> Zd;
+ bits<3> Zn;
+ let Inst{31-21} = 0b11000000100;
+ let Inst{19-10} = 0b1010000000;
+ let Inst{9-7} = Zn;
+ let Inst{6-5} = 0b00;
+}
+
+class sme2_luti6_zt_consecutive<string asm>
+ : sme2_luti6_zt_base<ZZZZ_b_mul_r, asm> {
+ let Inst{20} = 0;
+ let Inst{4-2} = Zd;
+ let Inst{1-0} = 0b00;
+}
+
+class sme2_luti6_zt_strided<string asm>
+ : sme2_luti6_zt_base<ZZZZ_b_strided, asm> {
+ let Inst{20} = 1;
+ let Inst{4} = Zd{2};
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = Zd{1-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Lookup table read with 6-bit indices (8-bit)
+class sme2_luti6_vector_vg4_base<RegisterOperand zd_ty, string asm>
+ : I<(outs zd_ty:$Zd), (ins ZZ_h:$Zn, ZZ_Any:$Zm, VectorIndexD:$i1),
+ asm, "\t$Zd, $Zn, $Zm$i1", "", []>, Sched<[]> {
+ bits<3> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ bits<1> i1;
+ let Inst{31-23} = 0b110000010;
+ let Inst{22} = i1;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{9-5} = Zn;
+}
+
+class sme2_luti6_vector_vg4_consecutive<string asm>
+ : sme2_luti6_vector_vg4_base<ZZZZ_h_mul_r, asm> {
+ let Inst{15-10} = 0b111101;
+ let Inst{4-2} = Zd;
+ let Inst{1-0} = 0b00;
+}
+
+class sme2_luti6_vector_vg4_strided<string asm>
+ : sme2_luti6_vector_vg4_base<ZZZZ_h_strided, asm> {
+ let Inst{15-10} = 0b111111;
+ let Inst{4} = Zd{2};
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = Zd{1-0};
+}
+
//===----------------------------------------------------------------------===//
// SME2 MOV
class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v,
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 3cdd505..1664f4a 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3787,7 +3787,7 @@ multiclass sve2p1_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator intri
// SVE Integer Dot Product Group - Indexed Group
//===----------------------------------------------------------------------===//
-class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
+class sve_intx_dot_by_indexed_elem<bit U, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2,
ZPRRegOp zprty3, Operand itype>
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop),
@@ -3795,8 +3795,7 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
"", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
- let Inst{31-23} = 0b010001001;
- let Inst{22} = sz;
+ let Inst{31-24} = 0b01000100;
let Inst{21} = 0b1;
let Inst{15-11} = 0;
let Inst{10} = U;
@@ -3810,16 +3809,18 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
SDPatternOperator op> {
- def _BtoS : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
+ def _BtoS : sve_intx_dot_by_indexed_elem<opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
bits<2> iop;
bits<3> Zm;
+ let Inst{23-22} = 0b10;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _HtoD : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
+ def _HtoD : sve_intx_dot_by_indexed_elem<opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
bits<1> iop;
bits<4> Zm;
- let Inst{20} = iop;
+ let Inst{23-22} = 0b11;
+ let Inst{20} = iop;
let Inst{19-16} = Zm;
}
@@ -3827,6 +3828,16 @@ multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _HtoD)>;
}
+class sve_intx_dot_by_indexed_elem_x<bit opc, string asm>
+: sve_intx_dot_by_indexed_elem<opc, asm, ZPR16, ZPR8, ZPR3b8, VectorIndexH32b_timm> {
+ bits<3> iop;
+ bits<3> Zm;
+ let Inst{23} = 0b0;
+ let Inst{22} = iop{2};
+ let Inst{20-19} = iop{1-0};
+ let Inst{18-16} = Zm;
+}
+
//===----------------------------------------------------------------------===//
// SVE2 Complex Integer Dot Product Group
//===----------------------------------------------------------------------===//
@@ -4085,7 +4096,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
bits<5> Zdn;
let Inst{31-24} = 0b01000100;
let Inst{23-22} = sz;
- let Inst{21-20} = 0b01;
+ let Inst{21} = 0b0;
let Inst{20-16} = opc{5-1};
let Inst{15-14} = 0b10;
let Inst{13} = opc{0};
@@ -4590,15 +4601,15 @@ multiclass sve2_int_cadd<bit opc, string asm, SDPatternOperator op> {
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, complexrotateopodd, !cast<Instruction>(NAME # _D)>;
}
-class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
+class sve2_int_absdiff_accum<bits<3> sz, bits<4> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm),
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<5> Zm;
- let Inst{31-24} = 0b01000101;
- let Inst{23-22} = sz;
+ let Inst{31-25} = 0b0100010;
+ let Inst{24-22} = sz;
let Inst{21} = 0b0;
let Inst{20-16} = Zm;
let Inst{15-14} = 0b11;
@@ -4613,10 +4624,10 @@ class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
}
multiclass sve2_int_absdiff_accum<bit opc, string asm, SDPatternOperator op> {
- def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>;
- def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>;
- def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>;
- def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>;
+ def _B : sve2_int_absdiff_accum<0b100, { 0b111, opc }, asm, ZPR8, ZPR8>;
+ def _H : sve2_int_absdiff_accum<0b101, { 0b111, opc }, asm, ZPR16, ZPR16>;
+ def _S : sve2_int_absdiff_accum<0b110, { 0b111, opc }, asm, ZPR32, ZPR32>;
+ def _D : sve2_int_absdiff_accum<0b111, { 0b111, opc }, asm, ZPR64, ZPR64>;
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -4626,20 +4637,26 @@ multiclass sve2_int_absdiff_accum<bit opc, string asm, SDPatternOperator op> {
multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm,
SDPatternOperator op> {
- def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
- def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
- def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+ def _H : sve2_int_absdiff_accum<0b101, { 0b00, opc }, asm, ZPR16, ZPR8>;
+ def _S : sve2_int_absdiff_accum<0b110, { 0b00, opc }, asm, ZPR32, ZPR16>;
+ def _D : sve2_int_absdiff_accum<0b111, { 0b00, opc }, asm, ZPR64, ZPR32>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve2_int_two_way_absdiff_accum_long<bit U, string asm> {
+ def _BtoH : sve2_int_absdiff_accum<0b001, { 0b01, U, 0b1 }, asm, ZPR16, ZPR8>;
+ def _HtoS : sve2_int_absdiff_accum<0b010, { 0b01, U, 0b1 }, asm, ZPR32, ZPR16>;
+ def _StoD : sve2_int_absdiff_accum<0b011, { 0b01, U, 0b1 }, asm, ZPR64, ZPR32>;
+}
+
multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm,
SDPatternOperator op> {
- def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
+ def _S : sve2_int_absdiff_accum<{ 0b1, opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
ZPR32, ZPR32>;
- def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
+ def _D : sve2_int_absdiff_accum<{ 0b1, opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
ZPR64, ZPR64>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
@@ -9610,17 +9627,18 @@ multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
// SVE Floating Point Matrix Multiply Accumulate Group
//===----------------------------------------------------------------------===//
-class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty>
+class sve_fp_matrix_mla<bits<3> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty>
: I<(outs zda_ty:$Zda), (ins zda_ty:$_Zda, reg_ty:$Zn, reg_ty:$Zm),
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<5> Zm;
let Inst{31-24} = 0b01100100;
- let Inst{23-22} = opc;
+ let Inst{23-22} = opc{2-1};
let Inst{21} = 1;
let Inst{20-16} = Zm;
- let Inst{15-10} = 0b111001;
+ let Inst{15-11} = 0b11100;
+ let Inst{10} = opc{0};
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
@@ -9630,10 +9648,12 @@ class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_t
let mayRaiseFPException = 1;
}
-multiclass sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty, SDPatternOperator op, ValueType zda_vt, ValueType reg_vt> {
+multiclass sve_fp_matrix_mla<bits<3> opc, string asm, ZPRRegOp zda_ty,
+ ZPRRegOp reg_ty, SDPatternOperator op,
+ ValueType zda_vt, ValueType reg_vt> {
def NAME : sve_fp_matrix_mla<opc, asm, zda_ty, reg_ty>;
- def : SVE_3_Op_Pat<zda_vt, op , zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<zda_vt, op, zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>;
}
//===----------------------------------------------------------------------===//
@@ -10030,18 +10050,19 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte
}
// SVE2 multi-vec shift narrow
-class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
- mnemonic, "\t$Zd, $Zn, $imm4",
+class sve2p1_multi_vec_shift_narrow<string mnemonic, ZPRRegOp ZdRC, RegisterOperand ZSrcOp,
+ Operand immtype, bits<3> opc, bits<2> tsz>
+ : I<(outs ZdRC:$Zd), (ins ZSrcOp:$Zn, immtype:$imm),
+ mnemonic, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
bits<5> Zd;
bits<4> Zn;
- bits<4> imm4;
+ bits<4> imm;
let Inst{31-23} = 0b010001011;
let Inst{22} = tsz{1};
let Inst{21} = 0b1;
let Inst{20} = tsz{0};
- let Inst{19-16} = imm4;
+ let Inst{18-16} = imm{2-0}; // imm3
let Inst{15-14} = 0b00;
let Inst{13-11} = opc;
let Inst{10} = 0b0;
@@ -10052,12 +10073,19 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
let hasSideEffects = 0;
}
-multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
- def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
+multiclass sve_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
+ def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, ZPR16, ZZ_s_mul_r, vecshiftR16, opc, 0b01> {
+ let Inst{19} = imm{3}; // imm4
+ }
def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
+multiclass sve_multi_vec_round_shift_narrow<string mnemonic, bits<3> opc> {
+ def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, ZPR8, ZZ_h_mul_r, vecshiftR8, opc, 0b00> {
+ let Inst{19} = 0b1; // always 1 for imm3 version
+ }
+}
// SME2 multi-vec contiguous load (scalar plus scalar, two registers)
class sve2p1_mem_cld_ss_2z<string mnemonic, bits<2> msz, bit n,
@@ -11164,7 +11192,7 @@ multiclass sve2_fp8_dot_indexed_s<string asm, SDPatternOperator op> {
def : SVE_4_Op_Pat<nxv4f32, op, nxv4f32, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME)>;
}
-// FP8 Look up table
+// Look up table
class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty,
Operand idx_ty, bits<4>opc, string mnemonic>
: I<(outs zd_ty:$Zd), (ins zn_ty:$Zn, ZPRAny:$Zm, idx_ty:$idx),
@@ -11183,7 +11211,7 @@ class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty,
let Inst{4-0} = Zd;
}
-// FP8 Look up table read with 2-bit indices
+// Look up table read with 2-bit indices
multiclass sve2_luti2_vector_index<string mnemonic> {
def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexS32b, {?, 0b100}, mnemonic> {
bits<2> idx;
@@ -11205,7 +11233,7 @@ multiclass sve2_luti2_vector_index<string mnemonic> {
i32, timm32_0_7, !cast<Instruction>(NAME # _H)>;
}
-// FP8 Look up table read with 4-bit indices
+// Look up table read with 4-bit indices
multiclass sve2_luti4_vector_index<string mnemonic> {
def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexD32b, 0b1001, mnemonic> {
bit idx;
@@ -11226,7 +11254,7 @@ multiclass sve2_luti4_vector_index<string mnemonic> {
i32, timm32_0_3, !cast<Instruction>(NAME # _H)>;
}
-// FP8 Look up table read with 4-bit indices (two contiguous registers)
+// Look up table read with 4-bit indices (two contiguous registers)
multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
def NAME : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexS32b, {?, 0b101}, mnemonic> {
bits<2> idx;
@@ -11250,6 +11278,29 @@ multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
nxv16i8:$Op3, timm32_0_3:$Op4))>;
}
+// Look up table read with 6-bit indices
+multiclass sve2_luti6_vector_index<string mnemonic> {
+ def _H : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexD32b, 0b1011, mnemonic> {
+ bit idx;
+ let Inst{23} = idx;
+ }
+}
+
+// Look up table
+class sve2_luti6_vector<string mnemonic>
+ : I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, ZPRAny:$Zm),
+ mnemonic, "\t$Zd, $Zn, $Zm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-21} = 0b01000101001;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b101011;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
//===----------------------------------------------------------------------===//
// Checked Pointer Arithmetic (FEAT_CPA)
//===----------------------------------------------------------------------===//
@@ -11280,3 +11331,49 @@ class sve_int_mla_cpa<string asm>
let ElementSize = ZPR64.ElementSize;
}
+
+//===----------------------------------------------------------------------===//
+// FP to Int down-converts
+//===----------------------------------------------------------------------===//
+class sve2_fp_to_int_downcvt<string asm, ZPRRegOp ZdRC, RegisterOperand ZSrcOp, bits<2> size, bit U>
+ : I<(outs ZdRC:$Zd), (ins ZSrcOp:$Zn),
+ asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<4> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = size;
+ let Inst{21-11} = 0b00110100110;
+ let Inst{10} = U;
+ let Inst{9-6} = Zn;
+ let Inst{5} = 0b0;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp_to_int_downcvt<string asm, bit U> {
+ def _HtoB : sve2_fp_to_int_downcvt<asm, ZPR8, ZZ_h_mul_r, 0b01, U>;
+ def _StoH : sve2_fp_to_int_downcvt<asm, ZPR16, ZZ_s_mul_r, 0b10, U>;
+ def _DtoS : sve2_fp_to_int_downcvt<asm, ZPR32, ZZ_d_mul_r, 0b11, U>;
+}
+
+//===----------------------------------------------------------------------===//
+// Int to FP up-converts
+//===----------------------------------------------------------------------===//
+class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, ZPRRegOp ZnRC,
+ bits<2> size, bits<2> U>
+ : I<(outs ZdRC:$Zd), (ins ZnRC:$Zn),
+ asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = size;
+ let Inst{21-12} = 0b0011000011;
+ let Inst{11-10} = U;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U> {
+ def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8, 0b01, U>;
+ def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>;
+ def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>;
+}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index d6cb0e8..268a229 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -139,6 +139,13 @@ namespace llvm {
}
namespace llvm {
+namespace AArch64CMHPriorityHint {
+#define GET_CMHPRIORITYHINT_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64CMHPriorityHint
+} // namespace llvm
+
+namespace llvm {
namespace AArch64SysReg {
#define GET_SysRegsList_IMPL
#include "AArch64GenSystemOperands.inc"
@@ -190,6 +197,32 @@ namespace AArch64TLBIP {
#define GET_TLBIPTable_IMPL
#include "AArch64GenSystemOperands.inc"
} // namespace AArch64TLBIP
+
+namespace AArch64MLBI {
+#define GET_MLBITable_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64MLBI
+} // namespace llvm
+
+namespace llvm {
+namespace AArch64GIC {
+#define GET_GICTable_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GIC
+} // namespace llvm
+
+namespace llvm {
+namespace AArch64GICR {
+#define GET_GICRTable_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GICR
+} // namespace llvm
+
+namespace llvm {
+namespace AArch64GSB {
+#define GET_GSBTable_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GSB
} // namespace llvm
namespace llvm {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index fea33ef..27812e9 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -409,6 +409,16 @@ struct SysAliasReg : SysAlias {
: SysAlias(N, E, F), NeedsReg(R) {}
};
+struct SysAliasOptionalReg : SysAlias {
+ bool NeedsReg;
+ bool OptionalReg;
+ constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O)
+ : SysAlias(N, E), NeedsReg(R), OptionalReg(O) {}
+ constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O,
+ FeatureBitset F)
+ : SysAlias(N, E, F), NeedsReg(R), OptionalReg(O) {}
+};
+
struct SysAliasImm : SysAlias {
uint16_t ImmValue;
constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)
@@ -677,6 +687,14 @@ namespace AArch64BTIHint {
#include "AArch64GenSystemOperands.inc"
}
+namespace AArch64CMHPriorityHint {
+struct CMHPriorityHint : SysAlias {
+ using SysAlias::SysAlias;
+};
+#define GET_CMHPRIORITYHINT_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64CMHPriorityHint
+
namespace AArch64SME {
enum ToggleCondition : unsigned {
Always,
@@ -788,21 +806,53 @@ namespace AArch64SysReg {
}
namespace AArch64TLBI {
- struct TLBI : SysAliasReg {
- using SysAliasReg::SysAliasReg;
- };
- #define GET_TLBITable_DECL
- #include "AArch64GenSystemOperands.inc"
+struct TLBI : SysAliasOptionalReg {
+ using SysAliasOptionalReg::SysAliasOptionalReg;
+};
+#define GET_TLBITable_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64TLBIP {
-struct TLBIP : SysAliasReg {
- using SysAliasReg::SysAliasReg;
+struct TLBIP : SysAliasOptionalReg {
+ using SysAliasOptionalReg::SysAliasOptionalReg;
};
#define GET_TLBIPTable_DECL
#include "AArch64GenSystemOperands.inc"
} // namespace AArch64TLBIP
+namespace AArch64MLBI {
+struct MLBI : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
+};
+#define GET_MLBITable_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64MLBI
+
+namespace AArch64GIC {
+struct GIC : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
+};
+#define GET_GICTable_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GIC
+
+namespace AArch64GICR {
+struct GICR : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
+};
+#define GET_GICRTable_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GICR
+
+namespace AArch64GSB {
+struct GSB : SysAlias {
+ using SysAlias::SysAlias;
+};
+#define GET_GSBTable_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GSB
+
namespace AArch64II {
/// Target Operand Flag enum.
enum TOF {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index d71f728..085c8588 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -75,8 +75,8 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
}
void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName,
- const AArch64TargetLowering &TLI) {
- RTLIB::LibcallImpl Impl = TLI.getSupportedLibcallImpl(FuncName);
+ const RTLIB::RuntimeLibcallsInfo &RTLCI) {
+ RTLIB::LibcallImpl Impl = RTLCI.getSupportedLibcallImpl(FuncName);
if (Impl == RTLIB::Unsupported)
return;
unsigned KnownAttrs = SMEAttrs::Normal;
@@ -124,21 +124,22 @@ bool SMECallAttrs::requiresSMChange() const {
return true;
}
-SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI)
+SMECallAttrs::SMECallAttrs(const CallBase &CB,
+ const RTLIB::RuntimeLibcallsInfo *RTLCI)
: CallerFn(*CB.getFunction()), CalledFn(SMEAttrs::Normal),
Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) {
if (auto *CalledFunction = CB.getCalledFunction())
- CalledFn = SMEAttrs(*CalledFunction, TLI);
-
- // An `invoke` of an agnostic ZA function may not return normally (it may
- // resume in an exception block). In this case, it acts like a private ZA
- // callee and may require a ZA save to be set up before it is called.
- if (isa<InvokeInst>(CB))
- CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
+ CalledFn = SMEAttrs(*CalledFunction, RTLCI);
// FIXME: We probably should not allow SME attributes on direct calls but
// clang duplicates streaming mode attributes at each callsite.
assert((IsIndirect ||
((Callsite.withoutPerCallsiteFlags() | CalledFn) == CalledFn)) &&
"SME attributes at callsite do not match declaration");
+
+ // An `invoke` of an agnostic ZA function may not return normally (it may
+ // resume in an exception block). In this case, it acts like a private ZA
+ // callee and may require a ZA save to be set up before it is called.
+ if (isa<InvokeInst>(CB))
+ CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index d26e3cd..28c397e 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -12,8 +12,9 @@
#include "llvm/IR/Function.h"
namespace llvm {
-
-class AArch64TargetLowering;
+namespace RTLIB {
+struct RuntimeLibcallsInfo;
+}
class Function;
class CallBase;
@@ -52,14 +53,14 @@ public:
SMEAttrs() = default;
SMEAttrs(unsigned Mask) { set(Mask); }
- SMEAttrs(const Function &F, const AArch64TargetLowering *TLI = nullptr)
+ SMEAttrs(const Function &F, const RTLIB::RuntimeLibcallsInfo *RTLCI = nullptr)
: SMEAttrs(F.getAttributes()) {
- if (TLI)
- addKnownFunctionAttrs(F.getName(), *TLI);
+ if (RTLCI)
+ addKnownFunctionAttrs(F.getName(), *RTLCI);
}
SMEAttrs(const AttributeList &L);
- SMEAttrs(StringRef FuncName, const AArch64TargetLowering &TLI) {
- addKnownFunctionAttrs(FuncName, TLI);
+ SMEAttrs(StringRef FuncName, const RTLIB::RuntimeLibcallsInfo &RTLCI) {
+ addKnownFunctionAttrs(FuncName, RTLCI);
};
void set(unsigned M, bool Enable = true) {
@@ -157,7 +158,7 @@ public:
private:
void addKnownFunctionAttrs(StringRef FuncName,
- const AArch64TargetLowering &TLI);
+ const RTLIB::RuntimeLibcallsInfo &RTLCI);
void validate() const;
};
@@ -175,7 +176,7 @@ public:
SMEAttrs Callsite = SMEAttrs::Normal)
: CallerFn(Caller), CalledFn(Callee), Callsite(Callsite) {}
- SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI);
+ SMECallAttrs(const CallBase &CB, const RTLIB::RuntimeLibcallsInfo *RTLCI);
SMEAttrs &caller() { return CallerFn; }
SMEAttrs &callee() { return IsIndirect ? Callsite : CalledFn; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index e8b211f..7f00ead 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -176,6 +176,19 @@ def binop_s64_with_s32_mask_combines : GICombineGroup<[
combine_or_s64_with_s32_mask, combine_and_s64_with_s32_mask
]>;
+// (or i64:x, (zext i32:y)) -> i64:(merge (or lo_32(x), i32:y), hi_32(x))
+// (or (zext i32:y), i64:x) -> i64:(merge (or lo_32(x), i32:y), hi_32(x))
+def or_s64_zext_s32_frag : GICombinePatFrag<(outs root:$dst), (ins $src_s64, $src_s32),
+ [(pattern (G_OR $dst, i64:$src_s64, i64:$zext_val), (G_ZEXT i64:$zext_val, i32:$src_s32)),
+ (pattern (G_OR $dst, i64:$zext_val, i64:$src_s64), (G_ZEXT i64:$zext_val, i32:$src_s32))]>;
+
+def combine_or_s64_s32 : GICombineRule<
+ (defs root:$dst),
+ (match (or_s64_zext_s32_frag $dst, i64:$x, i32:$y):$dst),
+ (apply (G_UNMERGE_VALUES $x_lo, $x_hi, $x),
+ (G_OR $or, $x_lo, $y),
+ (G_MERGE_VALUES $dst, $or, $x_hi))>;
+
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
// saves one instruction compared to the promotion.
@@ -206,7 +219,7 @@ def AMDGPUPreLegalizerCombiner: GICombiner<
"AMDGPUPreLegalizerCombinerImpl",
[all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16,
foldable_fneg, combine_shuffle_vector_to_build_vector,
- binop_s64_with_s32_mask_combines]> {
+ binop_s64_with_s32_mask_combines, combine_or_s64_s32]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
@@ -215,7 +228,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
[all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64,
- binop_s64_with_s32_mask_combines]> {
+ binop_s64_with_s32_mask_combines, combine_or_s64_s32]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8ed4062..1b559a6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -514,8 +514,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
- setOperationAction({ISD::ABS, ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX},
- MVT::i32, Legal);
+ setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32,
+ Legal);
setOperationAction(
{ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 596a895..1a13b22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -976,9 +976,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
}
+ auto &MinNumMaxNumIeee =
+ getActionDefinitionsBuilder({G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
+
+ if (ST.hasVOP3PInsts()) {
+ MinNumMaxNumIeee.legalFor(FPTypesPK16)
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .clampMaxNumElements(0, S16, 2)
+ .clampScalar(0, S16, S64)
+ .scalarize(0);
+ } else if (ST.has16BitInsts()) {
+ MinNumMaxNumIeee.legalFor(FPTypes16).clampScalar(0, S16, S64).scalarize(0);
+ } else {
+ MinNumMaxNumIeee.legalFor(FPTypesBase)
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
+ }
+
auto &MinNumMaxNum = getActionDefinitionsBuilder(
- {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE,
- G_FMAXNUM_IEEE});
+ {G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM});
if (ST.hasVOP3PInsts()) {
MinNumMaxNum.customFor(FPTypesPK16)
@@ -2136,9 +2152,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor(FPTypesPK16)
.clampMaxNumElements(0, S16, 2)
.scalarize(0);
+ } else if (ST.hasVOP3PInsts()) {
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
+ .lowerFor({V2S16})
+ .clampMaxNumElementsStrict(0, S16, 2)
+ .scalarize(0)
+ .lower();
} else {
- // TODO: Implement
- getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
+ .scalarize(0)
+ .clampScalar(0, S32, S64)
+ .lower();
}
getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
@@ -2195,8 +2219,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINIMUMNUM:
case TargetOpcode::G_FMAXIMUMNUM:
- case TargetOpcode::G_FMINNUM_IEEE:
- case TargetOpcode::G_FMAXNUM_IEEE:
return legalizeMinNumMaxNum(Helper, MI);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return legalizeExtractVectorElt(MI, MRI, B);
@@ -2817,23 +2839,8 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
MachineFunction &MF = Helper.MIRBuilder.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
- MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
-
- // With ieee_mode disabled, the instructions have the correct behavior
- // already for G_FMINIMUMNUM/G_FMAXIMUMNUM.
- //
- // FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode
- // enabled.
- if (!MFI->getMode().IEEE) {
- if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM ||
- MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM)
- return true;
-
- return !IsIEEEOp;
- }
-
- if (IsIEEEOp)
+ // With ieee_mode disabled, the instructions have the correct behavior.
+ if (!MFI->getMode().IEEE)
return true;
return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 99ba043..5580e4c 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1860,7 +1860,6 @@ private:
bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
const unsigned CPol);
bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
- bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands);
bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
unsigned getConstantBusLimit(unsigned Opcode) const;
@@ -5506,22 +5505,6 @@ bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
return true;
}
-bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst,
- const OperandVector &Operands) {
- if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
- return true;
-
- int Simm16Pos =
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16);
- if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) {
- SMLoc Loc = Operands[1]->getStartLoc();
- Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]");
- return false;
- }
-
- return true;
-}
-
bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
const OperandVector &Operands) {
unsigned Opc = Inst.getOpcode();
@@ -5681,9 +5664,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
if (!validateTFE(Inst, Operands)) {
return false;
}
- if (!validateSetVgprMSB(Inst, Operands)) {
- return false;
- }
if (!validateWMMA(Inst, Operands)) {
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 09ef6ac..2aa54c9 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -45,9 +45,6 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
- // 32-bit ABS is legal for AMDGPU except for R600
- setOperationAction(ISD::ABS, MVT::i32, Expand);
-
// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
// spaces, so it is custom lowered to handle those where it isn't.
for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a757421..be42291 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -298,7 +298,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC,
{MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64}, Expand);
- setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Legal);
+ setOperationAction({ISD::ABS, ISD::UADDO, ISD::USUBO}, MVT::i32, Legal);
setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY}, MVT::i32, Legal);
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index ee10190..05ba76a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -976,10 +976,10 @@ def : GCNPat <
} // End SubtargetPredicate = HasLshlAddU64Inst
let SubtargetPredicate = HasAddMinMaxInsts in {
-def : ThreeOp_i32_Pats<add, smax, V_ADD_MAX_I32_e64>;
-def : ThreeOp_i32_Pats<add, umax, V_ADD_MAX_U32_e64>;
-def : ThreeOp_i32_Pats<add, smin, V_ADD_MIN_I32_e64>;
-def : ThreeOp_i32_Pats<add, umin, V_ADD_MIN_U32_e64>;
+def : ThreeOp_i32_Pats<saddsat, smax, V_ADD_MAX_I32_e64>;
+def : ThreeOp_i32_Pats<uaddsat, umax, V_ADD_MAX_U32_e64>;
+def : ThreeOp_i32_Pats<saddsat, smin, V_ADD_MIN_I32_e64>;
+def : ThreeOp_i32_Pats<uaddsat, umin, V_ADD_MIN_U32_e64>;
}
def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index c4692b7..4ae2c1e 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -464,10 +464,10 @@ class ThreeOp_OpSelClampPats <SDPatternOperator op1, SDPatternOperator op2,
>;
let SubtargetPredicate = HasPkAddMinMaxInsts in {
-def : ThreeOp_OpSelClampPats<add, smax, V_PK_ADD_MAX_I16>;
-def : ThreeOp_OpSelClampPats<add, umax, V_PK_ADD_MAX_U16>;
-def : ThreeOp_OpSelClampPats<add, smin, V_PK_ADD_MIN_I16>;
-def : ThreeOp_OpSelClampPats<add, umin, V_PK_ADD_MIN_U16>;
+def : ThreeOp_OpSelClampPats<saddsat, smax, V_PK_ADD_MAX_I16>;
+def : ThreeOp_OpSelClampPats<uaddsat, umax, V_PK_ADD_MAX_U16>;
+def : ThreeOp_OpSelClampPats<saddsat, smin, V_PK_ADD_MIN_I16>;
+def : ThreeOp_OpSelClampPats<uaddsat, umin, V_PK_ADD_MIN_U16>;
}
let SubtargetPredicate = HasPkMinMax3Insts in {
diff --git a/llvm/lib/Target/ARM/ARMArchitectures.td b/llvm/lib/Target/ARM/ARMArchitectures.td
index 301ed5b..bfcecfe 100644
--- a/llvm/lib/Target/ARM/ARMArchitectures.td
+++ b/llvm/lib/Target/ARM/ARMArchitectures.td
@@ -297,6 +297,18 @@ def ARMv96a : Architecture<"armv9.6-a", "ARMv96a", [HasV9_6aOps,
FeatureCRC,
FeatureRAS,
FeatureDotProd]>;
+def ARMv97a : Architecture<"armv9.7-a", "ARMv97a", [HasV9_7aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
FeatureDB,
diff --git a/llvm/lib/Target/ARM/ARMFeatures.td b/llvm/lib/Target/ARM/ARMFeatures.td
index 9b1fa5d..e562b21 100644
--- a/llvm/lib/Target/ARM/ARMFeatures.td
+++ b/llvm/lib/Target/ARM/ARMFeatures.td
@@ -712,6 +712,11 @@ def HasV9_6aOps : SubtargetFeature<"v9.6a", "HasV9_6aOps", "true",
"Support ARM v9.6a instructions",
[HasV9_5aOps]>;
+// Armv9.7-A is a v9-only architecture.
+def HasV9_7aOps : SubtargetFeature<"v9.7a", "HasV9_7aOps", "true",
+ "Support ARM v9.7a instructions",
+ [HasV9_6aOps]>;
+
def HasV8_1MMainlineOps : SubtargetFeature<
"v8.1m.main", "HasV8_1MMainlineOps", "true",
"Support ARM v8-1M Mainline instructions",
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8122db2..313ae3d 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -21381,15 +21381,6 @@ void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
TargetLowering::insertSSPDeclarations(M);
}
-Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const {
- // MSVC CRT has a function to validate security cookie.
- RTLIB::LibcallImpl SecurityCheckCookie =
- getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
- if (SecurityCheckCookie != RTLIB::Unsupported)
- return M.getFunction(getLibcallImplName(SecurityCheckCookie));
- return TargetLowering::getSSPStackGuardCheck(M);
-}
-
bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const {
// If we do not have NEON, vector types are not natively supported.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 8c5e0cf..357d2c5 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -708,7 +708,6 @@ class VectorType;
bool useLoadStackGuardNode(const Module &M) const override;
void insertSSPDeclarations(Module &M) const override;
- Function *getSSPStackGuardCheck(const Module &M) const override;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 53be167..10d4cd5 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -6546,23 +6546,25 @@ def KCFI_CHECK_ARM
: PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>,
Sched<[]>,
Requires<[IsARM]> {
- let Size = 28; // 7 instructions (bic, ldr, 4x eor, beq, udf)
+ let Size = 40; // worst-case 10 instructions @ 4 bytes each
+ // (push, bic, ldr, 4x eor, pop, beq, udf)
}
def KCFI_CHECK_Thumb2
: PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>,
Sched<[]>,
Requires<[IsThumb2]> {
- let Size =
- 32; // worst-case 9 instructions (push, bic, ldr, 4x eor, pop, beq.w, udf)
+ let Size = 34; // worst-case (push.w[2], bic[4], ldr[4], 4x eor[16], pop.w[2],
+ // beq.w[4], udf[2])
}
def KCFI_CHECK_Thumb1
: PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>,
Sched<[]>,
Requires<[IsThumb1Only]> {
- let Size = 50; // worst-case 25 instructions (pushes, bic helper, type
- // building, cmp, pops)
+ let Size = 38; // worst-case 19 instructions @ 2 bytes each
+ // (2x push, 3x bic-helper, subs+ldr, 13x type-building, cmp,
+ // 2x pop, beq, bkpt)
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 0796746..94b511a 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -895,6 +895,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::ArchKind::ARMV9_4A:
case ARM::ArchKind::ARMV9_5A:
case ARM::ArchKind::ARMV9_6A:
+ case ARM::ArchKind::ARMV9_7A:
S.setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
S.setAttributeItem(ARM_ISA_use, Allowed, false);
S.setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 02fb905..4a2f714 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1504,14 +1504,26 @@ let Defs = [SREG], hasSideEffects = 0 in
def FRMIDX : Pseudo<(outs DLDREGS:$dst), (ins DLDREGS:$src, i16imm:$src2),
"frmidx\t$dst, $src, $src2", []>;
+// The instructions STDSPQRr and STDWSPQRr are used to store to the stack
+// frame. The most accurate implementation would be to load the SP into
+// a temporary pointer variable and then STDPtrQRr. However for efficiency,
+// we assume that R29R28 contains the current call frame pointer.
+// However in the PEI pass we sometimes rewrite a ADJCALLSTACKDOWN pseudo,
+// plus one or more STDSPQRr/STDWSPQRr pseudo instructions to use Z for a
+// stack adjustment then as a base pointer. To avoid corruption, we thus
+// specify special classes of registers, like GPR8 and DREGS, but with
+// the Z register removed, as the source/input to these instructions.
// This pseudo is either converted to a regular store or a push which clobbers
// SP.
-def STDSPQRr : StorePseudo<(outs), (ins memspi:$dst, GPR8:$src),
+let Defs = [SP], Uses = [SP], hasSideEffects = 0 in
+def STDSPQRr : StorePseudo<(outs), (ins memspi:$dst, GPR8NOZ:$src),
"stdstk\t$dst, $src", [(store i8:$src, addr:$dst)]>;
+// See the comment on STDSPQRr.
// This pseudo is either converted to a regular store or a push which clobbers
// SP.
-def STDWSPQRr : StorePseudo<(outs), (ins memspi:$dt, DREGS:$src),
+let Defs = [SP], Uses = [SP], hasSideEffects = 0 in
+def STDWSPQRr : StorePseudo<(outs), (ins memspi:$dt, DREGSNOZ:$src),
"stdwstk\t$dt, $src", [(store i16:$src, addr:$dt)]>;
// SP read/write pseudos.
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.td b/llvm/lib/Target/AVR/AVRRegisterInfo.td
index 182f92c..9b935b1 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -211,6 +211,31 @@ def PTRDISPREGS : RegisterClass<"AVR", [i16], 8, (add R31R30, R29R28), ptr>;
// model this using a register class containing only the Z register.
def ZREG : RegisterClass<"AVR", [i16], 8, (add R31R30)>;
+// general registers excluding Z register lo/hi, these are the only
+// registers that are always safe for STDSPQr instructions
+def GPR8NOZ : RegisterClass<"AVR", [i8], 8,
+ (// Return value and argument registers.
+ add R24, R25, R18, R19, R20, R21, R22, R23,
+ // Scratch registers.
+ R26, R27,
+ // Callee saved registers.
+ R28, R29, R17, R16, R15, R14, R13, R12, R11, R10,
+ R9, R8, R7, R6, R5, R4, R3, R2, R0, R1)>;
+
+// 16-bit pair register class excluding Z register lo/hi, these are the only
+// registers that are always safe for STDWSPQr instructions
+def DREGSNOZ : RegisterClass<"AVR", [i16], 8,
+ (// Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10, R9R8,
+ R7R6, R5R4, R3R2, R1R0,
+ // Pseudo regs for unaligned 16-bits
+ R26R25, R24R23, R22R21, R20R19, R18R17, R16R15,
+ R14R13, R12R11, R10R9)>;
+
// Register class used for the stack read pseudo instruction.
def GPRSP : RegisterClass<"AVR", [i16], 8, (add SP)>;
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 42e90f0..d6fa65f 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
///
-/// \file This file contains pases and utilities to convert a modern LLVM
+/// \file This file contains passes and utilities to convert a modern LLVM
/// module into a module compatible with the LLVM 3.7-based DirectX Intermediate
/// Language (DXIL).
//===----------------------------------------------------------------------===//
@@ -16,7 +16,6 @@
#include "DirectX.h"
#include "DirectXIRPasses/PointerTypeAnalysis.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/DXILMetadataAnalysis.h"
#include "llvm/Analysis/DXILResource.h"
@@ -27,7 +26,6 @@
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/VersionTuple.h"
#define DEBUG_TYPE "dxil-prepare"
@@ -116,31 +114,6 @@ static void removeStringFunctionAttributes(Function &F,
F.removeRetAttrs(DeadAttrs);
}
-static void cleanModuleFlags(Module &M) {
- NamedMDNode *MDFlags = M.getModuleFlagsMetadata();
- if (!MDFlags)
- return;
-
- SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries;
- M.getModuleFlagsMetadata(FlagEntries);
- bool Updated = false;
- for (auto &Flag : FlagEntries) {
- // llvm 3.7 only supports behavior up to AppendUnique.
- if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique)
- continue;
- Flag.Behavior = Module::ModFlagBehavior::Warning;
- Updated = true;
- }
-
- if (!Updated)
- return;
-
- MDFlags->eraseFromParent();
-
- for (auto &Flag : FlagEntries)
- M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val);
-}
-
class DXILPrepareModule : public ModulePass {
static Value *maybeGenerateBitcast(IRBuilder<> &Builder,
@@ -202,15 +175,6 @@ class DXILPrepareModule : public ModulePass {
Builder.getPtrTy(PtrTy->getAddressSpace())));
}
- static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) {
- return {M.getMDKindID("dx.nonuniform"),
- M.getMDKindID("dx.controlflow.hints"),
- M.getMDKindID("dx.precise"),
- llvm::LLVMContext::MD_range,
- llvm::LLVMContext::MD_alias_scope,
- llvm::LLVMContext::MD_noalias};
- }
-
public:
bool runOnModule(Module &M) override {
PointerTypeMap PointerTypes = PointerTypeAnalysis::run(M);
@@ -224,10 +188,7 @@ public:
const dxil::ModuleMetadataInfo MetadataInfo =
getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata();
VersionTuple ValVer = MetadataInfo.ValidatorVersion;
- bool SkipValidation = ValVer.getMajor() == 0 && ValVer.getMinor() == 0;
-
- // construct allowlist of valid metadata node kinds
- std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M);
+ bool AllowExperimental = ValVer.getMajor() == 0 && ValVer.getMinor() == 0;
for (auto &F : M.functions()) {
F.removeFnAttrs(AttrMask);
@@ -235,7 +196,7 @@ public:
// Only remove string attributes if we are not skipping validation.
// This will reserve the experimental attributes when validation version
// is 0.0 for experiment mode.
- removeStringFunctionAttributes(F, SkipValidation);
+ removeStringFunctionAttributes(F, AllowExperimental);
for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx)
F.removeParamAttrs(Idx, AttrMask);
@@ -243,11 +204,17 @@ public:
IRBuilder<> Builder(&BB);
for (auto &I : make_early_inc_range(BB)) {
- I.dropUnknownNonDebugMetadata(DXILCompatibleMDs);
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ CB->removeFnAttrs(AttrMask);
+ CB->removeRetAttrs(AttrMask);
+ for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx)
+ CB->removeParamAttrs(Idx, AttrMask);
+ continue;
+ }
// Emtting NoOp bitcast instructions allows the ValueEnumerator to be
// unmodified as it reserves instruction IDs during contruction.
- if (auto LI = dyn_cast<LoadInst>(&I)) {
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
if (Value *NoOpBitcast = maybeGenerateBitcast(
Builder, PointerTypes, I, LI->getPointerOperand(),
LI->getType())) {
@@ -257,7 +224,7 @@ public:
}
continue;
}
- if (auto SI = dyn_cast<StoreInst>(&I)) {
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
if (Value *NoOpBitcast = maybeGenerateBitcast(
Builder, PointerTypes, I, SI->getPointerOperand(),
SI->getValueOperand()->getType())) {
@@ -268,39 +235,16 @@ public:
}
continue;
}
- if (auto GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
if (Value *NoOpBitcast = maybeGenerateBitcast(
Builder, PointerTypes, I, GEP->getPointerOperand(),
GEP->getSourceElementType()))
GEP->setOperand(0, NoOpBitcast);
continue;
}
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- CB->removeFnAttrs(AttrMask);
- CB->removeRetAttrs(AttrMask);
- for (size_t Idx = 0, End = CB->arg_size(); Idx < End; ++Idx)
- CB->removeParamAttrs(Idx, AttrMask);
- continue;
- }
}
}
}
- // Remove flags not for DXIL.
- cleanModuleFlags(M);
-
- // dx.rootsignatures will have been parsed from its metadata form as its
- // binary form as part of the RootSignatureAnalysisWrapper, so safely
- // remove it as it is not recognized in DXIL
- if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures"))
- RootSignature->eraseFromParent();
-
- // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and
- // causes all tests using the DXIL Validator to fail.
- //
- // This is a temporary fix and should be replaced with a whitelist once
- // we have determined all metadata that the DXIL Validator allows
- if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa"))
- ErrNo->eraseFromParent();
return true;
}
@@ -308,11 +252,11 @@ public:
DXILPrepareModule() : ModulePass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DXILMetadataAnalysisWrapperPass>();
- AU.addRequired<RootSignatureAnalysisWrapper>();
- AU.addPreserved<RootSignatureAnalysisWrapper>();
- AU.addPreserved<ShaderFlagsAnalysisWrapper>();
+
AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
AU.addPreserved<DXILResourceWrapperPass>();
+ AU.addPreserved<RootSignatureAnalysisWrapper>();
+ AU.addPreserved<ShaderFlagsAnalysisWrapper>();
}
static char ID; // Pass identification.
};
@@ -323,7 +267,6 @@ char DXILPrepareModule::ID = 0;
INITIALIZE_PASS_BEGIN(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module",
false, false)
INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper)
INITIALIZE_PASS_END(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false,
false)
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
index 9eebcc9..1e4797b 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
@@ -7,8 +7,10 @@
//===----------------------------------------------------------------------===//
#include "DXILTranslateMetadata.h"
+#include "DXILRootSignature.h"
#include "DXILShaderFlags.h"
#include "DirectX.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/DXILMetadataAnalysis.h"
@@ -204,9 +206,9 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags,
return MDNode::get(Ctx, MDVals);
}
-MDTuple *constructEntryMetadata(const Function *EntryFn, MDTuple *Signatures,
- MDNode *Resources, MDTuple *Properties,
- LLVMContext &Ctx) {
+static MDTuple *constructEntryMetadata(const Function *EntryFn,
+ MDTuple *Signatures, MDNode *Resources,
+ MDTuple *Properties, LLVMContext &Ctx) {
// Each entry point metadata record specifies:
// * reference to the entry point function global symbol
// * unmangled name
@@ -290,42 +292,82 @@ static MDTuple *emitTopLevelLibraryNode(Module &M, MDNode *RMD,
return constructEntryMetadata(nullptr, nullptr, RMD, Properties, Ctx);
}
-// TODO: We might need to refactor this to be more generic,
-// in case we need more metadata to be replaced.
-static void translateBranchMetadata(Module &M) {
- for (Function &F : M) {
- for (BasicBlock &BB : F) {
- Instruction *BBTerminatorInst = BB.getTerminator();
+static void translateBranchMetadata(Module &M, Instruction *BBTerminatorInst) {
+ MDNode *HlslControlFlowMD =
+ BBTerminatorInst->getMetadata("hlsl.controlflow.hint");
+
+ if (!HlslControlFlowMD)
+ return;
- MDNode *HlslControlFlowMD =
- BBTerminatorInst->getMetadata("hlsl.controlflow.hint");
+ assert(HlslControlFlowMD->getNumOperands() == 2 &&
+ "invalid operands for hlsl.controlflow.hint");
- if (!HlslControlFlowMD)
- continue;
+ MDBuilder MDHelper(M.getContext());
- assert(HlslControlFlowMD->getNumOperands() == 2 &&
- "invalid operands for hlsl.controlflow.hint");
+ llvm::Metadata *HintsStr = MDHelper.createString("dx.controlflow.hints");
+ llvm::Metadata *HintsValue = MDHelper.createConstant(
+ mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1)));
- MDBuilder MDHelper(M.getContext());
- ConstantInt *Op1 =
- mdconst::extract<ConstantInt>(HlslControlFlowMD->getOperand(1));
+ MDNode *MDNode = llvm::MDNode::get(M.getContext(), {HintsStr, HintsValue});
- SmallVector<llvm::Metadata *, 2> Vals(
- ArrayRef<Metadata *>{MDHelper.createString("dx.controlflow.hints"),
- MDHelper.createConstant(Op1)});
+ BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode);
+ BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr);
+}
+
+static std::array<unsigned, 6> getCompatibleInstructionMDs(llvm::Module &M) {
+ return {
+ M.getMDKindID("dx.nonuniform"), M.getMDKindID("dx.controlflow.hints"),
+ M.getMDKindID("dx.precise"), llvm::LLVMContext::MD_range,
+ llvm::LLVMContext::MD_alias_scope, llvm::LLVMContext::MD_noalias};
+}
- MDNode *MDNode = llvm::MDNode::get(M.getContext(), Vals);
+static void translateInstructionMetadata(Module &M) {
+ // construct allowlist of valid metadata node kinds
+ std::array<unsigned, 6> DXILCompatibleMDs = getCompatibleInstructionMDs(M);
- BBTerminatorInst->setMetadata("dx.controlflow.hints", MDNode);
- BBTerminatorInst->setMetadata("hlsl.controlflow.hint", nullptr);
+ for (Function &F : M) {
+ for (BasicBlock &BB : F) {
+ // This needs to be done first so that "hlsl.controlflow.hints" isn't
+ // removed in the whitelist below
+ if (auto *I = BB.getTerminator())
+ translateBranchMetadata(M, I);
+
+ for (auto &I : make_early_inc_range(BB)) {
+ I.dropUnknownNonDebugMetadata(DXILCompatibleMDs);
+ }
}
}
}
-static void translateMetadata(Module &M, DXILResourceMap &DRM,
- DXILResourceTypeMap &DRTM,
- const ModuleShaderFlags &ShaderFlags,
- const ModuleMetadataInfo &MMDI) {
+static void cleanModuleFlags(Module &M) {
+ NamedMDNode *MDFlags = M.getModuleFlagsMetadata();
+ if (!MDFlags)
+ return;
+
+ SmallVector<llvm::Module::ModuleFlagEntry> FlagEntries;
+ M.getModuleFlagsMetadata(FlagEntries);
+ bool Updated = false;
+ for (auto &Flag : FlagEntries) {
+ // llvm 3.7 only supports behavior up to AppendUnique.
+ if (Flag.Behavior <= Module::ModFlagBehavior::AppendUnique)
+ continue;
+ Flag.Behavior = Module::ModFlagBehavior::Warning;
+ Updated = true;
+ }
+
+ if (!Updated)
+ return;
+
+ MDFlags->eraseFromParent();
+
+ for (auto &Flag : FlagEntries)
+ M.addModuleFlag(Flag.Behavior, Flag.Key->getString(), Flag.Val);
+}
+
+static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM,
+ DXILResourceTypeMap &DRTM,
+ const ModuleShaderFlags &ShaderFlags,
+ const ModuleMetadataInfo &MMDI) {
LLVMContext &Ctx = M.getContext();
IRBuilder<> IRB(Ctx);
SmallVector<MDNode *> EntryFnMDNodes;
@@ -381,6 +423,22 @@ static void translateMetadata(Module &M, DXILResourceMap &DRM,
M.getOrInsertNamedMetadata("dx.entryPoints");
for (auto *Entry : EntryFnMDNodes)
EntryPointsNamedMD->addOperand(Entry);
+
+ cleanModuleFlags(M);
+
+ // dx.rootsignatures will have been parsed from its metadata form as its
+ // binary form as part of the RootSignatureAnalysisWrapper, so safely
+ // remove it as it is not recognized in DXIL
+ if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures"))
+ RootSignature->eraseFromParent();
+
+ // llvm.errno.tbaa was recently added but is not supported in LLVM 3.7 and
+ // causes all tests using the DXIL Validator to fail.
+ //
+ // This is a temporary fix and should be replaced with a allowlist once
+ // we have determined all metadata that the DXIL Validator allows
+ if (NamedMDNode *ErrNo = M.getNamedMetadata("llvm.errno.tbaa"))
+ ErrNo->eraseFromParent();
}
PreservedAnalyses DXILTranslateMetadata::run(Module &M,
@@ -390,8 +448,8 @@ PreservedAnalyses DXILTranslateMetadata::run(Module &M,
const ModuleShaderFlags &ShaderFlags = MAM.getResult<ShaderFlagsAnalysis>(M);
const dxil::ModuleMetadataInfo MMDI = MAM.getResult<DXILMetadataAnalysis>(M);
- translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
- translateBranchMetadata(M);
+ translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
+ translateInstructionMetadata(M);
return PreservedAnalyses::all();
}
@@ -409,10 +467,13 @@ public:
AU.addRequired<DXILResourceWrapperPass>();
AU.addRequired<ShaderFlagsAnalysisWrapper>();
AU.addRequired<DXILMetadataAnalysisWrapperPass>();
- AU.addPreserved<DXILResourceWrapperPass>();
+ AU.addRequired<RootSignatureAnalysisWrapper>();
+
AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
- AU.addPreserved<ShaderFlagsAnalysisWrapper>();
AU.addPreserved<DXILResourceBindingWrapperPass>();
+ AU.addPreserved<DXILResourceWrapperPass>();
+ AU.addPreserved<RootSignatureAnalysisWrapper>();
+ AU.addPreserved<ShaderFlagsAnalysisWrapper>();
}
bool runOnModule(Module &M) override {
@@ -425,8 +486,8 @@ public:
dxil::ModuleMetadataInfo MMDI =
getAnalysis<DXILMetadataAnalysisWrapperPass>().getModuleMetadata();
- translateMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
- translateBranchMetadata(M);
+ translateGlobalMetadata(M, DRM, DRTM, ShaderFlags, MMDI);
+ translateInstructionMetadata(M);
return true;
}
};
@@ -443,6 +504,7 @@ INITIALIZE_PASS_BEGIN(DXILTranslateMetadataLegacy, "dxil-translate-metadata",
"DXIL Translate Metadata", false, false)
INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper)
+INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper)
INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass)
INITIALIZE_PASS_END(DXILTranslateMetadataLegacy, "dxil-translate-metadata",
"DXIL Translate Metadata", false, false)
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h
index f3f5eb1..4c1ffac 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.h
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.h
@@ -13,7 +13,8 @@
namespace llvm {
-/// A pass that transforms DXIL Intrinsics that don't have DXIL opCodes
+/// A pass that transforms LLVM Metadata in the module to it's DXIL equivalent,
+/// then emits all recognized DXIL Metadata
class DXILTranslateMetadata : public PassInfoMixin<DXILTranslateMetadata> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td
index fb0928b8..ede8463 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/llvm/lib/Target/Hexagon/Hexagon.td
@@ -79,6 +79,12 @@ def ExtensionHVXV79: SubtargetFeature<"hvxv79", "HexagonHVXVersion",
ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71,
ExtensionHVXV73, ExtensionHVXV75]>;
+def ExtensionHVXV81: SubtargetFeature<"hvxv81", "HexagonHVXVersion",
+ "Hexagon::ArchEnum::V81", "Hexagon HVX instructions",
+ [ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67,
+ ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71,
+ ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79]>;
+
def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
"true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps",
@@ -151,6 +157,8 @@ def UseHVXV75 : Predicate<"HST->useHVXV75Ops()">,
AssemblerPredicate<(all_of ExtensionHVXV75)>;
def UseHVXV79 : Predicate<"HST->useHVXV79Ops()">,
AssemblerPredicate<(all_of ExtensionHVXV79)>;
+def UseHVXV81 : Predicate<"HST->useHVXV81Ops()">,
+ AssemblerPredicate<(all_of ExtensionHVXV81)>;
def UseAudio : Predicate<"HST->useAudioOps()">,
AssemblerPredicate<(all_of ExtensionAudio)>;
def UseZReg : Predicate<"HST->useZRegOps()">,
@@ -488,6 +496,11 @@ def : Proc<"hexagonv79", HexagonModelV79,
ArchV68, ArchV69, ArchV71, ArchV73, ArchV75, ArchV79,
FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+def : Proc<"hexagonv81", HexagonModelV81,
+ [ArchV65, ArchV66, ArchV67, ArchV68, ArchV69, ArchV71, ArchV73,
+ ArchV75, ArchV79, ArchV81,
+ FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
// Need to update the correct features for tiny core.
// Disable NewValueJumps since the packetizer is unable to handle a packet with
diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h
index 8984534..9bf4034 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -29,7 +29,8 @@ enum class ArchEnum {
V71,
V73,
V75,
- V79
+ V79,
+ V81
};
inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) {
@@ -50,6 +51,7 @@ inline std::optional<Hexagon::ArchEnum> getCpu(StringRef CPU) {
.Case("hexagonv73", Hexagon::ArchEnum::V73)
.Case("hexagonv75", Hexagon::ArchEnum::V75)
.Case("hexagonv79", Hexagon::ArchEnum::V79)
+ .Case("hexagonv81", Hexagon::ArchEnum::V81)
.Default(std::nullopt);
}
} // namespace Hexagon
diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.td b/llvm/lib/Target/Hexagon/HexagonDepArch.td
index 8ec1d93..f623fd0 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td
@@ -34,3 +34,5 @@ def ArchV75: SubtargetFeature<"v75", "HexagonArchVersion", "Hexagon::ArchEnum::V
def HasV75 : Predicate<"HST->hasV75Ops()">, AssemblerPredicate<(all_of ArchV75)>;
def ArchV79: SubtargetFeature<"v79", "HexagonArchVersion", "Hexagon::ArchEnum::V79", "Enable Hexagon V79 architecture">;
def HasV79 : Predicate<"HST->hasV79Ops()">, AssemblerPredicate<(all_of ArchV79)>;
+def ArchV81: SubtargetFeature<"v81", "HexagonArchVersion", "Hexagon::ArchEnum::V81", "Enable Hexagon V81 architecture">;
+def HasV81 : Predicate<"HST->hasV81Ops()">, AssemblerPredicate<(all_of ArchV81)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index 93696e0..f4e36fa7 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -7222,3 +7222,595 @@ class DepHVXItinV79 {
[Hex_FWD, Hex_FWD, HVX_FWD]>
];
}
+
+class DepHVXItinV81 {
+ list<InstrItinData> DepHVXItinV81_list = [
+ InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_37820f4c, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_4942646a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_531b383c, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7095ecba, /*SLOT01,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_72e2b393, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_73efe966, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7d68d5c2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a69eeee1, /*SLOT01,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb599486, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cda936da, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_dcca380f, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_f175e046, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+} \ No newline at end of file
diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
index 7a1ad3e..48b665c 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -13740,3 +13740,891 @@ class DepScalarItinV79 {
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
];
}
+
+class DepScalarItinV81 {
+ list<InstrItinData> DepScalarItinV81_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a43be35, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_139ef484, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_151bf368, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1d41f8b7, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20131976, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_23708a21, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_2471c1c8, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24f426ab, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_33e7e673, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_362b0be2, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_388f9897, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_38e0bae9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_407e96f9, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_42ff66ba, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_442395f3, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44fffc58, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_46c18ecf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4a55d03c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4abdbdc6, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4bf903b0, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_512b1653, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_53c851ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54f0cee2, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55b33fda, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56a124a7, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57a55b54, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_59a7822c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a222e89, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_60e324ff, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_63567288, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_651cbe02, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_69bfb303, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6aa823ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6d861a95, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fb52018, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_711c805f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7476d766, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_759e57be, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7d6a2568, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_78f87ed3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_7af3a37e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c28bd7e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f58404a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_84a7500d, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_86173609, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b5bd4f5, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8f36a2fd, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95f43c5e, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9b20a062, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e27f2f9, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_a724463d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7a13fac, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af6af259, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b2196a3f, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b3d46584, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_b9bec29e, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb78483e, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d234b61a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d33e5eee, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d71ea8fa, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_dc51281d, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_decdde8a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e60def48, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed03645c, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed3f8d2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eeda4109, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f97707c1, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f999c66e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+} \ No newline at end of file
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index ae96753..f8f1c2a 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -39178,6 +39178,19 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vsub_hf_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf16 = vsub($Vu32.hf,$Vv32.qf16)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsub_qf16 : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -39269,6 +39282,19 @@ let opNewValue = 0;
let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
+def V6_vsub_sf_mix : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.qf32 = vsub($Vu32.sf,$Vv32.qf32)",
+tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV81,UseHVXQFloat]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
def V6_vsub_sf_sf : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -41116,6 +41142,17 @@ let hasNewValue = 1;
let opNewValue = 0;
let isSolo = 1;
}
+def Y2_tlbpp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = tlbp($Rss32)",
+tc_6aa823ab, TypeCR>, Enc_90cd8b, Requires<[HasV81]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+}
def Y2_tlbr : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index 17cb96c..23f4b3a 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -3827,3 +3827,14 @@ def: Pat<(int_hexagon_V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2),
(V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsub_hf_f8_128B HvxVR:$src1, HvxVR:$src2),
(V6_vsub_hf_f8 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV79, UseHVX128B]>;
+
+// V81 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_hf_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_hf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vsub_sf_mix_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsub_sf_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV81, UseHVX128B, UseHVXQFloat]>;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index e285e04..7ee280d 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -654,7 +654,9 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
IntNo == Intrinsic::hexagon_V6_vgathermh ||
IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
IntNo == Intrinsic::hexagon_V6_vgathermhw ||
- IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
+ IntNo == Intrinsic::hexagon_V6_vgathermhw_128B ||
+ IntNo == Intrinsic::hexagon_V6_vgather_vscattermh ||
+ IntNo == Intrinsic::hexagon_V6_vgather_vscattermh_128B) {
SelectV65Gather(N);
return;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index c7a4f68..3cc146b 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -2953,6 +2953,10 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
case Intrinsic::hexagon_V6_vgathermhw_128B:
Opcode = Hexagon::V6_vgathermhw_pseudo;
break;
+ case Intrinsic::hexagon_V6_vgather_vscattermh:
+ case Intrinsic::hexagon_V6_vgather_vscattermh_128B:
+ Opcode = Hexagon::V6_vgather_vscatter_mh_pseudo;
+ break;
}
SDVTList VTs = CurDAG->getVTList(MVT::Other);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 9f7f434..526b4de 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2145,7 +2145,9 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::hexagon_V6_vgathermhq:
case Intrinsic::hexagon_V6_vgathermhq_128B:
case Intrinsic::hexagon_V6_vgathermhwq:
- case Intrinsic::hexagon_V6_vgathermhwq_128B: {
+ case Intrinsic::hexagon_V6_vgathermhwq_128B:
+ case Intrinsic::hexagon_V6_vgather_vscattermh:
+ case Intrinsic::hexagon_V6_vgather_vscattermh_128B: {
const Module &M = *I.getParent()->getParent()->getParent();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Type *VecTy = I.getArgOperand(1)->getType();
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 939841a..47726d6 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1554,80 +1554,93 @@ HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const {
MachineBasicBlock::iterator First;
switch (Opc) {
- case Hexagon::V6_vgathermh_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermw_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermhw_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermhq_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermwq_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
-
- case Hexagon::V6_vgathermhwq_pseudo:
- First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(5));
- BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
- .add(MI.getOperand(0))
- .addImm(MI.getOperand(1).getImm())
- .addReg(Hexagon::VTMP);
- MBB.erase(MI);
- return First.getInstrIterator();
+ case Hexagon::V6_vgather_vscatter_mh_pseudo:
+ // This is mainly a place holder. It will be extended.
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vscattermh))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+ case Hexagon::V6_vgathermh_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhw_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
+
+ case Hexagon::V6_vgathermhwq_pseudo:
+ First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
+ .add(MI.getOperand(0))
+ .addImm(MI.getOperand(1).getImm())
+ .addReg(Hexagon::VTMP);
+ MBB.erase(MI);
+ return First.getInstrIterator();
}
return MI.getIterator();
@@ -2806,6 +2819,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::V6_vL32b_nt_tmp_npred_ai:
case Hexagon::V6_vS32Ub_npred_ai:
case Hexagon::V6_vgathermh_pseudo:
+ case Hexagon::V6_vgather_vscatter_mh_pseudo:
case Hexagon::V6_vgathermw_pseudo:
case Hexagon::V6_vgathermhw_pseudo:
case Hexagon::V6_vgathermhq_pseudo:
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
index f927f9b..42393d0 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
@@ -40,6 +40,19 @@ defm V6_vgathermh_pseudo : vgathermh<HvxVR>;
defm V6_vgathermw_pseudo : vgathermw<HvxVR>;
defm V6_vgathermhw_pseudo : vgathermhw<HvxWR>;
+
+multiclass vgather_scatter_mh<RegisterClass RC> {
+ let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
+ mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
+ def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ),
+ (ins IntRegs:$_dst_, s4_0Imm:$Ii,
+ IntRegs:$Rt, ModRegs:$Mu, RC:$Vv),
+ ".error \"should not emit\" ",
+ []>;
+}
+
+defm V6_vgather_vscatter_mh_pseudo : vgather_scatter_mh<HvxVR>;
+
multiclass vgathermhq<RegisterClass RC1, RegisterClass RC2> {
let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td
index b8a9cf3..9bcd4bf 100644
--- a/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -75,3 +75,4 @@ include "HexagonScheduleV71T.td"
include "HexagonScheduleV73.td"
include "HexagonScheduleV75.td"
include "HexagonScheduleV79.td"
+include "HexagonScheduleV81.td" \ No newline at end of file
diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV81.td b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td
new file mode 100644
index 0000000..dd5f5a0
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonScheduleV81.td
@@ -0,0 +1,31 @@
+//=-HexagonScheduleV81.td - HexagonV81 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def HexagonV81ItinList : DepScalarItinV81, ScalarItin,
+ DepHVXItinV81, HVXItin, PseudoItin {
+ list<InstrItinData> ItinList =
+ !listconcat(DepScalarItinV81_list, ScalarItin_list,
+ DepHVXItinV81_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV81 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+ CVI_ALL_NOMEM, CVI_ZW],
+ [Hex_FWD, HVX_FWD],
+ HexagonV81ItinList.ItinList>;
+
+def HexagonModelV81 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV81;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 7430567..995f66d 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -224,6 +224,15 @@ public:
bool useHVXV79Ops() const {
return HexagonHVXVersion >= Hexagon::ArchEnum::V79;
}
+ bool hasV81Ops() const {
+ return getHexagonArchVersion() >= Hexagon::ArchEnum::V81;
+ }
+ bool hasV81OpsOnly() const {
+ return getHexagonArchVersion() == Hexagon::ArchEnum::V81;
+ }
+ bool useHVXV81Ops() const {
+ return HexagonHVXVersion >= Hexagon::ArchEnum::V81;
+ }
bool useAudioOps() const { return UseAudioOps; }
bool useCompound() const { return UseCompound; }
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 171e294..e925e04 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -31,6 +31,10 @@ using namespace llvm;
static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
+cl::opt<bool> HexagonAllowScatterGatherHVX(
+ "hexagon-allow-scatter-gather-hvx", cl::init(false), cl::Hidden,
+ cl::desc("Allow auto-generation of HVX scatter-gather"));
+
static cl::opt<bool> EnableV68FloatAutoHVX(
"force-hvx-float", cl::Hidden,
cl::desc("Enable auto-vectorization of floatint point types on v68."));
@@ -354,6 +358,61 @@ bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/,
return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
}
+bool HexagonTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) const {
+ // For now assume we can not deal with all HVX datatypes.
+ if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
+ !HexagonAllowScatterGatherHVX)
+ return false;
+ // This must be in sync with HexagonVectorCombine pass.
+ switch (Ty->getScalarSizeInBits()) {
+ case 8:
+ return (getTypeNumElements(Ty) == 128);
+ case 16:
+ if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32)
+ return (Alignment >= 2);
+ break;
+ case 32:
+ if (getTypeNumElements(Ty) == 32)
+ return (Alignment >= 4);
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool HexagonTTIImpl::isLegalMaskedScatter(Type *Ty, Align Alignment) const {
+ if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
+ !HexagonAllowScatterGatherHVX)
+ return false;
+ // This must be in sync with HexagonVectorCombine pass.
+ switch (Ty->getScalarSizeInBits()) {
+ case 8:
+ return (getTypeNumElements(Ty) == 128);
+ case 16:
+ if (getTypeNumElements(Ty) == 64)
+ return (Alignment >= 2);
+ break;
+ case 32:
+ if (getTypeNumElements(Ty) == 32)
+ return (Alignment >= 4);
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool HexagonTTIImpl::forceScalarizeMaskedGather(VectorType *VTy,
+ Align Alignment) const {
+ return !isLegalMaskedGather(VTy, Alignment);
+}
+
+bool HexagonTTIImpl::forceScalarizeMaskedScatter(VectorType *VTy,
+ Align Alignment) const {
+ return !isLegalMaskedScatter(VTy, Alignment);
+}
+
/// --- Vector TTI end ---
unsigned HexagonTTIImpl::getPrefetchDistance() const {
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index dbf16c9..cec2bf9 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -169,6 +169,12 @@ public:
unsigned AddressSpace) const override;
bool isLegalMaskedLoad(Type *DataType, Align Alignment,
unsigned AddressSpace) const override;
+ bool isLegalMaskedGather(Type *Ty, Align Alignment) const override;
+ bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override;
+ bool forceScalarizeMaskedGather(VectorType *VTy,
+ Align Alignment) const override;
+ bool forceScalarizeMaskedScatter(VectorType *VTy,
+ Align Alignment) const override;
/// @}
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 9ab5202..5c50ec2 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -57,6 +57,11 @@
#define DEBUG_TYPE "hexagon-vc"
+// This is a const that represents default HVX VTCM page size.
+// It is boot time configurable, so we probably want an API to
+// read it, but for now assume 128KB
+#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
+
using namespace llvm;
namespace {
@@ -418,6 +423,18 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
class HvxIdioms {
public:
+ enum DstQualifier {
+ Undefined = 0,
+ Arithmetic,
+ LdSt,
+ LLVM_Gather,
+ LLVM_Scatter,
+ HEX_Gather_Scatter,
+ HEX_Gather,
+ HEX_Scatter,
+ Call
+ };
+
HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {
auto *Int32Ty = HVC.getIntTy(32);
HvxI32Ty = HVC.getHvxTy(Int32Ty, /*Pair=*/false);
@@ -473,6 +490,11 @@ private:
auto createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
Signedness SgnX, ArrayRef<Value *> WordY,
Signedness SgnY) const -> SmallVector<Value *>;
+ // Vector manipulations for Ripple
+ bool matchScatter(Instruction &In) const;
+ bool matchGather(Instruction &In) const;
+ Value *processVScatter(Instruction &In) const;
+ Value *processVGather(Instruction &In) const;
VectorType *HvxI32Ty;
VectorType *HvxP32Ty;
@@ -1545,7 +1567,7 @@ auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
}
auto AlignVectors::run() -> bool {
- LLVM_DEBUG(dbgs() << "Running HVC::AlignVectors on " << HVC.F.getName()
+ LLVM_DEBUG(dbgs() << "\nRunning HVC::AlignVectors on " << HVC.F.getName()
<< '\n');
if (!createAddressGroups())
return false;
@@ -1797,6 +1819,846 @@ auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
return Ext;
}
+inline bool HvxIdioms::matchScatter(Instruction &In) const {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In);
+ if (!II)
+ return false;
+ return (II->getIntrinsicID() == Intrinsic::masked_scatter);
+}
+
+inline bool HvxIdioms::matchGather(Instruction &In) const {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&In);
+ if (!II)
+ return false;
+ return (II->getIntrinsicID() == Intrinsic::masked_gather);
+}
+
+Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual);
+
+// Binary instructions we want to handle as users of gather/scatter.
+inline bool isArithmetic(unsigned Opc) {
+ switch (Opc) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl:
+ case Instruction::UDiv:
+ return true;
+ }
+ return false;
+}
+
+// TODO: Maybe use MemoryLocation for this. See getLocOrNone above.
+inline Value *getPointer(Value *Ptr) {
+ assert(Ptr && "Unable to extract pointer");
+ if (isa<AllocaInst>(Ptr) || isa<Argument>(Ptr) || isa<GlobalValue>(Ptr))
+ return Ptr;
+ if (isa<LoadInst>(Ptr) || isa<StoreInst>(Ptr))
+ return getLoadStorePointerOperand(Ptr);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(1);
+ }
+ return nullptr;
+}
+
+static Instruction *selectDestination(Instruction *In,
+ HvxIdioms::DstQualifier &Qual) {
+ Instruction *Destination = nullptr;
+ if (!In)
+ return Destination;
+ if (isa<StoreInst>(In)) {
+ Destination = In;
+ Qual = HvxIdioms::LdSt;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_gather) {
+ Destination = In;
+ Qual = HvxIdioms::LLVM_Gather;
+ } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) {
+ Destination = In;
+ Qual = HvxIdioms::LLVM_Scatter;
+ } else if (II->getIntrinsicID() == Intrinsic::masked_store) {
+ Destination = In;
+ Qual = HvxIdioms::LdSt;
+ } else if (II->getIntrinsicID() ==
+ Intrinsic::hexagon_V6_vgather_vscattermh) {
+ Destination = In;
+ Qual = HvxIdioms::HEX_Gather_Scatter;
+ } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
+ Destination = In;
+ Qual = HvxIdioms::HEX_Scatter;
+ } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
+ Destination = In;
+ Qual = HvxIdioms::HEX_Gather;
+ }
+ } else if (isa<ZExtInst>(In)) {
+ return locateDestination(In, Qual);
+ } else if (isa<CastInst>(In)) {
+ return locateDestination(In, Qual);
+ } else if (isa<CallInst>(In)) {
+ Destination = In;
+ Qual = HvxIdioms::Call;
+ } else if (isa<GetElementPtrInst>(In)) {
+ return locateDestination(In, Qual);
+ } else if (isArithmetic(In->getOpcode())) {
+ Destination = In;
+ Qual = HvxIdioms::Arithmetic;
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n");
+ }
+ return Destination;
+}
+
+// This method attempts to find destination (user) for a given intrinsic.
+// Given that these are produced only by Ripple, the number of options is
+// limited. Simplest case is explicit store which in fact is redundant (since
+// HVX gater creates its own store during packetization). Nevertheless we need
+// to figure address where we storing. Other cases are more complicated, but
+// still few.
+Instruction *locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual) {
+ Instruction *Destination = nullptr;
+ if (!In)
+ return Destination;
+ // Get all possible destinations
+ SmallVector<Instruction *> Users;
+ // Iterate over the uses of the instruction
+ for (auto &U : In->uses()) {
+ if (auto *UI = dyn_cast<Instruction>(U.getUser())) {
+ Destination = selectDestination(UI, Qual);
+ if (Destination)
+ Users.push_back(Destination);
+ }
+ }
+ // Now see which of the users (if any) is a memory destination.
+ for (auto *I : Users)
+ if (getPointer(I))
+ return I;
+ return Destination;
+}
+
+// The two intrinsics we handle here have GEP in a different position.
+inline GetElementPtrInst *locateGepFromIntrinsic(Instruction *In) {
+ assert(In && "Bad instruction");
+ IntrinsicInst *IIn = dyn_cast<IntrinsicInst>(In);
+ assert((IIn && (IIn->getIntrinsicID() == Intrinsic::masked_gather ||
+ IIn->getIntrinsicID() == Intrinsic::masked_scatter)) &&
+ "Not a gather Intrinsic");
+ GetElementPtrInst *GEPIndex = nullptr;
+ if (IIn->getIntrinsicID() == Intrinsic::masked_gather)
+ GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(0));
+ else
+ GEPIndex = dyn_cast<GetElementPtrInst>(IIn->getOperand(1));
+ return GEPIndex;
+}
+
+// Given the intrinsic find its GEP argument and extract base address it uses.
+// The method relies on the way how Ripple typically forms the GEP for
+// scatter/gather.
+static Value *locateAddressFromIntrinsic(Instruction *In) {
+ GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In);
+ if (!GEPIndex) {
+ LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n");
+ return nullptr;
+ }
+ Value *BaseAddress = GEPIndex->getPointerOperand();
+ auto *IndexLoad = dyn_cast<LoadInst>(BaseAddress);
+ if (IndexLoad)
+ return IndexLoad;
+
+ auto *IndexZEx = dyn_cast<ZExtInst>(BaseAddress);
+ if (IndexZEx) {
+ IndexLoad = dyn_cast<LoadInst>(IndexZEx->getOperand(0));
+ if (IndexLoad)
+ return IndexLoad;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(IndexZEx->getOperand(0));
+ if (II && II->getIntrinsicID() == Intrinsic::masked_gather)
+ return locateAddressFromIntrinsic(II);
+ }
+ auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(BaseAddress);
+ if (BaseShuffle) {
+ IndexLoad = dyn_cast<LoadInst>(BaseShuffle->getOperand(0));
+ if (IndexLoad)
+ return IndexLoad;
+ auto *IE = dyn_cast<InsertElementInst>(BaseShuffle->getOperand(0));
+ if (IE) {
+ auto *Src = IE->getOperand(1);
+ IndexLoad = dyn_cast<LoadInst>(Src);
+ if (IndexLoad)
+ return IndexLoad;
+ auto *Alloca = dyn_cast<AllocaInst>(Src);
+ if (Alloca)
+ return Alloca;
+ if (isa<Argument>(Src)) {
+ return Src;
+ }
+ if (isa<GlobalValue>(Src)) {
+ return Src;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n");
+ return nullptr;
+}
+
+static Type *getIndexType(Value *In) {
+ if (!In)
+ return nullptr;
+
+ if (isa<LoadInst>(In) || isa<StoreInst>(In))
+ return getLoadStoreType(In);
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return II->getType();
+ if (II->getIntrinsicID() == Intrinsic::masked_store)
+ return II->getOperand(0)->getType();
+ }
+ return In->getType();
+}
+
+static Value *locateIndexesFromGEP(Value *In) {
+ if (!In)
+ return nullptr;
+ if (isa<LoadInst>(In))
+ return In;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(In)) {
+ if (II->getIntrinsicID() == Intrinsic::masked_load)
+ return In;
+ if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ return In;
+ }
+ if (auto *IndexZEx = dyn_cast<ZExtInst>(In))
+ return locateIndexesFromGEP(IndexZEx->getOperand(0));
+ if (auto *IndexSEx = dyn_cast<SExtInst>(In))
+ return locateIndexesFromGEP(IndexSEx->getOperand(0));
+ if (auto *BaseShuffle = dyn_cast<ShuffleVectorInst>(In))
+ return locateIndexesFromGEP(BaseShuffle->getOperand(0));
+ if (auto *IE = dyn_cast<InsertElementInst>(In))
+ return locateIndexesFromGEP(IE->getOperand(1));
+ if (auto *cstDataVector = dyn_cast<ConstantDataVector>(In))
+ return cstDataVector;
+ if (auto *GEPIndex = dyn_cast<GetElementPtrInst>(In))
+ return GEPIndex->getOperand(0);
+ return nullptr;
+}
+
+// Given the intrinsic find its GEP argument and extract offsetts from the base
+// address it uses.
+static Value *locateIndexesFromIntrinsic(Instruction *In) {
+ GetElementPtrInst *GEPIndex = locateGepFromIntrinsic(In);
+ if (!GEPIndex) {
+ LLVM_DEBUG(dbgs() << " No GEP in intrinsic\n");
+ return nullptr;
+ }
+ Value *Indexes = GEPIndex->getOperand(1);
+ if (auto *IndexLoad = locateIndexesFromGEP(Indexes))
+ return IndexLoad;
+
+ LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n");
+ return nullptr;
+}
+
+// Because of aukward definition of many Hex intrinsics we often have to
+// reinterprete HVX native <64 x i16> as <32 x i32> which in practice is a NOP
+// for all use cases, so this only exist to make IR builder happy.
+inline Value *getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC,
+ IRBuilderBase &Builder,
+ LLVMContext &Ctx, Value *I) {
+ assert(I && "Unable to reinterprete cast");
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ std::vector<unsigned> shuffleMask;
+ for (unsigned i = 0; i < 64; ++i)
+ shuffleMask.push_back(i);
+ Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask);
+ Value *CastShuffle =
+ Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
+ return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32");
+}
+
+// Recast <128 x i8> as <32 x i32>
+inline Value *getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC,
+ IRBuilderBase &Builder,
+ LLVMContext &Ctx, Value *I) {
+ assert(I && "Unable to reinterprete cast");
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ std::vector<unsigned> shuffleMask;
+ for (unsigned i = 0; i < 128; ++i)
+ shuffleMask.push_back(i);
+ Constant *Mask = llvm::ConstantDataVector::get(Ctx, shuffleMask);
+ Value *CastShuffle =
+ Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
+ return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32");
+}
+
+// Create <32 x i32> mask reinterpreted as <128 x i1> with a given pattern
+inline Value *get_i32_Mask(const HexagonVectorCombine &HVC,
+ IRBuilderBase &Builder, LLVMContext &Ctx,
+ unsigned int pattern) {
+ std::vector<unsigned int> byteMask;
+ for (unsigned i = 0; i < 32; ++i)
+ byteMask.push_back(pattern);
+
+ return Builder.CreateIntrinsic(
+ HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt),
+ {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
+ nullptr);
+}
+
+Value *HvxIdioms::processVScatter(Instruction &In) const {
+ auto *InpTy = dyn_cast<VectorType>(In.getOperand(0)->getType());
+ assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather");
+ unsigned InpSize = HVC.getSizeOf(InpTy);
+ auto *F = In.getFunction();
+ LLVMContext &Ctx = F->getContext();
+ auto *ElemTy = dyn_cast<IntegerType>(InpTy->getElementType());
+ assert(ElemTy && "llvm.scatter needs integer type argument");
+ unsigned ElemWidth = HVC.DL.getTypeAllocSize(ElemTy);
+ LLVM_DEBUG({
+ unsigned Elements = HVC.length(InpTy);
+ dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n";
+ dbgs() << " Input type(" << *InpTy << ") elements(" << Elements
+ << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth("
+ << ElemWidth << ")\n";
+ });
+
+ IRBuilder Builder(In.getParent(), In.getIterator(),
+ InstSimplifyFolder(HVC.DL));
+
+ auto *ValueToScatter = In.getOperand(0);
+ LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n");
+
+ if (HVC.HST.getVectorLength() != InpSize) {
+ LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize
+ << ") for vscatter\n");
+ return nullptr;
+ }
+
+ // Base address of indexes.
+ auto *IndexLoad = locateAddressFromIntrinsic(&In);
+ if (!IndexLoad)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
+
+ // Address of destination. Must be in VTCM.
+ auto *Ptr = getPointer(IndexLoad);
+ if (!Ptr)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n");
+ // Indexes/offsets
+ auto *Indexes = locateIndexesFromIntrinsic(&In);
+ if (!Indexes)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
+ Value *CastedDst = Builder.CreateBitOrPointerCast(Ptr, Type::getInt32Ty(Ctx),
+ "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n");
+ // Adjust Indexes
+ auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes);
+ Value *CastIndex = nullptr;
+ if (cstDataVector) {
+ // Our indexes are represented as a constant. We need it in a reg.
+ AllocaInst *IndexesAlloca =
+ Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false));
+ [[maybe_unused]] auto *StoreIndexes =
+ Builder.CreateStore(cstDataVector, IndexesAlloca);
+ LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
+ CastIndex = Builder.CreateLoad(IndexesAlloca->getAllocatedType(),
+ IndexesAlloca, "reload_index");
+ } else {
+ if (ElemWidth == 2)
+ CastIndex = getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes);
+ else
+ CastIndex = Indexes;
+ }
+ LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
+
+ if (ElemWidth == 1) {
+ // v128i8 There is no native instruction for this.
+ // Do this as two Hi/Lo gathers with masking.
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ // Extend indexes. We assume that indexes are in 128i8 format - need to
+ // expand them to Hi/Lo 64i16
+ Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32");
+ auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
+ auto *UnpackedIndexes = Builder.CreateIntrinsic(
+ HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr);
+ LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n");
+
+ auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
+ auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
+ [[maybe_unused]] Value *IndexHi =
+ HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
+ [[maybe_unused]] Value *IndexLo =
+ HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
+ LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
+ LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
+ // Now unpack values to scatter
+ Value *CastSrc =
+ getReinterpretiveCast_i8_to_i32(HVC, Builder, Ctx, ValueToScatter);
+ LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
+ auto *UnpackedValueToScatter = Builder.CreateIntrinsic(
+ HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr);
+ LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter
+ << ")\n");
+
+ [[maybe_unused]] Value *UVSHi =
+ HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
+ [[maybe_unused]] Value *UVSLo =
+ HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
+ LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n");
+ LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n");
+
+ // Create the mask for individual bytes
+ auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
+ LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
+ [[maybe_unused]] auto *ResHi = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
+ {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ IndexHi, UVSHi},
+ nullptr);
+ LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n");
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
+ {QByteMask, CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ IndexLo, UVSLo},
+ nullptr);
+ } else if (ElemWidth == 2) {
+ Value *CastSrc =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, ValueToScatter);
+ LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
+ {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex,
+ CastSrc},
+ nullptr);
+ } else if (ElemWidth == 4) {
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
+ {CastedDst, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), CastIndex,
+ ValueToScatter},
+ nullptr);
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n");
+ return nullptr;
+ }
+}
+
+Value *HvxIdioms::processVGather(Instruction &In) const {
+ [[maybe_unused]] auto *InpTy =
+ dyn_cast<VectorType>(In.getOperand(0)->getType());
+ assert(InpTy && "Cannot handle no vector type for llvm.gather");
+ [[maybe_unused]] auto *ElemTy =
+ dyn_cast<PointerType>(InpTy->getElementType());
+ assert(ElemTy && "llvm.gather needs vector of ptr argument");
+ auto *F = In.getFunction();
+ LLVMContext &Ctx = F->getContext();
+ LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n"
+ << *In.getParent() << "\n");
+ LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements("
+ << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy)
+ << ") type(" << *ElemTy << ") Access alignment("
+ << *In.getOperand(1) << ") AddressSpace("
+ << ElemTy->getAddressSpace() << ")\n");
+
+ // TODO: Handle masking of elements.
+ assert(dyn_cast<VectorType>(In.getOperand(2)->getType()) &&
+ "llvm.gather needs vector for mask");
+ IRBuilder Builder(In.getParent(), In.getIterator(),
+ InstSimplifyFolder(HVC.DL));
+
+ // See who is using the result. The difference between LLVM and HVX vgather
+ // Intrinsic makes it impossible to handle all cases with temp storage. Alloca
+ // in VTCM is not yet supported, so for now we just bail out for those cases.
+ HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
+ Instruction *Dst = locateDestination(&In, Qual);
+ if (!Dst) {
+ LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n");
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual
+ << ")\n");
+
+ // Address of destination. Must be in VTCM.
+ auto *Ptr = getPointer(Dst);
+ if (!Ptr) {
+ LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n");
+ return nullptr;
+ }
+
+ // Result type. Assume it is a vector type.
+ auto *DstType = cast<VectorType>(getIndexType(Dst));
+ assert(DstType && "Cannot handle non vector dst type for llvm.gather");
+
+ // Base address for sources to be loaded
+ auto *IndexLoad = locateAddressFromIntrinsic(&In);
+ if (!IndexLoad)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
+
+ // Gather indexes/offsets
+ auto *Indexes = locateIndexesFromIntrinsic(&In);
+ if (!Indexes)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
+
+ Instruction *Gather = nullptr;
+ Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
+ if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
+ // We fully assume the address space is in VTCM. We also assume that all
+ // pointers in Operand(0) have the same base(!).
+ // This is the most basic case of all the above.
+ unsigned OutputSize = HVC.getSizeOf(DstType);
+ auto *DstElemTy = cast<IntegerType>(DstType->getElementType());
+ unsigned ElemWidth = HVC.DL.getTypeAllocSize(DstElemTy);
+ LLVM_DEBUG(dbgs() << " Buffer type : " << *Ptr->getType()
+ << " Address space ("
+ << Ptr->getType()->getPointerAddressSpace() << ")\n"
+ << " Result type : " << *DstType
+ << "\n Size in bytes : " << OutputSize
+ << " element type(" << *DstElemTy
+ << ")\n ElemWidth : " << ElemWidth << " bytes\n");
+
+ auto *IndexType = cast<VectorType>(getIndexType(Indexes));
+ assert(IndexType && "Cannot handle non vector index type for llvm.gather");
+ unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType());
+ LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n");
+
+ // Intrinsic takes i32 instead of pointer so cast.
+ Value *CastedPtr = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ // [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ...]
+ // int_hexagon_V6_vgathermh [... , llvm_v16i32_ty]
+ // int_hexagon_V6_vgathermh_128B [... , llvm_v32i32_ty]
+ // int_hexagon_V6_vgathermhw [... , llvm_v32i32_ty]
+ // int_hexagon_V6_vgathermhw_128B [... , llvm_v64i32_ty]
+ // int_hexagon_V6_vgathermw [... , llvm_v16i32_ty]
+ // int_hexagon_V6_vgathermw_128B [... , llvm_v32i32_ty]
+ if (HVC.HST.getVectorLength() == OutputSize) {
+ if (ElemWidth == 1) {
+ // v128i8 There is no native instruction for this.
+ // Do this as two Hi/Lo gathers with masking.
+ // Unpack indexes. We assume that indexes are in 128i8 format - need to
+ // expand them to Hi/Lo 64i16
+ Value *CastIndexes =
+ Builder.CreateBitCast(Indexes, NT, "cast_to_32i32");
+ auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
+ auto *UnpackedIndexes =
+ Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true),
+ V6_vunpack, CastIndexes, nullptr);
+ LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes
+ << ")\n");
+
+ auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
+ auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
+ [[maybe_unused]] Value *IndexHi =
+ HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
+ [[maybe_unused]] Value *IndexLo =
+ HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
+ LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
+ LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
+ // Create the mask for individual bytes
+ auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
+ LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
+ // We use our destination allocation as a temp storage
+ // This is unlikely to work properly for masked gather.
+ auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq);
+ [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), V6_vgather,
+ {Ptr, QByteMask, CastedPtr,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexHi},
+ nullptr);
+ LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n");
+ // Rematerialize the result
+ [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi");
+ LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n");
+ // Same for the low part. Here we use Gather to return non-NULL result
+ // from this function and continue to iterate. We also are deleting Dst
+ // store below.
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), V6_vgather,
+ {Ptr, QByteMask, CastedPtr,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), IndexLo},
+ nullptr);
+ LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n");
+ Value *LoadedResultLo = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo");
+ LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n");
+ // Now we have properly sized bytes in every other position
+ // B b A a c a A b B c f F g G h H is presented as
+ // B . b . A . a . c . a . A . b . B . c . f . F . g . G . h . H
+ // Use vpack to gather them
+ auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb);
+ [[maybe_unused]] auto Res = Builder.CreateIntrinsic(
+ NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr);
+ LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n");
+ [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr);
+ LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n");
+ } else if (ElemWidth == 2) {
+ // v32i16
+ if (IndexWidth == 2) {
+ // Reinterprete 64i16 as 32i32. Only needed for syntactic IR match.
+ Value *CastIndex =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes);
+ LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
+ // shift all i16 left by 1 to match short addressing mode instead of
+ // byte.
+ auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
+ Value *AdjustedIndex = HVC.createHvxIntrinsic(
+ Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
+ LLVM_DEBUG(dbgs()
+ << " Shifted half index: " << *AdjustedIndex << ")\n");
+
+ auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh);
+ // The 3rd argument is the size of the region to gather from. Probably
+ // want to set it to max VTCM size.
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), V6_vgather,
+ {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ AdjustedIndex},
+ nullptr);
+ for (auto &U : Dst->uses()) {
+ if (auto *UI = dyn_cast<Instruction>(U.getUser()))
+ dbgs() << " dst used by: " << *UI << "\n";
+ }
+ for (auto &U : In.uses()) {
+ if (auto *UI = dyn_cast<Instruction>(U.getUser()))
+ dbgs() << " In used by : " << *UI << "\n";
+ }
+ // Create temp load from result in case the result is used by any
+ // other instruction.
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ } else {
+ dbgs() << " Unhandled index type for vgather\n";
+ return nullptr;
+ }
+ } else if (ElemWidth == 4) {
+ if (IndexWidth == 4) {
+ // v32i32
+ auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
+ Value *AdjustedIndex = HVC.createHvxIntrinsic(
+ Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
+ LLVM_DEBUG(dbgs()
+ << " Shifted word index: " << *AdjustedIndex << ")\n");
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
+ {Ptr, CastedPtr, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ AdjustedIndex},
+ nullptr);
+ } else {
+ LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n");
+ return nullptr;
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n");
+ return nullptr;
+ }
+ } else if (HVC.HST.getVectorLength() == OutputSize * 2) {
+ // This is half of the reg width, duplicate low in high
+ LLVM_DEBUG(dbgs() << " Unhandled half of register size\n");
+ return nullptr;
+ } else if (HVC.HST.getVectorLength() * 2 == OutputSize) {
+ LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n");
+ return nullptr;
+ }
+ // Erase the original intrinsic and store that consumes it.
+ // HVX will create a pseudo for gather that is expanded to gather + store
+ // during packetization.
+ Dst->eraseFromParent();
+ } else if (Qual == HvxIdioms::LLVM_Scatter) {
+ // Gather feeds directly into scatter.
+ LLVM_DEBUG({
+ auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType());
+ assert(DstInpTy && "Cannot handle no vector type for llvm.scatter");
+ unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
+ unsigned DstElements = HVC.length(DstInpTy);
+ auto *DstElemTy = cast<PointerType>(DstInpTy->getElementType());
+ assert(DstElemTy && "llvm.scatter needs vector of ptr argument");
+ dbgs() << " Gather feeds into scatter\n Values to scatter : "
+ << *Dst->getOperand(0) << "\n";
+ dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements
+ << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy
+ << ") Access alignment(" << *Dst->getOperand(2) << ")\n";
+ });
+ // Address of source
+ auto *Src = getPointer(IndexLoad);
+ if (!Src)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n");
+
+ if (!isa<PointerType>(Src->getType())) {
+ LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n");
+ return nullptr;
+ }
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
+
+ auto *DstLoad = locateAddressFromIntrinsic(Dst);
+ if (!DstLoad) {
+ LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n");
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
+
+ Value *Ptr = getPointer(DstLoad);
+ if (!Ptr)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n");
+ Value *CastIndex =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, IndexLoad);
+ LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
+ // Shift all i16 left by 1 to match short addressing mode instead of
+ // byte.
+ auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
+ Value *AdjustedIndex = HVC.createHvxIntrinsic(
+ Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
+ LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n");
+
+ return Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ AdjustedIndex},
+ nullptr);
+ } else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
+ // Gather feeds into previously inserted pseudo intrinsic.
+ // These could not be in the same packet, so we need to generate another
+ // pseudo that is expanded to .tmp + store V6_vgathermh_pseudo
+ // V6_vgathermh_pseudo (ins IntRegs:$_dst_, s4_0Imm:$Ii, IntRegs:$Rt,
+ // ModRegs:$Mu, HvxVR:$Vv)
+ if (isa<AllocaInst>(IndexLoad)) {
+ auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes);
+ if (cstDataVector) {
+ // Our indexes are represented as a constant. We need THEM in a reg.
+ // This most likely will not work properly since alloca gives us DDR
+ // stack location. This will be fixed once we teach compiler about VTCM.
+ AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
+ [[maybe_unused]] auto *StoreIndexes =
+ Builder.CreateStore(cstDataVector, IndexesAlloca);
+ LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
+ Value *LoadedIndex = Builder.CreateLoad(
+ IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
+ AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
+ LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n");
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {ResultAlloca, CastedSrc,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex},
+ nullptr);
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ }
+ } else {
+ // Address of source
+ auto *Src = getPointer(IndexLoad);
+ if (!Src)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Src : " << *Src << "\n");
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
+
+ auto *DstLoad = locateAddressFromIntrinsic(Dst);
+ if (!DstLoad)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
+ auto *Ptr = getPointer(DstLoad);
+ if (!Ptr)
+ return nullptr;
+ LLVM_DEBUG(dbgs() << " Ptr : " << *Ptr << "\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
+ {Ptr, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ Indexes},
+ nullptr);
+ }
+ return Gather;
+ } else if (Qual == HvxIdioms::HEX_Scatter) {
+ // This is the case when result of a gather is used as an argument to
+ // Intrinsic::hexagon_V6_vscattermh_128B. Most likely we just inserted it
+ // ourselves. We have to create alloca, store to it, and replace all uses
+ // with that.
+ AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
+ Value *CastIndex =
+ getReinterpretiveCast_i16_to_i32(HVC, Builder, Ctx, Indexes);
+ LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {ResultAlloca, CastedSrc, HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE),
+ CastIndex},
+ nullptr);
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ } else if (Qual == HvxIdioms::HEX_Gather) {
+ // Gather feeds to another gather but already replaced with
+ // hexagon_V6_vgathermh_128B
+ if (isa<AllocaInst>(IndexLoad)) {
+ auto *cstDataVector = dyn_cast<ConstantDataVector>(Indexes);
+ if (cstDataVector) {
+ // Our indexes are represented as a constant. We need it in a reg.
+ AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
+
+ [[maybe_unused]] auto *StoreIndexes =
+ Builder.CreateStore(cstDataVector, IndexesAlloca);
+ LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
+ Value *LoadedIndex = Builder.CreateLoad(
+ IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
+ AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
+ LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca
+ << "\n AddressSpace: "
+ << ResultAlloca->getAddressSpace() << "\n";);
+
+ Value *CastedSrc = Builder.CreateBitOrPointerCast(
+ IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
+ LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
+
+ Gather = Builder.CreateIntrinsic(
+ Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
+ {ResultAlloca, CastedSrc,
+ HVC.getConstInt(DEFAULT_HVX_VTCM_PAGE_SIZE), LoadedIndex},
+ nullptr);
+ Value *LoadedResult = Builder.CreateLoad(
+ HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
+ LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
+ LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
+ In.replaceAllUsesWith(LoadedResult);
+ }
+ }
+ } else if (Qual == HvxIdioms::LLVM_Gather) {
+ // Gather feeds into another gather
+ errs() << " Underimplemented vgather to vgather sequence\n";
+ return nullptr;
+ } else
+ llvm_unreachable("Unhandled Qual enum");
+
+ return Gather;
+}
+
auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
const FxpOp &Op) const -> Value * {
assert(Op.X.Val->getType() == Op.Y.Val->getType());
@@ -2138,6 +3000,26 @@ auto HvxIdioms::run() -> bool {
It = StartOver ? B.rbegin()
: cast<Instruction>(New)->getReverseIterator();
Changed = true;
+ } else if (matchGather(*It)) {
+ Value *New = processVGather(*It);
+ if (!New)
+ continue;
+ LLVM_DEBUG(dbgs() << " Gather : " << *New << "\n");
+ // We replace original intrinsic with a new pseudo call.
+ It->eraseFromParent();
+ It = cast<Instruction>(New)->getReverseIterator();
+ RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI);
+ Changed = true;
+ } else if (matchScatter(*It)) {
+ Value *New = processVScatter(*It);
+ if (!New)
+ continue;
+ LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n");
+ // We replace original intrinsic with a new pseudo call.
+ It->eraseFromParent();
+ It = cast<Instruction>(New)->getReverseIterator();
+ RecursivelyDeleteTriviallyDeadInstructions(&*It, &HVC.TLI);
+ Changed = true;
}
}
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 6455757..2f59b7c 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -186,6 +186,9 @@ static unsigned featureToArchVersion(unsigned Feature) {
case Hexagon::ArchV79:
case Hexagon::ExtensionHVXV79:
return 79;
+ case Hexagon::ArchV81:
+ case Hexagon::ExtensionHVXV81:
+ return 81;
}
llvm_unreachable("Expected valid arch feature");
return 0;
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 6b48a21..b8075bd 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -96,6 +96,8 @@ cl::opt<bool> MV75("mv75", cl::Hidden, cl::desc("Build for Hexagon V75"),
cl::init(false));
cl::opt<bool> MV79("mv79", cl::Hidden, cl::desc("Build for Hexagon V79"),
cl::init(false));
+cl::opt<bool> MV81("mv81", cl::Hidden, cl::desc("Build for Hexagon V81"),
+ cl::init(false));
} // namespace
static cl::opt<Hexagon::ArchEnum> EnableHVX(
@@ -111,6 +113,7 @@ static cl::opt<Hexagon::ArchEnum> EnableHVX(
clEnumValN(Hexagon::ArchEnum::V73, "v73", "Build for HVX v73"),
clEnumValN(Hexagon::ArchEnum::V75, "v75", "Build for HVX v75"),
clEnumValN(Hexagon::ArchEnum::V79, "v79", "Build for HVX v79"),
+ clEnumValN(Hexagon::ArchEnum::V81, "v81", "Build for HVX v81"),
// Sentinel for no value specified.
clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
// Sentinel for flag not present.
@@ -159,6 +162,8 @@ static StringRef HexagonGetArchVariant() {
return "hexagonv75";
if (MV79)
return "hexagonv79";
+ if (MV81)
+ return "hexagonv81";
return "";
}
@@ -474,6 +479,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
case Hexagon::ArchEnum::V79:
Result.push_back("+hvxv79");
break;
+ case Hexagon::ArchEnum::V81:
+ Result.push_back("+hvxv81");
+ break;
case Hexagon::ArchEnum::Generic: {
Result.push_back(StringSwitch<StringRef>(CPU)
@@ -489,7 +497,8 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
.Case("hexagonv71t", "+hvxv71")
.Case("hexagonv73", "+hvxv73")
.Case("hexagonv75", "+hvxv75")
- .Case("hexagonv79", "+hvxv79"));
+ .Case("hexagonv79", "+hvxv79")
+ .Case("hexagonv81", "+hvxv81"));
break;
}
case Hexagon::ArchEnum::NoArch:
@@ -538,8 +547,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
FeatureBitset FB = S;
unsigned CpuArch = ArchV5;
for (unsigned F :
- {ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, ArchV66,
- ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
+ {ArchV81, ArchV79, ArchV75, ArchV73, ArchV71, ArchV69, ArchV68, ArchV67,
+ ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
if (!FB.test(F))
continue;
CpuArch = F;
@@ -556,7 +565,7 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
for (unsigned F :
{ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66,
ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71,
- ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79}) {
+ ExtensionHVXV73, ExtensionHVXV75, ExtensionHVXV79, ExtensionHVXV81}) {
if (!FB.test(F))
continue;
HasHvxVer = true;
@@ -569,6 +578,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// HasHvxVer is false, and UseHvx is true.
switch (CpuArch) {
+ case ArchV81:
+ FB.set(ExtensionHVXV81);
+ [[fallthrough]];
case ArchV79:
FB.set(ExtensionHVXV79);
[[fallthrough]];
@@ -668,12 +680,12 @@ void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS) {
std::optional<unsigned>
Hexagon_MC::getHVXVersion(const FeatureBitset &Features) {
- for (auto Arch : {Hexagon::ExtensionHVXV79, Hexagon::ExtensionHVXV75,
- Hexagon::ExtensionHVXV73, Hexagon::ExtensionHVXV71,
- Hexagon::ExtensionHVXV69, Hexagon::ExtensionHVXV68,
- Hexagon::ExtensionHVXV67, Hexagon::ExtensionHVXV66,
- Hexagon::ExtensionHVXV65, Hexagon::ExtensionHVXV62,
- Hexagon::ExtensionHVXV60})
+ for (auto Arch : {Hexagon::ExtensionHVXV81, Hexagon::ExtensionHVXV79,
+ Hexagon::ExtensionHVXV75, Hexagon::ExtensionHVXV73,
+ Hexagon::ExtensionHVXV71, Hexagon::ExtensionHVXV69,
+ Hexagon::ExtensionHVXV68, Hexagon::ExtensionHVXV67,
+ Hexagon::ExtensionHVXV66, Hexagon::ExtensionHVXV65,
+ Hexagon::ExtensionHVXV62, Hexagon::ExtensionHVXV60})
if (Features.test(Arch))
return Arch;
return {};
@@ -681,13 +693,13 @@ Hexagon_MC::getHVXVersion(const FeatureBitset &Features) {
unsigned Hexagon_MC::getArchVersion(const FeatureBitset &Features) {
for (auto Arch :
- {Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73, Hexagon::ArchV71,
- Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67, Hexagon::ArchV66,
- Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60, Hexagon::ArchV55,
- Hexagon::ArchV5})
+ {Hexagon::ArchV81, Hexagon::ArchV79, Hexagon::ArchV75, Hexagon::ArchV73,
+ Hexagon::ArchV71, Hexagon::ArchV69, Hexagon::ArchV68, Hexagon::ArchV67,
+ Hexagon::ArchV66, Hexagon::ArchV65, Hexagon::ArchV62, Hexagon::ArchV60,
+ Hexagon::ArchV55, Hexagon::ArchV5})
if (Features.test(Arch))
return Arch;
- llvm_unreachable("Expected arch v5-v79");
+ llvm_unreachable("Expected arch v5-v81");
return 0;
}
@@ -708,7 +720,8 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
.Case("hexagonv71t", llvm::ELF::EF_HEXAGON_MACH_V71T)
.Case("hexagonv73", llvm::ELF::EF_HEXAGON_MACH_V73)
.Case("hexagonv75", llvm::ELF::EF_HEXAGON_MACH_V75)
- .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79);
+ .Case("hexagonv79", llvm::ELF::EF_HEXAGON_MACH_V79)
+ .Case("hexagonv81", llvm::ELF::EF_HEXAGON_MACH_V81);
}
llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index aca7abd..44d1a44 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -4578,6 +4578,8 @@ def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>;
def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>;
def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>;
+def : InstAlias<"mtpidr $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsISA3_0]>;
+def : InstAlias<"mfpidr $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsISA3_0]>;
foreach SPRG = 4-7 in {
def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 9e6b7f0..2754d78 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1124,7 +1124,8 @@ def HasStdExtZbkbOrP
"'Base P' (Packed-SIMD)">;
def HasStdExtZbbOrZbkbOrP
- : Predicate<"Subtarget->HasStdExtZbbOrZbkb()|| Subtarget->hasStdExtP()">,
+ : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb() || "
+ "Subtarget->hasStdExtP()">,
AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbkb, FeatureStdExtP),
"'Zbb' (Basic Bit-Manipulation) or "
"'Zbkb' (Bitmanip instructions for Cryptography) or "
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 26fe9ed..1c930ac 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -318,8 +318,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
- if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
- !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
+ if (!Subtarget.hasStdExtZbb() && !Subtarget.hasStdExtP() &&
+ !Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
+ !Subtarget.hasVendorXAndesPerf() &&
!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
@@ -392,7 +393,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
}
- if (Subtarget.hasStdExtZbb() ||
+ if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP() ||
(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
Legal);
@@ -403,6 +404,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
+ // If have a CLZW, but not CTZW, custom promote i32.
+ if (Subtarget.hasStdExtP() && Subtarget.is64Bit())
+ setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
}
if (!Subtarget.hasCPOPLike()) {
@@ -419,13 +423,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// We need the custom lowering to make sure that the resulting sequence
// for the 32bit case is efficient on 64bit targets.
// Use default promotion for i32 without Zbb.
- if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb())
+ if (Subtarget.is64Bit() &&
+ (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP()))
setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction(ISD::CTLZ, XLenVT, Expand);
}
- if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
+ if (Subtarget.hasStdExtP() ||
+ (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
setOperationAction(ISD::ABS, XLenVT, Legal);
} else if (Subtarget.hasShortForwardBranchOpt()) {
// We can use PseudoCCSUB to implement ABS.
@@ -14669,6 +14675,25 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
bool IsCTZ =
N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
+
+ // Without Zbb, lower as 32 - clzw(~X & (X-1))
+ if (IsCTZ && !Subtarget.hasStdExtZbb()) {
+ assert(Subtarget.hasStdExtP());
+
+ NewOp0 = DAG.getFreeze(NewOp0);
+ SDValue Not = DAG.getNOT(DL, NewOp0, MVT::i64);
+ SDValue Minus1 = DAG.getNode(ISD::SUB, DL, MVT::i64, NewOp0,
+ DAG.getConstant(1, DL, MVT::i64));
+ SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Not, Minus1);
+ SDValue CLZW = DAG.getNode(RISCVISD::CLZW, DL, MVT::i64, And);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, MVT::i64,
+ DAG.getConstant(32, DL, MVT::i64), CLZW);
+ SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Sub,
+ DAG.getValueType(MVT::i32));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ return;
+ }
+
unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
@@ -14797,7 +14822,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
N->getOperand(0));
- SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
+ SDValue Abs = DAG.getNode(RISCVISD::NEGW_MAX, DL, MVT::i64, Src);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
return;
}
@@ -21813,7 +21838,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
// Output is either all zero or operand 0. We can propagate sign bit count
// from operand 0.
return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
- case RISCVISD::ABSW: {
+ case RISCVISD::NEGW_MAX: {
// We expand this at isel to negw+max. The result will have 33 sign bits
// if the input has at least 33 sign bits.
unsigned Tmp =
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 7d8a919..cc085bb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1455,3 +1455,11 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PMAXU_DW : RVPPairBinaryExchanged_rr<0b1111, 0b01, "pmaxu.dw">;
def PMAXU_DB : RVPPairBinaryExchanged_rr<0b1111, 0b10, "pmaxu.db">;
} // Predicates = [HasStdExtP, IsRV32]
+
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtP] in
+def : PatGpr<abs, ABS>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 4104abd..f7b4914 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -218,11 +218,13 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
}
let Predicates = [HasVendorXSfvfexpAny], DecoderNamespace = "XSfvector" in {
- def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">;
+ def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">,
+ SchedUnaryMC<"WriteSF_VFExp", "ReadSF_VFExp">;
}
let Predicates = [HasVendorXSfvfexpa], DecoderNamespace = "XSfvector" in {
- def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">;
+ def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">,
+ SchedUnaryMC<"WriteSF_VFExpa", "ReadSF_VFExpa">;
}
let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvector",
@@ -482,11 +484,53 @@ let Predicates = [HasVendorXSfvfwmaccqqq] in {
defm SF_VFWMACC_4x4x4 : VPseudoSiFiveVFWMACC;
}
-let Predicates = [HasVendorXSfvfnrclipxfqf] in {
+let Predicates = [HasVendorXSfvfnrclipxfqf], AltFmtType = IS_NOT_ALTFMT in {
defm SF_VFNRCLIP_XU_F_QF : VPseudoSiFiveVFNRCLIP;
defm SF_VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP;
}
+class VFExpSchedSEWSet<string mx, bit IsBF16, bit IsApprox> {
+ defvar BaseSet = SchedSEWSet<mx, isF=1>.val;
+ list<int> val = !if(IsBF16, !listremove(BaseSet, [32, 64]),
+ !if(IsApprox, BaseSet, !listremove(BaseSet, [64])));
+}
+multiclass VPseudoVFExp_V<bit IsBF16 = false, bit IsApprox = false> {
+ defvar SchedSuffix = !if(IsApprox, "VFExpa", "VFExp");
+
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ foreach e = VFExpSchedSEWSet<mx, IsBF16, IsApprox>.val in {
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E" # e
+ : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix,
+ mx, e, forcePassthruRead=true>;
+ def "_V_" # mx # "_E" # e # "_MASK"
+ : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix,
+ mx, e, forcePassthruRead=true>;
+ }
+ }
+ }
+}
+
+let Predicates = [HasVendorXSfvfbfexp16e], hasSideEffects = 0 in {
+ let AltFmtType = IS_ALTFMT in {
+ defm PseudoSF_VFEXP_ALT : VPseudoVFExp_V<IsBF16=true>;
+ }
+}
+
+let Predicates = [HasVendorXSfvfexpAnyFloat], hasSideEffects = 0 in {
+ let AltFmtType = IS_NOT_ALTFMT in {
+ defm PseudoSF_VFEXP : VPseudoVFExp_V;
+ }
+}
+
+let Predicates = [HasVendorXSfvfexpa], AltFmtType = IS_NOT_ALTFMT in {
+ defm PseudoSF_VFEXPA : VPseudoVFExp_V<IsApprox=true>;
+}
+
// SDNode
def SDT_SF_VC_V_X : SDTypeProfile<1, 4, [SDTCisVec<0>,
SDTCisVT<1, XLenVT>,
@@ -893,3 +937,36 @@ let Predicates = [HasVendorXSfcease] in {
let rs2 = 0b00101;
}
}
+
+let Predicates = [HasVendorXSfvfbfexp16e] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP_ALT",
+ AllBF16Vectors,
+ isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexp16e] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP",
+ [VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8],
+ isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexp32e] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP",
+ [VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexpa] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA",
+ [VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexpa, HasVInstructionsF16] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA",
+ [VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8],
+ isSEWAware=1>;
+}
+
+let Predicates = [HasVendorXSfvfexpa64e] in {
+ defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA",
+ [VF64M1, VF64M2, VF64M4, VF64M8], isSEWAware=1>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 62b7bcd..5429c2a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -51,7 +51,7 @@ def riscv_zip : RVSDNode<"ZIP", SDTIntUnaryOp>;
def riscv_unzip : RVSDNode<"UNZIP", SDTIntUnaryOp>;
// RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel.
-def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>;
+def riscv_negw_max : RVSDNode<"NEGW_MAX", SDTIntUnaryOp>;
// Scalar cryptography
def riscv_clmul : RVSDNode<"CLMUL", SDTIntBinOp>;
@@ -599,37 +599,43 @@ def : PatGpr<riscv_zip, ZIP_RV32, i32>;
def : PatGpr<riscv_unzip, UNZIP_RV32, i32>;
} // Predicates = [HasStdExtZbkb, IsRV32]
-let Predicates = [HasStdExtZbb] in {
+let Predicates = [HasStdExtZbbOrP] in {
def : PatGpr<ctlz, CLZ>;
+}
+
+let Predicates = [HasStdExtZbb] in {
def : PatGpr<cttz, CTZ>;
def : PatGpr<ctpop, CPOP>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbb, IsRV64] in {
+let Predicates = [HasStdExtZbbOrP, IsRV64] in {
def : PatGpr<riscv_clzw, CLZW>;
+}
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
def : PatGpr<riscv_ctzw, CTZW>;
def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
-def : Pat<(i64 (riscv_absw GPR:$rs1)),
+def : Pat<(i64 (riscv_negw_max GPR:$rs1)),
(MAX GPR:$rs1, (XLenVT (SUBW (XLenVT X0), GPR:$rs1)))>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbb] in {
+let Predicates = [HasStdExtZbbOrP] in {
def : Pat<(XLenVT (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>;
def : Pat<(XLenVT (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbb] in {
+let Predicates = [HasStdExtZbbOrP] in {
def : PatGprGpr<smin, MIN>;
def : PatGprGpr<smax, MAX>;
def : PatGprGpr<umin, MINU>;
def : PatGprGpr<umax, MAXU>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in
+let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV32] in
def : PatGpr<bswap, REV8_RV32, i32>;
-let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in
+let Predicates = [HasStdExtZbbOrZbkbOrP, IsRV64] in
def : PatGpr<bswap, REV8_RV64, i64>;
let Predicates = [HasStdExtZbkb] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 637d61fe..36a2f46 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -1588,6 +1588,10 @@ multiclass SiFive7SchedResources<int vlen, bit dualVALU,
//===----------------------------------------------------------------------===//
// Unsupported extensions
defm : UnsupportedSchedQ;
+ // TODO: scheduling info of XSfvfexp* and XSfvfexpa*
+ // for SiFive7 will be added in follow-up patches.
+ defm : UnsupportedSchedXSfvfexp;
+ defm : UnsupportedSchedXSfvfexpa;
defm : UnsupportedSchedZabha;
defm : UnsupportedSchedZbc;
defm : UnsupportedSchedZbkb;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index 9ab9636..64ccfd8 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -523,6 +523,8 @@ include "RISCVScheduleZvk.td"
// Vendor Extensions
multiclass UnsupportedSchedXsf {
defm : UnsupportedSchedXsfvcp;
+ defm : UnsupportedSchedXSfvfexp;
+ defm : UnsupportedSchedXSfvfexpa;
defm : UnsupportedSchedXSfvfnrclipxfqf;
defm : UnsupportedSchedXSfvfwmaccqqq;
defm : UnsupportedSchedXSfvqmaccdod;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
index 99632e4..1ee6dc1 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleXSf.td
@@ -99,3 +99,23 @@ defm : LMULWriteRes<"WriteSF_VFWMACC_QQQ", []>;
defm : LMULReadAdvance<"ReadSF_VFWMACC_QQQ", 0>;
} // Unsupported = true
}
+
+defm "" : LMULSEWSchedWritesF<"WriteSF_VFExp">;
+defm "" : LMULSEWSchedReadsF<"ReadSF_VFExp">;
+
+multiclass UnsupportedSchedXSfvfexp {
+let Unsupported = true in {
+defm : LMULSEWWriteResF<"WriteSF_VFExp", []>;
+defm : LMULSEWReadAdvanceF<"ReadSF_VFExp", 0>;
+} // Unsupported = true
+}
+
+defm "" : LMULSEWSchedWritesF<"WriteSF_VFExpa">;
+defm "" : LMULSEWSchedReadsF<"ReadSF_VFExpa">;
+
+multiclass UnsupportedSchedXSfvfexpa {
+let Unsupported = true in {
+defm : LMULSEWWriteResF<"WriteSF_VFExpa", []>;
+defm : LMULSEWReadAdvanceF<"ReadSF_VFExpa", 0>;
+} // Unsupported = true
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 334db4b..4b4fc8f 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -187,7 +187,7 @@ public:
}
bool hasCLZLike() const {
- return HasStdExtZbb || HasVendorXTHeadBb ||
+ return HasStdExtZbb || HasStdExtP || HasVendorXTHeadBb ||
(HasVendorXCVbitmanip && !IsRV64);
}
bool hasCTZLike() const {
@@ -197,7 +197,7 @@ public:
return HasStdExtZbb || (HasVendorXCVbitmanip && !IsRV64);
}
bool hasREV8Like() const {
- return HasStdExtZbb || HasStdExtZbkb || HasVendorXTHeadBb;
+ return HasStdExtZbb || HasStdExtZbkb || HasStdExtP || HasVendorXTHeadBb;
}
bool hasBEXTILike() const { return HasStdExtZbs || HasVendorXTHeadBs; }
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 62073ec..4393f6e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4721,9 +4721,6 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
return false;
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
auto getFoldableLogicOp = [](SDValue Op) {
// Peek through single use bitcast.
if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
@@ -4740,13 +4737,47 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
return SDValue();
};
- SDValue A, FoldableOp;
- if ((FoldableOp = getFoldableLogicOp(N1))) {
- A = N0;
- } else if ((FoldableOp = getFoldableLogicOp(N0))) {
- A = N1;
- } else
- return false;
+ SDValue N0, N1, A, FoldableOp;
+
+ // Identify and (optionally) peel an outer NOT that wraps a pure logic tree
+ auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) {
+ if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() &&
+ ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) {
+ SDValue InnerOp = Op->getOperand(0);
+
+ if (!getFoldableLogicOp(InnerOp))
+ return SDValue();
+
+ N0 = InnerOp.getOperand(0);
+ N1 = InnerOp.getOperand(1);
+ if ((FoldableOp = getFoldableLogicOp(N1))) {
+ A = N0;
+ return InnerOp;
+ }
+ if ((FoldableOp = getFoldableLogicOp(N0))) {
+ A = N1;
+ return InnerOp;
+ }
+ }
+ return SDValue();
+ };
+
+ bool PeeledOuterNot = false;
+ SDNode *OriN = N;
+ if (SDValue InnerOp = tryPeelOuterNotWrappingLogic(N)) {
+ PeeledOuterNot = true;
+ N = InnerOp.getNode();
+ } else {
+ N0 = N->getOperand(0);
+ N1 = N->getOperand(1);
+
+ if ((FoldableOp = getFoldableLogicOp(N1)))
+ A = N0;
+ else if ((FoldableOp = getFoldableLogicOp(N0)))
+ A = N1;
+ else
+ return false;
+ }
SDValue B = FoldableOp.getOperand(0);
SDValue C = FoldableOp.getOperand(1);
@@ -4798,7 +4829,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
case ISD::XOR: Imm ^= TernlogMagicA; break;
}
- return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm);
+ if (PeeledOuterNot)
+ Imm = ~Imm;
+
+ return matchVPTERNLOG(OriN, ParentA, ParentB, ParentC, A, B, C, Imm);
}
/// If the high bits of an 'and' operand are known zero, try setting the
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4dfc400..410f20e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57617,10 +57617,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
}
// Fold any similar generic ADD/SUB opcodes to reuse this node.
- auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
+ auto MatchGeneric = [&](unsigned Opc, SDValue N0, SDValue N1, bool Negate) {
SDValue Ops[] = {N0, N1};
SDVTList VTs = DAG.getVTList(N->getValueType(0));
- if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
+ if (SDNode *GenericAddSub = DAG.getNodeIfExists(Opc, VTs, Ops)) {
SDValue Op(N, 0);
if (Negate) {
// Bail if this is only used by a user of the x86 add/sub.
@@ -57632,8 +57632,25 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
DCI.CombineTo(GenericAddSub, Op);
}
};
- MatchGeneric(LHS, RHS, false);
- MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
+ MatchGeneric(GenericOpc, LHS, RHS, false);
+ MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode());
+
+ if (auto *Const = dyn_cast<ConstantSDNode>(RHS)) {
+ SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
+ if (X86ISD::SUB == N->getOpcode()) {
+ // Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C).
+ MatchGeneric(ISD::ADD, LHS, NegC, false);
+ } else {
+ // Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS).
+ MatchGeneric(ISD::SUB, NegC, LHS, true);
+ }
+ } else if (auto *Const = dyn_cast<ConstantSDNode>(LHS)) {
+ if (X86ISD::SUB == N->getOpcode()) {
+ SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
+ // Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C).
+ MatchGeneric(ISD::ADD, RHS, NegC, true);
+ }
+ }
// TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
// EFLAGS result doesn't change.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index e28b9c1..b7151f6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1592,7 +1592,6 @@ namespace llvm {
bool useLoadStackGuardNode(const Module &M) const override;
bool useStackGuardXorFP() const override;
void insertSSPDeclarations(Module &M) const override;
- Function *getSSPStackGuardCheck(const Module &M) const override;
SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
const SDLoc &DL) const override;
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 37d7772..a61bbe5 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -640,15 +640,6 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const {
TargetLowering::insertSSPDeclarations(M);
}
-Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
- // MSVC CRT has a function to validate security cookie.
- if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
- Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
- return M.getFunction("__security_check_cookie");
- }
- return TargetLowering::getSSPStackGuardCheck(M);
-}
-
Value *
X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
index edcf247..632c6a2 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
@@ -1407,7 +1407,7 @@ let isBarrier = 1, isTerminator = 1 in {
let r = 0x04;
}
- def BREAK_N : RRRN_Inst<0x0C, (outs), (ins uimm4:$imm),
+ def BREAK_N : RRRN_Inst<0x0D, (outs), (ins uimm4:$imm),
"break.n\t$imm", []>, Requires<[HasDensity, HasDebug]> {
bits<4> imm;
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp
index 0fce5b9..709e5f0 100644
--- a/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -88,6 +88,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) {
case ArchKind::ARMV9_4A:
case ArchKind::ARMV9_5A:
case ArchKind::ARMV9_6A:
+ case ArchKind::ARMV9_7A:
return 9;
case ArchKind::INVALID:
return 0;
@@ -127,6 +128,7 @@ static ARM::ProfileKind getProfileKind(ARM::ArchKind AK) {
case ARM::ArchKind::ARMV9_4A:
case ARM::ArchKind::ARMV9_5A:
case ARM::ArchKind::ARMV9_6A:
+ case ARM::ArchKind::ARMV9_7A:
return ARM::ProfileKind::A;
case ARM::ArchKind::ARMV4:
case ARM::ArchKind::ARMV4T:
diff --git a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
index f6cea85..15ba1eb 100644
--- a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
@@ -46,6 +46,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v9.4a", "v9.4-a")
.Case("v9.5a", "v9.5-a")
.Case("v9.6a", "v9.6-a")
+ .Case("v9.7a", "v9.7-a")
.Case("v8m.base", "v8-m.base")
.Case("v8m.main", "v8-m.main")
.Case("v8.1m.main", "v8.1-m.main")
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index 1068ce4..11ba9ee 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -937,6 +937,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
return Triple::ARMSubArch_v9_5a;
case ARM::ArchKind::ARMV9_6A:
return Triple::ARMSubArch_v9_6a;
+ case ARM::ArchKind::ARMV9_7A:
+ return Triple::ARMSubArch_v9_7a;
case ARM::ArchKind::ARMV8R:
return Triple::ARMSubArch_v8r;
case ARM::ArchKind::ARMV8MBaseline:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 669d4f0..8d9933b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -582,6 +582,18 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
return BinaryOperator::CreateSub(ConstCtlz, X);
}
+
+ // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
+ if (Op0->hasOneUse() &&
+ match(Op0,
+ m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) {
+ Type *Ty = II.getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
+ {X, IC.Builder.getFalse()});
+ auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
+ return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
+ }
}
// cttz(Pow2) -> Log2(Pow2)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 5aa8de3..f5130da 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -4697,5 +4697,31 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
cast<IntrinsicInst>(TrueVal)->getParamAlign(0).valueOrOne(),
CondVal, FalseVal));
+ // Canonicalize sign function ashr pattern: select (icmp slt X, 1), ashr X,
+ // bitwidth-1, 1 -> scmp(X, 0)
+ // Also handles: select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0)
+ unsigned BitWidth = SI.getType()->getScalarSizeInBits();
+ CmpPredicate Pred;
+ Value *CmpLHS, *CmpRHS;
+
+ // Canonicalize sign function ashr patterns:
+ // select (icmp slt X, 1), ashr X, bitwidth-1, 1 -> scmp(X, 0)
+ // select (icmp sgt X, 0), 1, ashr X, bitwidth-1 -> scmp(X, 0)
+ if (match(&SI, m_Select(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
+ m_Value(TrueVal), m_Value(FalseVal))) &&
+ ((Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_One()) &&
+ match(TrueVal,
+ m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1))) &&
+ match(FalseVal, m_One())) ||
+ (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_Zero()) &&
+ match(TrueVal, m_One()) &&
+ match(FalseVal,
+ m_AShr(m_Specific(CmpLHS), m_SpecificInt(BitWidth - 1)))))) {
+
+ Function *Scmp = Intrinsic::getOrInsertDeclaration(
+ SI.getModule(), Intrinsic::scmp, {SI.getType(), SI.getType()});
+ return CallInst::Create(Scmp, {CmpLHS, ConstantInt::get(SI.getType(), 0)});
+ }
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 67e2aae..9c8de45 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2327,6 +2327,18 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
return ConstantVector::get(NewVecC);
}
+// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
+static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
+ Constant *Splat, bool SplatLHS,
+ const DataLayout &DL) {
+ ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
+ Constant *LHS = ConstantVector::getSplat(EC, Splat);
+ Constant *RHS = Vector;
+ if (!SplatLHS)
+ std::swap(LHS, RHS);
+ return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
+}
+
Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
if (!isa<VectorType>(Inst.getType()))
return nullptr;
@@ -2338,6 +2350,37 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
assert(cast<VectorType>(RHS->getType())->getElementCount() ==
cast<VectorType>(Inst.getType())->getElementCount());
+ auto foldConstantsThroughSubVectorInsertSplat =
+ [&](Value *MaybeSubVector, Value *MaybeSplat,
+ bool SplatLHS) -> Instruction * {
+ Value *Idx;
+ Constant *Splat, *SubVector, *Dest;
+ if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) ||
+ !match(MaybeSubVector,
+ m_VectorInsert(m_Constant(Dest), m_Constant(SubVector),
+ m_Value(Idx))))
+ return nullptr;
+ SubVector =
+ constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
+ Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
+ if (!SubVector || !Dest)
+ return nullptr;
+ auto *InsertVector =
+ Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx);
+ return replaceInstUsesWith(Inst, InsertVector);
+ };
+
+ // If one operand is a constant splat and the other operand is a
+ // `vector.insert` where both the destination and subvector are constant,
+ // apply the operation to both the destination and subvector, returning a new
+ // constant `vector.insert`. This helps constant folding for scalable vectors.
+ if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
+ /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
+ return Folded;
+ if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
+ /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
+ return Folded;
+
// If both operands of the binop are vector concatenations, then perform the
// narrow binop on each pair of the source operands followed by concatenation
// of the results.
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b6cbecb..10b03bb 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4);
static const Align kShadowTLSAlignment = Align(8);
// These constants must be kept in sync with the ones in msan.h.
+// TODO: increase size to match SVE/SVE2/SME/SME2 limits
static const unsigned kParamTLSSize = 800;
static const unsigned kRetvalTLSSize = 800;
@@ -1544,6 +1545,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
+ static bool isAArch64SVCount(Type *Ty) {
+ if (TargetExtType *TTy = dyn_cast<TargetExtType>(Ty))
+ return TTy->getName() == "aarch64.svcount";
+ return false;
+ }
+
+ // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka
+ // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>.
+ static bool isScalableNonVectorType(Type *Ty) {
+ if (!isAArch64SVCount(Ty))
+ LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty
+ << "\n");
+
+ return Ty->isScalableTy() && !isa<VectorType>(Ty);
+ }
+
void materializeChecks() {
#ifndef NDEBUG
// For assert below.
@@ -1672,6 +1689,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
return Res;
}
+ if (isScalableNonVectorType(OrigTy)) {
+ LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy
+ << "\n");
+ return OrigTy;
+ }
+
uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
return IntegerType::get(*MS.C, TypeSize);
}
@@ -2185,8 +2208,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
<< *OrigIns << "\n");
return;
}
-#ifndef NDEBUG
+
Type *ShadowTy = Shadow->getType();
+ if (isScalableNonVectorType(ShadowTy)) {
+ LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow
+ << " before " << *OrigIns << "\n");
+ return;
+ }
+#ifndef NDEBUG
assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
"Can only insert checks for integer, vector, and aggregate shadow "
@@ -6972,6 +7001,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// an extra "select". This results in much more compact IR.
// Sa = select Sb, poisoned, (select b, Sc, Sd)
Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
+ } else if (isScalableNonVectorType(I.getType())) {
+ // This is intended to handle target("aarch64.svcount"), which can't be
+ // handled in the else branch because of incompatibility with CreateXor
+ // ("The supported LLVM operations on this type are limited to load,
+ // store, phi, select and alloca instructions").
+
+ // TODO: this currently underapproximates. Use Arm SVE EOR in the else
+ // branch as needed instead.
+ Sa1 = getCleanShadow(getShadowTy(I.getType()));
} else {
// Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
// If Sb (condition is poisoned), look for bits in c and d that are equal
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a1ad2db..2591df8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -4172,11 +4172,6 @@ class VPlan {
/// definitions are VPValues that hold a pointer to their underlying IR.
SmallVector<VPValue *, 16> VPLiveIns;
- /// Mapping from SCEVs to the VPValues representing their expansions.
- /// NOTE: This mapping is temporary and will be removed once all users have
- /// been modeled in VPlan directly.
- DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
-
/// Blocks allocated and owned by the VPlan. They will be deleted once the
/// VPlan is destroyed.
SmallVector<VPBlockBase *> CreatedBlocks;
@@ -4424,15 +4419,6 @@ public:
LLVM_DUMP_METHOD void dump() const;
#endif
- VPValue *getSCEVExpansion(const SCEV *S) const {
- return SCEVToExpansion.lookup(S);
- }
-
- void addSCEVExpansion(const SCEV *S, VPValue *V) {
- assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
- SCEVToExpansion[S] = V;
- }
-
/// Clone the current VPlan, update all VPValues of the new VPlan and cloned
/// recipes to refer to the clones, and return it.
VPlan *duplicate();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3e85e6f..84817d7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -943,12 +943,40 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
}
}
+/// Get any instruction opcode or intrinsic ID data embedded in recipe \p R.
+/// Returns an optional pair, where the first element indicates whether it is
+/// an intrinsic ID.
+static std::optional<std::pair<bool, unsigned>>
+getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
+ return TypeSwitch<const VPSingleDefRecipe *,
+ std::optional<std::pair<bool, unsigned>>>(R)
+ .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
+ VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
+ [](auto *I) { return std::make_pair(false, I->getOpcode()); })
+ .Case<VPWidenIntrinsicRecipe>([](auto *I) {
+ return std::make_pair(true, I->getVectorIntrinsicID());
+ })
+ .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {
+ // For recipes that do not directly map to LLVM IR instructions,
+ // assign opcodes after the last VPInstruction opcode (which is also
+ // after the last IR Instruction opcode), based on the VPDefID.
+ return std::make_pair(false,
+ VPInstruction::OpsEnd + 1 + I->getVPDefID());
+ })
+ .Default([](auto *) { return std::nullopt; });
+}
+
/// Try to fold \p R using InstSimplifyFolder. Will succeed and return a
-/// non-nullptr Value for a handled \p Opcode if corresponding \p Operands are
-/// foldable live-ins.
-static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
- ArrayRef<VPValue *> Operands,
- const DataLayout &DL, VPTypeAnalysis &TypeInfo) {
+/// non-nullptr VPValue for a handled opcode or intrinsic ID if corresponding \p
+/// Operands are foldable live-ins.
+static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R,
+ ArrayRef<VPValue *> Operands,
+ const DataLayout &DL,
+ VPTypeAnalysis &TypeInfo) {
+ auto OpcodeOrIID = getOpcodeOrIntrinsicID(&R);
+ if (!OpcodeOrIID)
+ return nullptr;
+
SmallVector<Value *, 4> Ops;
for (VPValue *Op : Operands) {
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
@@ -956,43 +984,57 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
Ops.push_back(Op->getLiveInIRValue());
}
- InstSimplifyFolder Folder(DL);
- if (Instruction::isBinaryOp(Opcode))
- return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
+ auto FoldToIRValue = [&]() -> Value * {
+ InstSimplifyFolder Folder(DL);
+ if (OpcodeOrIID->first) {
+ if (R.getNumOperands() != 2)
+ return nullptr;
+ unsigned ID = OpcodeOrIID->second;
+ return Folder.FoldBinaryIntrinsic(ID, Ops[0], Ops[1],
+ TypeInfo.inferScalarType(&R));
+ }
+ unsigned Opcode = OpcodeOrIID->second;
+ if (Instruction::isBinaryOp(Opcode))
+ return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode),
+ Ops[0], Ops[1]);
+ if (Instruction::isCast(Opcode))
+ return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
+ TypeInfo.inferScalarType(R.getVPSingleValue()));
+ switch (Opcode) {
+ case VPInstruction::LogicalAnd:
+ return Folder.FoldSelect(Ops[0], Ops[1],
+ ConstantInt::getNullValue(Ops[1]->getType()));
+ case VPInstruction::Not:
+ return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
+ Constant::getAllOnesValue(Ops[0]->getType()));
+ case Instruction::Select:
+ return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
Ops[1]);
- if (Instruction::isCast(Opcode))
- return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
- TypeInfo.inferScalarType(R.getVPSingleValue()));
- switch (Opcode) {
- case VPInstruction::LogicalAnd:
- return Folder.FoldSelect(Ops[0], Ops[1],
- ConstantInt::getNullValue(Ops[1]->getType()));
- case VPInstruction::Not:
- return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
- Constant::getAllOnesValue(Ops[0]->getType()));
- case Instruction::Select:
- return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
- case Instruction::ICmp:
- case Instruction::FCmp:
- return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
- Ops[1]);
- case Instruction::GetElementPtr: {
- auto &RFlags = cast<VPRecipeWithIRFlags>(R);
- auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
- return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops),
- RFlags.getGEPNoWrapFlags());
- }
- case VPInstruction::PtrAdd:
- case VPInstruction::WidePtrAdd:
- return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
- Ops[1],
- cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
- // An extract of a live-in is an extract of a broadcast, so return the
- // broadcasted element.
- case Instruction::ExtractElement:
- assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
- return Ops[0];
- }
+ case Instruction::GetElementPtr: {
+ auto &RFlags = cast<VPRecipeWithIRFlags>(R);
+ auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
+ return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0],
+ drop_begin(Ops), RFlags.getGEPNoWrapFlags());
+ }
+ case VPInstruction::PtrAdd:
+ case VPInstruction::WidePtrAdd:
+ return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()),
+ Ops[0], Ops[1],
+ cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
+ // An extract of a live-in is an extract of a broadcast, so return the
+ // broadcasted element.
+ case Instruction::ExtractElement:
+ assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
+ return Ops[0];
+ }
+ return nullptr;
+ };
+
+ if (Value *V = FoldToIRValue())
+ return R.getParent()->getPlan()->getOrAddLiveIn(V);
return nullptr;
}
@@ -1006,19 +1048,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
// Simplification of live-in IR values for SingleDef recipes using
// InstSimplifyFolder.
- if (TypeSwitch<VPRecipeBase *, bool>(&R)
- .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
- VPReplicateRecipe, VPWidenSelectRecipe>([&](auto *I) {
- const DataLayout &DL =
- Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
- Value *V = tryToFoldLiveIns(*I, I->getOpcode(), I->operands(), DL,
- TypeInfo);
- if (V)
- I->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
- return V;
- })
- .Default([](auto *) { return false; }))
- return;
+ const DataLayout &DL =
+ Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
+ if (VPValue *V = tryToFoldLiveIns(*Def, Def->operands(), DL, TypeInfo))
+ return Def->replaceAllUsesWith(V);
// Fold PredPHI LiveIn -> LiveIn.
if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) {
@@ -1996,29 +2029,6 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
return Def == getEmptyKey() || Def == getTombstoneKey();
}
- /// Get any instruction opcode or intrinsic ID data embedded in recipe \p R.
- /// Returns an optional pair, where the first element indicates whether it is
- /// an intrinsic ID.
- static std::optional<std::pair<bool, unsigned>>
- getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
- return TypeSwitch<const VPSingleDefRecipe *,
- std::optional<std::pair<bool, unsigned>>>(R)
- .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
- VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
- [](auto *I) { return std::make_pair(false, I->getOpcode()); })
- .Case<VPWidenIntrinsicRecipe>([](auto *I) {
- return std::make_pair(true, I->getVectorIntrinsicID());
- })
- .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {
- // For recipes that do not directly map to LLVM IR instructions,
- // assign opcodes after the last VPInstruction opcode (which is also
- // after the last IR Instruction opcode), based on the VPDefID.
- return std::make_pair(false,
- VPInstruction::OpsEnd + 1 + I->getVPDefID());
- })
- .Default([](auto *) { return std::nullopt; });
- }
-
/// If recipe \p R will lower to a GEP with a non-i8 source element type,
/// return that source element type.
static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
@@ -4119,7 +4129,7 @@ static bool isAlreadyNarrow(VPValue *VPV) {
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VectorRegWidth) {
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
- if (!VectorLoop)
+ if (!VectorLoop || VectorLoop->getEntry()->getNumSuccessors() != 0)
return;
VPTypeAnalysis TypeInfo(Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 06c3d75..fe66f13 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -32,8 +32,6 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
}
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
- if (auto *Expanded = Plan.getSCEVExpansion(Expr))
- return Expanded;
VPValue *Expanded = nullptr;
if (auto *E = dyn_cast<SCEVConstant>(Expr))
Expanded = Plan.getOrAddLiveIn(E->getValue());
@@ -50,7 +48,6 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
}
}
- Plan.addSCEVExpansion(Expr, Expanded);
return Expanded;
}