aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td13
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td1
-rw-r--r--llvm/lib/Target/AArch64/AArch64Features.td48
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp111
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td117
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td641
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td11
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td83
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64SystemOperands.td370
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp90
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp201
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp10
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp16
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp18
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp115
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h5
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td72
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td167
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp33
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h64
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp21
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h19
28 files changed, 1798 insertions, 457 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 86f9548..a4529a5 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -73,9 +73,16 @@ def SVEUnsupported : AArch64Unsupported {
SVE2Unsupported.F);
}
-let F = [HasSME2p2, HasSVE2p2_or_SME2p2, HasNonStreamingSVE_or_SME2p2,
- HasNonStreamingSVE2p2_or_SME2p2] in
-def SME2p2Unsupported : AArch64Unsupported;
+def SME2p3Unsupported : AArch64Unsupported {
+ let F = [HasSVE2p3_or_SME2p3, HasSVE_B16MM];
+}
+
+def SME2p2Unsupported : AArch64Unsupported {
+ let F = !listconcat([HasSME2p2, HasSVE2p2_or_SME2p2,
+ HasNonStreamingSVE_or_SME2p2,
+ HasNonStreamingSVE2p2_or_SME2p2],
+ SME2p3Unsupported.F);
+}
def SME2p1Unsupported : AArch64Unsupported {
let F = !listconcat([HasSME2p1, HasSVE2p1_or_SME2p1,
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index ecaeff7..b3ec65c 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -71,7 +71,6 @@ def AArch64PreLegalizerCombiner: GICombiner<
"AArch64PreLegalizerCombinerImpl", [all_combines,
icmp_redundant_trunc,
fold_global_offset,
- shuffle_to_extract,
ext_addv_to_udot_addv,
ext_uaddv_to_uaddlv,
push_sub_through_zext,
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 46f5f0c..0e94b78 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -585,6 +585,47 @@ def FeatureSME_TMOP: ExtensionWithMArch<"sme-tmop", "SME_TMOP", "FEAT_SME_TMOP",
def FeatureSSVE_FEXPA : ExtensionWithMArch<"ssve-fexpa", "SSVE_FEXPA", "FEAT_SSVE_FEXPA",
"Enable SVE FEXPA instruction in Streaming SVE mode", [FeatureSME2]>;
+//===----------------------------------------------------------------------===//
+// Armv9.7 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureCMH : ExtensionWithMArch<"cmh", "CMH", "FEAT_CMH",
+ "Enable Armv9.7-A Contention Management Hints">;
+
+def FeatureLSCP : ExtensionWithMArch<"lscp", "LSCP", "FEAT_LSCP",
+ "Enable Armv9.7-A Load-acquire and store-release pair extension">;
+
+def FeatureTLBID: ExtensionWithMArch<"tlbid", "TLBID", "FEAT_TLBID",
+ "Enable Armv9.7-A TLBI Domains extension">;
+
+def FeatureMPAMv2: ExtensionWithMArch<"mpamv2", "MPAMv2", "FEAT_MPAMv2",
+ "Enable Armv9.7-A MPAMv2 Lookaside Buffer Invalidate instructions">;
+
+def FeatureMTETC: ExtensionWithMArch<"mtetc", "MTETC", "FEAT_MTETC",
+ "Enable Virtual Memory Tagging Extension">;
+
+def FeatureGCIE: ExtensionWithMArch<"gcie", "GCIE", "FEAT_GCIE",
+ "Enable GICv5 (Generic Interrupt Controller) CPU Interface Extension">;
+
+def FeatureSVE2p3 : ExtensionWithMArch<"sve2p3", "SVE2p3", "FEAT_SVE2p3",
+ "Enable Armv9.7-A Scalable Vector Extension 2.3 instructions", [FeatureSVE2p2]>;
+
+def FeatureSME2p3 : ExtensionWithMArch<"sme2p3", "SME2p3", "FEAT_SME2p3",
+ "Enable Armv9.7-A Scalable Matrix Extension 2.3 instructions", [FeatureSME2p2]>;
+
+def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B16MM",
+ "Enable Armv9.7-A SVE non-widening BFloat16 matrix multiply-accumulate", [FeatureSVE]>;
+
+def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
+ "Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;
+
+def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT",
+ "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM",
+ "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+//===----------------------------------------------------------------------===//
// Other Features
//===----------------------------------------------------------------------===//
@@ -939,9 +980,12 @@ def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a",
[HasV9_4aOps, FeatureCPA],
!listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA, FeatureLUT, FeatureFAMINMAX])>;
def HasV9_6aOps : Architecture64<9, 6, "a", "v9.6a",
- [HasV9_5aOps, FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2, FeatureLSUI, FeatureOCCMO],
- !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2,
+ [HasV9_5aOps, FeatureCMPBR, FeatureLSUI, FeatureOCCMO],
+ !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR,
FeatureLSUI, FeatureOCCMO])>;
+def HasV9_7aOps : Architecture64<9, 7, "a", "v9.7a",
+ [HasV9_6aOps, FeatureSVE2p3, FeatureFPRCVT],
+ !listconcat(HasV9_6aOps.DefaultExts, [FeatureSVE2p3, FeatureFPRCVT])>;
def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
[ //v8.1
FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a81de5c..d16b116 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9002,12 +9002,12 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
}
static SMECallAttrs
-getSMECallAttrs(const Function &Caller, const AArch64TargetLowering &TLI,
+getSMECallAttrs(const Function &Caller, const RTLIB::RuntimeLibcallsInfo &RTLCI,
const TargetLowering::CallLoweringInfo &CLI) {
if (CLI.CB)
- return SMECallAttrs(*CLI.CB, &TLI);
+ return SMECallAttrs(*CLI.CB, &RTLCI);
if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
- return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), TLI));
+ return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), RTLCI));
return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(SMEAttrs::Normal));
}
@@ -9029,7 +9029,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// SME Streaming functions are not eligible for TCO as they may require
// the streaming mode or ZA to be restored after returning from the call.
- SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, *this, CLI);
+ SMECallAttrs CallAttrs =
+ getSMECallAttrs(CallerF, getRuntimeLibcallsInfo(), CLI);
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingAllZAState() ||
CallAttrs.caller().hasStreamingBody())
@@ -9454,7 +9455,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Determine whether we need any streaming mode changes.
- SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), *this, CLI);
+ SMECallAttrs CallAttrs =
+ getSMECallAttrs(MF.getFunction(), getRuntimeLibcallsInfo(), CLI);
std::optional<unsigned> ZAMarkerNode;
bool UseNewSMEABILowering = getTM().useNewSMEABILowering();
@@ -19476,6 +19478,61 @@ static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
Op1 ? Op1 : Mul->getOperand(1));
}
+// Multiplying an RDSVL value by a constant can sometimes be done cheaper by
+// folding a power-of-two factor of the constant into the RDSVL immediate and
+// compensating with an extra shift.
+//
+// We rewrite:
+// (mul (srl (rdsvl 1), w), x)
+// to one of:
+// (shl (rdsvl y), z) if z > 0
+// (srl (rdsvl y), abs(z)) if z < 0
+// where integers y, z satisfy x = y * 2^(w + z) and y ∈ [-32, 31].
+static SDValue performMulRdsvlCombine(SDNode *Mul, SelectionDAG &DAG) {
+ SDLoc DL(Mul);
+ EVT VT = Mul->getValueType(0);
+ SDValue MulOp0 = Mul->getOperand(0);
+ int ConstMultiplier =
+ cast<ConstantSDNode>(Mul->getOperand(1))->getSExtValue();
+ if ((MulOp0->getOpcode() != ISD::SRL) ||
+ (MulOp0->getOperand(0).getOpcode() != AArch64ISD::RDSVL))
+ return SDValue();
+
+ unsigned AbsConstValue = abs(ConstMultiplier);
+ unsigned OperandShift =
+ cast<ConstantSDNode>(MulOp0->getOperand(1))->getZExtValue();
+
+ // z ≤ ctz(|x|) - w (largest extra shift we can take while keeping y
+ // integral)
+ int UpperBound = llvm::countr_zero(AbsConstValue) - OperandShift;
+
+ // To keep y in range, with B = 31 for x > 0 and B = 32 for x < 0, we need:
+ // 2^(w + z) ≥ ceil(x / B) ⇒ z ≥ ceil_log2(ceil(x / B)) - w (LowerBound).
+ unsigned B = ConstMultiplier < 0 ? 32 : 31;
+ unsigned CeilAxOverB = (AbsConstValue + (B - 1)) / B; // ceil(|x|/B)
+ int LowerBound = llvm::Log2_32_Ceil(CeilAxOverB) - OperandShift;
+
+ // No valid solution found.
+ if (LowerBound > UpperBound)
+ return SDValue();
+
+ // Any value of z in [LowerBound, UpperBound] is valid. Prefer no extra
+ // shift if possible.
+ int Shift = std::min(std::max(/*prefer*/ 0, LowerBound), UpperBound);
+
+ // y = x / 2^(w + z)
+ int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
+ (ConstMultiplier < 0 ? -1 : 1);
+ auto Rdsvl = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+ DAG.getSignedConstant(RdsvlMul, DL, MVT::i32));
+
+ if (Shift == 0)
+ return Rdsvl;
+ return DAG.getNode(Shift < 0 ? ISD::SRL : ISD::SHL, DL, VT, Rdsvl,
+ DAG.getConstant(abs(Shift), DL, MVT::i32),
+ SDNodeFlags::Exact);
+}
+
// Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz
// Same for other types with equivalent constants.
static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG) {
@@ -19604,6 +19661,9 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (!isa<ConstantSDNode>(N1))
return SDValue();
+ if (SDValue Ext = performMulRdsvlCombine(N, DAG))
+ return Ext;
+
ConstantSDNode *C = cast<ConstantSDNode>(N1);
const APInt &ConstValue = C->getAPIntValue();
@@ -26665,11 +26725,34 @@ static SDValue performDUPCombine(SDNode *N,
}
if (N->getOpcode() == AArch64ISD::DUP) {
+ SDValue Op = N->getOperand(0);
+
+ // Optimize DUP(extload/zextload i8/i16/i32) to avoid GPR->FPR transfer.
+ // For example:
+ // v4i32 = DUP (i32 (zextloadi8 addr))
+ // =>
+ // v4i32 = SCALAR_TO_VECTOR (i32 (zextloadi8 addr)) ; Matches to ldr b0
+ // v4i32 = DUPLANE32 (v4i32), 0
+ if (auto *LD = dyn_cast<LoadSDNode>(Op)) {
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ EVT MemVT = LD->getMemoryVT();
+ EVT ElemVT = VT.getVectorElementType();
+ if ((ExtType == ISD::EXTLOAD || ExtType == ISD::ZEXTLOAD) &&
+ (MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) &&
+ ElemVT != MemVT && LD->hasOneUse()) {
+ EVT Vec128VT = EVT::getVectorVT(*DCI.DAG.getContext(), ElemVT,
+ 128 / ElemVT.getSizeInBits());
+ SDValue ScalarToVec =
+ DCI.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, Vec128VT, Op);
+ return DCI.DAG.getNode(getDUPLANEOp(ElemVT), DL, VT, ScalarToVec,
+ DCI.DAG.getConstant(0, DL, MVT::i64));
+ }
+ }
+
// If the instruction is known to produce a scalar in SIMD registers, we can
// duplicate it across the vector lanes using DUPLANE instead of moving it
// to a GPR first. For example, this allows us to handle:
// v4i32 = DUP (i32 (FCMGT (f32, f32)))
- SDValue Op = N->getOperand(0);
// FIXME: Ideally, we should be able to handle all instructions that
// produce a scalar value in FPRs.
if (Op.getOpcode() == AArch64ISD::FCMEQ ||
@@ -29430,15 +29513,6 @@ void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
TargetLowering::insertSSPDeclarations(M);
}
-Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
- // MSVC CRT has a function to validate security cookie.
- RTLIB::LibcallImpl SecurityCheckCookieLibcall =
- getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
- if (SecurityCheckCookieLibcall != RTLIB::Unsupported)
- return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall));
- return TargetLowering::getSSPStackGuardCheck(M);
-}
-
Value *
AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
@@ -29447,11 +29521,6 @@ AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
if (Subtarget->isTargetAndroid())
return UseTlsOffset(IRB, 0x48);
- // Fuchsia is similar.
- // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
- if (Subtarget->isTargetFuchsia())
- return UseTlsOffset(IRB, -0x8);
-
return TargetLowering::getSafeStackPointerLocation(IRB);
}
@@ -29769,7 +29838,7 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
// Checks to allow the use of SME instructions
if (auto *Base = dyn_cast<CallBase>(&Inst)) {
- auto CallAttrs = SMECallAttrs(*Base, this);
+ auto CallAttrs = SMECallAttrs(*Base, &getRuntimeLibcallsInfo());
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingZT0() ||
CallAttrs.requiresPreservingAllZAState())
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9495c9f..2cb8ed2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -366,7 +366,6 @@ public:
Value *getIRStackGuard(IRBuilderBase &IRB) const override;
void insertSSPDeclarations(Module &M) const override;
- Function *getSSPStackGuardCheck(const Module &M) const override;
/// If the target has a standard location for the unsafe stack pointer,
/// returns the address of that location. Otherwise, returns nullptr.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 09ce713..58a53af 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1894,6 +1894,21 @@ def btihint_op : Operand<i32> {
}];
}
+def CMHPriorityHintOperand : AsmOperandClass {
+ let Name = "CMHPriorityHint";
+ let ParserMethod = "tryParseCMHPriorityHint";
+}
+
+def CMHPriorityHint_op : Operand<i32> {
+ let ParserMatchClass = CMHPriorityHintOperand;
+ let PrintMethod = "printCMHPriorityHintOp";
+ let MCOperandPredicate = [{
+ if (!MCOp.isImm())
+ return false;
+ return AArch64CMHPriorityHint::lookupCMHPriorityHintByEncoding(MCOp.getImm()) != nullptr;
+ }];
+}
+
class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
bits<16> systemreg;
@@ -4636,6 +4651,48 @@ multiclass StorePairOffset<bits<2> opc, bit V, RegisterOperand regtype,
GPR64sp:$Rn, 0)>;
}
+class BaseLoadStoreAcquirePairOffset<bits<4> opc, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, #0]", "", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ let Inst{31-23} = 0b110110010;
+ let Inst{22} = L;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rt2;
+ let Inst{15-12} = opc;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass LoadAcquirePairOffset<bits<4> opc, string asm> {
+ let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in
+ def i : BaseLoadStoreAcquirePairOffset<opc, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2),
+ (ins GPR64sp:$Rn), asm>,
+ Sched<[WriteAtomic, WriteLDHi]>;
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") GPR64:$Rt, GPR64:$Rt2,
+ GPR64sp:$Rn)>;
+}
+
+
+multiclass StoreAcquirePairOffset<bits<4> opc, string asm> {
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+ def i : BaseLoadStoreAcquirePairOffset<opc, 0b0, (outs),
+ (ins GPR64:$Rt, GPR64:$Rt2,
+ GPR64sp:$Rn),
+ asm>,
+ Sched<[WriteSTP]>;
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") GPR64:$Rt, GPR64:$Rt2,
+ GPR64sp:$Rn)>;
+}
+
// (pre-indexed)
class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
string asm>
@@ -5241,7 +5298,7 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
- SDPatternOperator OpN = null_frag> {
+ SDPatternOperator OpN> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
[(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
@@ -6481,8 +6538,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
-
- def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ def v16i8_v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
V128, v8f16, v16i8, op>;
}
@@ -6491,6 +6547,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
V128, v4f32, v16i8, op>;
}
+multiclass SIMDThreeSameVectorFMLA<string asm> {
+ def v8f16_v8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+ V128, v8f16, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
+ def v8f16_v4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+ V128, v4f32, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
+ def v4f16_v2f32 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+ v2f32, v4f16, OpNode>;
+ def v8f16_v4f32 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+ v4f32, v8f16, OpNode>;
+}
+
// FP8 assembly/disassembly classes
//----------------------------------------------------------------------------
@@ -9112,6 +9185,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>;
}
+multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
+ def v4f16_v2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+ V64, v2f32, v4f16, VectorIndexS, null_frag>;
+ def v8f16_v4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+ V128, v4f32, v8f16, VectorIndexS, null_frag>;
+}
+
//----------------------------------------------------------------------------
// FP8 Advanced SIMD vector x indexed element
multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> {
@@ -13227,3 +13307,34 @@ multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
let Predicates = [HasNEON, HasF8F32MM];
}
}
+
+//----------------------------------------------------------------------------
+// Contention Management Hints - FEAT_CMH
+//----------------------------------------------------------------------------
+
+class SHUHInst<string asm> : I<
+ (outs),
+ (ins CMHPriorityHint_op:$priority),
+ asm, "\t$priority", "", []>, Sched<[]> {
+ bits<1> priority;
+ let Inst{31-12} = 0b11010101000000110010;
+ let Inst{11-8} = 0b0110;
+ let Inst{7-6} = 0b01;
+ let Inst{5} = priority;
+ let Inst{4-0} = 0b11111;
+}
+
+multiclass SHUH<string asm> {
+ def NAME : SHUHInst<asm>;
+ def : InstAlias<asm, (!cast<Instruction>(NAME) 0), 1>;
+}
+
+class STCPHInst<string asm> : I<
+ (outs),
+ (ins),
+ asm, "", "", []>, Sched<[]> {
+ let Inst{31-12} = 0b11010101000000110010;
+ let Inst{11-8} = 0b0110;
+ let Inst{7-5} = 0b100;
+ let Inst{4-0} = 0b11111;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index d5117da..457e540 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5151,7 +5151,15 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// GPR32 zeroing
if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
- if (Subtarget.hasZeroCycleZeroingGPR32()) {
+ if (Subtarget.hasZeroCycleZeroingGPR64() &&
+ !Subtarget.hasZeroCycleZeroingGPR32()) {
+ MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
+ assert(DestRegX.isValid() && "Destination super-reg not valid");
+ BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f788c75..b9e299e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -50,63 +50,44 @@ def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">,
AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">;
def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
-
def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
- AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
-
+ AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
def HasEL3 : Predicate<"Subtarget->hasEL3()">,
- AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
-
+ AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
def HasVH : Predicate<"Subtarget->hasVH()">,
- AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
-
+ AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
def HasLOR : Predicate<"Subtarget->hasLOR()">,
- AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
-
+ AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
- AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
-
+ AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">,
- AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
-
+ AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
def HasJS : Predicate<"Subtarget->hasJS()">,
- AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
-
+ AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
- AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
-
-def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
- AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
-
+ AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
+def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
+ AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
def HasNV : Predicate<"Subtarget->hasNV()">,
- AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
-
+ AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
- AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
-
+ AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
def HasDIT : Predicate<"Subtarget->hasDIT()">,
- AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
-
-def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
- AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
-
+ AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
+def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
+ AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
def HasAM : Predicate<"Subtarget->hasAM()">,
- AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
-
+ AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
- AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
-
-def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
- AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
-
+ AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
+def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
+ AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
- AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
-
-def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
- AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
-
+ AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
+def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
+ AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
- AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
+ AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
def HasNEON : Predicate<"Subtarget->isNeonAvailable()">,
AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
@@ -149,13 +130,13 @@ def HasSVE2 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasS
AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
def HasSVE2p1 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">;
-def HasSVEAES : Predicate<"Subtarget->hasSVEAES()">,
+def HasSVEAES : Predicate<"Subtarget->hasSVEAES()">,
AssemblerPredicateWithAll<(all_of FeatureSVEAES), "sve-aes">;
-def HasSVESM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVESM4()">,
+def HasSVESM4 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVESM4()">,
AssemblerPredicateWithAll<(all_of FeatureSVESM4), "sve-sm4">;
-def HasSVESHA3 : Predicate<"Subtarget->hasSVESHA3()">,
+def HasSVESHA3 : Predicate<"Subtarget->hasSVESHA3()">,
AssemblerPredicateWithAll<(all_of FeatureSVESHA3), "sve-sha3">;
-def HasSVEBitPerm : Predicate<"Subtarget->hasSVEBitPerm()">,
+def HasSVEBitPerm : Predicate<"Subtarget->hasSVEBitPerm()">,
AssemblerPredicateWithAll<(all_of FeatureSVEBitPerm), "sve-bitperm">;
def HasSMEandIsNonStreamingSafe
: Predicate<"Subtarget->hasSME()">,
@@ -196,7 +177,7 @@ def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
"(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2,
(all_of FeatureSVE2, FeatureFP8DOT2)),
- "ssve-fp8dot2 or (sve2 and fp8dot2)">;
+ "ssve-fp8dot2 or (sve2 and fp8dot2)">;
def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">,
AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">;
def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
@@ -204,43 +185,60 @@ def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
(all_of FeatureSVE2, FeatureFP8DOT4)),
"ssve-fp8dot4 or (sve2 and fp8dot4)">;
-def HasLUT : Predicate<"Subtarget->hasLUT()">,
+def HasLUT : Predicate<"Subtarget->hasLUT()">,
AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
-def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
+def HasSME_LUTv2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
-def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
+def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
-def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
+def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
-def HasSME_MOP4 : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_MOP4())">,
+def HasSME_MOP4 : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_MOP4())">,
AssemblerPredicateWithAll<(all_of FeatureSME_MOP4), "sme-mop4">;
-def HasSME_TMOP : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_TMOP())">,
+def HasSME_TMOP : Predicate<"(Subtarget->isStreaming() && Subtarget->hasSME_TMOP())">,
AssemblerPredicateWithAll<(all_of FeatureSME_TMOP), "sme-tmop">;
-
-def HasCMPBR : Predicate<"Subtarget->hasCMPBR()">,
+def HasCMPBR : Predicate<"Subtarget->hasCMPBR()">,
AssemblerPredicateWithAll<(all_of FeatureCMPBR), "cmpbr">;
-def HasF8F32MM : Predicate<"Subtarget->hasF8F32MM()">,
+def HasF8F32MM : Predicate<"Subtarget->hasF8F32MM()">,
AssemblerPredicateWithAll<(all_of FeatureF8F32MM), "f8f32mm">;
-def HasF8F16MM : Predicate<"Subtarget->hasF8F16MM()">,
+def HasF8F16MM : Predicate<"Subtarget->hasF8F16MM()">,
AssemblerPredicateWithAll<(all_of FeatureF8F16MM), "f8f16mm">;
-def HasFPRCVT : Predicate<"Subtarget->hasFPRCVT()">,
+def HasFPRCVT : Predicate<"Subtarget->hasFPRCVT()">,
AssemblerPredicateWithAll<(all_of FeatureFPRCVT), "fprcvt">;
-def HasLSFE : Predicate<"Subtarget->hasLSFE()">,
+def HasLSFE : Predicate<"Subtarget->hasLSFE()">,
AssemblerPredicateWithAll<(all_of FeatureLSFE), "lsfe">;
-def HasSME2p2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">,
+def HasSME2p2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p2), "sme2p2">;
-def HasSVEAES2 : Predicate<"Subtarget->hasSVEAES2()">,
+def HasSVEAES2 : Predicate<"Subtarget->hasSVEAES2()">,
AssemblerPredicateWithAll<(all_of FeatureSVEAES2), "sve-aes2">;
-def HasSVEBFSCALE : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">,
+def HasSVEBFSCALE : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">,
AssemblerPredicateWithAll<(all_of FeatureSVEBFSCALE), "sve-bfscale">;
-def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">,
+def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">,
AssemblerPredicateWithAll<(all_of FeatureSVE_F16F32MM), "sve-f16f32mm">;
def HasPCDPHINT : Predicate<"Subtarget->hasPCDPHINT()">,
- AssemblerPredicateWithAll<(all_of FeaturePCDPHINT), "pcdphint">;
+ AssemblerPredicateWithAll<(all_of FeaturePCDPHINT), "pcdphint">;
def HasLSUI : Predicate<"Subtarget->hasLSUI()">,
- AssemblerPredicateWithAll<(all_of FeatureLSUI), "lsui">;
+ AssemblerPredicateWithAll<(all_of FeatureLSUI), "lsui">;
def HasOCCMO : Predicate<"Subtarget->hasOCCMO()">,
- AssemblerPredicateWithAll<(all_of FeatureOCCMO), "occmo">;
+ AssemblerPredicateWithAll<(all_of FeatureOCCMO), "occmo">;
+def HasCMH : Predicate<"Subtarget->hasCMH()">,
+ AssemblerPredicateWithAll<(all_of FeatureCMH), "cmh">;
+def HasLSCP : Predicate<"Subtarget->hasLSCP()">,
+ AssemblerPredicateWithAll<(all_of FeatureLSCP), "lscp">;
+def HasSVE2p2 : Predicate<"Subtarget->hasSVE2p2()">,
+ AssemblerPredicateWithAll<(all_of FeatureSVE2p2), "sve2p2">;
+def HasSVE_B16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_B16MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureSVE_B16MM), "sve-b16mm">;
+def HasF16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasF16MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16MM), "f16mm">;
+def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">,
+ AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
+def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">,
+ AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
+def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">;
+def HasF16F32MM : Predicate<"Subtarget->hasF16F32MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -310,6 +308,10 @@ def HasSVE2p2_or_SME2p2
: Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2())">,
AssemblerPredicateWithAll<(any_of FeatureSME2p2, FeatureSVE2p2),
"sme2p2 or sve2p2">;
+def HasSVE2p3_or_SME2p3
+ : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p3() || Subtarget->hasSME2p3())">,
+ AssemblerPredicateWithAll<(any_of FeatureSME2p3, FeatureSVE2p3),
+ "sme2p3 or sve2p3">;
def HasNonStreamingSVE2p2_or_SME2p2
: Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p2()) ||"
"(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
@@ -328,100 +330,110 @@ def HasNEONandIsStreamingSafe
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
// A subset of NEON instructions are legal in Streaming SVE mode only with +sme2p2.
def HasNEONandIsSME2p2StreamingSafe
- : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
- AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
+ : Predicate<"Subtarget->isNeonAvailable() || (Subtarget->hasNEON() && Subtarget->hasSME2p2())">,
+ AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
- AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
+ AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
- AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
+ AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
def HasSB : Predicate<"Subtarget->hasSB()">,
- AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
-def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
- AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
+ AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
+def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
+ AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
- AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
+ AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
def HasBTI : Predicate<"Subtarget->hasBTI()">,
- AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
+ AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
def HasMTE : Predicate<"Subtarget->hasMTE()">,
- AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
+ AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
def HasTME : Predicate<"Subtarget->hasTME()">,
- AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
+ AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
def HasETE : Predicate<"Subtarget->hasETE()">,
- AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
+ AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
- AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
+ AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">,
- AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
+ AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
def HasNoBF16 : Predicate<"!Subtarget->hasBF16()">;
def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
- AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
+ AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
- AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
+ AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
- AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
+ AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
def HasXS : Predicate<"Subtarget->hasXS()">,
- AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
+ AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
- AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
+ AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
def HasLS64 : Predicate<"Subtarget->hasLS64()">,
- AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
+ AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
- AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
+ AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
- AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
+ AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
def HasHBC : Predicate<"Subtarget->hasHBC()">,
- AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
+ AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
- AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
+ AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">,
- AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
+ AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">,
- AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
+ AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
def HasITE : Predicate<"Subtarget->hasITE()">,
- AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
+ AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
def HasTHE : Predicate<"Subtarget->hasTHE()">,
- AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
+ AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">,
- AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
+ AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
def HasLSE128 : Predicate<"Subtarget->hasLSE128()">,
- AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
+ AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
def HasD128 : Predicate<"Subtarget->hasD128()">,
- AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
+ AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
def HasCHK : Predicate<"Subtarget->hasCHK()">,
- AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
+ AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
def HasGCS : Predicate<"Subtarget->hasGCS()">,
- AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
+ AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
def HasCPA : Predicate<"Subtarget->hasCPA()">,
- AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
+ AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
+def HasTLBID : Predicate<"Subtarget->hasTLBID()">,
+ AssemblerPredicateWithAll<(all_of FeatureTLBID), "tlbid">;
+def HasMPAMv2 : Predicate<"Subtarget->hasMPAMv2()">,
+ AssemblerPredicateWithAll<(all_of FeatureMPAMv2), "mpamv2">;
+def HasMTETC : Predicate<"Subtarget->hasMTETC()">,
+ AssemblerPredicateWithAll<(all_of FeatureMTETC), "mtetc">;
+def HasGCIE : Predicate<"Subtarget->hasGCIE()">,
+ AssemblerPredicateWithAll<(all_of FeatureGCIE), "gcie">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
def UseExperimentalZeroingPseudos
- : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
+ : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
def UseAlternateSExtLoadCVTF32
- : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
+ : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
def UseNegativeImmediates
- : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
- "NegativeImmediates">;
+ : Predicate<"false">,
+ AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
+ "NegativeImmediates">;
-def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
+def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
-def HasFastIncVL : Predicate<"!Subtarget->hasDisableFastIncVL()">;
+def HasFastIncVL : Predicate<"!Subtarget->hasDisableFastIncVL()">;
-def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
+def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
-def UseLDAPUR : Predicate<"!Subtarget->avoidLDAPUR()">;
+def UseLDAPUR : Predicate<"!Subtarget->avoidLDAPUR()">;
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
-def AllowMisalignedMemAccesses : Predicate<"!Subtarget->requiresStrictAlign()">;
+def AllowMisalignedMemAccesses
+ : Predicate<"!Subtarget->requiresStrictAlign()">;
def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">;
@@ -3692,6 +3704,12 @@ def UDF : UDFType<0, "udf">;
// Load instructions.
//===----------------------------------------------------------------------===//
+let Predicates = [HasLSCP] in {
+defm LDAP : LoadAcquirePairOffset<0b0101, "ldap">;
+defm LDAPP : LoadAcquirePairOffset<0b0111, "ldapp">;
+defm STLP : StoreAcquirePairOffset<0b0101, "stlp">;
+}
+
// Pair (indexed, offset)
defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
@@ -4004,26 +4022,6 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
-// load zero-extended i32, bitcast to f64
-def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
-
-// load zero-extended i16, bitcast to f64
-def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-
-// load zero-extended i8, bitcast to f64
-def : Pat <(f64 (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))),
- (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-
-// load zero-extended i16, bitcast to f32
-def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
- (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
-
-// load zero-extended i8, bitcast to f32
-def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))),
- (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
-
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch timm:$Rt,
@@ -4375,6 +4373,64 @@ def : Pat <(v1i64 (scalar_to_vector (i64
(load (ro64.Xpat GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend))))),
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro64.Xext:$extend)>;
+// Patterns for bitconvert or scalar_to_vector of load operations.
+// Enables direct SIMD register loads for small integer types (i8/i16) that are
+// naturally zero-extended to i32/i64.
+multiclass ExtLoad8_16AllModes<ValueType OutTy, ValueType InnerTy,
+ SDPatternOperator OuterOp,
+ PatFrags LoadOp8, PatFrags LoadOp16> {
+ // 8-bit loads.
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDURBi GPR64sp:$Rn, simm9:$offset), bsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$extend), bsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp8 (ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$extend), bsub)>;
+
+ // 16-bit loads.
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDURHi GPR64sp:$Rn, simm9:$offset), hsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$extend), hsub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp16 (ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$extend), hsub)>;
+}
+
+// Extended multiclass that includes 32-bit loads in addition to 8-bit and 16-bit.
+multiclass ExtLoad8_16_32AllModes<ValueType OutTy, ValueType InnerTy,
+ SDPatternOperator OuterOp,
+ PatFrags LoadOp8, PatFrags LoadOp16, PatFrags LoadOp32> {
+ defm : ExtLoad8_16AllModes<OutTy, InnerTy, OuterOp, LoadOp8, LoadOp16>;
+
+ // 32-bit loads.
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDURSi GPR64sp:$Rn, simm9:$offset), ssub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$extend), ssub)>;
+ def : Pat<(OutTy (OuterOp (InnerTy (LoadOp32 (ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend))))),
+ (SUBREG_TO_REG (i64 0), (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$extend), ssub)>;
+}
+
+// Instantiate bitconvert patterns for floating-point types.
+defm : ExtLoad8_16AllModes<f32, i32, bitconvert, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16_32AllModes<f64, i64, bitconvert, zextloadi8, zextloadi16, zextloadi32>;
+
+// Instantiate scalar_to_vector patterns for all vector types.
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v16i8, i32, scalar_to_vector, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v8i16, i32, scalar_to_vector, extloadi8, extloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, zextloadi8, zextloadi16>;
+defm : ExtLoad8_16AllModes<v4i32, i32, scalar_to_vector, extloadi8, extloadi16>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, zextloadi8, zextloadi16, zextloadi32>;
+defm : ExtLoad8_16_32AllModes<v2i64, i64, scalar_to_vector, extloadi8, extloadi16, extloadi32>;
+
// Pre-fetch.
defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
[(AArch64Prefetch timm:$Rt,
@@ -5239,113 +5295,10 @@ let Predicates = [HasNEON, HasFPRCVT] in{
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
- defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
- defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
-}
-
-
-// AArch64's FCVT instructions saturate when out of range.
-multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+ defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>;
+ defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>;
}
-defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
-defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
-
-multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
- def : Pat<(i32 (to_int (round f32:$Rn))),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int (round f32:$Rn))),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int (round f64:$Rn))),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int (round f64:$Rn))),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- // These instructions saturate like fp_to_[su]int_sat.
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-}
-
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
-
let Predicates = [HasFullFP16] in {
@@ -6553,8 +6506,8 @@ defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtn
defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
+defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
@@ -6574,6 +6527,7 @@ defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
// Floating-point conversion patterns.
multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
+ let Predicates = [HasFPRCVT] in {
def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
(!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
@@ -6582,6 +6536,7 @@ multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
(!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+ }
def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
@@ -6596,6 +6551,8 @@ defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_sint, "FCVTZS">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_uint, "FCVTZU">;
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
let Predicates = [HasFullFP16] in {
@@ -6652,6 +6609,196 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
+// AArch64's FCVT instructions saturate when out of range.
+multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat f16:$Rn, i32)))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat f16:$Rn, i64)))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat f32:$Rn, i64)))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat f32:$Rn, i32)))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat f64:$Rn, i64)))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+}
+
+defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
+defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
+
+multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode to_int_sat_gi, SDNode round, string INST> {
+ def : Pat<(i32 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i64 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(i32 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(i64 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f64 (bitconvert (i64 (to_int (round f32:$Rn))))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int (round f64:$Rn))))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (to_int (round f32:$Rn))))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int (round f64:$Rn))))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ // These instructions saturate like fp_to_[su]int_sat.
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i32 (to_int_sat_gi (round f16:$Rn))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi (round f16:$Rn))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi (round f32:$Rn))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi (round f64:$Rn))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi (round f32:$Rn))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi (round f64:$Rn))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f64:$Rn), i64)))),
+ (!cast<Instruction>(INST # v1i64) f64:$Rn)>;
+}
+
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fceil, "FCVTPS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fceil, "FCVTPU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ffloor, "FCVTMS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ffloor, "FCVTMU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, ftrunc, "FCVTZS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, ftrunc, "FCVTZU">;
+defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fp_to_sint_sat_gi, fround, "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fp_to_uint_sat_gi, fround, "FCVTAU">;
+
// f16 -> s16 conversions
let Predicates = [HasFullFP16] in {
def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
@@ -11248,8 +11395,28 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}
+let Predicates = [HasF16F32DOT] in {
+ defm FDOT :SIMDThreeSameVectorFDot<"fdot">;
+ defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">;
+}
+
+let Predicates = [HasF16MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">;
+
+let Predicates = [HasF16F32MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
+
let Uses = [FPMR, FPCR] in
-defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+
+//===----------------------------------------------------------------------===//
+// Contention Management Hints (FEAT_CMH)
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasCMH] in {
+ defm SHUH : SHUH<"shuh">; // Shared Update Hint instruction
+ def STCPH : STCPHInst<"stcph">; // Store Concurrent Priority Hint instruction
+}
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index d1832f4..f680a5e 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -651,7 +651,7 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI);
void mappingImpl(yaml::IO &YamlIO) override;
- ~AArch64FunctionInfo() = default;
+ ~AArch64FunctionInfo() override = default;
};
template <> struct MappingTraits<AArch64FunctionInfo> {
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 72a7676..47d76f3 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -154,7 +154,7 @@ public:
bool shouldAnalyzePhysregInMachineLoopInfo(MCRegister R) const override;
- virtual bool isIgnoredCVReg(MCRegister LLVMReg) const override;
+ bool isIgnoredCVReg(MCRegister LLVMReg) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 47144c7..cd94a25 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1341,6 +1341,10 @@ def Z_q : RegisterOperand<ZPR, "printTypedVectorList<0,'q'>"> {
let ParserMatchClass = ZPRVectorList<128, 1>;
}
+def ZZ_Any : RegisterOperand<ZPR2, "printTypedVectorList<0,0>"> {
+ let ParserMatchClass = ZPRVectorList<0, 2>;
+}
+
def ZZ_b : RegisterOperand<ZPR2, "printTypedVectorList<0,'b'>"> {
let ParserMatchClass = ZPRVectorList<8, 2>;
}
@@ -1361,6 +1365,10 @@ def ZZ_q : RegisterOperand<ZPR2, "printTypedVectorList<0,'q'>"> {
let ParserMatchClass = ZPRVectorList<128, 2>;
}
+def ZZZ_Any : RegisterOperand<ZPR3, "printTypedVectorList<0,0>"> {
+ let ParserMatchClass = ZPRVectorList<0, 3>;
+}
+
def ZZZ_b : RegisterOperand<ZPR3, "printTypedVectorList<0,'b'>"> {
let ParserMatchClass = ZPRVectorList<8, 3>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index e552afe..752b185 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1173,3 +1173,14 @@ let Predicates = [HasSME_MOP4, HasSMEF64F64] in {
defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a", "int_aarch64_sme_mop4a">;
defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s", "int_aarch64_sme_mop4s">;
}
+
+//===----------------------------------------------------------------------===//
+// SME2.3 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSME2p3] in {
+ def LUTI6_ZTZ : sme2_lut_single<"luti6">;
+ def LUTI6_4ZT3Z : sme2_luti6_zt_consecutive<"luti6">;
+ def LUTI6_S_4ZT3Z : sme2_luti6_zt_strided<"luti6">;
+ def LUTI6_4Z2Z2ZI : sme2_luti6_vector_vg4_consecutive<"luti6">;
+ def LUTI6_S_4Z2Z2ZI : sme2_luti6_vector_vg4_strided<"luti6">;
+} // [HasSME2p3]
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 98a128e..3b268dc 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2569,7 +2569,7 @@ let Predicates = [HasBF16, HasSVE_or_SME] in {
} // End HasBF16, HasSVE_or_SME
let Predicates = [HasBF16, HasSVE] in {
- defm BFMMLA_ZZZ_HtoS : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
+ defm BFMMLA_ZZZ_HtoS : sve_fp_matrix_mla<0b011, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
} // End HasBF16, HasSVE
let Predicates = [HasBF16, HasSVE_or_SME] in {
@@ -3680,15 +3680,15 @@ let Predicates = [HasSVE_or_SME, HasMatMulInt8] in {
} // End HasSVE_or_SME, HasMatMulInt8
let Predicates = [HasSVE, HasMatMulFP32] in {
- defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b10, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>;
+ defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b101, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>;
} // End HasSVE, HasMatMulFP32
let Predicates = [HasSVE_F16F32MM] in {
- def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b00, "fmmla", ZPR32, ZPR16>;
+ def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b001, "fmmla", ZPR32, ZPR16>;
} // End HasSVE_F16F32MM
let Predicates = [HasSVE, HasMatMulFP64] in {
- defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b11, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>;
+ defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b111, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>;
defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>;
defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>;
defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>;
@@ -4272,9 +4272,9 @@ def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv8i16:$MulLHS, nxv8i16:$
defm SQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00, int_aarch64_sve_sqcvtn_x2>;
defm UQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01, int_aarch64_sve_uqcvtn_x2>;
defm SQCVTUN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtun", 0b10, int_aarch64_sve_sqcvtun_x2>;
-defm SQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>;
-defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>;
-defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>;
+defm SQRSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>;
+defm UQRSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>;
+defm SQRSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>;
defm WHILEGE_2PXX : sve2p1_int_while_rr_pair<"whilege", 0b000>;
defm WHILEGT_2PXX : sve2p1_int_while_rr_pair<"whilegt", 0b001>;
@@ -4615,6 +4615,75 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
defm REVD_ZPzZ : sve_int_perm_rev_revd_z<"revd", AArch64revd_mt>;
} // End HasSME2p2orSVE2p2
+
+//===----------------------------------------------------------------------===//
+// SME2.3 or SVE2.3 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2p3_or_SME2p3] in {
+ // SVE2 Add pairwise within quadword vector segments (unpredicated)
+ defm ADDQP_ZZZ : sve2_int_mul<0b110, "addqp", null_frag>;
+
+ // SVE2 Add subtract/subtract pairwise
+ defm ADDSUBP_ZZZ : sve2_int_mul<0b111, "addsubp", null_frag>;
+ defm SUBP_ZPmZZ : sve2_int_arith_pred<0b100001, "subp", null_frag>;
+
+ // SVE2 integer absolute difference and accumulate long
+ defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal">;
+ defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal">;
+
+ // SVE2 integer dot product
+ def SDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b0, "sdot", ZPR16, ZPR8>;
+ def UDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b1, "udot", ZPR16, ZPR8>;
+
+ // SVE2 integer indexed dot product
+ def SDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b0, "sdot">;
+ def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">;
+
+ // SVE2 fp convert, narrow and interleave to integer, rounding toward zero
+ defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>;
+ defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1>;
+
+ // SVE2 signed/unsigned integer convert to floating-point
+ defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00>;
+ defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10>;
+ defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01>;
+ defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>;
+
+ // SVE2 saturating shift right narrow by immediate and interleave
+ defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101>;
+ defm SQRSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrun", 0b001>;
+ defm SQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrn", 0b000>;
+ defm SQSHRUN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqshrun", 0b100>;
+ defm UQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqrshrn", 0b111>;
+ defm UQSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"uqshrn", 0b010>;
+ defm SQSHRUN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrun", 0b100, null_frag>;
+ defm SQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrn", 0b000, null_frag>;
+ defm UQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqshrn", 0b010, null_frag>;
+
+ defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">;
+} // End HasSME2p3orSVE2p3
+
+//===----------------------------------------------------------------------===//
+// SVE2.3 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2p3] in {
+ def LUTI6_Z2ZZ : sve2_luti6_vector<"luti6">;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE_B16MM Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE_B16MM] in {
+ def BFMMLA_ZZZ_H : sve_fp_matrix_mla<0b110, "bfmmla", ZPR16, ZPR16>;
+}
+
+//===----------------------------------------------------------------------===//
+// F16MM Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2p2, HasF16MM] in {
+ def FMMLA_ZZZ_H : sve_fp_matrix_mla<0b100, "fmmla", ZPR16, ZPR16>;
+}
+
//===----------------------------------------------------------------------===//
// SME2.2 or SVE2.2 instructions - Legal in streaming mode iff target has SME2p2
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index bdde8e3..2387f17 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -2762,11 +2762,11 @@ def : InstRW<[V2Write_11c_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
def : InstRW<[V2Write_11c_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
// Non temporal store, scalar + imm
-def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
+def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BHWD]_ZRI$")>;
// Non temporal store, scalar + scalar
-def : InstRW<[V2Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
-def : InstRW<[V2Write_2c_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
+def : InstRW<[V2Write_2c_1L01_1S_1V01], (instrs STNT1H_ZRR)>;
+def : InstRW<[V2Write_2c_1L01_1V01], (instregex "^STNT1[BWD]_ZRR$")>;
// Scatter non temporal store, vector + scalar 32-bit element size
def : InstRW<[V2Write_4c_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 9438917..ae46d71 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -205,6 +205,7 @@ def lookupDCByName : SearchIndex {
let Key = ["Name"];
}
+// Op1 CRn CRm Op2
def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>;
def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>;
def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>;
@@ -241,6 +242,11 @@ def : DC<"CIGDVAC", 0b011, 0b0111, 0b1110, 0b101>;
def : DC<"GZVA", 0b011, 0b0111, 0b0100, 0b100>;
}
+let Requires = [{ {AArch64::FeatureMTETC} }] in {
+def : DC<"ZGBVA", 0b011, 0b0111, 0b0100, 0b101>;
+def : DC<"GBVA", 0b011, 0b0111, 0b0100, 0b111>;
+}
+
let Requires = [{ {AArch64::FeatureMEC} }] in {
def : DC<"CIPAE", 0b100, 0b0111, 0b1110, 0b000>;
def : DC<"CIGDPAE", 0b100, 0b0111, 0b1110, 0b111>;
@@ -813,11 +819,26 @@ def : BTI<"j", 0b100>;
def : BTI<"jc", 0b110>;
//===----------------------------------------------------------------------===//
+// CMHPriority instruction options.
+//===----------------------------------------------------------------------===//
+
+class CMHPriorityHint<string name, bits<1> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<1> Encoding;
+ let Encoding = encoding;
+}
+
+def : CMHPriorityHint<"ph", 0b1>;
+
+//===----------------------------------------------------------------------===//
// TLBI (translation lookaside buffer invalidate) instruction options.
//===----------------------------------------------------------------------===//
class TLBICommon<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg> {
+ bits<3> op2, bit needsreg, bit optionalreg> {
string Name = name;
bits<14> Encoding;
let Encoding{13-11} = op1;
@@ -825,24 +846,25 @@ class TLBICommon<string name, bits<3> op1, bits<4> crn, bits<4> crm,
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
bit NeedsReg = needsreg;
+ bit OptionalReg = optionalreg;
list<string> Requires = [];
list<string> ExtraRequires = [];
code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }];
}
class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg>
- : TLBICommon<name, op1, crn, crm, op2, needsreg>;
+ bits<3> op2, bit needsreg, bit optionalreg>
+ : TLBICommon<name, op1, crn, crm, op2, needsreg, optionalreg>;
class TLBIPEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg>
- : TLBICommon<name, op1, crn, crm, op2, needsreg>;
+ bits<3> op2, bit needsreg, bit optionalreg>
+ : TLBICommon<name, op1, crn, crm, op2, needsreg, optionalreg>;
multiclass TLBITableBase {
def NAME # Table : GenericTable {
let FilterClass = NAME # "Entry";
let CppTypeName = NAME;
- let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+ let Fields = ["Name", "Encoding", "NeedsReg", "OptionalReg", "RequiresStr"];
let PrimaryKey = ["Encoding"];
let PrimaryKeyName = "lookup" # NAME # "ByEncoding";
}
@@ -856,60 +878,60 @@ defm TLBI : TLBITableBase;
defm TLBIP : TLBITableBase;
multiclass TLBI<string name, bit hasTLBIP, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg = 1> {
- def : TLBIEntry<name, op1, crn, crm, op2, needsreg>;
- def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
+ bits<3> op2, bit needsreg = 1, bit optionalreg = 0> {
+ def : TLBIEntry<name, op1, crn, crm, op2, needsreg, optionalreg>;
+ def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg, optionalreg> {
let Encoding{7} = 1;
let ExtraRequires = ["AArch64::FeatureXS"];
}
if !eq(hasTLBIP, true) then {
- def : TLBIPEntry<name, op1, crn, crm, op2, needsreg>;
- def : TLBIPEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
+ def : TLBIPEntry<name, op1, crn, crm, op2, needsreg, optionalreg>;
+ def : TLBIPEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg, optionalreg> {
let Encoding{7} = 1;
let ExtraRequires = ["AArch64::FeatureXS"];
}
}
}
-// hasTLBIP op1 CRn CRm op2 needsreg
+// hasTLBIP op1 CRn CRm op2 needsreg, optreg
defm : TLBI<"IPAS2E1IS", 1, 0b100, 0b1000, 0b0000, 0b001>;
defm : TLBI<"IPAS2LE1IS", 1, 0b100, 0b1000, 0b0000, 0b101>;
-defm : TLBI<"VMALLE1IS", 0, 0b000, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"ALLE2IS", 0, 0b100, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"ALLE3IS", 0, 0b110, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"VMALLE1IS", 0, 0b000, 0b1000, 0b0011, 0b000, 0, 1>;
+defm : TLBI<"ALLE2IS", 0, 0b100, 0b1000, 0b0011, 0b000, 0, 1>;
+defm : TLBI<"ALLE3IS", 0, 0b110, 0b1000, 0b0011, 0b000, 0, 1>;
defm : TLBI<"VAE1IS", 1, 0b000, 0b1000, 0b0011, 0b001>;
defm : TLBI<"VAE2IS", 1, 0b100, 0b1000, 0b0011, 0b001>;
defm : TLBI<"VAE3IS", 1, 0b110, 0b1000, 0b0011, 0b001>;
defm : TLBI<"ASIDE1IS", 0, 0b000, 0b1000, 0b0011, 0b010>;
defm : TLBI<"VAAE1IS", 1, 0b000, 0b1000, 0b0011, 0b011>;
-defm : TLBI<"ALLE1IS", 0, 0b100, 0b1000, 0b0011, 0b100, 0>;
+defm : TLBI<"ALLE1IS", 0, 0b100, 0b1000, 0b0011, 0b100, 0, 1>;
defm : TLBI<"VALE1IS", 1, 0b000, 0b1000, 0b0011, 0b101>;
defm : TLBI<"VALE2IS", 1, 0b100, 0b1000, 0b0011, 0b101>;
defm : TLBI<"VALE3IS", 1, 0b110, 0b1000, 0b0011, 0b101>;
-defm : TLBI<"VMALLS12E1IS", 0, 0b100, 0b1000, 0b0011, 0b110, 0>;
+defm : TLBI<"VMALLS12E1IS", 0, 0b100, 0b1000, 0b0011, 0b110, 0, 1>;
defm : TLBI<"VAALE1IS", 1, 0b000, 0b1000, 0b0011, 0b111>;
defm : TLBI<"IPAS2E1", 1, 0b100, 0b1000, 0b0100, 0b001>;
defm : TLBI<"IPAS2LE1", 1, 0b100, 0b1000, 0b0100, 0b101>;
-defm : TLBI<"VMALLE1", 0, 0b000, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"ALLE2", 0, 0b100, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"ALLE3", 0, 0b110, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"VMALLE1", 0, 0b000, 0b1000, 0b0111, 0b000, 0, 0>;
+defm : TLBI<"ALLE2", 0, 0b100, 0b1000, 0b0111, 0b000, 0, 0>;
+defm : TLBI<"ALLE3", 0, 0b110, 0b1000, 0b0111, 0b000, 0, 0>;
defm : TLBI<"VAE1", 1, 0b000, 0b1000, 0b0111, 0b001>;
defm : TLBI<"VAE2", 1, 0b100, 0b1000, 0b0111, 0b001>;
defm : TLBI<"VAE3", 1, 0b110, 0b1000, 0b0111, 0b001>;
defm : TLBI<"ASIDE1", 0, 0b000, 0b1000, 0b0111, 0b010>;
defm : TLBI<"VAAE1", 1, 0b000, 0b1000, 0b0111, 0b011>;
-defm : TLBI<"ALLE1", 0, 0b100, 0b1000, 0b0111, 0b100, 0>;
+defm : TLBI<"ALLE1", 0, 0b100, 0b1000, 0b0111, 0b100, 0, 0>;
defm : TLBI<"VALE1", 1, 0b000, 0b1000, 0b0111, 0b101>;
defm : TLBI<"VALE2", 1, 0b100, 0b1000, 0b0111, 0b101>;
defm : TLBI<"VALE3", 1, 0b110, 0b1000, 0b0111, 0b101>;
-defm : TLBI<"VMALLS12E1", 0, 0b100, 0b1000, 0b0111, 0b110, 0>;
+defm : TLBI<"VMALLS12E1", 0, 0b100, 0b1000, 0b0111, 0b110, 0, 0>;
defm : TLBI<"VAALE1", 1, 0b000, 0b1000, 0b0111, 0b111>;
// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
let Requires = ["AArch64::FeatureTLB_RMI"] in {
// Armv8.4-A Outer Sharable TLB Maintenance instructions:
-// hasTLBIP op1 CRn CRm op2 needsreg
-defm : TLBI<"VMALLE1OS", 0, 0b000, 0b1000, 0b0001, 0b000, 0>;
+// hasTLBIP op1 CRn CRm op2 needsreg, optreg
+defm : TLBI<"VMALLE1OS", 0, 0b000, 0b1000, 0b0001, 0b000, 0, 1>;
defm : TLBI<"VAE1OS", 1, 0b000, 0b1000, 0b0001, 0b001>;
defm : TLBI<"ASIDE1OS", 0, 0b000, 0b1000, 0b0001, 0b010>;
defm : TLBI<"VAAE1OS", 1, 0b000, 0b1000, 0b0001, 0b011>;
@@ -919,15 +941,15 @@ defm : TLBI<"IPAS2E1OS", 1, 0b100, 0b1000, 0b0100, 0b000>;
defm : TLBI<"IPAS2LE1OS", 1, 0b100, 0b1000, 0b0100, 0b100>;
defm : TLBI<"VAE2OS", 1, 0b100, 0b1000, 0b0001, 0b001>;
defm : TLBI<"VALE2OS", 1, 0b100, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"VMALLS12E1OS", 0, 0b100, 0b1000, 0b0001, 0b110, 0>;
+defm : TLBI<"VMALLS12E1OS", 0, 0b100, 0b1000, 0b0001, 0b110, 0, 1>;
defm : TLBI<"VAE3OS", 1, 0b110, 0b1000, 0b0001, 0b001>;
defm : TLBI<"VALE3OS", 1, 0b110, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"ALLE2OS", 0, 0b100, 0b1000, 0b0001, 0b000, 0>;
-defm : TLBI<"ALLE1OS", 0, 0b100, 0b1000, 0b0001, 0b100, 0>;
-defm : TLBI<"ALLE3OS", 0, 0b110, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"ALLE2OS", 0, 0b100, 0b1000, 0b0001, 0b000, 0, 1>;
+defm : TLBI<"ALLE1OS", 0, 0b100, 0b1000, 0b0001, 0b100, 0, 1>;
+defm : TLBI<"ALLE3OS", 0, 0b110, 0b1000, 0b0001, 0b000, 0, 1>;
// Armv8.4-A TLB Range Maintenance instructions:
-// hasTLBIP op1 CRn CRm op2 needsreg
+// hasTLBIP op1 CRn CRm op2
defm : TLBI<"RVAE1", 1, 0b000, 0b1000, 0b0110, 0b001>;
defm : TLBI<"RVAAE1", 1, 0b000, 0b1000, 0b0110, 0b011>;
defm : TLBI<"RVALE1", 1, 0b000, 0b1000, 0b0110, 0b101>;
@@ -962,18 +984,19 @@ defm : TLBI<"RVALE3OS", 1, 0b110, 0b1000, 0b0101, 0b101>;
// Armv9-A Realm Management Extension TLBI Instructions
let Requires = ["AArch64::FeatureRME"] in {
+// hasTLBIP op1 CRn CRm op2 needsreg
defm : TLBI<"RPAOS", 0, 0b110, 0b1000, 0b0100, 0b011>;
defm : TLBI<"RPALOS", 0, 0b110, 0b1000, 0b0100, 0b111>;
-defm : TLBI<"PAALLOS", 0, 0b110, 0b1000, 0b0001, 0b100, 0>;
-defm : TLBI<"PAALL", 0, 0b110, 0b1000, 0b0111, 0b100, 0>;
+defm : TLBI<"PAALLOS", 0, 0b110, 0b1000, 0b0001, 0b100, 0, 0>;
+defm : TLBI<"PAALL", 0, 0b110, 0b1000, 0b0111, 0b100, 0, 0>;
}
// Armv9.5-A TLBI VMALL for Dirty State
let Requires = ["AArch64::FeatureTLBIW"] in {
-// op1, CRn, CRm, op2, needsreg
-defm : TLBI<"VMALLWS2E1", 0, 0b100, 0b1000, 0b0110, 0b010, 0>;
-defm : TLBI<"VMALLWS2E1IS", 0, 0b100, 0b1000, 0b0010, 0b010, 0>;
-defm : TLBI<"VMALLWS2E1OS", 0, 0b100, 0b1000, 0b0101, 0b010, 0>;
+// hasTLBIP op1 CRn CRm op2 needsreg, optreg
+defm : TLBI<"VMALLWS2E1", 0, 0b100, 0b1000, 0b0110, 0b010, 0, 0>;
+defm : TLBI<"VMALLWS2E1IS", 0, 0b100, 0b1000, 0b0010, 0b010, 0, 1>;
+defm : TLBI<"VMALLWS2E1OS", 0, 0b100, 0b1000, 0b0101, 0b010, 0, 1>;
}
//===----------------------------------------------------------------------===//
@@ -1862,13 +1885,6 @@ def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>;
// v8.4a MPAM registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::FeatureMPAM} }] in {
-def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>;
-def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>;
-def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>;
-def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>;
-def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>;
-def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>;
def : RWSysReg<"MPAMVPMV_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b001>;
def : RWSysReg<"MPAMVPM0_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b000>;
def : RWSysReg<"MPAMVPM1_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b001>;
@@ -1878,8 +1894,6 @@ def : RWSysReg<"MPAMVPM4_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b100>;
def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>;
def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
-def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>;
-} //FeatureMPAM
// v8.4a Activity Monitor registers
// Op0 Op1 CRn CRm Op2
@@ -2319,6 +2333,26 @@ def : RWSysReg<"MPAMBW0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b101>;
def : RWSysReg<"MPAMBWCAP_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b110>;
def : RWSysReg<"MPAMBWSM_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b111>;
+// v9.7a Memory partitioning and monitoring version 2
+// (FEAT_MPAMv2) registers
+// Op0 Op1 CRn CRm Op2
+// MPAM system registers that are also available for MPAMv2
+def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>;
+def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>;
+def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>;
+// Only MPAMv2 registers
+def : RWSysReg<"MPAMCTL_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b010>;
+def : RWSysReg<"MPAMCTL_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b010>;
+def : RWSysReg<"MPAMCTL_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b010>;
+def : RWSysReg<"MPAMCTL_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b010>;
+def : RWSysReg<"MPAMVIDCR_EL2", 0b11, 0b100, 0b1010, 0b0111, 0b000>;
+def : RWSysReg<"MPAMVIDSR_EL2", 0b11, 0b100, 0b1010, 0b0111, 0b001>;
+def : RWSysReg<"MPAMVIDSR_EL3", 0b11, 0b110, 0b1010, 0b0111, 0b001>;
+
//===----------------------------------------------------------------------===//
// FEAT_SRMASK v9.6a registers
//===----------------------------------------------------------------------===//
@@ -2412,3 +2446,251 @@ def : DC<"CIVAPS", 0b000, 0b0111, 0b1111, 0b001>;
let Requires = [{ {AArch64::FeaturePoPS, AArch64::FeatureMTE} }] in {
def : DC<"CIGDVAPS", 0b000, 0b0111, 0b1111, 0b101>;
}
+
+// v9.7a TLBI domains system registers (MemSys)
+foreach n = 0-3 in {
+ defvar nb = !cast<bits<3>>(n);
+ def : RWSysReg<"VTLBID"#n#"_EL2", 0b11, 0b100, 0b0010, 0b1000, nb>;
+}
+
+foreach n = 0-3 in {
+ defvar nb = !cast<bits<3>>(n);
+ def : RWSysReg<"VTLBIDOS"#n#"_EL2", 0b11, 0b100, 0b0010, 0b1001, nb>;
+}
+
+def : ROSysReg<"TLBIDIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b110>;
+
+// MPAM Lookaside Buffer Invalidate (MLBI) instructions
+class MLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2, bit needsreg> {
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ bit NeedsReg = needsreg;
+ string RequiresStr = [{ {AArch64::FeatureMPAMv2} }];
+}
+
+def MLBITable : GenericTable {
+ let FilterClass = "MLBI";
+ let CppTypeName = "MLBI";
+ let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+
+ let PrimaryKey = ["Encoding"];
+ let PrimaryKeyName = "lookupMLBIByEncoding";
+}
+
+def lookupMLBIByName : SearchIndex {
+ let Table = MLBITable;
+ let Key = ["Name"];
+}
+
+// Op1 CRn CRm Op2 needsReg
+def : MLBI<"ALLE1", 0b100, 0b0111, 0b0000, 0b100, 0>;
+def : MLBI<"VMALLE1", 0b100, 0b0111, 0b0000, 0b101, 0>;
+def : MLBI<"VPIDE1", 0b100, 0b0111, 0b0000, 0b110, 1>;
+def : MLBI<"VPMGE1", 0b100, 0b0111, 0b0000, 0b111, 1>;
+
+
+// v9.7-A GICv5 (FEAT_GCIE)
+// CPU Interface Registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ICC_APR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b000>;
+def : RWSysReg<"ICC_APR_EL3", 0b11, 0b110, 0b1100, 0b1000, 0b000>;
+def : RWSysReg<"ICC_CR0_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b001>;
+def : RWSysReg<"ICC_CR0_EL3", 0b11, 0b110, 0b1100, 0b1001, 0b000>;
+def : ROSysReg<"ICC_DOMHPPIR_EL3", 0b11, 0b110, 0b1100, 0b1000, 0b010>;
+def : ROSysReg<"ICC_HAPR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b011>;
+def : ROSysReg<"ICC_HPPIR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b011>;
+def : ROSysReg<"ICC_HPPIR_EL3", 0b11, 0b110, 0b1100, 0b1001, 0b001>;
+def : ROSysReg<"ICC_IAFFIDR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b101>;
+def : RWSysReg<"ICC_ICSR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b100>;
+def : ROSysReg<"ICC_IDR0_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b010>;
+def : RWSysReg<"ICC_PCR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b010>;
+def : RWSysReg<"ICC_PCR_EL3", 0b11, 0b110, 0b1100, 0b1000, 0b001>;
+
+// Virtual CPU Interface Registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ICV_APR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b000>;
+def : RWSysReg<"ICV_CR0_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b001>;
+def : RWSysReg<"ICV_HAPR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b011>;
+def : RWSysReg<"ICV_HPPIR_EL1", 0b11, 0b000, 0b1100, 0b1010, 0b011>;
+def : RWSysReg<"ICV_PCR_EL1", 0b11, 0b001, 0b1100, 0b0000, 0b010>;
+
+foreach n=0-3 in {
+ defvar nb = !cast<bits<2>>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<"ICC_PPI_DOMAINR"#n#"_EL3", 0b11, 0b110, 0b1100, 0b1000, {0b1,nb{1-0}}>;
+
+}
+
+foreach n=0-15 in{
+ defvar nb = !cast<bits<4>>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<"ICC_PPI_PRIORITYR"#n#"_EL1", 0b11, 0b000, 0b1100, {0b111,nb{3}}, nb{2-0}>;
+}
+
+// PPI and Virtual PPI Registers
+multiclass PPIRegisters<string prefix> {
+ foreach n=0-1 in {
+ defvar nb = !cast<bit>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<prefix#"_PPI_CACTIVER"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b00,nb}>;
+ def : RWSysReg<prefix#"_PPI_CPENDR"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b10,nb}>;
+ def : RWSysReg<prefix#"_PPI_ENABLER"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1010, {0b11,nb}>;
+ def : RWSysReg<prefix#"_PPI_SACTIVER"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b01,nb}>;
+ def : RWSysReg<prefix#"_PPI_SPENDR"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1101, {0b11,nb}>;
+ def : RWSysReg<prefix#"_PPI_HMR"#n#"_EL1", 0b11, 0b000, 0b1100, 0b1010, {0b00,nb}>;
+ }
+}
+
+defm : PPIRegisters<"ICC">; // PPI Registers
+defm : PPIRegisters<"ICV">; // Virtual PPI Registers
+
+foreach n=0-15 in {
+ defvar nb = !cast<bits<4>>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<"ICV_PPI_PRIORITYR"#n#"_EL1", 0b11, 0b000, 0b1100, {0b111,nb{3}}, nb{2-0}>;
+}
+
+// Hypervisor Control Registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ICH_APR_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b100>;
+def : RWSysReg<"ICH_CONTEXTR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b110>;
+def : RWSysReg<"ICH_HFGITR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b111>;
+def : RWSysReg<"ICH_HFGRTR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b100>;
+def : RWSysReg<"ICH_HFGWTR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b110>;
+def : ROSysReg<"ICH_HPPIR_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b101>;
+def : RWSysReg<"ICH_VCTLR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b100>;
+
+foreach n=0-1 in {
+ defvar nb = !cast<bit>(n);
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ICH_PPI_ACTIVER"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b11,nb}>;
+def : RWSysReg<"ICH_PPI_DVIR"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b00,nb}>;
+def : RWSysReg<"ICH_PPI_ENABLER"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b01,nb}>;
+def : RWSysReg<"ICH_PPI_PENDR"#n#"_EL2", 0b11, 0b100, 0b1100, 0b1010, {0b10,nb}>;
+}
+
+foreach n=0-15 in {
+ defvar nb = !cast<bits<4>>(n);
+// Op0 Op1 CRn CRm Op2
+ def : RWSysReg<"ICH_PPI_PRIORITYR"#n#"_EL2", 0b11, 0b100, 0b1100, {0b111,nb{3}}, nb{2-0}>;
+}
+
+//===----------------------------------------------------------------------===//
+// GICv5 instruction options.
+//===----------------------------------------------------------------------===//
+
+// GIC
+class GIC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2> {
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ bit NeedsReg = 1;
+ string RequiresStr = [{ {AArch64::FeatureGCIE} }];
+}
+
+// GSB
+class GSB<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2> {
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ string RequiresStr = [{ {AArch64::FeatureGCIE} }];
+}
+
+// GICR
+class GICR<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2> {
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ bit NeedsReg = 1;
+ string RequiresStr = [{ {AArch64::FeatureGCIE} }];
+}
+
+def GICTable : GenericTable {
+ let FilterClass = "GIC";
+ let CppTypeName = "GIC";
+ let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+
+ let PrimaryKey = ["Encoding"];
+ let PrimaryKeyName = "lookupGICByEncoding";
+}
+
+def GSBTable : GenericTable {
+ let FilterClass = "GSB";
+ let CppTypeName = "GSB";
+ let Fields = ["Name", "Encoding", "RequiresStr"];
+
+ let PrimaryKey = ["Encoding"];
+ let PrimaryKeyName = "lookupGSBByEncoding";
+}
+
+def GICRTable : GenericTable {
+ let FilterClass = "GICR";
+ let CppTypeName = "GICR";
+ let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+
+ let PrimaryKey = ["Encoding"];
+ let PrimaryKeyName = "lookupGICRByEncoding";
+}
+
+def lookupGICByName : SearchIndex {
+ let Table = GICTable;
+ let Key = ["Name"];
+}
+
+def lookupGSBByName : SearchIndex {
+ let Table = GSBTable;
+ let Key = ["Name"];
+}
+
+def lookupGICRByName : SearchIndex {
+ let Table = GICRTable;
+ let Key = ["Name"];
+}
+
+// Op1 CRn CRm Op2
+def : GSB<"sys", 0b000, 0b1100, 0b0000, 0b000>;
+def : GSB<"ack", 0b000, 0b1100, 0b0000, 0b001>;
+
+// Op1 CRn CRm Op2
+def : GICR<"cdia", 0b000, 0b1100, 0b0011, 0b000>;
+def : GICR<"cdnmia", 0b000, 0b1100, 0b0011, 0b001>;
+
+// Op1 CRn CRm Op2
+def : GIC<"cdaff", 0b000, 0b1100, 0b0001, 0b011>;
+def : GIC<"cddi", 0b000, 0b1100, 0b0010, 0b000>;
+def : GIC<"cddis", 0b000, 0b1100, 0b0001, 0b000>;
+def : GIC<"cden", 0b000, 0b1100, 0b0001, 0b001>;
+def : GIC<"cdeoi", 0b000, 0b1100, 0b0001, 0b111>;
+def : GIC<"cdhm", 0b000, 0b1100, 0b0010, 0b001>;
+def : GIC<"cdpend", 0b000, 0b1100, 0b0001, 0b100>;
+def : GIC<"cdpri", 0b000, 0b1100, 0b0001, 0b010>;
+def : GIC<"cdrcfg", 0b000, 0b1100, 0b0001, 0b101>;
+def : GIC<"vdaff", 0b100, 0b1100, 0b0001, 0b011>;
+def : GIC<"vddi", 0b100, 0b1100, 0b0010, 0b000>;
+def : GIC<"vddis", 0b100, 0b1100, 0b0001, 0b000>;
+def : GIC<"vden", 0b100, 0b1100, 0b0001, 0b001>;
+def : GIC<"vdhm", 0b100, 0b1100, 0b0010, 0b001>;
+def : GIC<"vdpend", 0b100, 0b1100, 0b0001, 0b100>;
+def : GIC<"vdpri", 0b100, 0b1100, 0b0001, 0b010>;
+def : GIC<"vdrcfg", 0b100, 0b1100, 0b0001, 0b101>;
+def : GIC<"ldaff", 0b110, 0b1100, 0b0001, 0b011>;
+def : GIC<"lddi", 0b110, 0b1100, 0b0010, 0b000>;
+def : GIC<"lddis", 0b110, 0b1100, 0b0001, 0b000>;
+def : GIC<"lden", 0b110, 0b1100, 0b0001, 0b001>;
+def : GIC<"ldhm", 0b110, 0b1100, 0b0010, 0b001>;
+def : GIC<"ldpend", 0b110, 0b1100, 0b0001, 0b100>;
+def : GIC<"ldpri", 0b110, 0b1100, 0b0001, 0b010>;
+def : GIC<"ldrcfg", 0b110, 0b1100, 0b0001, 0b101>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e3370d3..fede586 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -224,7 +224,8 @@ static cl::opt<bool> EnableScalableAutovecInStreamingMode(
static bool isSMEABIRoutineCall(const CallInst &CI,
const AArch64TargetLowering &TLI) {
const auto *F = CI.getCalledFunction();
- return F && SMEAttrs(F->getName(), TLI).isSMEABIRoutine();
+ return F &&
+ SMEAttrs(F->getName(), TLI.getRuntimeLibcallsInfo()).isSMEABIRoutine();
}
/// Returns true if the function has explicit operations that can only be
@@ -355,7 +356,7 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
// change only once and avoid inlining of G into F.
SMEAttrs FAttrs(*F);
- SMECallAttrs CallAttrs(Call, getTLI());
+ SMECallAttrs CallAttrs(Call, &getTLI()->getRuntimeLibcallsInfo());
if (SMECallAttrs(FAttrs, CallAttrs.callee()).requiresSMChange()) {
if (F == Call.getCaller()) // (1)
@@ -957,23 +958,50 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return TyL.first + ExtraCost;
}
case Intrinsic::get_active_lane_mask: {
- auto *RetTy = dyn_cast<FixedVectorType>(ICA.getReturnType());
- if (RetTy) {
- EVT RetVT = getTLI()->getValueType(DL, RetTy);
- EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
- if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
- !getTLI()->isTypeLegal(RetVT)) {
- // We don't have enough context at this point to determine if the mask
- // is going to be kept live after the block, which will force the vXi1
- // type to be expanded to legal vectors of integers, e.g. v4i1->v4i32.
- // For now, we just assume the vectorizer created this intrinsic and
- // the result will be the input for a PHI. In this case the cost will
- // be extremely high for fixed-width vectors.
- // NOTE: getScalarizationOverhead returns a cost that's far too
- // pessimistic for the actual generated codegen. In reality there are
- // two instructions generated per lane.
- return RetTy->getNumElements() * 2;
+ auto RetTy = cast<VectorType>(ICA.getReturnType());
+ EVT RetVT = getTLI()->getValueType(DL, RetTy);
+ EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
+ if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
+ break;
+
+ if (RetTy->isScalableTy()) {
+ if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
+ TargetLowering::TypeSplitVector)
+ break;
+
+ auto LT = getTypeLegalizationCost(RetTy);
+ InstructionCost Cost = LT.first;
+ // When SVE2p1 or SME2 is available, we can halve getTypeLegalizationCost
+ // as get_active_lane_mask may lower to the sve_whilelo_x2 intrinsic, e.g.
+ // nxv32i1 = get_active_lane_mask(base, idx) ->
+ // {nxv16i1, nxv16i1} = sve_whilelo_x2(base, idx)
+ if (ST->hasSVE2p1() || ST->hasSME2()) {
+ Cost /= 2;
+ if (Cost == 1)
+ return Cost;
}
+
+ // If more than one whilelo intrinsic is required, include the extra cost
+ // required by the saturating add & select required to increment the
+ // start value after the first intrinsic call.
+ Type *OpTy = ICA.getArgTypes()[0];
+ IntrinsicCostAttributes AddAttrs(Intrinsic::uadd_sat, OpTy, {OpTy, OpTy});
+ InstructionCost SplitCost = getIntrinsicInstrCost(AddAttrs, CostKind);
+ Type *CondTy = OpTy->getWithNewBitWidth(1);
+ SplitCost += getCmpSelInstrCost(Instruction::Select, OpTy, CondTy,
+ CmpInst::ICMP_UGT, CostKind);
+ return Cost + (SplitCost * (Cost - 1));
+ } else if (!getTLI()->isTypeLegal(RetVT)) {
+ // We don't have enough context at this point to determine if the mask
+ // is going to be kept live after the block, which will force the vXi1
+ // type to be expanded to legal vectors of integers, e.g. v4i1->v4i32.
+ // For now, we just assume the vectorizer created this intrinsic and
+ // the result will be the input for a PHI. In this case the cost will
+ // be extremely high for fixed-width vectors.
+ // NOTE: getScalarizationOverhead returns a cost that's far too
+ // pessimistic for the actual generated codegen. In reality there are
+ // two instructions generated per lane.
+ return cast<FixedVectorType>(RetTy)->getNumElements() * 2;
}
break;
}
@@ -1577,18 +1605,26 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
}
static bool isAllActivePredicate(Value *Pred) {
- // Look through convert.from.svbool(convert.to.svbool(...) chain.
Value *UncastedPred;
+
+ // Look through predicate casts that only remove lanes.
if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
- m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
- m_Value(UncastedPred)))))
- // If the predicate has the same or less lanes than the uncasted
- // predicate then we know the casting has no effect.
- if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
- cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
- Pred = UncastedPred;
+ m_Value(UncastedPred)))) {
+ auto *OrigPredTy = cast<ScalableVectorType>(Pred->getType());
+ Pred = UncastedPred;
+
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
+ m_Value(UncastedPred))))
+ // If the predicate has the same or less lanes than the uncasted predicate
+ // then we know the casting has no effect.
+ if (OrigPredTy->getMinNumElements() <=
+ cast<ScalableVectorType>(UncastedPred->getType())
+ ->getMinNumElements())
+ Pred = UncastedPred;
+ }
+
auto *C = dyn_cast<Constant>(Pred);
- return (C && C->isAllOnesValue());
+ return C && C->isAllOnesValue();
}
// Simplify `V` by only considering the operations that affect active lanes.
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 636d4f8a..6273cfc 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -159,6 +159,7 @@ private:
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
+ bool parseSyslAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
bool parseSyspAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S);
AArch64CC::CondCode parseCondCodeString(StringRef Cond,
@@ -266,6 +267,7 @@ private:
ParseStatus tryParseRPRFMOperand(OperandVector &Operands);
ParseStatus tryParsePSBHint(OperandVector &Operands);
ParseStatus tryParseBTIHint(OperandVector &Operands);
+ ParseStatus tryParseCMHPriorityHint(OperandVector &Operands);
ParseStatus tryParseAdrpLabel(OperandVector &Operands);
ParseStatus tryParseAdrLabel(OperandVector &Operands);
template <bool AddFPZeroAsLiteral>
@@ -370,6 +372,7 @@ private:
k_PSBHint,
k_PHint,
k_BTIHint,
+ k_CMHPriorityHint,
} Kind;
SMLoc StartLoc, EndLoc;
@@ -499,6 +502,11 @@ private:
unsigned Length;
unsigned Val;
};
+ struct CMHPriorityHintOp {
+ const char *Data;
+ unsigned Length;
+ unsigned Val;
+ };
struct SVCROp {
const char *Data;
@@ -525,6 +533,7 @@ private:
struct PSBHintOp PSBHint;
struct PHintOp PHint;
struct BTIHintOp BTIHint;
+ struct CMHPriorityHintOp CMHPriorityHint;
struct ShiftExtendOp ShiftExtend;
struct SVCROp SVCR;
};
@@ -595,6 +604,9 @@ public:
case k_BTIHint:
BTIHint = o.BTIHint;
break;
+ case k_CMHPriorityHint:
+ CMHPriorityHint = o.CMHPriorityHint;
+ break;
case k_ShiftExtend:
ShiftExtend = o.ShiftExtend;
break;
@@ -769,6 +781,16 @@ public:
return StringRef(BTIHint.Data, BTIHint.Length);
}
+ unsigned getCMHPriorityHint() const {
+ assert(Kind == k_CMHPriorityHint && "Invalid access!");
+ return CMHPriorityHint.Val;
+ }
+
+ StringRef getCMHPriorityHintName() const {
+ assert(Kind == k_CMHPriorityHint && "Invalid access!");
+ return StringRef(CMHPriorityHint.Data, CMHPriorityHint.Length);
+ }
+
StringRef getSVCR() const {
assert(Kind == k_SVCR && "Invalid access!");
return StringRef(SVCR.Data, SVCR.Length);
@@ -1511,6 +1533,7 @@ public:
bool isPSBHint() const { return Kind == k_PSBHint; }
bool isPHint() const { return Kind == k_PHint; }
bool isBTIHint() const { return Kind == k_BTIHint; }
+ bool isCMHPriorityHint() const { return Kind == k_CMHPriorityHint; }
bool isShiftExtend() const { return Kind == k_ShiftExtend; }
bool isShifter() const {
if (!isShiftExtend())
@@ -2196,6 +2219,11 @@ public:
Inst.addOperand(MCOperand::createImm(getBTIHint()));
}
+ void addCMHPriorityHintOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getCMHPriorityHint()));
+ }
+
void addShifterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
unsigned Imm =
@@ -2547,6 +2575,17 @@ public:
}
static std::unique_ptr<AArch64Operand>
+ CreateCMHPriorityHint(unsigned Val, StringRef Str, SMLoc S, MCContext &Ctx) {
+ auto Op = std::make_unique<AArch64Operand>(k_CMHPriorityHint, Ctx);
+ Op->CMHPriorityHint.Val = Val;
+ Op->CMHPriorityHint.Data = Str.data();
+ Op->CMHPriorityHint.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<AArch64Operand>
CreateMatrixRegister(unsigned RegNum, unsigned ElementWidth, MatrixKind Kind,
SMLoc S, SMLoc E, MCContext &Ctx) {
auto Op = std::make_unique<AArch64Operand>(k_MatrixRegister, Ctx);
@@ -2656,6 +2695,9 @@ void AArch64Operand::print(raw_ostream &OS, const MCAsmInfo &MAI) const {
case k_BTIHint:
OS << getBTIHintName();
break;
+ case k_CMHPriorityHint:
+ OS << getCMHPriorityHintName();
+ break;
case k_MatrixRegister:
OS << "<matrix " << getMatrixReg() << ">";
break;
@@ -3279,6 +3321,24 @@ ParseStatus AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
return ParseStatus::Success;
}
+/// tryParseCMHPriorityHint - Try to parse a CMHPriority operand
+ParseStatus AArch64AsmParser::tryParseCMHPriorityHint(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ const AsmToken &Tok = getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("invalid operand for instruction");
+
+ auto CMHPriority =
+ AArch64CMHPriorityHint::lookupCMHPriorityHintByName(Tok.getString());
+ if (!CMHPriority)
+ return TokError("invalid operand for instruction");
+
+ Operands.push_back(AArch64Operand::CreateCMHPriorityHint(
+ CMHPriority->Encoding, Tok.getString(), S, getContext()));
+ Lex(); // Eat identifier token.
+ return ParseStatus::Success;
+}
+
/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
/// instruction.
ParseStatus AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
@@ -3824,6 +3884,18 @@ static const struct Extension {
{"ssve-bitperm", {AArch64::FeatureSSVE_BitPerm}},
{"sme-mop4", {AArch64::FeatureSME_MOP4}},
{"sme-tmop", {AArch64::FeatureSME_TMOP}},
+ {"cmh", {AArch64::FeatureCMH}},
+ {"lscp", {AArch64::FeatureLSCP}},
+ {"tlbid", {AArch64::FeatureTLBID}},
+ {"mpamv2", {AArch64::FeatureMPAMv2}},
+ {"mtetc", {AArch64::FeatureMTETC}},
+ {"gcie", {AArch64::FeatureGCIE}},
+ {"sme2p3", {AArch64::FeatureSME2p3}},
+ {"sve2p3", {AArch64::FeatureSVE2p3}},
+ {"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
+ {"f16mm", {AArch64::FeatureF16MM}},
+ {"f16f32dot", {AArch64::FeatureF16F32DOT}},
+ {"f16f32mm", {AArch64::FeatureF16F32MM}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -3861,6 +3933,8 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv9.5a";
else if (FBS[AArch64::HasV9_6aOps])
Str += "ARMv9.6a";
+ else if (FBS[AArch64::HasV9_7aOps])
+ Str += "ARMv9.7a";
else if (FBS[AArch64::HasV8_0rOps])
Str += "ARMv8r";
else {
@@ -3894,8 +3968,9 @@ void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands
AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));
}
-/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
-/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
+/// parseSysAlias - The IC, DC, AT, TLBI, MLBI and GIC{R} and GSB instructions
+/// are simple aliases for the SYS instruction. Parse them specially so that
+/// we create a SYS MCInst.
bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
if (Name.contains('.'))
@@ -3908,6 +3983,8 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
bool ExpectRegister = true;
+ bool OptionalRegister = false;
+ bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
if (Mnemonic == "ic") {
const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op);
@@ -3950,13 +4027,50 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
return TokError(Str);
}
ExpectRegister = TLBI->NeedsReg;
+ bool hasTLBID = getSTI().hasFeature(AArch64::FeatureTLBID);
+ if (hasAll || hasTLBID) {
+ OptionalRegister = TLBI->OptionalReg;
+ }
createSysAlias(TLBI->Encoding, Operands, S);
- } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp" || Mnemonic == "cosp") {
+ } else if (Mnemonic == "mlbi") {
+ const AArch64MLBI::MLBI *MLBI = AArch64MLBI::lookupMLBIByName(Op);
+ if (!MLBI)
+ return TokError("invalid operand for MLBI instruction");
+ else if (!MLBI->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("MLBI " + std::string(MLBI->Name) + " requires: ");
+ setRequiredFeatureString(MLBI->getRequiredFeatures(), Str);
+ return TokError(Str);
+ }
+ ExpectRegister = MLBI->NeedsReg;
+ createSysAlias(MLBI->Encoding, Operands, S);
+ } else if (Mnemonic == "gic") {
+ const AArch64GIC::GIC *GIC = AArch64GIC::lookupGICByName(Op);
+ if (!GIC)
+ return TokError("invalid operand for GIC instruction");
+ else if (!GIC->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("GIC " + std::string(GIC->Name) + " requires: ");
+ setRequiredFeatureString(GIC->getRequiredFeatures(), Str);
+ return TokError(Str);
+ }
+ ExpectRegister = true;
+ createSysAlias(GIC->Encoding, Operands, S);
+ } else if (Mnemonic == "gsb") {
+ const AArch64GSB::GSB *GSB = AArch64GSB::lookupGSBByName(Op);
+ if (!GSB)
+ return TokError("invalid operand for GSB instruction");
+ else if (!GSB->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("GSB " + std::string(GSB->Name) + " requires: ");
+ setRequiredFeatureString(GSB->getRequiredFeatures(), Str);
+ return TokError(Str);
+ }
+ ExpectRegister = false;
+ createSysAlias(GSB->Encoding, Operands, S);
+ } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp" ||
+ Mnemonic == "cosp") {
if (Op.lower() != "rctx")
return TokError("invalid operand for prediction restriction instruction");
- bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
bool hasPredres = hasAll || getSTI().hasFeature(AArch64::FeaturePredRes);
bool hasSpecres2 = hasAll || getSTI().hasFeature(AArch64::FeatureSPECRES2);
@@ -3989,10 +4103,61 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
HasRegister = true;
}
- if (ExpectRegister && !HasRegister)
- return TokError("specified " + Mnemonic + " op requires a register");
- else if (!ExpectRegister && HasRegister)
- return TokError("specified " + Mnemonic + " op does not use a register");
+ if (!OptionalRegister) {
+ if (ExpectRegister && !HasRegister)
+ return TokError("specified " + Mnemonic + " op requires a register");
+ else if (!ExpectRegister && HasRegister)
+ return TokError("specified " + Mnemonic + " op does not use a register");
+ }
+
+ if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
+ return true;
+
+ return false;
+}
+
+/// parseSyslAlias - The GICR instructions are simple aliases for
+/// the SYSL instruction. Parse them specially so that we create a
+/// SYS MCInst.
+bool AArch64AsmParser::parseSyslAlias(StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
+
+ Mnemonic = Name;
+ Operands.push_back(
+ AArch64Operand::CreateToken("sysl", NameLoc, getContext()));
+
+ // Now expect two operands (identifier + register)
+ SMLoc startLoc = getLoc();
+ const AsmToken &regTok = getTok();
+ StringRef reg = regTok.getString();
+ unsigned RegNum = matchRegisterNameAlias(reg.lower(), RegKind::Scalar);
+ if (!RegNum)
+ return TokError("expected register operand");
+
+ Operands.push_back(AArch64Operand::CreateReg(
+ RegNum, RegKind::Scalar, startLoc, getLoc(), getContext(), EqualsReg));
+
+ Lex(); // Eat token
+ if (parseToken(AsmToken::Comma))
+ return true;
+
+ // Check for identifier
+ const AsmToken &operandTok = getTok();
+ StringRef Op = operandTok.getString();
+ SMLoc S2 = operandTok.getLoc();
+ Lex(); // Eat token
+
+ if (Mnemonic == "gicr") {
+ const AArch64GICR::GICR *GICR = AArch64GICR::lookupGICRByName(Op);
+ if (!GICR)
+ return Error(S2, "invalid operand for GICR instruction");
+ else if (!GICR->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("GICR " + std::string(GICR->Name) + " requires: ");
+ setRequiredFeatureString(GICR->getRequiredFeatures(), Str);
+ return Error(S2, Str);
+ }
+ createSysAlias(GICR->Encoding, Operands, S2);
+ }
if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
return true;
@@ -4025,7 +4190,7 @@ bool AArch64AsmParser::parseSyspAlias(StringRef Name, SMLoc NameLoc,
return TokError("invalid operand for TLBIP instruction");
const AArch64TLBIP::TLBIP TLBIP(
TLBIPorig->Name, TLBIPorig->Encoding | (HasnXSQualifier ? (1 << 7) : 0),
- TLBIPorig->NeedsReg,
+ TLBIPorig->NeedsReg, TLBIPorig->OptionalReg,
HasnXSQualifier
? TLBIPorig->FeaturesRequired | FeatureBitset({AArch64::FeatureXS})
: TLBIPorig->FeaturesRequired);
@@ -4719,6 +4884,13 @@ ParseStatus AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
FirstReg, Count, Stride, NumElements, ElementWidth, VectorKind, S,
getLoc(), getContext()));
+ if (getTok().is(AsmToken::LBrac)) {
+ ParseStatus Res = tryParseVectorIndex(Operands);
+ if (Res.isFailure())
+ return ParseStatus::Failure;
+ return ParseStatus::Success;
+ }
+
return ParseStatus::Success;
}
@@ -5267,12 +5439,17 @@ bool AArch64AsmParser::parseInstruction(ParseInstructionInfo &Info,
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
- // IC, DC, AT, TLBI and Prediction invalidation instructions are aliases for
- // the SYS instruction.
+ // IC, DC, AT, TLBI, MLBI, GIC{R}, GSB and Prediction invalidation
+ // instructions are aliases for the SYS instruction.
if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi" ||
- Head == "cfp" || Head == "dvp" || Head == "cpp" || Head == "cosp")
+ Head == "cfp" || Head == "dvp" || Head == "cpp" || Head == "cosp" ||
+ Head == "mlbi" || Head == "gic" || Head == "gsb")
return parseSysAlias(Head, NameLoc, Operands);
+ // GICR instructions are aliases for the SYSL instruction.
+ if (Head == "gicr")
+ return parseSyslAlias(Head, NameLoc, Operands);
+
// TLBIP instructions are aliases for the SYSP instruction.
if (Head == "tlbip")
return parseSyspAlias(Head, NameLoc, Operands);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 3e55b76..14b0f9a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5126,23 +5126,13 @@ bool AArch64InstructionSelector::selectShuffleVector(
MachineInstr &I, MachineRegisterInfo &MRI) {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Register Src1Reg = I.getOperand(1).getReg();
- const LLT Src1Ty = MRI.getType(Src1Reg);
Register Src2Reg = I.getOperand(2).getReg();
- const LLT Src2Ty = MRI.getType(Src2Reg);
ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
LLVMContext &Ctx = MF.getFunction().getContext();
- // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
- // it's originated from a <1 x T> type. Those should have been lowered into
- // G_BUILD_VECTOR earlier.
- if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
- LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
- return false;
- }
-
unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
SmallVector<Constant *, 64> CstIdxs;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 05a4313..5f93847 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1201,25 +1201,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return llvm::is_contained(
{v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
})
- // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
- // destinations, we just want those lowered into G_BUILD_VECTOR or
- // G_EXTRACT_ELEMENT.
- .lowerIf([=](const LegalityQuery &Query) {
- return !Query.Types[0].isVector() || !Query.Types[1].isVector();
- })
.moreElementsIf(
[](const LegalityQuery &Query) {
- return Query.Types[0].isVector() && Query.Types[1].isVector() &&
- Query.Types[0].getNumElements() >
- Query.Types[1].getNumElements();
+ return Query.Types[0].getNumElements() >
+ Query.Types[1].getNumElements();
},
changeTo(1, 0))
.moreElementsToNextPow2(0)
.moreElementsIf(
[](const LegalityQuery &Query) {
- return Query.Types[0].isVector() && Query.Types[1].isVector() &&
- Query.Types[0].getNumElements() <
- Query.Types[1].getNumElements();
+ return Query.Types[0].getNumElements() <
+ Query.Types[1].getNumElements();
},
changeTo(0, 1))
.widenScalarOrEltToNextPow2OrMinSize(0, 8)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 830a35bb..6d2d705 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -856,7 +856,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case TargetOpcode::G_FPTOSI_SAT:
- case TargetOpcode::G_FPTOUI_SAT: {
+ case TargetOpcode::G_FPTOUI_SAT:
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI: {
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
if (DstType.isVector())
break;
@@ -864,11 +866,19 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
break;
}
- OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
+ TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+ TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
+ if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
+ all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ return onlyUsesFP(UseMI, MRI, TRI) ||
+ prefersFPUse(UseMI, MRI, TRI);
+ }))
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ else
+ OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
}
- case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_INTRINSIC_LLRINT:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 35bd244..5c3e26e 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -84,6 +84,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
return;
}
+ if (Opcode == AArch64::SYSLxt)
+ if (printSyslAlias(MI, STI, O)) {
+ printAnnotation(O, Annot);
+ return;
+ }
+
if (Opcode == AArch64::SYSPxt || Opcode == AArch64::SYSPxt_XZR)
if (printSyspAlias(MI, STI, O)) {
printAnnotation(O, Annot);
@@ -909,13 +915,25 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
Encoding |= CnVal << 7;
Encoding |= Op1Val << 11;
- bool NeedsReg;
+ bool NeedsReg = false;
+ bool OptionalReg = false;
std::string Ins;
std::string Name;
if (CnVal == 7) {
switch (CmVal) {
default: return false;
+ // MLBI aliases
+ case 0: {
+ const AArch64MLBI::MLBI *MLBI =
+ AArch64MLBI::lookupMLBIByEncoding(Encoding);
+ if (!MLBI || !MLBI->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = MLBI->NeedsReg;
+ Ins = "mlbi\t";
+ Name = std::string(MLBI->Name);
+ } break;
// Maybe IC, maybe Prediction Restriction
case 1:
switch (Op1Val) {
@@ -1004,19 +1022,41 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
return false;
NeedsReg = TLBI->NeedsReg;
+ if (STI.hasFeature(AArch64::FeatureAll) ||
+ STI.hasFeature(AArch64::FeatureTLBID))
+ OptionalReg = TLBI->OptionalReg;
Ins = "tlbi\t";
Name = std::string(TLBI->Name);
- }
- else
+ } else if (CnVal == 12) {
+ if (CmVal != 0) {
+ // GIC aliases
+ const AArch64GIC::GIC *GIC = AArch64GIC::lookupGICByEncoding(Encoding);
+ if (!GIC || !GIC->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = true;
+ Ins = "gic\t";
+ Name = std::string(GIC->Name);
+ } else {
+ // GSB aliases
+ const AArch64GSB::GSB *GSB = AArch64GSB::lookupGSBByEncoding(Encoding);
+ if (!GSB || !GSB->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = false;
+ Ins = "gsb\t";
+ Name = std::string(GSB->Name);
+ }
+ } else
return false;
StringRef Reg = getRegisterName(MI->getOperand(4).getReg());
bool NotXZR = Reg != "xzr";
- // If a mandatory is not specified in the TableGen
+ // If a mandatory or optional register is not specified in the TableGen
// (i.e. no register operand should be present), and the register value
// is not xzr/x31, then disassemble to a SYS alias instead.
- if (NotXZR && !NeedsReg)
+ if (NotXZR && !NeedsReg && !OptionalReg)
return false;
std::string Str = Ins + Name;
@@ -1024,12 +1064,64 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
O << '\t' << Str;
- if (NeedsReg)
+ // For optional registers, don't print the value if it's xzr/x31
+ // since this defaults to xzr/x31 if register is not specified.
+ if (NeedsReg || (OptionalReg && NotXZR))
O << ", " << Reg;
return true;
}
+bool AArch64InstPrinter::printSyslAlias(const MCInst *MI,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+#ifndef NDEBUG
+ unsigned Opcode = MI->getOpcode();
+ assert(Opcode == AArch64::SYSLxt && "Invalid opcode for SYSL alias!");
+#endif
+
+ StringRef Reg = getRegisterName(MI->getOperand(0).getReg());
+ const MCOperand &Op1 = MI->getOperand(1);
+ const MCOperand &Cn = MI->getOperand(2);
+ const MCOperand &Cm = MI->getOperand(3);
+ const MCOperand &Op2 = MI->getOperand(4);
+
+ unsigned Op1Val = Op1.getImm();
+ unsigned CnVal = Cn.getImm();
+ unsigned CmVal = Cm.getImm();
+ unsigned Op2Val = Op2.getImm();
+
+ uint16_t Encoding = Op2Val;
+ Encoding |= CmVal << 3;
+ Encoding |= CnVal << 7;
+ Encoding |= Op1Val << 11;
+
+ std::string Ins;
+ std::string Name;
+
+ if (CnVal == 12) {
+ if (CmVal == 3) {
+ // GICR aliases
+ const AArch64GICR::GICR *GICR =
+ AArch64GICR::lookupGICRByEncoding(Encoding);
+ if (!GICR || !GICR->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ Ins = "gicr";
+ Name = std::string(GICR->Name);
+ } else
+ return false;
+ } else
+ return false;
+
+ std::string Str;
+ llvm::transform(Name, Name.begin(), ::tolower);
+
+ O << '\t' << Ins << '\t' << Reg.str() << ", " << Name;
+
+ return true;
+}
+
bool AArch64InstPrinter::printSyspAlias(const MCInst *MI,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1508,6 +1600,17 @@ void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
markup(O, Markup::Immediate) << '#' << formatImm(btihintop);
}
+void AArch64InstPrinter::printCMHPriorityHintOp(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned priorityhint_op = MI->getOperand(OpNum).getImm();
+ auto PHint =
+ AArch64CMHPriorityHint::lookupCMHPriorityHintByEncoding(priorityhint_op);
+ if (PHint)
+ O << PHint->Name;
+}
+
void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 15ef2dd..307402d 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -52,6 +52,8 @@ public:
protected:
bool printSysAlias(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
+ bool printSyslAlias(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
bool printSyspAlias(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
bool printRangePrefetchAlias(const MCInst *MI, const MCSubtargetInfo &STI,
@@ -151,6 +153,9 @@ protected:
void printBTIHintOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCMHPriorityHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
void printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 33f35ad..99836ae 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3920,6 +3920,78 @@ multiclass sme2_luti4_vector_vg4_index<string mnemonic> {
def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
}
+// 8-bit Look up table
+class sme2_lut_single<string asm>
+ : I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn),
+ asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
+ bits<0> ZTt;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-10} = 0b1100000011001000010000;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+//===----------------------------------------------------------------------===//
+// Lookup table read with 6-bit indices (8-bit)
+class sme2_luti6_zt_base<RegisterOperand zd_ty, string asm>
+ : I<(outs zd_ty:$Zd), (ins ZTR:$ZTt, ZZZ_Any:$Zn),
+ asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
+ bits<0> ZTt;
+ bits<3> Zd;
+ bits<3> Zn;
+ let Inst{31-21} = 0b11000000100;
+ let Inst{19-10} = 0b1010000000;
+ let Inst{9-7} = Zn;
+ let Inst{6-5} = 0b00;
+}
+
+class sme2_luti6_zt_consecutive<string asm>
+ : sme2_luti6_zt_base<ZZZZ_b_mul_r, asm> {
+ let Inst{20} = 0;
+ let Inst{4-2} = Zd;
+ let Inst{1-0} = 0b00;
+}
+
+class sme2_luti6_zt_strided<string asm>
+ : sme2_luti6_zt_base<ZZZZ_b_strided, asm> {
+ let Inst{20} = 1;
+ let Inst{4} = Zd{2};
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = Zd{1-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Lookup table read with 6-bit indices (8-bit)
+class sme2_luti6_vector_vg4_base<RegisterOperand zd_ty, string asm>
+ : I<(outs zd_ty:$Zd), (ins ZZ_h:$Zn, ZZ_Any:$Zm, VectorIndexD:$i1),
+ asm, "\t$Zd, $Zn, $Zm$i1", "", []>, Sched<[]> {
+ bits<3> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ bits<1> i1;
+ let Inst{31-23} = 0b110000010;
+ let Inst{22} = i1;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{9-5} = Zn;
+}
+
+class sme2_luti6_vector_vg4_consecutive<string asm>
+ : sme2_luti6_vector_vg4_base<ZZZZ_h_mul_r, asm> {
+ let Inst{15-10} = 0b111101;
+ let Inst{4-2} = Zd;
+ let Inst{1-0} = 0b00;
+}
+
+class sme2_luti6_vector_vg4_strided<string asm>
+ : sme2_luti6_vector_vg4_base<ZZZZ_h_strided, asm> {
+ let Inst{15-10} = 0b111111;
+ let Inst{4} = Zd{2};
+ let Inst{3-2} = 0b00;
+ let Inst{1-0} = Zd{1-0};
+}
+
//===----------------------------------------------------------------------===//
// SME2 MOV
class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v,
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 3cdd505..1664f4a 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3787,7 +3787,7 @@ multiclass sve2p1_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator intri
// SVE Integer Dot Product Group - Indexed Group
//===----------------------------------------------------------------------===//
-class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
+class sve_intx_dot_by_indexed_elem<bit U, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2,
ZPRRegOp zprty3, Operand itype>
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop),
@@ -3795,8 +3795,7 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
"", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
- let Inst{31-23} = 0b010001001;
- let Inst{22} = sz;
+ let Inst{31-24} = 0b01000100;
let Inst{21} = 0b1;
let Inst{15-11} = 0;
let Inst{10} = U;
@@ -3810,16 +3809,18 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
SDPatternOperator op> {
- def _BtoS : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
+ def _BtoS : sve_intx_dot_by_indexed_elem<opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
bits<2> iop;
bits<3> Zm;
+ let Inst{23-22} = 0b10;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _HtoD : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
+ def _HtoD : sve_intx_dot_by_indexed_elem<opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
bits<1> iop;
bits<4> Zm;
- let Inst{20} = iop;
+ let Inst{23-22} = 0b11;
+ let Inst{20} = iop;
let Inst{19-16} = Zm;
}
@@ -3827,6 +3828,16 @@ multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _HtoD)>;
}
+class sve_intx_dot_by_indexed_elem_x<bit opc, string asm>
+: sve_intx_dot_by_indexed_elem<opc, asm, ZPR16, ZPR8, ZPR3b8, VectorIndexH32b_timm> {
+ bits<3> iop;
+ bits<3> Zm;
+ let Inst{23} = 0b0;
+ let Inst{22} = iop{2};
+ let Inst{20-19} = iop{1-0};
+ let Inst{18-16} = Zm;
+}
+
//===----------------------------------------------------------------------===//
// SVE2 Complex Integer Dot Product Group
//===----------------------------------------------------------------------===//
@@ -4085,7 +4096,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
bits<5> Zdn;
let Inst{31-24} = 0b01000100;
let Inst{23-22} = sz;
- let Inst{21-20} = 0b01;
+ let Inst{21} = 0b0;
let Inst{20-16} = opc{5-1};
let Inst{15-14} = 0b10;
let Inst{13} = opc{0};
@@ -4590,15 +4601,15 @@ multiclass sve2_int_cadd<bit opc, string asm, SDPatternOperator op> {
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, complexrotateopodd, !cast<Instruction>(NAME # _D)>;
}
-class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
+class sve2_int_absdiff_accum<bits<3> sz, bits<4> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm),
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<5> Zm;
- let Inst{31-24} = 0b01000101;
- let Inst{23-22} = sz;
+ let Inst{31-25} = 0b0100010;
+ let Inst{24-22} = sz;
let Inst{21} = 0b0;
let Inst{20-16} = Zm;
let Inst{15-14} = 0b11;
@@ -4613,10 +4624,10 @@ class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
}
multiclass sve2_int_absdiff_accum<bit opc, string asm, SDPatternOperator op> {
- def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>;
- def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>;
- def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>;
- def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>;
+ def _B : sve2_int_absdiff_accum<0b100, { 0b111, opc }, asm, ZPR8, ZPR8>;
+ def _H : sve2_int_absdiff_accum<0b101, { 0b111, opc }, asm, ZPR16, ZPR16>;
+ def _S : sve2_int_absdiff_accum<0b110, { 0b111, opc }, asm, ZPR32, ZPR32>;
+ def _D : sve2_int_absdiff_accum<0b111, { 0b111, opc }, asm, ZPR64, ZPR64>;
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -4626,20 +4637,26 @@ multiclass sve2_int_absdiff_accum<bit opc, string asm, SDPatternOperator op> {
multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm,
SDPatternOperator op> {
- def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
- def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
- def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+ def _H : sve2_int_absdiff_accum<0b101, { 0b00, opc }, asm, ZPR16, ZPR8>;
+ def _S : sve2_int_absdiff_accum<0b110, { 0b00, opc }, asm, ZPR32, ZPR16>;
+ def _D : sve2_int_absdiff_accum<0b111, { 0b00, opc }, asm, ZPR64, ZPR32>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve2_int_two_way_absdiff_accum_long<bit U, string asm> {
+ def _BtoH : sve2_int_absdiff_accum<0b001, { 0b01, U, 0b1 }, asm, ZPR16, ZPR8>;
+ def _HtoS : sve2_int_absdiff_accum<0b010, { 0b01, U, 0b1 }, asm, ZPR32, ZPR16>;
+ def _StoD : sve2_int_absdiff_accum<0b011, { 0b01, U, 0b1 }, asm, ZPR64, ZPR32>;
+}
+
multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm,
SDPatternOperator op> {
- def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
+ def _S : sve2_int_absdiff_accum<{ 0b1, opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
ZPR32, ZPR32>;
- def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
+ def _D : sve2_int_absdiff_accum<{ 0b1, opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
ZPR64, ZPR64>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
@@ -9610,17 +9627,18 @@ multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
// SVE Floating Point Matrix Multiply Accumulate Group
//===----------------------------------------------------------------------===//
-class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty>
+class sve_fp_matrix_mla<bits<3> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty>
: I<(outs zda_ty:$Zda), (ins zda_ty:$_Zda, reg_ty:$Zn, reg_ty:$Zm),
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
bits<5> Zm;
let Inst{31-24} = 0b01100100;
- let Inst{23-22} = opc;
+ let Inst{23-22} = opc{2-1};
let Inst{21} = 1;
let Inst{20-16} = Zm;
- let Inst{15-10} = 0b111001;
+ let Inst{15-11} = 0b11100;
+ let Inst{10} = opc{0};
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
@@ -9630,10 +9648,12 @@ class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_t
let mayRaiseFPException = 1;
}
-multiclass sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty, SDPatternOperator op, ValueType zda_vt, ValueType reg_vt> {
+multiclass sve_fp_matrix_mla<bits<3> opc, string asm, ZPRRegOp zda_ty,
+ ZPRRegOp reg_ty, SDPatternOperator op,
+ ValueType zda_vt, ValueType reg_vt> {
def NAME : sve_fp_matrix_mla<opc, asm, zda_ty, reg_ty>;
- def : SVE_3_Op_Pat<zda_vt, op , zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<zda_vt, op, zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>;
}
//===----------------------------------------------------------------------===//
@@ -10030,18 +10050,19 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte
}
// SVE2 multi-vec shift narrow
-class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
- mnemonic, "\t$Zd, $Zn, $imm4",
+class sve2p1_multi_vec_shift_narrow<string mnemonic, ZPRRegOp ZdRC, RegisterOperand ZSrcOp,
+ Operand immtype, bits<3> opc, bits<2> tsz>
+ : I<(outs ZdRC:$Zd), (ins ZSrcOp:$Zn, immtype:$imm),
+ mnemonic, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
bits<5> Zd;
bits<4> Zn;
- bits<4> imm4;
+ bits<4> imm;
let Inst{31-23} = 0b010001011;
let Inst{22} = tsz{1};
let Inst{21} = 0b1;
let Inst{20} = tsz{0};
- let Inst{19-16} = imm4;
+ let Inst{18-16} = imm{2-0}; // imm3
let Inst{15-14} = 0b00;
let Inst{13-11} = opc;
let Inst{10} = 0b0;
@@ -10052,12 +10073,19 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
let hasSideEffects = 0;
}
-multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
- def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
+multiclass sve_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
+ def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, ZPR16, ZZ_s_mul_r, vecshiftR16, opc, 0b01> {
+ let Inst{19} = imm{3}; // imm4
+ }
def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
+multiclass sve_multi_vec_round_shift_narrow<string mnemonic, bits<3> opc> {
+ def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, ZPR8, ZZ_h_mul_r, vecshiftR8, opc, 0b00> {
+ let Inst{19} = 0b1; // always 1 for imm3 version
+ }
+}
// SME2 multi-vec contiguous load (scalar plus scalar, two registers)
class sve2p1_mem_cld_ss_2z<string mnemonic, bits<2> msz, bit n,
@@ -11164,7 +11192,7 @@ multiclass sve2_fp8_dot_indexed_s<string asm, SDPatternOperator op> {
def : SVE_4_Op_Pat<nxv4f32, op, nxv4f32, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME)>;
}
-// FP8 Look up table
+// Look up table
class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty,
Operand idx_ty, bits<4>opc, string mnemonic>
: I<(outs zd_ty:$Zd), (ins zn_ty:$Zn, ZPRAny:$Zm, idx_ty:$idx),
@@ -11183,7 +11211,7 @@ class sve2_lut_vector_index<ZPRRegOp zd_ty, RegisterOperand zn_ty,
let Inst{4-0} = Zd;
}
-// FP8 Look up table read with 2-bit indices
+// Look up table read with 2-bit indices
multiclass sve2_luti2_vector_index<string mnemonic> {
def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexS32b, {?, 0b100}, mnemonic> {
bits<2> idx;
@@ -11205,7 +11233,7 @@ multiclass sve2_luti2_vector_index<string mnemonic> {
i32, timm32_0_7, !cast<Instruction>(NAME # _H)>;
}
-// FP8 Look up table read with 4-bit indices
+// Look up table read with 4-bit indices
multiclass sve2_luti4_vector_index<string mnemonic> {
def _B : sve2_lut_vector_index<ZPR8, Z_b, VectorIndexD32b, 0b1001, mnemonic> {
bit idx;
@@ -11226,7 +11254,7 @@ multiclass sve2_luti4_vector_index<string mnemonic> {
i32, timm32_0_3, !cast<Instruction>(NAME # _H)>;
}
-// FP8 Look up table read with 4-bit indices (two contiguous registers)
+// Look up table read with 4-bit indices (two contiguous registers)
multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
def NAME : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexS32b, {?, 0b101}, mnemonic> {
bits<2> idx;
@@ -11250,6 +11278,29 @@ multiclass sve2_luti4_vector_vg2_index<string mnemonic> {
nxv16i8:$Op3, timm32_0_3:$Op4))>;
}
+// Look up table read with 6-bit indices
+multiclass sve2_luti6_vector_index<string mnemonic> {
+ def _H : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexD32b, 0b1011, mnemonic> {
+ bit idx;
+ let Inst{23} = idx;
+ }
+}
+
+// Look up table
+class sve2_luti6_vector<string mnemonic>
+ : I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, ZPRAny:$Zm),
+ mnemonic, "\t$Zd, $Zn, $Zm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-21} = 0b01000101001;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b101011;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
//===----------------------------------------------------------------------===//
// Checked Pointer Arithmetic (FEAT_CPA)
//===----------------------------------------------------------------------===//
@@ -11280,3 +11331,49 @@ class sve_int_mla_cpa<string asm>
let ElementSize = ZPR64.ElementSize;
}
+
+//===----------------------------------------------------------------------===//
+// FP to Int down-converts
+//===----------------------------------------------------------------------===//
+class sve2_fp_to_int_downcvt<string asm, ZPRRegOp ZdRC, RegisterOperand ZSrcOp, bits<2> size, bit U>
+ : I<(outs ZdRC:$Zd), (ins ZSrcOp:$Zn),
+ asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<4> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = size;
+ let Inst{21-11} = 0b00110100110;
+ let Inst{10} = U;
+ let Inst{9-6} = Zn;
+ let Inst{5} = 0b0;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp_to_int_downcvt<string asm, bit U> {
+ def _HtoB : sve2_fp_to_int_downcvt<asm, ZPR8, ZZ_h_mul_r, 0b01, U>;
+ def _StoH : sve2_fp_to_int_downcvt<asm, ZPR16, ZZ_s_mul_r, 0b10, U>;
+ def _DtoS : sve2_fp_to_int_downcvt<asm, ZPR32, ZZ_d_mul_r, 0b11, U>;
+}
+
+//===----------------------------------------------------------------------===//
+// Int to FP up-converts
+//===----------------------------------------------------------------------===//
+class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, ZPRRegOp ZnRC,
+ bits<2> size, bits<2> U>
+ : I<(outs ZdRC:$Zd), (ins ZnRC:$Zn),
+ asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = size;
+ let Inst{21-12} = 0b0011000011;
+ let Inst{11-10} = U;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U> {
+ def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8, 0b01, U>;
+ def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>;
+ def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>;
+}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index d6cb0e8..268a229 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -139,6 +139,13 @@ namespace llvm {
}
namespace llvm {
+namespace AArch64CMHPriorityHint {
+#define GET_CMHPRIORITYHINT_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64CMHPriorityHint
+} // namespace llvm
+
+namespace llvm {
namespace AArch64SysReg {
#define GET_SysRegsList_IMPL
#include "AArch64GenSystemOperands.inc"
@@ -190,6 +197,32 @@ namespace AArch64TLBIP {
#define GET_TLBIPTable_IMPL
#include "AArch64GenSystemOperands.inc"
} // namespace AArch64TLBIP
+
+namespace AArch64MLBI {
+#define GET_MLBITable_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64MLBI
+} // namespace llvm
+
+namespace llvm {
+namespace AArch64GIC {
+#define GET_GICTable_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GIC
+} // namespace llvm
+
+namespace llvm {
+namespace AArch64GICR {
+#define GET_GICRTable_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GICR
+} // namespace llvm
+
+namespace llvm {
+namespace AArch64GSB {
+#define GET_GSBTable_IMPL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GSB
} // namespace llvm
namespace llvm {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index fea33ef..27812e9 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -409,6 +409,16 @@ struct SysAliasReg : SysAlias {
: SysAlias(N, E, F), NeedsReg(R) {}
};
+struct SysAliasOptionalReg : SysAlias {
+ bool NeedsReg;
+ bool OptionalReg;
+ constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O)
+ : SysAlias(N, E), NeedsReg(R), OptionalReg(O) {}
+ constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O,
+ FeatureBitset F)
+ : SysAlias(N, E, F), NeedsReg(R), OptionalReg(O) {}
+};
+
struct SysAliasImm : SysAlias {
uint16_t ImmValue;
constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)
@@ -677,6 +687,14 @@ namespace AArch64BTIHint {
#include "AArch64GenSystemOperands.inc"
}
+namespace AArch64CMHPriorityHint {
+struct CMHPriorityHint : SysAlias {
+ using SysAlias::SysAlias;
+};
+#define GET_CMHPRIORITYHINT_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64CMHPriorityHint
+
namespace AArch64SME {
enum ToggleCondition : unsigned {
Always,
@@ -788,21 +806,53 @@ namespace AArch64SysReg {
}
namespace AArch64TLBI {
- struct TLBI : SysAliasReg {
- using SysAliasReg::SysAliasReg;
- };
- #define GET_TLBITable_DECL
- #include "AArch64GenSystemOperands.inc"
+struct TLBI : SysAliasOptionalReg {
+ using SysAliasOptionalReg::SysAliasOptionalReg;
+};
+#define GET_TLBITable_DECL
+#include "AArch64GenSystemOperands.inc"
}
namespace AArch64TLBIP {
-struct TLBIP : SysAliasReg {
- using SysAliasReg::SysAliasReg;
+struct TLBIP : SysAliasOptionalReg {
+ using SysAliasOptionalReg::SysAliasOptionalReg;
};
#define GET_TLBIPTable_DECL
#include "AArch64GenSystemOperands.inc"
} // namespace AArch64TLBIP
+namespace AArch64MLBI {
+struct MLBI : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
+};
+#define GET_MLBITable_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64MLBI
+
+namespace AArch64GIC {
+struct GIC : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
+};
+#define GET_GICTable_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GIC
+
+namespace AArch64GICR {
+struct GICR : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
+};
+#define GET_GICRTable_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GICR
+
+namespace AArch64GSB {
+struct GSB : SysAlias {
+ using SysAlias::SysAlias;
+};
+#define GET_GSBTable_DECL
+#include "AArch64GenSystemOperands.inc"
+} // namespace AArch64GSB
+
namespace AArch64II {
/// Target Operand Flag enum.
enum TOF {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index d71f728..085c8588 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -75,8 +75,8 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
}
void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName,
- const AArch64TargetLowering &TLI) {
- RTLIB::LibcallImpl Impl = TLI.getSupportedLibcallImpl(FuncName);
+ const RTLIB::RuntimeLibcallsInfo &RTLCI) {
+ RTLIB::LibcallImpl Impl = RTLCI.getSupportedLibcallImpl(FuncName);
if (Impl == RTLIB::Unsupported)
return;
unsigned KnownAttrs = SMEAttrs::Normal;
@@ -124,21 +124,22 @@ bool SMECallAttrs::requiresSMChange() const {
return true;
}
-SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI)
+SMECallAttrs::SMECallAttrs(const CallBase &CB,
+ const RTLIB::RuntimeLibcallsInfo *RTLCI)
: CallerFn(*CB.getFunction()), CalledFn(SMEAttrs::Normal),
Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) {
if (auto *CalledFunction = CB.getCalledFunction())
- CalledFn = SMEAttrs(*CalledFunction, TLI);
-
- // An `invoke` of an agnostic ZA function may not return normally (it may
- // resume in an exception block). In this case, it acts like a private ZA
- // callee and may require a ZA save to be set up before it is called.
- if (isa<InvokeInst>(CB))
- CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
+ CalledFn = SMEAttrs(*CalledFunction, RTLCI);
// FIXME: We probably should not allow SME attributes on direct calls but
// clang duplicates streaming mode attributes at each callsite.
assert((IsIndirect ||
((Callsite.withoutPerCallsiteFlags() | CalledFn) == CalledFn)) &&
"SME attributes at callsite do not match declaration");
+
+ // An `invoke` of an agnostic ZA function may not return normally (it may
+ // resume in an exception block). In this case, it acts like a private ZA
+ // callee and may require a ZA save to be set up before it is called.
+ if (isa<InvokeInst>(CB))
+ CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index d26e3cd..28c397e 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -12,8 +12,9 @@
#include "llvm/IR/Function.h"
namespace llvm {
-
-class AArch64TargetLowering;
+namespace RTLIB {
+struct RuntimeLibcallsInfo;
+}
class Function;
class CallBase;
@@ -52,14 +53,14 @@ public:
SMEAttrs() = default;
SMEAttrs(unsigned Mask) { set(Mask); }
- SMEAttrs(const Function &F, const AArch64TargetLowering *TLI = nullptr)
+ SMEAttrs(const Function &F, const RTLIB::RuntimeLibcallsInfo *RTLCI = nullptr)
: SMEAttrs(F.getAttributes()) {
- if (TLI)
- addKnownFunctionAttrs(F.getName(), *TLI);
+ if (RTLCI)
+ addKnownFunctionAttrs(F.getName(), *RTLCI);
}
SMEAttrs(const AttributeList &L);
- SMEAttrs(StringRef FuncName, const AArch64TargetLowering &TLI) {
- addKnownFunctionAttrs(FuncName, TLI);
+ SMEAttrs(StringRef FuncName, const RTLIB::RuntimeLibcallsInfo &RTLCI) {
+ addKnownFunctionAttrs(FuncName, RTLCI);
};
void set(unsigned M, bool Enable = true) {
@@ -157,7 +158,7 @@ public:
private:
void addKnownFunctionAttrs(StringRef FuncName,
- const AArch64TargetLowering &TLI);
+ const RTLIB::RuntimeLibcallsInfo &RTLCI);
void validate() const;
};
@@ -175,7 +176,7 @@ public:
SMEAttrs Callsite = SMEAttrs::Normal)
: CallerFn(Caller), CalledFn(Callee), Callsite(Callsite) {}
- SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI);
+ SMECallAttrs(const CallBase &CB, const RTLIB::RuntimeLibcallsInfo *RTLCI);
SMEAttrs &caller() { return CallerFn; }
SMEAttrs &callee() { return IsIndirect ? Callsite : CalledFn; }