aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp41
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp27
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp10
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp15
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp12
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp127
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h12
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h3
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp9
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp1
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp29
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVCallingConv.cpp16
-rw-r--r--llvm/lib/Target/RISCV/RISCVCallingConv.h8
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td35
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp136
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp586
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td13
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormatsC.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp14
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td15
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td131
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoC.td192
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoM.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td816
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td37
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td10
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSpacemiT.td141
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZa.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td17
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td31
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td45
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp90
-rw-r--r--llvm/lib/Target/RISCV/RISCVMoveMerger.cpp128
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp240
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.h7
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedAndes45.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td200
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp41
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp25
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h12
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp136
-rw-r--r--llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp14
-rw-r--r--llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h2
57 files changed, 2454 insertions, 1037 deletions
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index d71c42c..cd83928 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -121,7 +121,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
bool parseVTypeToken(const AsmToken &Tok, VTypeState &State, unsigned &Sew,
unsigned &Lmul, bool &Fractional, bool &TailAgnostic,
- bool &MaskAgnostic);
+ bool &MaskAgnostic, bool &AltFmt);
bool generateVTypeError(SMLoc ErrorLoc);
bool generateXSfmmVTypeError(SMLoc ErrorLoc);
@@ -903,6 +903,7 @@ public:
VK == RISCV::S_QC_ABS20;
}
+ bool isSImm8Unsigned() const { return isSImm<8>() || isUImm<8>(); }
bool isSImm10Unsigned() const { return isSImm<10>() || isUImm<10>(); }
bool isUImm20LUI() const {
@@ -1199,6 +1200,14 @@ public:
addExpr(Inst, getImm(), isRV64Imm());
}
+ void addSImm8UnsignedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ int64_t Imm;
+ [[maybe_unused]] bool IsConstant = evaluateConstantImm(getImm(), Imm);
+ assert(IsConstant);
+ Inst.addOperand(MCOperand::createImm(SignExtend64<8>(Imm)));
+ }
+
void addSImm10UnsignedOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
int64_t Imm;
@@ -1547,6 +1556,9 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 9) - 8,
"immediate must be a multiple of 8 bytes in the range");
+ case Match_InvalidSImm8Unsigned:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 7),
+ (1 << 8) - 1);
case Match_InvalidSImm10:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 9),
(1 << 9) - 1);
@@ -2249,14 +2261,23 @@ ParseStatus RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
bool RISCVAsmParser::parseVTypeToken(const AsmToken &Tok, VTypeState &State,
unsigned &Sew, unsigned &Lmul,
bool &Fractional, bool &TailAgnostic,
- bool &MaskAgnostic) {
+ bool &MaskAgnostic, bool &AltFmt) {
if (Tok.isNot(AsmToken::Identifier))
return true;
StringRef Identifier = Tok.getIdentifier();
if (State < VTypeState::SeenSew && Identifier.consume_front("e")) {
- if (Identifier.getAsInteger(10, Sew))
- return true;
+ if (Identifier.getAsInteger(10, Sew)) {
+ if (Identifier == "16alt") {
+ AltFmt = true;
+ Sew = 16;
+ } else if (Identifier == "8alt") {
+ AltFmt = true;
+ Sew = 8;
+ } else {
+ return true;
+ }
+ }
if (!RISCVVType::isValidSEW(Sew))
return true;
@@ -2328,11 +2349,12 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
bool Fractional = false;
bool TailAgnostic = false;
bool MaskAgnostic = false;
+ bool AltFmt = false;
VTypeState State = VTypeState::SeenNothingYet;
do {
if (parseVTypeToken(getTok(), State, Sew, Lmul, Fractional, TailAgnostic,
- MaskAgnostic)) {
+ MaskAgnostic, AltFmt)) {
// The first time, errors return NoMatch rather than Failure
if (State == VTypeState::SeenNothingYet)
return ParseStatus::NoMatch;
@@ -2358,12 +2380,17 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
}
unsigned VTypeI =
- RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic);
+ RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic, AltFmt);
Operands.push_back(RISCVOperand::createVType(VTypeI, S));
return ParseStatus::Success;
}
bool RISCVAsmParser::generateVTypeError(SMLoc ErrorLoc) {
+ if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa))
+ return Error(
+ ErrorLoc,
+ "operand must be "
+ "e[8|8alt|16|16alt|32|64],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]");
return Error(
ErrorLoc,
"operand must be "
@@ -4053,4 +4080,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeRISCVAsmParser() {
RegisterMCAsmParser<RISCVAsmParser> X(getTheRISCV32Target());
RegisterMCAsmParser<RISCVAsmParser> Y(getTheRISCV64Target());
+ RegisterMCAsmParser<RISCVAsmParser> A(getTheRISCV32beTarget());
+ RegisterMCAsmParser<RISCVAsmParser> B(getTheRISCV64beTarget());
}
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 67cc01e..dbb16fc 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoder.h"
#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -25,6 +26,7 @@
#include "llvm/Support/Endian.h"
using namespace llvm;
+using namespace llvm::MCD;
#define DEBUG_TYPE "riscv-disassembler"
@@ -72,6 +74,10 @@ LLVMInitializeRISCVDisassembler() {
createRISCVDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheRISCV64Target(),
createRISCVDisassembler);
+ TargetRegistry::RegisterMCDisassembler(getTheRISCV32beTarget(),
+ createRISCVDisassembler);
+ TargetRegistry::RegisterMCDisassembler(getTheRISCV64beTarget(),
+ createRISCVDisassembler);
}
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo,
@@ -552,16 +558,6 @@ static DecodeStatus decodeXqccmpRlistS0(MCInst &Inst, uint32_t Imm,
return decodeZcmpRlist(Inst, Imm, Address, Decoder);
}
-static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-#include "RISCVGenDisassemblerTables.inc"
-
static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -602,6 +598,8 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
return S;
}
+#include "RISCVGenDisassemblerTables.inc"
+
// Add implied SP operand for C.*SP compressed instructions. The SP operand
// isn't explicitly encoded in the instruction.
void RISCVDisassembler::addSPOperands(MCInst &MI) const {
@@ -672,8 +670,13 @@ static constexpr FeatureBitset XAndesGroup = {
RISCV::FeatureVendorXAndesVSIntLoad, RISCV::FeatureVendorXAndesVPackFPH,
RISCV::FeatureVendorXAndesVDot};
+static constexpr FeatureBitset XSMTGroup = {RISCV::FeatureVendorXSMTVDot};
+
static constexpr DecoderListEntry DecoderList32[]{
// Vendor Extensions
+ {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"},
+ {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"},
+ {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"},
{DecoderTableXVentana32,
{RISCV::FeatureVendorXVentanaCondOps},
"XVentanaCondOps"},
@@ -689,10 +692,8 @@ static constexpr DecoderListEntry DecoderList32[]{
{RISCV::FeatureVendorXMIPSCBOP},
"MIPS mips.pref"},
{DecoderTableXAndes32, XAndesGroup, "Andes extensions"},
+ {DecoderTableXSMT32, XSMTGroup, "SpacemiT extensions"},
// Standard Extensions
- {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"},
- {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"},
- {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"},
{DecoderTable32, {}, "standard 32-bit instructions"},
{DecoderTableRV32Only32, {}, "RV32-only standard 32-bit instructions"},
{DecoderTableZfinx32, {}, "Zfinx (Float in Integer)"},
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index d2b75a6..34026ed 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -45,8 +45,8 @@ public:
CCValAssign::LocInfo LocInfo,
const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
CCState &State) override {
- if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, Info.IsFixed,
- IsRet, Info.Ty))
+ if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, IsRet,
+ Info.Ty))
return true;
StackSize = State.getStackSize();
@@ -196,8 +196,8 @@ public:
if (LocVT.isScalableVector())
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
- if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State,
- /*IsFixed=*/true, IsRet, Info.Ty))
+ if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, IsRet,
+ Info.Ty))
return true;
StackSize = State.getStackSize();
@@ -454,7 +454,7 @@ bool RISCVCallLowering::canLowerReturn(MachineFunction &MF,
for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
MVT VT = MVT::getVT(Outs[I].Ty);
if (CC_RISCV(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo,
- /*IsFixed=*/true, /*isRet=*/true, nullptr))
+ /*isRet=*/true, nullptr))
return false;
}
return true;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index f83c2b6..51ea3fc 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -736,7 +736,6 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
}
case TargetOpcode::G_FCONSTANT: {
// TODO: Use constant pool for complex constants.
- // TODO: Optimize +0.0 to use fcvt.d.w for s64 on rv32.
Register DstReg = MI.getOperand(0).getReg();
const APFloat &FPimm = MI.getOperand(1).getFPImm()->getValueAPF();
APInt Imm = FPimm.bitcastToAPInt();
@@ -753,8 +752,22 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
if (!FMV.constrainAllUses(TII, TRI, RBI))
return false;
} else {
+ // s64 on rv32
assert(Size == 64 && !Subtarget->is64Bit() &&
"Unexpected size or subtarget");
+
+ if (Imm.isNonNegative() && Imm.isZero()) {
+ // Optimize +0.0 to use fcvt.d.w
+ MachineInstrBuilder FCVT =
+ MIB.buildInstr(RISCV::FCVT_D_W, {DstReg}, {Register(RISCV::X0)})
+ .addImm(RISCVFPRndMode::RNE);
+ if (!FCVT.constrainAllUses(TII, TRI, RBI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
+
// Split into two pieces and build through the stack.
Register GPRRegHigh = MRI->createVirtualRegister(&RISCV::GPRRegClass);
Register GPRRegLow = MRI->createVirtualRegister(&RISCV::GPRRegClass);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index e88f33d..9fd9639 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -26,6 +26,8 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/Type.h"
using namespace llvm;
@@ -152,7 +154,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
// TODO: Use Vector Single-Width Saturating Instructions for vector types.
- getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
+ getActionDefinitionsBuilder(
+ {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT, G_SSHLSAT, G_USHLSAT})
.lower();
getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
@@ -692,6 +695,11 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
+ getActionDefinitionsBuilder(G_ATOMICRMW_ADD)
+ .legalFor(ST.hasStdExtA(), {{sXLen, p0}})
+ .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
+ .clampScalar(0, sXLen, sXLen);
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -729,6 +737,8 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::riscv_masked_atomicrmw_add:
+ return true;
}
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 95ec42f..41a9c92 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -32,10 +32,17 @@ static cl::opt<bool> ULEB128Reloc(
"riscv-uleb128-reloc", cl::init(true), cl::Hidden,
cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate"));
+static cl::opt<bool>
+ AlignRvc("riscv-align-rvc", cl::init(true), cl::Hidden,
+ cl::desc("When generating R_RISCV_ALIGN, insert $alignment-2 "
+ "bytes of NOPs even in norvc code"));
+
RISCVAsmBackend::RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI,
- bool Is64Bit, const MCTargetOptions &Options)
- : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI),
- Is64Bit(Is64Bit), TargetOptions(Options) {
+ bool Is64Bit, bool IsLittleEndian,
+ const MCTargetOptions &Options)
+ : MCAsmBackend(IsLittleEndian ? llvm::endianness::little
+ : llvm::endianness::big),
+ STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {
RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
}
@@ -306,12 +313,21 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
// If conditions are met, compute the padding size and create a fixup encoding
// the padding size in the addend.
bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
- // Use default handling unless linker relaxation is enabled and the alignment
- // is larger than the nop size.
- const MCSubtargetInfo *STI = F.getSubtargetInfo();
- if (!STI->hasFeature(RISCV::FeatureRelax))
+ // Alignments before the first linker-relaxable instruction have fixed sizes
+ // and do not require relocations. Alignments after a linker-relaxable
+ // instruction require a relocation, even if the STI specifies norelax.
+ //
+ // firstLinkerRelaxable is the layout order within the subsection, which may
+ // be smaller than the section's order. Therefore, alignments in a
+ // lower-numbered subsection may be unnecessarily treated as linker-relaxable.
+ auto *Sec = F.getParent();
+ if (F.getLayoutOrder() <= Sec->firstLinkerRelaxable())
return false;
- unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
+
+ // Use default handling unless the alignment is larger than the nop size.
+ const MCSubtargetInfo *STI = F.getSubtargetInfo();
+ unsigned MinNopLen =
+ AlignRvc || STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
if (F.getAlignment() <= MinNopLen)
return false;
@@ -321,16 +337,12 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN);
F.setVarFixups({Fixup});
F.setLinkerRelaxable();
- F.getParent()->setLinkerRelaxable();
return true;
}
-bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
- bool &WasRelaxed) const {
+bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F) const {
int64_t LineDelta = F.getDwarfLineDelta();
const MCExpr &AddrDelta = F.getDwarfAddrDelta();
- size_t OldSize = F.getVarSize();
-
int64_t Value;
// If the label difference can be resolved, use the default handling, which
// utilizes a shorter special opcode.
@@ -361,7 +373,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
} else {
PCBytes = 2;
OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
- support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
+ support::endian::write<uint16_t>(OS, 0, Endian);
}
auto Offset = OS.tell() - PCBytes;
@@ -376,15 +388,12 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
F.setVarContents(Data);
F.setVarFixups({MCFixup::create(Offset, &AddrDelta,
MCFixup::getDataKindForSize(PCBytes))});
- WasRelaxed = OldSize != Data.size();
return true;
}
-bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const {
+bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F) const {
const MCExpr &AddrDelta = F.getDwarfAddrDelta();
SmallVector<MCFixup, 2> Fixups;
- size_t OldSize = F.getVarSize();
-
int64_t Value;
if (AddrDelta.evaluateAsAbsolute(Value, *Asm))
return false;
@@ -397,7 +406,6 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const {
if (Value == 0) {
F.clearVarContents();
F.clearVarFixups();
- WasRelaxed = OldSize != 0;
return true;
}
@@ -415,23 +423,21 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const {
AddFixups(0, {ELF::R_RISCV_SET6, ELF::R_RISCV_SUB6});
} else if (isUInt<8>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc1);
- support::endian::write<uint8_t>(OS, 0, llvm::endianness::little);
+ support::endian::write<uint8_t>(OS, 0, Endian);
AddFixups(1, {ELF::R_RISCV_SET8, ELF::R_RISCV_SUB8});
} else if (isUInt<16>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc2);
- support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
+ support::endian::write<uint16_t>(OS, 0, Endian);
AddFixups(1, {ELF::R_RISCV_SET16, ELF::R_RISCV_SUB16});
} else if (isUInt<32>(Value)) {
OS << uint8_t(dwarf::DW_CFA_advance_loc4);
- support::endian::write<uint32_t>(OS, 0, llvm::endianness::little);
+ support::endian::write<uint32_t>(OS, 0, Endian);
AddFixups(1, {ELF::R_RISCV_SET32, ELF::R_RISCV_SUB32});
} else {
llvm_unreachable("unsupported CFA encoding");
}
F.setVarContents(Data);
F.setVarFixups(Fixups);
-
- WasRelaxed = OldSize != Data.size();
return true;
}
@@ -471,9 +477,12 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
Count -= 1;
}
+ // TODO: emit a mapping symbol right here
+
if (Count % 4 == 2) {
- // The canonical nop with Zca is c.nop.
- OS.write(STI->hasFeature(RISCV::FeatureStdExtZca) ? "\x01\0" : "\0\0", 2);
+ // The canonical nop with Zca is c.nop. For .balign 4, we generate a 2-byte
+ // c.nop even in a norvc region.
+ OS.write("\x01\0", 2);
Count -= 2;
}
@@ -803,6 +812,23 @@ void RISCVAsmBackend::maybeAddVendorReloc(const MCFragment &F,
Asm->getWriter().recordRelocation(F, VendorFixup, VendorTarget, VendorValue);
}
+static bool relaxableFixupNeedsRelocation(const MCFixupKind Kind) {
+ // Some Fixups are marked as LinkerRelaxable by
+ // `RISCVMCCodeEmitter::getImmOpValue` only because they may be
+ // (assembly-)relaxed into a linker-relaxable instruction. This function
+ // should return `false` for those fixups so they do not get a `R_RISCV_RELAX`
+ // relocation emitted in addition to the relocation.
+ switch (Kind) {
+ default:
+ break;
+ case RISCV::fixup_riscv_rvc_jump:
+ case RISCV::fixup_riscv_rvc_branch:
+ case RISCV::fixup_riscv_jal:
+ return false;
+ }
+ return true;
+}
+
bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup,
const MCValue &Target, uint64_t &FixedValue,
bool IsResolved) {
@@ -845,30 +871,53 @@ bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup,
return false;
}
- // If linker relaxation is enabled and supported by the current relocation,
- // generate a relocation and then append a RELAX.
- if (Fixup.isLinkerRelaxable())
+ // If linker relaxation is enabled and supported by the current fixup, then we
+ // always want to generate a relocation.
+ bool NeedsRelax = Fixup.isLinkerRelaxable() &&
+ relaxableFixupNeedsRelocation(Fixup.getKind());
+ if (NeedsRelax)
IsResolved = false;
+
if (IsResolved && Fixup.isPCRel())
IsResolved = isPCRelFixupResolved(Target.getAddSym(), F);
if (!IsResolved) {
- // Some Fixups require a vendor relocation, record it (directly) before we
+ // Some Fixups require a VENDOR relocation, record it (directly) before we
// add the relocation.
maybeAddVendorReloc(F, Fixup);
Asm->getWriter().recordRelocation(F, Fixup, Target, FixedValue);
- }
- if (Fixup.isLinkerRelaxable()) {
- auto FA = MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_RISCV_RELAX);
- Asm->getWriter().recordRelocation(F, FA, MCValue::get(nullptr),
- FixedValueA);
+ if (NeedsRelax) {
+ // Some Fixups get a RELAX relocation, record it (directly) after we add
+ // the relocation.
+ MCFixup RelaxFixup =
+ MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_RISCV_RELAX);
+ MCValue RelaxTarget = MCValue::get(nullptr);
+ uint64_t RelaxValue;
+ Asm->getWriter().recordRelocation(F, RelaxFixup, RelaxTarget, RelaxValue);
+ }
}
return false;
}
+// Data fixups should be swapped for big endian cores.
+// Instruction fixups should not be swapped as RISC-V instructions
+// are always little-endian.
+static bool isDataFixup(unsigned Kind) {
+ switch (Kind) {
+ default:
+ return false;
+
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ case FK_Data_8:
+ return true;
+ }
+}
+
void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
const MCValue &Target, uint8_t *Data,
uint64_t Value, bool IsResolved) {
@@ -892,8 +941,11 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
+ // For big endian cores, data fixup should be swapped.
+ bool SwapValue = Endian == llvm::endianness::big && isDataFixup(Kind);
for (unsigned i = 0; i != NumBytes; ++i) {
- Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ unsigned Idx = SwapValue ? (NumBytes - 1 - i) : i;
+ Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
@@ -908,5 +960,6 @@ MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
const MCTargetOptions &Options) {
const Triple &TT = STI.getTargetTriple();
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
- return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), Options);
+ return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), TT.isLittleEndian(),
+ Options);
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index adec1ec..5152d05 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -35,7 +35,7 @@ class RISCVAsmBackend : public MCAsmBackend {
public:
RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
- const MCTargetOptions &Options);
+ bool IsLittleEndian, const MCTargetOptions &Options);
~RISCVAsmBackend() override = default;
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
@@ -65,8 +65,8 @@ public:
const MCSubtargetInfo &STI) const override;
bool relaxAlign(MCFragment &F, unsigned &Size) override;
- bool relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const override;
- bool relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const override;
+ bool relaxDwarfLineAddr(MCFragment &) const override;
+ bool relaxDwarfCFA(MCFragment &) const override;
std::pair<bool, bool> relaxLEB128(MCFragment &LF,
int64_t &Value) const override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index bddea43..fcea23a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -139,6 +139,9 @@ enum {
// 3 -> SEW * 4
DestEEWShift = ElementsDependOnMaskShift + 1,
DestEEWMask = 3ULL << DestEEWShift,
+
+ ReadsPastVLShift = DestEEWShift + 2,
+ ReadsPastVLMask = 1ULL << ReadsPastVLShift,
};
// Helper functions to read TSFlags.
@@ -195,6 +198,12 @@ static inline bool elementsDependOnMask(uint64_t TSFlags) {
return TSFlags & ElementsDependOnMaskMask;
}
+/// \returns true if the instruction may read elements past VL, e.g.
+/// vslidedown/vrgather
+static inline bool readsPastVL(uint64_t TSFlags) {
+ return TSFlags & ReadsPastVLMask;
+}
+
static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
// This method is only called if we expect to have a VL operand, and all
@@ -337,8 +346,11 @@ enum OperandType : unsigned {
OPERAND_SIMM5_PLUS1,
OPERAND_SIMM6,
OPERAND_SIMM6_NONZERO,
+ OPERAND_SIMM8,
+ OPERAND_SIMM8_UNSIGNED,
OPERAND_SIMM10,
OPERAND_SIMM10_LSB0000_NONZERO,
+ OPERAND_SIMM10_UNSIGNED,
OPERAND_SIMM11,
OPERAND_SIMM12,
OPERAND_SIMM12_LSB00000,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 543c4c5..37fe325 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -36,6 +36,12 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S,
setFlagsFromFeatures(STI);
}
+RISCVELFStreamer::RISCVELFStreamer(MCContext &C,
+ std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> MOW,
+ std::unique_ptr<MCCodeEmitter> MCE)
+ : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {}
+
RISCVELFStreamer &RISCVTargetELFStreamer::getStreamer() {
return static_cast<RISCVELFStreamer &>(Streamer);
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index 98948cd..26da244 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -28,8 +28,7 @@ class RISCVELFStreamer : public MCELFStreamer {
public:
RISCVELFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> MOW,
- std::unique_ptr<MCCodeEmitter> MCE)
- : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {}
+ std::unique_ptr<MCCodeEmitter> MCE);
void changeSection(MCSection *Section, uint32_t Subsection) override;
void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 8c9ab8e..50f5a5d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -75,7 +75,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (PrintAliases && !NoAliases)
Res = RISCVRVC::uncompress(UncompressedMI, *MI, STI);
if (Res)
- NewMI = const_cast<MCInst *>(&UncompressedMI);
+ NewMI = &UncompressedMI;
if (!PrintAliases || NoAliases || !printAliasInstr(NewMI, Address, STI, O))
printInstruction(NewMI, Address, STI, O);
printAnnotation(O, Annot);
@@ -216,9 +216,12 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
// Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx,
- // or non-zero in bits 8 and above.
+ // altfmt=1 without zvfbfa extension, or non-zero in bits 9 and above.
if (RISCVVType::getVLMUL(Imm) == RISCVVType::VLMUL::LMUL_RESERVED ||
- RISCVVType::getSEW(Imm) > 64 || (Imm >> 8) != 0) {
+ RISCVVType::getSEW(Imm) > 64 ||
+ (RISCVVType::isAltFmt(Imm) &&
+ !STI.hasFeature(RISCV::FeatureStdExtZvfbfa)) ||
+ (Imm >> 9) != 0) {
O << formatImm(Imm);
return;
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 090d331..77f65d8 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
void RISCVMCAsmInfo::anchor() {}
RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
+ IsLittleEndian = TT.isLittleEndian();
CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
CommentString = "#";
AlignmentIsInBytes = false;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index cbeabdd..717fba6 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -576,8 +576,21 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
"getImmOpValue expects only expressions or immediates");
const MCExpr *Expr = MO.getExpr();
MCExpr::ExprKind Kind = Expr->getKind();
- unsigned FixupKind = RISCV::fixup_riscv_invalid;
+
+ // `RelaxCandidate` must be set to `true` in two cases:
+ // - The fixup's relocation gets a R_RISCV_RELAX relocation
+ // - The underlying instruction may be relaxed to an instruction that gets a
+ // `R_RISCV_RELAX` relocation.
+ //
+ // The actual emission of `R_RISCV_RELAX` will be handled in
+ // `RISCVAsmBackend::applyFixup`.
bool RelaxCandidate = false;
+ auto AsmRelaxToLinkerRelaxableWithFeature = [&](unsigned Feature) -> void {
+ if (!STI.hasFeature(RISCV::FeatureExactAssembly) && STI.hasFeature(Feature))
+ RelaxCandidate = true;
+ };
+
+ unsigned FixupKind = RISCV::fixup_riscv_invalid;
if (Kind == MCExpr::Specifier) {
const auto *RVExpr = cast<MCSpecifierExpr>(Expr);
FixupKind = RVExpr->getSpecifier();
@@ -644,18 +657,26 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
// FIXME: Sub kind binary exprs have chance of underflow.
if (MIFrm == RISCVII::InstFormatJ) {
FixupKind = RISCV::fixup_riscv_jal;
+ AsmRelaxToLinkerRelaxableWithFeature(RISCV::FeatureVendorXqcilb);
} else if (MIFrm == RISCVII::InstFormatB) {
FixupKind = RISCV::fixup_riscv_branch;
+ // This might be assembler relaxed to `b<cc>; jal` but we cannot relax
+ // the `jal` again in the assembler.
} else if (MIFrm == RISCVII::InstFormatCJ) {
FixupKind = RISCV::fixup_riscv_rvc_jump;
+ AsmRelaxToLinkerRelaxableWithFeature(RISCV::FeatureVendorXqcilb);
} else if (MIFrm == RISCVII::InstFormatCB) {
FixupKind = RISCV::fixup_riscv_rvc_branch;
+ // This might be assembler relaxed to `b<cc>; jal` but we cannot relax
+ // the `jal` again in the assembler.
} else if (MIFrm == RISCVII::InstFormatCI) {
FixupKind = RISCV::fixup_riscv_rvc_imm;
} else if (MIFrm == RISCVII::InstFormatI) {
FixupKind = RISCV::fixup_riscv_12_i;
} else if (MIFrm == RISCVII::InstFormatQC_EB) {
FixupKind = RISCV::fixup_riscv_qc_e_branch;
+ // This might be assembler relaxed to `qc.e.b<cc>; jal` but we cannot
+ // relax the `jal` again in the assembler.
} else if (MIFrm == RISCVII::InstFormatQC_EAI) {
FixupKind = RISCV::fixup_riscv_qc_e_32;
RelaxCandidate = true;
@@ -670,9 +691,9 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
assert(FixupKind != RISCV::fixup_riscv_invalid && "Unhandled expression!");
addFixup(Fixups, 0, Expr, FixupKind);
- // If linker relaxation is enabled and supported by this relocation, set
- // a bit so that if fixup is unresolved, a R_RISCV_RELAX relocation will be
- // appended.
+ // If linker relaxation is enabled and supported by this relocation, set a bit
+ // so that the assembler knows the size of the instruction is not fixed/known,
+ // and the relocation will need a R_RISCV_RELAX relocation.
if (EnableRelax && RelaxCandidate)
Fixups.back().setLinkerRelaxable();
++MCNumFixups;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 61ecfb2..d917ef4 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -376,7 +376,8 @@ static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) {
extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeRISCVTargetMC() {
- for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
+ for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target(),
+ &getTheRISCV32beTarget(), &getTheRISCV64beTarget()}) {
TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
TargetRegistry::RegisterMCObjectFileInfo(*T, createRISCVMCObjectFileInfo);
TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 2f32e2a..83566b1 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -611,6 +611,8 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeRISCVAsmPrinter() {
RegisterAsmPrinter<RISCVAsmPrinter> X(getTheRISCV32Target());
RegisterAsmPrinter<RISCVAsmPrinter> Y(getTheRISCV64Target());
+ RegisterAsmPrinter<RISCVAsmPrinter> A(getTheRISCV32beTarget());
+ RegisterAsmPrinter<RISCVAsmPrinter> B(getTheRISCV64beTarget());
}
void RISCVAsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index cb6117e..78f4779 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -324,7 +324,7 @@ static MCRegister allocateRVVReg(MVT ValVT, unsigned ValNo, CCState &State,
// Implements the RISC-V calling convention. Returns true upon failure.
bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
+ CCState &State, bool IsRet, Type *OrigTy) {
const MachineFunction &MF = State.getMachineFunction();
const DataLayout &DL = MF.getDataLayout();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
@@ -379,12 +379,12 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
break;
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_LP64F:
- UseGPRForF16_F32 = !IsFixed;
+ UseGPRForF16_F32 = ArgFlags.isVarArg();
break;
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64D:
- UseGPRForF16_F32 = !IsFixed;
- UseGPRForF64 = !IsFixed;
+ UseGPRForF16_F32 = ArgFlags.isVarArg();
+ UseGPRForF64 = ArgFlags.isVarArg();
break;
}
@@ -465,7 +465,7 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
// currently if we are using ILP32E calling convention. This behavior may be
// changed when RV32E/ILP32E is ratified.
unsigned TwoXLenInBytes = (2 * XLen) / 8;
- if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
+ if (ArgFlags.isVarArg() && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
ABI != RISCVABI::ABI_ILP32E) {
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
@@ -620,8 +620,8 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
// benchmark. But theoretically, it may have benefit for some cases.
bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State,
- bool IsFixed, bool IsRet, Type *OrigTy) {
+ ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet,
+ Type *OrigTy) {
const MachineFunction &MF = State.getMachineFunction();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
const RISCVTargetLowering &TLI = *Subtarget.getTargetLowering();
@@ -741,7 +741,7 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- CCState &State) {
+ Type *OrigTy, CCState &State) {
if (ArgFlags.isNest()) {
report_fatal_error(
"Attribute 'nest' is not supported in GHC calling convention");
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.h b/llvm/lib/Target/RISCV/RISCVCallingConv.h
index bf823b7..0847dd6 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.h
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.h
@@ -21,19 +21,19 @@ namespace llvm {
typedef bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State,
- bool IsFixed, bool IsRet, Type *OrigTy);
+ bool IsRet, Type *OrigTy);
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- CCState &State, bool IsFixed, bool IsRet, Type *OrigTy);
+ CCState &State, bool IsRet, Type *OrigTy);
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- CCState &State, bool IsFixed, bool IsRet, Type *OrigTy);
+ CCState &State, bool IsRet, Type *OrigTy);
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- CCState &State);
+ Type *OrigTy, CCState &State);
namespace RISCV {
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index a7329d2..fa8272b 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -234,7 +234,7 @@ def FeatureStdExtZtso
def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZtso()">,
AssemblerPredicate<(all_of FeatureStdExtZtso),
"'Ztso' (Memory Model - Total Store Order)">;
-def NotHasStdExtZtso : Predicate<"!Subtarget->hasStdExtZtso()">;
+def NoStdExtZtso : Predicate<"!Subtarget->hasStdExtZtso()">;
def FeatureStdExtZa64rs
: RISCVExtension<1, 0, "Reservation Set Size of at Most 64 Bytes">;
@@ -473,7 +473,7 @@ def FeatureStdExtZba
def HasStdExtZba : Predicate<"Subtarget->hasStdExtZba()">,
AssemblerPredicate<(all_of FeatureStdExtZba),
"'Zba' (Address Generation Instructions)">;
-def NotHasStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">;
+def NoStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">;
def FeatureStdExtZbb
: RISCVExtension<1, 0, "Basic Bit-Manipulation">,
@@ -680,6 +680,13 @@ def FeatureStdExtV
[FeatureStdExtZvl128b, FeatureStdExtZve64d]>,
RISCVExtensionBitmask<0, 21>;
+def FeatureStdExtZvfbfa
+ : RISCVExperimentalExtension<0, 1, "Additional BF16 vector compute support",
+ [FeatureStdExtZve32f, FeatureStdExtZfbfmin]>;
+def HasStdExtZvfbfa : Predicate<"Subtarget->hasStdExtZvfbfa()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvfbfa),
+ "'Zvfbfa' (Additional BF16 vector compute support)">;
+
def FeatureStdExtZvfbfmin
: RISCVExtension<1, 0, "Vector BF16 Converts", [FeatureStdExtZve32f]>;
def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
@@ -1055,13 +1062,13 @@ def FeatureStdExtSupm
"Indicates User-mode Pointer Masking">;
def FeatureStdExtSmctr
- : RISCVExperimentalExtension<1, 0,
- "Control Transfer Records Machine Level",
- [FeatureStdExtSscsrind]>;
+ : RISCVExtension<1, 0,
+ "Control Transfer Records Machine Level",
+ [FeatureStdExtSscsrind]>;
def FeatureStdExtSsctr
- : RISCVExperimentalExtension<1, 0,
- "Control Transfer Records Supervisor Level",
- [FeatureStdExtSscsrind]>;
+ : RISCVExtension<1, 0,
+ "Control Transfer Records Supervisor Level",
+ [FeatureStdExtSscsrind]>;
def HasStdExtSmctrOrSsctr : Predicate<"Subtarget->hasStdExtSmctrOrSsctr()">,
AssemblerPredicate<(any_of FeatureStdExtSmctr, FeatureStdExtSsctr),
"'Smctr' (Control Transfer Records Machine Level) or "
@@ -1069,7 +1076,7 @@ def HasStdExtSmctrOrSsctr : Predicate<"Subtarget->hasStdExtSmctrOrSsctr()">,
// Packed SIMD Extensions
def FeatureStdExtP
- : RISCVExperimentalExtension<0, 14,
+ : RISCVExperimentalExtension<0, 15,
"'Base P' (Packed SIMD)">;
def HasStdExtP : Predicate<"Subtarget->hasStdExtP()">,
AssemblerPredicate<(all_of FeatureStdExtP),
@@ -1408,7 +1415,7 @@ def HasVendorXMIPSCBOP
: Predicate<"Subtarget->hasVendorXMIPSCBOP()">,
AssemblerPredicate<(all_of FeatureVendorXMIPSCBOP),
"'Xmipscbop' (MIPS hardware prefetch)">;
-def NotHasVendorXMIPSCBOP : Predicate<"!Subtarget->hasVendorXMIPSCBOP()">;
+def NoVendorXMIPSCBOP : Predicate<"!Subtarget->hasVendorXMIPSCBOP()">;
// WCH / Nanjing Qinheng Microelectronics Extension(s)
@@ -1642,6 +1649,14 @@ def HasVendorXAndesVDot
AssemblerPredicate<(all_of FeatureVendorXAndesVDot),
"'XAndesVDot' (Andes Vector Dot Product Extension)">;
+def FeatureVendorXSMTVDot
+ : RISCVExtension<1, 0, "SpacemiT Vector Dot Product Extension",
+ [FeatureStdExtZve32f]>;
+def HasVendorXSMTVDot
+ : Predicate<"Subtarget->hasVendorXSMTVDot()">,
+ AssemblerPredicate<(all_of FeatureVendorXSMTVDot),
+ "'XSMTVDot' (SpacemiT Vector Dot Product Extension)">;
+
//===----------------------------------------------------------------------===//
// LLVM specific features and extensions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 20ade6c..791efca 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -147,7 +147,7 @@ def : Pat<(i32 (trunc GPR:$src)), (COPY GPR:$src)>;
def : Pat<(zext_is_sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
}
-let Predicates = [IsRV64, NotHasStdExtZba] in
+let Predicates = [IsRV64, NoStdExtZba] in
def : Pat<(zext (i32 GPR:$src)), (SRLI (i64 (SLLI GPR:$src, 32)), 32)>;
let Predicates = [IsRV32, NoStdExtZbb, NoStdExtZbkb] in
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 5998653..f9f35f6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -18,6 +18,7 @@
#include "RISCVInstrInfo.h"
#include "RISCVSelectionDAGInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Debug.h"
@@ -681,40 +682,86 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) {
if (!Subtarget->hasVendorXqcibm())
return false;
- auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
- if (!N1C)
+ using namespace SDPatternMatch;
+
+ SDValue X;
+ APInt MaskImm;
+ if (!sd_match(Node, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))
return false;
- int32_t C1 = N1C->getSExtValue();
- if (!isShiftedMask_32(C1) || isInt<12>(C1))
+ unsigned ShAmt, Width;
+ if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))
return false;
- // INSBI will clobber the input register in N0. Bail out if we need a copy to
- // preserve this value.
- SDValue N0 = Node->getOperand(0);
- if (!N0.hasOneUse())
+ // If Zbs is enabled and it is a single bit set we can use BSETI which
+ // can be compressed to C_BSETI when Xqcibm in enabled.
+ if (Width == 1 && Subtarget->hasStdExtZbs())
return false;
// If C1 is a shifted mask (but can't be formed as an ORI),
// use a bitfield insert of -1.
// Transform (or x, C1)
// -> (qc.insbi x, -1, width, shift)
- const unsigned Leading = llvm::countl_zero((uint32_t)C1);
- const unsigned Trailing = llvm::countr_zero((uint32_t)C1);
- const unsigned Width = 32 - Leading - Trailing;
+ SDLoc DL(Node);
+ MVT VT = Node->getSimpleValueType(0);
- // If Zbs is enabled and it is a single bit set we can use BSETI which
- // can be compressed to C_BSETI when Xqcibm in enabled.
- if (Width == 1 && Subtarget->hasStdExtZbs())
+ SDValue Ops[] = {X, CurDAG->getSignedTargetConstant(-1, DL, VT),
+ CurDAG->getTargetConstant(Width, DL, VT),
+ CurDAG->getTargetConstant(ShAmt, DL, VT)};
+ SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops);
+ ReplaceNode(Node, BitIns);
+ return true;
+}
+
+// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
+// being inserted only sets known zero bits.
+bool RISCVDAGToDAGISel::tryBitfieldInsertOpFromOrAndImm(SDNode *Node) {
+ // Supported only in Xqcibm for now.
+ if (!Subtarget->hasVendorXqcibm())
+ return false;
+
+ using namespace SDPatternMatch;
+
+ SDValue And;
+ APInt MaskImm, OrImm;
+ if (!sd_match(Node, m_Or(m_OneUse(m_And(m_Value(And), m_ConstInt(MaskImm))),
+ m_ConstInt(OrImm))))
+ return false;
+
+ // Compute the Known Zero for the AND as this allows us to catch more general
+ // cases than just looking for AND with imm.
+ KnownBits Known = CurDAG->computeKnownBits(Node->getOperand(0));
+
+ // The bits being inserted must only set those bits that are known to be zero.
+ if (!OrImm.isSubsetOf(Known.Zero)) {
+ // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
+ // currently handle this case.
+ return false;
+ }
+
+ unsigned ShAmt, Width;
+ // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
+ if (!Known.Zero.isShiftedMask(ShAmt, Width))
return false;
+ // QC_INSB(I) dst, src, #width, #shamt.
SDLoc DL(Node);
MVT VT = Node->getSimpleValueType(0);
+ SDValue ImmNode;
+ auto Opc = RISCV::QC_INSB;
- SDValue Ops[] = {N0, CurDAG->getSignedTargetConstant(-1, DL, VT),
- CurDAG->getTargetConstant(Width, DL, VT),
- CurDAG->getTargetConstant(Trailing, DL, VT)};
- SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops);
+ int32_t LIImm = OrImm.getSExtValue() >> ShAmt;
+
+ if (isInt<5>(LIImm)) {
+ Opc = RISCV::QC_INSBI;
+ ImmNode = CurDAG->getSignedTargetConstant(LIImm, DL, MVT::i32);
+ } else {
+ ImmNode = selectImm(CurDAG, DL, MVT::i32, LIImm, *Subtarget);
+ }
+
+ SDValue Ops[] = {And, ImmNode, CurDAG->getTargetConstant(Width, DL, VT),
+ CurDAG->getTargetConstant(ShAmt, DL, VT)};
+ SDNode *BitIns = CurDAG->getMachineNode(Opc, DL, VT, Ops);
ReplaceNode(Node, BitIns);
return true;
}
@@ -772,6 +819,49 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
return false;
}
+// (xor X, (and (xor X, C1), C2))
+// -> (qc.insbi X, (C1 >> ShAmt), Width, ShAmt)
+// where C2 is a shifted mask with width=Width and shift=ShAmt
+bool RISCVDAGToDAGISel::tryBitfieldInsertOpFromXor(SDNode *Node) {
+
+ if (!Subtarget->hasVendorXqcibm())
+ return false;
+
+ using namespace SDPatternMatch;
+
+ SDValue X;
+ APInt CImm, CMask;
+ if (!sd_match(
+ Node,
+ m_Xor(m_Value(X),
+ m_OneUse(m_And(m_OneUse(m_Xor(m_Deferred(X), m_ConstInt(CImm))),
+ m_ConstInt(CMask))))))
+ return false;
+
+ unsigned Width, ShAmt;
+ if (!CMask.isShiftedMask(ShAmt, Width))
+ return false;
+
+ int64_t Imm = CImm.getSExtValue();
+ Imm >>= ShAmt;
+
+ SDLoc DL(Node);
+ SDValue ImmNode;
+ auto Opc = RISCV::QC_INSB;
+
+ if (isInt<5>(Imm)) {
+ Opc = RISCV::QC_INSBI;
+ ImmNode = CurDAG->getSignedTargetConstant(Imm, DL, MVT::i32);
+ } else {
+ ImmNode = selectImm(CurDAG, DL, MVT::i32, Imm, *Subtarget);
+ }
+ SDValue Ops[] = {X, ImmNode, CurDAG->getTargetConstant(Width, DL, MVT::i32),
+ CurDAG->getTargetConstant(ShAmt, DL, MVT::i32)};
+ ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, MVT::i32, Ops));
+
+ return true;
+}
+
bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
const SDLoc &DL, MVT VT,
SDValue X, unsigned Msb,
@@ -1340,6 +1430,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (trySignedBitfieldInsertInMask(Node))
return;
+ if (tryBitfieldInsertOpFromOrAndImm(Node))
+ return;
+
if (tryShrinkShlLogicImm(Node))
return;
@@ -1349,6 +1442,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (tryShrinkShlLogicImm(Node))
return;
+ if (tryBitfieldInsertOpFromXor(Node))
+ return;
+
break;
case ISD::AND: {
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
@@ -1644,7 +1740,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// available.
// Transform (and x, C1)
// -> (<bfextract> x, msb, lsb)
- if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
+ if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
+ !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
+ !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
const unsigned Msb = llvm::bit_width(C1) - 1;
if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
return;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index ee3a86e..c329a4c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -75,6 +75,8 @@ public:
bool trySignedBitfieldExtract(SDNode *Node);
bool trySignedBitfieldInsertInSign(SDNode *Node);
bool trySignedBitfieldInsertInMask(SDNode *Node);
+ bool tryBitfieldInsertOpFromXor(SDNode *Node);
+ bool tryBitfieldInsertOpFromOrAndImm(SDNode *Node);
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT,
SDValue X, unsigned Msb, unsigned Lsb);
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 03e54b3..9115c13 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -109,7 +109,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
switch (ABI) {
default:
- report_fatal_error("Don't know how to lower this ABI");
+ reportFatalUsageError("Don't know how to lower this ABI");
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_ILP32E:
case RISCVABI::ABI_LP64E:
@@ -1800,15 +1800,20 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
switch (Intrinsic) {
default:
return false;
- case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
- case Intrinsic::riscv_masked_atomicrmw_add_i32:
- case Intrinsic::riscv_masked_atomicrmw_sub_i32:
- case Intrinsic::riscv_masked_atomicrmw_nand_i32:
- case Intrinsic::riscv_masked_atomicrmw_max_i32:
- case Intrinsic::riscv_masked_atomicrmw_min_i32:
- case Intrinsic::riscv_masked_atomicrmw_umax_i32:
- case Intrinsic::riscv_masked_atomicrmw_umin_i32:
- case Intrinsic::riscv_masked_cmpxchg_i32:
+ case Intrinsic::riscv_masked_atomicrmw_xchg:
+ case Intrinsic::riscv_masked_atomicrmw_add:
+ case Intrinsic::riscv_masked_atomicrmw_sub:
+ case Intrinsic::riscv_masked_atomicrmw_nand:
+ case Intrinsic::riscv_masked_atomicrmw_max:
+ case Intrinsic::riscv_masked_atomicrmw_min:
+ case Intrinsic::riscv_masked_atomicrmw_umax:
+ case Intrinsic::riscv_masked_atomicrmw_umin:
+ case Intrinsic::riscv_masked_cmpxchg:
+ // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
+ // narrow atomic operation. These will be expanded to an LR/SC loop that
+ // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
+ // will be used to modify the appropriate part of the 4 byte data and
+ // preserve the rest.
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
@@ -1844,6 +1849,17 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
/*IsStore*/ true,
/*IsUnitStrided*/ false, /*UsePtrVal*/ true);
+ case Intrinsic::riscv_sseg2_store_mask:
+ case Intrinsic::riscv_sseg3_store_mask:
+ case Intrinsic::riscv_sseg4_store_mask:
+ case Intrinsic::riscv_sseg5_store_mask:
+ case Intrinsic::riscv_sseg6_store_mask:
+ case Intrinsic::riscv_sseg7_store_mask:
+ case Intrinsic::riscv_sseg8_store_mask:
+ // Operands are (vec, ..., vec, ptr, offset, mask, vl)
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
+ /*IsStore*/ true,
+ /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
case Intrinsic::riscv_vlm:
return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
/*IsStore*/ false,
@@ -2512,11 +2528,11 @@ static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
}
break;
case ISD::SETUGT:
- if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1) &&
- C != -1) {
+ if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
// We have a branch immediate instruction for SETUGE but not SETUGT.
- // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
- RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
+ // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
+ // immediate.
+ RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
CC = ISD::SETUGE;
return;
}
@@ -7289,7 +7305,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
- report_fatal_error("unimplemented operand");
+ reportFatalInternalError(
+ "Unimplemented RISCVTargetLowering::LowerOperation Case");
case ISD::PREFETCH:
return LowerPREFETCH(Op, Subtarget, DAG);
case ISD::ATOMIC_FENCE:
@@ -7487,7 +7504,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// vscale as VLENB / 8.
static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
- report_fatal_error("Support for VLEN==32 is incomplete.");
+ reportFatalInternalError("Support for VLEN==32 is incomplete.");
// We assume VLENB is a multiple of 8. We manually choose the best shift
// here because SimplifyDemandedBits isn't always able to simplify it.
uint64_t Val = Op.getConstantOperandVal(0);
@@ -8176,6 +8193,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
return DAG.getLogicalNOT(DL, SetCC, VT);
}
+ // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
+ if (CCVal == ISD::SETUGT && Imm == 2047) {
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
+ DAG.getShiftAmountConstant(11, OpVT, DL));
+ return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
+ ISD::SETNE);
+ }
}
// Not a constant we could handle, swap the operands and condition code to
@@ -8500,7 +8524,7 @@ SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
if (!Subtarget.is64Bit())
- llvm::report_fatal_error("Trampolines only implemented for RV64");
+ llvm::reportFatalUsageError("Trampolines only implemented for RV64");
// Create an MCCodeEmitter to encode instructions.
TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
@@ -8660,7 +8684,7 @@ SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
if (!Subtarget.is64Bit())
- llvm::report_fatal_error("Trampolines only implemented for RV64");
+ llvm::reportFatalUsageError("Trampolines only implemented for RV64");
return Op.getOperand(0);
}
@@ -8795,7 +8819,7 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
switch (getTargetMachine().getCodeModel()) {
default:
- report_fatal_error("Unsupported code model for lowering");
+ reportFatalUsageError("Unsupported code model for lowering");
case CodeModel::Small: {
// Generate a sequence for accessing addresses within the first 2 GiB of
// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
@@ -8931,10 +8955,7 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
// Prepare argument list to generate call.
ArgListTy Args;
- ArgListEntry Entry;
- Entry.Node = Load;
- Entry.Ty = CallTy;
- Args.push_back(Entry);
+ Args.emplace_back(Load, CallTy);
// Setup call to __tls_get_addr.
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -8976,7 +8997,7 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
if (DAG.getMachineFunction().getFunction().getCallingConv() ==
CallingConv::GHC)
- report_fatal_error("In GHC calling convention TLS is not supported");
+ reportFatalUsageError("In GHC calling convention TLS is not supported");
SDValue Addr;
switch (Model) {
@@ -9266,19 +9287,38 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- const int TrueValCost = RISCVMatInt::getIntMatCost(
- TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
- const int FalseValCost = RISCVMatInt::getIntMatCost(
- FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
- bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
+ // Use SHL/ADDI (and possible XORI) to avoid having to materialize
+ // a constant in register
+ if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
+ SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
+ SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
+ return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
+ }
+ if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
+ SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
+ CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
+ SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
+ return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
+ }
+
+ auto getCost = [&](const APInt &Delta, const APInt &Addend) {
+ const int DeltaCost = RISCVMatInt::getIntMatCost(
+ Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ // Does the addend fold into an ADDI
+ if (Addend.isSignedIntN(12))
+ return DeltaCost;
+ const int AddendCost = RISCVMatInt::getIntMatCost(
+ Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ return AddendCost + DeltaCost;
+ };
+ bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
+ getCost(TrueVal - FalseVal, FalseVal);
SDValue LHSVal = DAG.getConstant(
IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
- SDValue RHSVal =
- DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
SDValue CMOV =
DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
DL, VT, LHSVal, CondV);
- return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
+ return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
}
// (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
@@ -9313,7 +9353,8 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(
ISD::OR, DL, VT,
DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
- DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
+ SDNodeFlags::Disjoint);
}
if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
@@ -10724,11 +10765,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
case Intrinsic::riscv_mopr:
- return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
+ return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2));
case Intrinsic::riscv_moprr: {
- return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
+ return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
case Intrinsic::riscv_clmul:
@@ -10825,7 +10866,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
!isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
!isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
- report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
+ reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
return Op;
}
// EGS * EEW >= 256 bits
@@ -10833,7 +10874,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_vsm3me: {
if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
!isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
- report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
+ reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
return Op;
}
// zvknha(SEW=32)/zvknhb(SEW=[32|64])
@@ -10842,11 +10883,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_vsha2ms: {
if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
!Subtarget.hasStdExtZvknhb())
- report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
+ reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
!isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
!isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
- report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
+ reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
return Op;
}
case Intrinsic::riscv_sf_vc_v_x:
@@ -11084,69 +11125,118 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
}
-SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
- SelectionDAG &DAG) const {
- unsigned IntNo = Op.getConstantOperandVal(1);
+static SDValue
+lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op,
+ const RISCVSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+ bool IsStrided;
switch (IntNo) {
- default:
- break;
case Intrinsic::riscv_seg2_store_mask:
case Intrinsic::riscv_seg3_store_mask:
case Intrinsic::riscv_seg4_store_mask:
case Intrinsic::riscv_seg5_store_mask:
case Intrinsic::riscv_seg6_store_mask:
case Intrinsic::riscv_seg7_store_mask:
- case Intrinsic::riscv_seg8_store_mask: {
- SDLoc DL(Op);
- static const Intrinsic::ID VssegInts[] = {
- Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
- Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
- Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
- Intrinsic::riscv_vsseg8_mask};
+ case Intrinsic::riscv_seg8_store_mask:
+ IsStrided = false;
+ break;
+ case Intrinsic::riscv_sseg2_store_mask:
+ case Intrinsic::riscv_sseg3_store_mask:
+ case Intrinsic::riscv_sseg4_store_mask:
+ case Intrinsic::riscv_sseg5_store_mask:
+ case Intrinsic::riscv_sseg6_store_mask:
+ case Intrinsic::riscv_sseg7_store_mask:
+ case Intrinsic::riscv_sseg8_store_mask:
+ IsStrided = true;
+ break;
+ default:
+ llvm_unreachable("unexpected intrinsic ID");
+ }
- // Operands: (chain, int_id, vec*, ptr, mask, vl)
- unsigned NF = Op->getNumOperands() - 5;
- assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
- MVT XLenVT = Subtarget.getXLenVT();
- MVT VT = Op->getOperand(2).getSimpleValueType();
- MVT ContainerVT = getContainerForFixedLengthVector(VT);
- unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
- ContainerVT.getScalarSizeInBits();
- EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
+ SDLoc DL(Op);
+ static const Intrinsic::ID VssegInts[] = {
+ Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
+ Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
+ Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
+ Intrinsic::riscv_vsseg8_mask};
+ static const Intrinsic::ID VsssegInts[] = {
+ Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
+ Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
+ Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
+ Intrinsic::riscv_vssseg8_mask};
+
+ // Operands: (chain, int_id, vec*, ptr, mask, vl) or
+ // (chain, int_id, vec*, ptr, stride, mask, vl)
+ unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
+ assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op->getOperand(2).getSimpleValueType();
+ MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
+ ContainerVT.getScalarSizeInBits();
+ EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
- SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
- SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
- MVT MaskVT = Mask.getSimpleValueType();
- MVT MaskContainerVT =
- ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
- Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+ SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+ SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
+ MVT MaskVT = Mask.getSimpleValueType();
+ MVT MaskContainerVT =
+ ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+ Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
- SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
- SDValue Ptr = Op->getOperand(NF + 2);
+ SDValue IntID = DAG.getTargetConstant(
+ IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
+ SDValue Ptr = Op->getOperand(NF + 2);
- auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
+ auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
- SDValue StoredVal = DAG.getUNDEF(VecTupTy);
- for (unsigned i = 0; i < NF; i++)
- StoredVal = DAG.getNode(
- RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
- convertToScalableVector(
- ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
- DAG.getTargetConstant(i, DL, MVT::i32));
+ SDValue StoredVal = DAG.getUNDEF(VecTupTy);
+ for (unsigned i = 0; i < NF; i++)
+ StoredVal = DAG.getNode(
+ RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
+ convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
+ DAG, Subtarget),
+ DAG.getTargetConstant(i, DL, MVT::i32));
+
+ SmallVector<SDValue, 10> Ops = {
+ FixedIntrinsic->getChain(),
+ IntID,
+ StoredVal,
+ Ptr,
+ Mask,
+ VL,
+ DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
+ // Insert the stride operand.
+ if (IsStrided)
+ Ops.insert(std::next(Ops.begin(), 4),
+ Op.getOperand(Op.getNumOperands() - 3));
+
+ return DAG.getMemIntrinsicNode(
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
+ FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
+}
+
+SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::riscv_seg2_store_mask:
+ case Intrinsic::riscv_seg3_store_mask:
+ case Intrinsic::riscv_seg4_store_mask:
+ case Intrinsic::riscv_seg5_store_mask:
+ case Intrinsic::riscv_seg6_store_mask:
+ case Intrinsic::riscv_seg7_store_mask:
+ case Intrinsic::riscv_seg8_store_mask:
+ case Intrinsic::riscv_sseg2_store_mask:
+ case Intrinsic::riscv_sseg3_store_mask:
+ case Intrinsic::riscv_sseg4_store_mask:
+ case Intrinsic::riscv_sseg5_store_mask:
+ case Intrinsic::riscv_sseg6_store_mask:
+ case Intrinsic::riscv_sseg7_store_mask:
+ case Intrinsic::riscv_sseg8_store_mask:
+ return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
- SDValue Ops[] = {
- FixedIntrinsic->getChain(),
- IntID,
- StoredVal,
- Ptr,
- Mask,
- VL,
- DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
-
- return DAG.getMemIntrinsicNode(
- ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
- FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
- }
case Intrinsic::riscv_sf_vc_xv_se:
return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
case Intrinsic::riscv_sf_vc_iv_se:
@@ -14273,7 +14363,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
MakeLibCallOptions CallOptions;
EVT OpVT = Op0.getValueType();
- CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Result;
std::tie(Result, Chain) =
@@ -14308,7 +14398,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
MakeLibCallOptions CallOptions;
EVT OpVT = Op0.getValueType();
- CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
+ CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
Results.push_back(Result);
@@ -14814,7 +14904,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue NewOp =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue Res = DAG.getNode(
- RISCVISD::MOPR, DL, MVT::i64, NewOp,
+ RISCVISD::MOP_R, DL, MVT::i64, NewOp,
DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -14827,7 +14917,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue NewOp1 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
SDValue Res = DAG.getNode(
- RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
+ RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -15996,9 +16086,10 @@ static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
Cond);
- SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
- Cond);
- SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
+ SDValue NewN1 =
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
+ SDValue NewOr =
+ DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
}
@@ -16531,8 +16622,10 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
// can become a sext.w instead of a shift pair.
-static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performSETCCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -16548,6 +16641,20 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
return V;
+ // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
+ if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) {
+ unsigned ShiftBits = AndRHSC.countr_zero();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl, VT));
+ return DAG.getSetCC(dl, VT, Shift, N1, Cond);
+ }
+ }
+
if (OpVT != MVT::i64 || !Subtarget.is64Bit())
return SDValue();
@@ -16582,27 +16689,39 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
}
static SDValue
-performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
+performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
SDValue Src = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
unsigned Opc = Src.getOpcode();
+ SDLoc DL(N);
// Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
// Don't do this with Zhinx. We need to explicitly sign extend the GPR.
if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
Subtarget.hasStdExtZfhmin())
- return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
- Src.getOperand(0));
+ return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
// Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
- return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
+ return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
Src.getOperand(1));
+ // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
+ if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
+
+ // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
+ if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
+ isAllOnesConstant(Src.getOperand(1)) &&
+ Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
+ return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+
return SDValue();
}
@@ -17461,7 +17580,7 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
return SDValue();
SmallVector<SDNode *> Worklist;
- SmallSet<SDNode *, 8> Inserted;
+ SmallPtrSet<SDNode *, 8> Inserted;
Worklist.push_back(N);
Inserted.insert(N);
SmallVector<CombineResult> CombinesToApply;
@@ -20022,9 +20141,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
case ISD::SETCC:
- return performSETCCCombine(N, DAG, Subtarget);
+ return performSETCCCombine(N, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG:
- return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
+ return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
case ISD::ZERO_EXTEND:
// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
// type legalization. This is safe because fp_to_uint produces poison if
@@ -20580,10 +20699,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
// vfmv.f.s is represented as extract element from 0. Match it late to avoid
// any illegal types.
- if (Val.getOpcode() == RISCVISD::VMV_X_S ||
- (DCI.isAfterLegalizeDAG() &&
- Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isNullConstant(Val.getOperand(1)))) {
+ if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
+ (DCI.isAfterLegalizeDAG() &&
+ Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isNullConstant(Val.getOperand(1)))) &&
+ Val.hasOneUse()) {
SDValue Src = Val.getOperand(0);
MVT VecVT = Src.getSimpleValueType();
// VecVT should be scalable and memory VT should match the element type.
@@ -20673,12 +20793,22 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
isNullConstant(Src.getOperand(1)) &&
Src.getOperand(0).getValueType().isScalableVector()) {
EVT VT = N->getValueType(0);
- EVT SrcVT = Src.getOperand(0).getValueType();
- assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
+ SDValue EVSrc = Src.getOperand(0);
+ EVT EVSrcVT = EVSrc.getValueType();
+ assert(EVSrcVT.getVectorElementType() == VT.getVectorElementType());
// Widths match, just return the original vector.
- if (SrcVT == VT)
- return Src.getOperand(0);
- // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
+ if (EVSrcVT == VT)
+ return EVSrc;
+ SDLoc DL(N);
+ // Width is narrower, using insert_subvector.
+ if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+ EVSrc,
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ }
+ // Width is wider, using extract_subvector.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
}
[[fallthrough]];
}
@@ -21018,9 +21148,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
- // Bail if we might break a sh{1,2,3}add pattern.
- if ((Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
- C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3 && N->hasOneUse() &&
+ bool IsShXAdd =
+ (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
+ C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3;
+ bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 &&
+ C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31;
+
+ // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
+ if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() &&
N->user_begin()->getOpcode() == ISD::ADD &&
!isUsedByLdSt(*N->user_begin(), nullptr) &&
!isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
@@ -21244,6 +21379,15 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known = Known.sext(BitWidth);
break;
}
+ case RISCVISD::SRAW: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
+ // Restore the original width by sign extending.
+ Known = Known.sext(BitWidth);
+ break;
+ }
case RISCVISD::CTZW: {
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
@@ -21349,8 +21493,16 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
if (Tmp < 33) return 1;
return 33;
}
+ case RISCVISD::SRAW: {
+ unsigned Tmp =
+ DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // sraw produces at least 33 sign bits. If the input already has more than
+ // 33 sign bits sraw, will preserve them.
+ // TODO: A more precise answer could be calculated depending on known bits
+ // in the shift amount.
+ return std::max(Tmp, 33U);
+ }
case RISCVISD::SLLW:
- case RISCVISD::SRAW:
case RISCVISD::SRLW:
case RISCVISD::DIVW:
case RISCVISD::DIVUW:
@@ -21361,9 +21513,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::FCVT_WU_RV64:
case RISCVISD::STRICT_FCVT_W_RV64:
case RISCVISD::STRICT_FCVT_WU_RV64:
- // TODO: As the result is sign-extended, this is conservatively correct. A
- // more precise answer could be calculated for SRAW depending on known
- // bits in the shift amount.
+ // TODO: As the result is sign-extended, this is conservatively correct.
return 33;
case RISCVISD::VMV_X_S: {
// The number of sign bits of the scalar result is computed by obtaining the
@@ -21382,24 +21532,23 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
switch (IntNo) {
default:
break;
- case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
- case Intrinsic::riscv_masked_atomicrmw_add_i64:
- case Intrinsic::riscv_masked_atomicrmw_sub_i64:
- case Intrinsic::riscv_masked_atomicrmw_nand_i64:
- case Intrinsic::riscv_masked_atomicrmw_max_i64:
- case Intrinsic::riscv_masked_atomicrmw_min_i64:
- case Intrinsic::riscv_masked_atomicrmw_umax_i64:
- case Intrinsic::riscv_masked_atomicrmw_umin_i64:
- case Intrinsic::riscv_masked_cmpxchg_i64:
+ case Intrinsic::riscv_masked_atomicrmw_xchg:
+ case Intrinsic::riscv_masked_atomicrmw_add:
+ case Intrinsic::riscv_masked_atomicrmw_sub:
+ case Intrinsic::riscv_masked_atomicrmw_nand:
+ case Intrinsic::riscv_masked_atomicrmw_max:
+ case Intrinsic::riscv_masked_atomicrmw_min:
+ case Intrinsic::riscv_masked_atomicrmw_umax:
+ case Intrinsic::riscv_masked_atomicrmw_umin:
+ case Intrinsic::riscv_masked_cmpxchg:
// riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
// narrow atomic operation. These are implemented using atomic
// operations at the minimum supported atomicrmw/cmpxchg width whose
// result is then sign extended to XLEN. With +A, the minimum width is
// 32 for both 64 and 32.
- assert(Subtarget.getXLen() == 64);
assert(getMinCmpXchgSizeInBits() == 32);
assert(Subtarget.hasStdExtA());
- return 33;
+ return Op.getValueSizeInBits() - 31;
}
break;
}
@@ -21447,6 +21596,14 @@ bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
// TODO: Add more target nodes.
switch (Op.getOpcode()) {
+ case RISCVISD::SLLW:
+ case RISCVISD::SRAW:
+ case RISCVISD::SRLW:
+ case RISCVISD::RORW:
+ case RISCVISD::ROLW:
+ // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
+ // amount is bounds.
+ return false;
case RISCVISD::SELECT_CC:
// Integer comparisons cannot create poison.
assert(Op.getOperand(0).getValueType().isInteger() &&
@@ -22234,8 +22391,8 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
if (!Subtarget.is64Bit())
- report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
- "supported on 64-bit targets");
+ reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
+ "supported on 64-bit targets");
return emitPatchPoint(MI, BB);
}
}
@@ -22270,20 +22427,12 @@ void RISCVTargetLowering::analyzeInputArgs(
MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
RISCVCCAssignFn Fn) const {
- FunctionType *FType = MF.getFunction().getFunctionType();
-
for (const auto &[Idx, In] : enumerate(Ins)) {
MVT ArgVT = In.VT;
ISD::ArgFlagsTy ArgFlags = In.Flags;
- Type *ArgTy = nullptr;
- if (IsRet)
- ArgTy = FType->getReturnType();
- else if (In.isOrigArg())
- ArgTy = FType->getParamType(In.getOrigArgIndex());
-
- if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
- /*IsFixed=*/true, IsRet, ArgTy)) {
+ if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
+ In.OrigTy)) {
LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
<< ArgVT << '\n');
llvm_unreachable(nullptr);
@@ -22298,10 +22447,9 @@ void RISCVTargetLowering::analyzeOutputArgs(
for (const auto &[Idx, Out] : enumerate(Outs)) {
MVT ArgVT = Out.VT;
ISD::ArgFlagsTy ArgFlags = Out.Flags;
- Type *OrigTy = CLI ? CLI->getArgs()[Out.OrigArgIndex].Ty : nullptr;
- if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, Out.IsFixed,
- IsRet, OrigTy)) {
+ if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
+ Out.OrigTy)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
<< ArgVT << "\n");
llvm_unreachable(nullptr);
@@ -22477,7 +22625,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
switch (CallConv) {
default:
- report_fatal_error("Unsupported calling convention");
+ reportFatalUsageError("Unsupported calling convention");
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::SPIR_KERNEL:
@@ -22501,17 +22649,17 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
break;
case CallingConv::GHC:
if (Subtarget.hasStdExtE())
- report_fatal_error("GHC calling convention is not supported on RVE!");
+ reportFatalUsageError("GHC calling convention is not supported on RVE!");
if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
- report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
- "(Zdinx/D) instruction set extensions");
+ reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
+ "(Zdinx/D) instruction set extensions");
}
const Function &Func = MF.getFunction();
if (Func.hasFnAttribute("interrupt")) {
if (!Func.arg_empty())
- report_fatal_error(
- "Functions with the interrupt attribute cannot have arguments!");
+ reportFatalUsageError(
+ "Functions with the interrupt attribute cannot have arguments!");
StringRef Kind =
MF.getFunction().getFnAttribute("interrupt").getValueAsString();
@@ -22527,11 +22675,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
"SiFive-CLIC-preemptible-stack-swap",
};
if (!llvm::is_contained(SupportedInterruptKinds, Kind))
- report_fatal_error(
- "Function interrupt attribute argument not supported!");
+ reportFatalUsageError(
+ "Function interrupt attribute argument not supported!");
if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
- report_fatal_error("'qci-*' interrupt kinds require Xqciint extension");
+ reportFatalUsageError(
+ "'qci-*' interrupt kinds require Xqciint extension");
if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
reportFatalUsageError(
@@ -22769,7 +22918,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (CallConv == CallingConv::GHC) {
if (Subtarget.hasStdExtE())
- report_fatal_error("GHC calling convention is not supported on RVE!");
+ reportFatalUsageError("GHC calling convention is not supported on RVE!");
ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
} else
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
@@ -22783,8 +22932,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsTailCall)
++NumTailCalls;
else if (CLI.CB && CLI.CB->isMustTailCall())
- report_fatal_error("failed to perform tail call elimination on a call "
- "site marked musttail");
+ reportFatalInternalError("failed to perform tail call elimination on a "
+ "call site marked musttail");
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getStackSize();
@@ -23083,7 +23232,7 @@ bool RISCVTargetLowering::CanLowerReturn(
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
- /*IsFixed=*/true, /*IsRet=*/true, nullptr))
+ /*IsRet=*/true, Outs[i].OrigTy))
return false;
}
return true;
@@ -23109,7 +23258,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
nullptr, CC_RISCV);
if (CallConv == CallingConv::GHC && !RVLocs.empty())
- report_fatal_error("GHC functions return void only");
+ reportFatalUsageError("GHC functions return void only");
SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -23175,7 +23324,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const Function &Func = DAG.getMachineFunction().getFunction();
if (Func.hasFnAttribute("interrupt")) {
if (!Func.getReturnType()->isVoidTy())
- report_fatal_error(
+ reportFatalUsageError(
"Functions with the interrupt attribute must have void return type!");
MachineFunction &MF = DAG.getMachineFunction();
@@ -23343,6 +23492,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
&RISCV::VRN2M4RegClass}) {
if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
return std::make_pair(0U, RC);
+
+ if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+ if (TRI->isTypeLegalForClass(*RC, ContainerVT))
+ return std::make_pair(0U, RC);
+ }
}
} else if (Constraint == "vd") {
for (const auto *RC :
@@ -23356,10 +23511,24 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
&RISCV::VRN2M4NoV0RegClass}) {
if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
return std::make_pair(0U, RC);
+
+ if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+ if (TRI->isTypeLegalForClass(*RC, ContainerVT))
+ return std::make_pair(0U, RC);
+ }
}
} else if (Constraint == "vm") {
if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
return std::make_pair(0U, &RISCV::VMV0RegClass);
+
+ if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+ // VT here might be coerced to vector with i8 elements, so we need to
+ // check if this is a M1 register here instead of checking VMV0RegClass.
+ if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
+ return std::make_pair(0U, &RISCV::VMV0RegClass);
+ }
} else if (Constraint == "cr") {
if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
return std::make_pair(0U, &RISCV::GPRF16CRegClass);
@@ -23679,53 +23848,26 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
- if (XLen == 32) {
- switch (BinOp) {
- default:
- llvm_unreachable("Unexpected AtomicRMW BinOp");
- case AtomicRMWInst::Xchg:
- return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
- case AtomicRMWInst::Add:
- return Intrinsic::riscv_masked_atomicrmw_add_i32;
- case AtomicRMWInst::Sub:
- return Intrinsic::riscv_masked_atomicrmw_sub_i32;
- case AtomicRMWInst::Nand:
- return Intrinsic::riscv_masked_atomicrmw_nand_i32;
- case AtomicRMWInst::Max:
- return Intrinsic::riscv_masked_atomicrmw_max_i32;
- case AtomicRMWInst::Min:
- return Intrinsic::riscv_masked_atomicrmw_min_i32;
- case AtomicRMWInst::UMax:
- return Intrinsic::riscv_masked_atomicrmw_umax_i32;
- case AtomicRMWInst::UMin:
- return Intrinsic::riscv_masked_atomicrmw_umin_i32;
- }
- }
-
- if (XLen == 64) {
- switch (BinOp) {
- default:
- llvm_unreachable("Unexpected AtomicRMW BinOp");
- case AtomicRMWInst::Xchg:
- return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
- case AtomicRMWInst::Add:
- return Intrinsic::riscv_masked_atomicrmw_add_i64;
- case AtomicRMWInst::Sub:
- return Intrinsic::riscv_masked_atomicrmw_sub_i64;
- case AtomicRMWInst::Nand:
- return Intrinsic::riscv_masked_atomicrmw_nand_i64;
- case AtomicRMWInst::Max:
- return Intrinsic::riscv_masked_atomicrmw_max_i64;
- case AtomicRMWInst::Min:
- return Intrinsic::riscv_masked_atomicrmw_min_i64;
- case AtomicRMWInst::UMax:
- return Intrinsic::riscv_masked_atomicrmw_umax_i64;
- case AtomicRMWInst::UMin:
- return Intrinsic::riscv_masked_atomicrmw_umin_i64;
- }
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ return Intrinsic::riscv_masked_atomicrmw_xchg;
+ case AtomicRMWInst::Add:
+ return Intrinsic::riscv_masked_atomicrmw_add;
+ case AtomicRMWInst::Sub:
+ return Intrinsic::riscv_masked_atomicrmw_sub;
+ case AtomicRMWInst::Nand:
+ return Intrinsic::riscv_masked_atomicrmw_nand;
+ case AtomicRMWInst::Max:
+ return Intrinsic::riscv_masked_atomicrmw_max;
+ case AtomicRMWInst::Min:
+ return Intrinsic::riscv_masked_atomicrmw_min;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::riscv_masked_atomicrmw_umax;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::riscv_masked_atomicrmw_umin;
}
-
- llvm_unreachable("Unexpected XLen\n");
}
Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
@@ -23750,7 +23892,7 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
unsigned XLen = Subtarget.getXLen();
Value *Ordering =
Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
- Type *Tys[] = {AlignedAddr->getType()};
+ Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
Function *LrwOpScwLoop = Intrinsic::getOrInsertDeclaration(
AI->getModule(),
getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
@@ -23806,14 +23948,13 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
unsigned XLen = Subtarget.getXLen();
Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
- Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
+ Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
if (XLen == 64) {
CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
- CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
}
- Type *Tys[] = {AlignedAddr->getType()};
+ Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
Value *Result = Builder.CreateIntrinsic(
CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
if (XLen == 64)
@@ -24237,7 +24378,12 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts(
return true;
}
- if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
+ if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
+ PartVT.isScalableVector()) {
+ if (ValueVT.isFixedLengthVector()) {
+ ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
+ Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
+ }
LLVMContext &Context = *DAG.getContext();
EVT ValueEltVT = ValueVT.getVectorElementType();
EVT PartEltVT = PartVT.getVectorElementType();
@@ -24307,12 +24453,17 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
return Val;
}
- if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
+ if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
+ PartVT.isScalableVector()) {
LLVMContext &Context = *DAG.getContext();
SDValue Val = Parts[0];
EVT ValueEltVT = ValueVT.getVectorElementType();
EVT PartEltVT = PartVT.getVectorElementType();
unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
+ if (ValueVT.isFixedLengthVector())
+ ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
+ .getSizeInBits()
+ .getKnownMinValue();
unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
if (PartVTBitSize % ValueVTBitSize == 0) {
assert(PartVTBitSize >= ValueVTBitSize);
@@ -24330,7 +24481,10 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
}
- Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
+ if (ValueVT.isFixedLengthVector())
+ Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
+ else
+ Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
return Val;
}
}
@@ -24437,8 +24591,8 @@ RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
- report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
- StringRef(RegName) + "\"."));
+ reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
+ StringRef(RegName) + "\"."));
return Reg;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 433b8be..4581c11 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -431,12 +431,12 @@ public:
bool lowerInterleavedLoad(Instruction *Load, Value *Mask,
ArrayRef<ShuffleVectorInst *> Shuffles,
- ArrayRef<unsigned> Indices,
- unsigned Factor) const override;
+ ArrayRef<unsigned> Indices, unsigned Factor,
+ const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index d9c6101..c2667b0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -261,17 +261,18 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
// Indicates the EEW of a vector instruction's destination operand.
EEW DestEEW = EEWSEWx1;
let TSFlags{25-24} = DestEEW.Value;
+
+ // Some vector instructions like vslidedown/vrgather will read elements past
+ // VL, and should be marked to make sure RISCVVLOptimizer doesn't reduce its
+ // operands' VLs.
+ bit ReadsPastVL = 0;
+ let TSFlags{26} = ReadsPastVL;
}
class RVInst<dag outs, dag ins, string opcodestr, string argstr,
list<dag> pattern, InstFormat format>
: RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
field bits<32> Inst;
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<32> SoftFail = 0;
let Size = 4;
}
@@ -279,7 +280,6 @@ class RVInst48<dag outs, dag ins, string opcodestr, string argstr,
list<dag> pattern, InstFormat format>
: RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
field bits<48> Inst;
- field bits<48> SoftFail = 0;
let Size = 6;
}
@@ -287,7 +287,6 @@ class RVInst64<dag outs, dag ins, string opcodestr, string argstr,
list<dag> pattern, InstFormat format>
: RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
field bits<64> Inst;
- field bits<64> SoftFail = 0;
let Size = 8;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
index 5e16061..209c3fa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -14,11 +14,6 @@ class RVInst16<dag outs, dag ins, string opcodestr, string argstr,
list<dag> pattern, InstFormat format>
: RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
field bits<16> Inst;
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<16> SoftFail = 0;
let Size = 2;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 085064e..7b4a1de 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -382,7 +382,7 @@ void RISCVInstrInfo::copyPhysRegVector(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
const TargetRegisterClass *RegClass) const {
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
RISCVVType::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags);
unsigned NF = RISCVRI::getNF(RegClass->TSFlags);
@@ -444,13 +444,7 @@ void RISCVInstrInfo::copyPhysRegVector(
return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
};
- auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
- uint16_t Encoding) {
- MCRegister Reg = RISCV::V0 + Encoding;
- if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1)
- return Reg;
- return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
- };
+
while (I != NumRegs) {
// For non-segment copying, we only do this once as the registers are always
// aligned.
@@ -470,9 +464,9 @@ void RISCVInstrInfo::copyPhysRegVector(
// Emit actual copying.
// For reversed copying, the encoding should be decreased.
- MCRegister ActualSrcReg = FindRegWithEncoding(
+ MCRegister ActualSrcReg = TRI->findVRegWithEncoding(
RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
- MCRegister ActualDstReg = FindRegWithEncoding(
+ MCRegister ActualDstReg = TRI->findVRegWithEncoding(
RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 8bd3830..23f5a84 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1694,6 +1694,16 @@ multiclass SelectCC_GPR_riirr<DAGOperand valty, DAGOperand imm> {
valty:$truev, valty:$falsev), []>;
}
+let Predicates = [IsRV32] in {
+def : Pat<(i32 (setlt (i32 GPR:$rs1), 0)), (SRLI GPR:$rs1, 31)>; // compressible
+}
+let Predicates = [IsRV64] in {
+def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x0000000080000000)), 0)),
+ (XORI (i64 (SRLIW GPR:$rs1, 31)), 1)>;
+def : Pat<(i64 (setlt (i64 GPR:$rs1), 0)), (SRLI GPR:$rs1, 63)>; // compressible
+def : Pat<(i64 (setlt (sext_inreg GPR:$rs1, i32), 0)), (SRLIW GPR:$rs1, 31)>;
+}
+
/// Branches and jumps
// Match `riscv_brcc` and lower to the appropriate RISC-V branch instruction.
@@ -2129,14 +2139,14 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
/// RV64 patterns
-let Predicates = [IsRV64, NotHasStdExtZba] in {
+let Predicates = [IsRV64, NoStdExtZba] in {
def : Pat<(i64 (and GPR:$rs1, 0xffffffff)), (SRLI (i64 (SLLI GPR:$rs1, 32)), 32)>;
// If we're shifting a 32-bit zero extended value left by 0-31 bits, use 2
// shifts instead of 3. This can occur when unsigned is used to index an array.
def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
(SRLI (i64 (SLLI GPR:$rs1, 32)), (ImmSubFrom32 uimm5:$shamt))>;
-}
+} // Predicates = [IsRV64, NoStdExtZba]
class binop_allhusers<SDPatternOperator operator>
: PatFrag<(ops node:$lhs, node:$rhs),
@@ -2367,6 +2377,7 @@ include "RISCVInstrInfoXqccmp.td"
include "RISCVInstrInfoXMips.td"
include "RISCVInstrInfoXRivos.td"
include "RISCVInstrInfoXAndes.td"
+include "RISCVInstrInfoXSpacemiT.td"
//===----------------------------------------------------------------------===//
// Global ISel
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 5fa7d41..59f5aeb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -190,7 +190,7 @@ let Predicates = [HasAtomicLdSt, IsRV64] in {
multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
list<Predicate> ExtraPreds = []> {
-let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in {
+let Predicates = !listconcat([HasStdExtA, NoStdExtZtso], ExtraPreds) in {
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
!cast<RVInst>(BaseInst), vt>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
@@ -277,13 +277,30 @@ class PseudoMaskedAMOUMinUMax
let hasSideEffects = 0;
}
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+multiclass PseudoAMOPat<string AtomicOp, Pseudo AMOInst, ValueType vt = XLenVT> {
+ def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_monotonic") GPR:$addr, GPR:$incr)),
+ (AMOInst GPR:$addr, GPR:$incr, 2)>;
+ def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_acquire") GPR:$addr, GPR:$incr)),
+ (AMOInst GPR:$addr, GPR:$incr, 4)>;
+ def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_release") GPR:$addr, GPR:$incr)),
+ (AMOInst GPR:$addr, GPR:$incr, 5)>;
+ def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_acq_rel") GPR:$addr, GPR:$incr)),
+ (AMOInst GPR:$addr, GPR:$incr, 6)>;
+ def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_seq_cst") GPR:$addr, GPR:$incr)),
+ (AMOInst GPR:$addr, GPR:$incr, 7)>;
+}
+
class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
- : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
+ : Pat<(XLenVT (intrin (XLenVT GPR:$addr), (XLenVT GPR:$incr),
+ (XLenVT GPR:$mask), (XLenVT timm:$ordering))),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
- : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
- timm:$ordering),
+ : Pat<(XLenVT (intrin (XLenVT GPR:$addr), (XLenVT GPR:$incr),
+ (XLenVT GPR:$mask), (XLenVT GPR:$shiftamt),
+ (XLenVT timm:$ordering))),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering)>;
@@ -291,50 +308,40 @@ let Predicates = [HasStdExtA] in {
let Size = 20 in
def PseudoAtomicLoadNand32 : PseudoAMO;
-// Ordering constants must be kept in sync with the AtomicOrdering enum in
-// AtomicOrdering.h.
-def : Pat<(XLenVT (atomic_load_nand_i32_monotonic GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
-def : Pat<(XLenVT (atomic_load_nand_i32_acquire GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
-def : Pat<(XLenVT (atomic_load_nand_i32_release GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
-def : Pat<(XLenVT (atomic_load_nand_i32_acq_rel GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
-def : Pat<(XLenVT (atomic_load_nand_i32_seq_cst GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
-
-let Size = 28 in
-def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
+defm : PseudoAMOPat<"atomic_load_nand_i32", PseudoAtomicLoadNand32>;
+
+let Size = 28 in {
+ def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
+ def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
+ def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
+}
+let Size = 32 in {
+ def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
+}
+let Size = 44 in {
+ def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
+ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
+}
+let Size = 36 in {
+ def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
+ def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
+}
+
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg,
PseudoMaskedAtomicSwap32>;
-let Size = 28 in
-def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32,
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add,
PseudoMaskedAtomicLoadAdd32>;
-let Size = 28 in
-def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32,
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub,
PseudoMaskedAtomicLoadSub32>;
-let Size = 32 in
-def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32,
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand,
PseudoMaskedAtomicLoadNand32>;
-let Size = 44 in
-def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
-def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32,
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max,
PseudoMaskedAtomicLoadMax32>;
-let Size = 44 in
-def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
-def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32,
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min,
PseudoMaskedAtomicLoadMin32>;
-let Size = 36 in
-def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32,
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax,
PseudoMaskedAtomicLoadUMax32>;
-let Size = 36 in
-def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin,
PseudoMaskedAtomicLoadUMin32>;
} // Predicates = [HasStdExtA]
@@ -342,35 +349,7 @@ let Predicates = [HasStdExtA, IsRV64] in {
let Size = 20 in
def PseudoAtomicLoadNand64 : PseudoAMO;
-// Ordering constants must be kept in sync with the AtomicOrdering enum in
-// AtomicOrdering.h.
-def : Pat<(i64 (atomic_load_nand_i64_monotonic GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>;
-def : Pat<(i64 (atomic_load_nand_i64_acquire GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>;
-def : Pat<(i64 (atomic_load_nand_i64_release GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>;
-def : Pat<(i64 (atomic_load_nand_i64_acq_rel GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>;
-def : Pat<(i64 (atomic_load_nand_i64_seq_cst GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>;
-
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64,
- PseudoMaskedAtomicSwap32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64,
- PseudoMaskedAtomicLoadAdd32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64,
- PseudoMaskedAtomicLoadSub32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64,
- PseudoMaskedAtomicLoadNand32>;
-def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64,
- PseudoMaskedAtomicLoadMax32>;
-def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64,
- PseudoMaskedAtomicLoadMin32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64,
- PseudoMaskedAtomicLoadUMax32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64,
- PseudoMaskedAtomicLoadUMin32>;
+defm : PseudoAMOPat<"atomic_load_nand_i64", PseudoAtomicLoadNand64, i64>;
} // Predicates = [HasStdExtA, IsRV64]
@@ -424,15 +403,9 @@ def PseudoMaskedCmpXchg32
let Size = 32;
}
-def : Pat<(int_riscv_masked_cmpxchg_i32
- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
+def : Pat<(XLenVT (int_riscv_masked_cmpxchg
+ (XLenVT GPR:$addr), (XLenVT GPR:$cmpval), (XLenVT GPR:$newval),
+ (XLenVT GPR:$mask), (XLenVT timm:$ordering))),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
} // Predicates = [HasStdExtA]
-
-let Predicates = [HasStdExtA, IsRV64] in {
-def : Pat<(int_riscv_masked_cmpxchg_i64
- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
- (PseudoMaskedCmpXchg32
- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
-} // Predicates = [HasStdExtA, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index c5551fb..bfc766d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -301,14 +301,6 @@ def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
let Inst{5} = imm{3};
}
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in
-def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>,
- Sched<[WriteFLD64, ReadFMemBase]> {
- bits<8> imm;
- let Inst{12-10} = imm{5-3};
- let Inst{6-5} = imm{7-6};
-}
-
def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>,
Sched<[WriteLDW, ReadMemBase]> {
bits<7> imm;
@@ -326,16 +318,6 @@ def C_LW_INX : CLoad_ri<0b010, "c.lw", GPRF32C, uimm7_lsb00>,
let Inst{5} = imm{6};
}
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
-def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
- Sched<[WriteFLD32, ReadFMemBase]> {
- bits<7> imm;
- let Inst{12-10} = imm{5-3};
- let Inst{6} = imm{2};
- let Inst{5} = imm{6};
-}
-
let Predicates = [HasStdExtZca, IsRV64] in
def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>,
Sched<[WriteLDD, ReadMemBase]> {
@@ -344,14 +326,6 @@ def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>,
let Inst{6-5} = imm{7-6};
}
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in
-def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>,
- Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> {
- bits<8> imm;
- let Inst{12-10} = imm{5-3};
- let Inst{6-5} = imm{7-6};
-}
-
def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>,
Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
bits<7> imm;
@@ -369,16 +343,6 @@ def C_SW_INX : CStore_rri<0b110, "c.sw", GPRF32C, uimm7_lsb00>,
let Inst{5} = imm{6};
}
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
-def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>,
- Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> {
- bits<7> imm;
- let Inst{12-10} = imm{5-3};
- let Inst{6} = imm{2};
- let Inst{5} = imm{6};
-}
-
let Predicates = [HasStdExtZca, IsRV64] in
def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>,
Sched<[WriteSTD, ReadStoreData, ReadMemBase]> {
@@ -500,12 +464,6 @@ def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb),
let Constraints = "$rd = $rd_wb";
}
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in
-def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>,
- Sched<[WriteFLD64, ReadFMemBase]> {
- let Inst{4-2} = imm{8-6};
-}
-
def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>,
Sched<[WriteLDW, ReadMemBase]> {
let Inst{3-2} = imm{7-6};
@@ -517,13 +475,6 @@ def C_LWSP_INX : CStackLoad<0b010, "c.lwsp", GPRF32NoX0, uimm8_lsb00>,
let Inst{3-2} = imm{7-6};
}
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
-def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>,
- Sched<[WriteFLD32, ReadFMemBase]> {
- let Inst{3-2} = imm{7-6};
-}
-
let Predicates = [HasStdExtZca, IsRV64] in
def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>,
Sched<[WriteLDD, ReadMemBase]> {
@@ -560,12 +511,6 @@ def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPR:$rd),
let Constraints = "$rs1 = $rd";
}
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in
-def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>,
- Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> {
- let Inst{9-7} = imm{8-6};
-}
-
def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>,
Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
let Inst{8-7} = imm{7-6};
@@ -577,13 +522,6 @@ def C_SWSP_INX : CStackStore<0b110, "c.swsp", GPRF32, uimm8_lsb00>,
let Inst{8-7} = imm{7-6};
}
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
-def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>,
- Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> {
- let Inst{8-7} = imm{7-6};
-}
-
let Predicates = [HasStdExtZca, IsRV64] in
def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000>,
Sched<[WriteSTD, ReadStoreData, ReadMemBase]> {
@@ -600,6 +538,61 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>,
} // Predicates = [HasStdExtZca]
+let DecoderNamespace = "RV32Only",
+ Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
+ def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
+ Sched<[WriteFLD32, ReadFMemBase]> {
+ bits<7> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6} = imm{2};
+ let Inst{5} = imm{6};
+ }
+
+ def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>,
+ Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> {
+ bits<7> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6} = imm{2};
+ let Inst{5} = imm{6};
+ }
+
+ def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>,
+ Sched<[WriteFLD32, ReadFMemBase]> {
+ let Inst{3-2} = imm{7-6};
+ }
+
+ def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>,
+ Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> {
+ let Inst{8-7} = imm{7-6};
+ }
+} // DecoderNamespace = "RV32Only", Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
+
+let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
+ def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>,
+ Sched<[WriteFLD64, ReadFMemBase]> {
+ bits<8> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6-5} = imm{7-6};
+ }
+
+ def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>,
+ Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> {
+ bits<8> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6-5} = imm{7-6};
+ }
+
+ def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>,
+ Sched<[WriteFLD64, ReadFMemBase]> {
+ let Inst{4-2} = imm{8-6};
+ }
+
+ def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>,
+ Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> {
+ let Inst{9-7} = imm{8-6};
+ }
+} // Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
+
//===----------------------------------------------------------------------===//
// HINT Instructions
//===----------------------------------------------------------------------===//
@@ -767,20 +760,17 @@ def : InstAlias<".insn_cj $opcode, $funct3, $imm11",
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
-// Patterns are defined in the same order the compressed instructions appear
+// Zca patterns are defined in the same order the compressed instructions appear
// under the "RVC Instruction Set Listings" section of the ISA manual.
+// Zca Instructions
+
// Quadrant 0
let Predicates = [HasStdExtZca] in {
def : CompressPat<(ADDI GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm),
(C_ADDI4SPN GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
-def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm),
- (C_FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
-} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
-
let Predicates = [HasStdExtZca] in {
def : CompressPat<(LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
@@ -790,21 +780,11 @@ def : CompressPat<(LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
-def : CompressPat<(FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
- (C_FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
-} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
-
let Predicates = [HasStdExtZca, IsRV64] in {
def : CompressPat<(LD GPRC:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm),
(C_LD GPRC:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
} // Predicates = [HasStdExtZca, IsRV64]
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
-def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm),
- (C_FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
-} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
-
let Predicates = [HasStdExtZca] in {
def : CompressPat<(SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
@@ -814,11 +794,6 @@ def : CompressPat<(SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
-def : CompressPat<(FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
- (C_FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
-} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
-
let Predicates = [HasStdExtZca, IsRV64] in {
def : CompressPat<(SD GPRC:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm),
(C_SD GPRC:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
@@ -907,11 +882,6 @@ def : CompressPat<(SLLI GPRNoX0:$rs1, GPRNoX0:$rs1, uimmlog2xlennonzero:$imm),
(C_SLLI GPRNoX0:$rs1, uimmlog2xlennonzero:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
-def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm),
- (C_FLDSP FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>;
-} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
-
let Predicates = [HasStdExtZca] in {
def : CompressPat<(LW GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
(C_LWSP GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
@@ -921,11 +891,6 @@ def : CompressPat<(LW_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
(C_LWSP_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
-def : CompressPat<(FLW FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
- (C_FLWSP FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
-} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
-
let Predicates = [HasStdExtZca, IsRV64] in {
def : CompressPat<(LD GPRNoX0:$rd, SPMem:$rs1, uimm9_lsb000:$imm),
(C_LDSP GPRNoX0:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>;
@@ -953,11 +918,6 @@ def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs1),
(C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
-def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm),
- (C_FSDSP FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>;
-} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
-
let Predicates = [HasStdExtZca] in {
def : CompressPat<(SW GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
(C_SWSP GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
@@ -967,12 +927,38 @@ def : CompressPat<(SW_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
(C_SWSP_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
-def : CompressPat<(FSW FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
- (C_FSWSP FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
-} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
-
let Predicates = [HasStdExtZca, IsRV64] in {
def : CompressPat<(SD GPR:$rs2, SPMem:$rs1, uimm9_lsb000:$imm),
(C_SDSP GPR:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>;
} // Predicates = [HasStdExtZca, IsRV64]
+
+// Zcf Instructions
+let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
+ // Quadrant 0
+ def : CompressPat<(FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
+ (C_FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
+ def : CompressPat<(FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
+ (C_FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
+
+ // Quadrant 2
+ def : CompressPat<(FLW FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
+ (C_FLWSP FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
+ def : CompressPat<(FSW FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
+ (C_FSWSP FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
+} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
+
+// Zcd Instructions
+let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
+ // Quadrant 0
+ def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm),
+ (C_FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
+ def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm),
+ (C_FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
+
+ // Quadrant 2
+ def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm),
+ (C_FLDSP FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>;
+ def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm),
+ (C_FSDSP FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>;
+} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
+
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index c1f67f7..fdf0195 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -111,11 +111,11 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))),
(REMW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtM, IsRV64]
-let Predicates = [HasStdExtZmmul, IsRV64, NotHasStdExtZba] in {
+let Predicates = [HasStdExtZmmul, IsRV64, NoStdExtZba] in {
// Special case for calculating the full 64-bit product of a 32x32 unsigned
// multiply where the inputs aren't known to be zero extended. We can shift the
// inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish
// zeroing the upper 32 bits.
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
(MULHU (i64 (SLLI GPR:$rs1, 32)), (i64 (SLLI GPR:$rs2, 32)))>;
-} // Predicates = [HasStdExtZmmul, IsRV64, NotHasStdExtZba]
+} // Predicates = [HasStdExtZmmul, IsRV64, NoStdExtZba]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 8297d50..c342b41 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -18,7 +18,26 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def simm10 : RISCVSImmLeafOp<10>;
+def simm10 : RISCVSImmOp<10>;
+
+def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> {
+ let RenderMethod = "addSImm8UnsignedOperands";
+}
+
+// A 8-bit signed immediate allowing range [-128, 255]
+// but represented as [-128, 255].
+def simm8_unsigned : RISCVOp {
+ let ParserMatchClass = SImm8UnsignedAsmOperand;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeSImmOperand<8>";
+ let OperandType = "OPERAND_SIMM8_UNSIGNED";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isInt<8>(Imm);
+ }];
+}
def SImm10UnsignedAsmOperand : SImmAsmOperand<10, "Unsigned"> {
let RenderMethod = "addSImm10UnsignedOperands";
@@ -30,7 +49,7 @@ def simm10_unsigned : RISCVOp {
let ParserMatchClass = SImm10UnsignedAsmOperand;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmOperand<10>";
- let OperandType = "OPERAND_SIMM10";
+ let OperandType = "OPERAND_SIMM10_UNSIGNED";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -43,49 +62,40 @@ def simm10_unsigned : RISCVOp {
// Instruction class templates
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class PLI_i<bits<7> funct7, string opcodestr>
- : RVInst<(outs GPR:$rd), (ins simm10:$imm10), opcodestr, "$rd, $imm10", [],
+// Common base for pli.b/h/w and plui.h/w
+class RVPLoadImm_i<bits<7> funct7, dag ins, string opcodestr,
+ string argstr>
+ : RVInst<(outs GPR:$rd), ins, opcodestr, argstr, [],
InstFormatOther> {
- bits<10> imm10;
bits<5> rd;
let Inst{31-25} = funct7;
- let Inst{24-16} = imm10{8-0};
- let Inst{15} = imm10{9};
let Inst{14-12} = 0b010;
let Inst{11-7} = rd;
let Inst{6-0} = OPC_OP_IMM_32.Value;
+
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
}
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class PLUI_i<bits<7> funct7, string opcodestr>
- : RVInst<(outs GPR:$rd), (ins simm10_unsigned:$imm10), opcodestr,
- "$rd, $imm10", [], InstFormatOther> {
+// Base for pli.h/w.
+class PLI_i<bits<7> funct7, string opcodestr>
+ : RVPLoadImm_i<funct7, (ins simm10:$imm10), opcodestr, "$rd, $imm10"> {
bits<10> imm10;
- bits<5> rd;
- let Inst{31-25} = funct7;
- let Inst{24} = imm10{0};
- let Inst{23-15} = imm10{9-1};
- let Inst{14-12} = 0b010;
- let Inst{11-7} = rd;
- let Inst{6-0} = OPC_OP_IMM_32.Value;
+ let Inst{24-16} = imm10{8-0};
+ let Inst{15} = imm10{9};
}
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class PLI_B_i<bits<8> funct8, string opcodestr>
- : RVInst<(outs GPR:$rd), (ins uimm8:$uimm8), opcodestr, "$rd, $uimm8", [],
- InstFormatOther> {
- bits<8> uimm8;
- bits<5> rd;
+// Base for plui.h/w.
+class PLUI_i<bits<7> funct7, string opcodestr>
+ : RVPLoadImm_i<funct7, (ins simm10_unsigned:$imm10), opcodestr,
+ "$rd, $imm10"> {
+ bits<10> imm10;
- let Inst{31-24} = funct8;
- let Inst{23-16} = uimm8;
- let Inst{15} = 0b0;
- let Inst{14-12} = 0b010;
- let Inst{11-7} = rd;
- let Inst{6-0} = OPC_OP_IMM_32.Value;
+ let Inst{24} = imm10{0};
+ let Inst{23-15} = imm10{9-1};
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -98,6 +108,14 @@ class RVPShift_ri<bits<3> f, bits<3> funct3, string opcodestr, Operand ImmType>
let Inst{27} = 0b0;
}
+class RVPShiftD_ri<bits<3> f, bits<3> funct3, string opcodestr>
+ : RVPShift_ri<f, funct3, opcodestr, uimm6> {
+ bits<6> shamt;
+
+ let Inst{26} = 0b1;
+ let Inst{25-20} = shamt;
+}
+
class RVPShiftW_ri<bits<3> f, bits<3> funct3, string opcodestr>
: RVPShift_ri<f, funct3, opcodestr, uimm5> {
bits<5> shamt;
@@ -131,59 +149,743 @@ class RVPUnary_ri<bits<2> w, bits<5> uf, string opcodestr>
let Inst{24-20} = uf;
}
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPBinaryScalar_rr<bits<3> f, bits<2> w, bits<3> funct3, string opcodestr>
+ : RVInstRBase<funct3, OPC_OP_IMM_32, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
+ let Inst{31} = 0b1;
+ let Inst{30-28} = f;
+ let Inst{27} = 0b1;
+ let Inst{26-25} = w;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPBinary_rr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr>
+ : RVInstRBase<funct3, OPC_OP_32, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
+ let Inst{31} = 0b1;
+ let Inst{30-27} = f;
+ let Inst{26-25} = w;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPTernary_rrr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr>
+ : RVInstRBase<funct3, OPC_OP_32, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr,
+ "$rd, $rs1, $rs2"> {
+ let Inst{31} = 0b1;
+ let Inst{30-27} = f;
+ let Inst{26-25} = w;
+
+ let Constraints = "$rd = $rd_wb";
+}
+
+// Common base for pli.db/h/w and plui.dh/w
+class RVPPairLoadImm_i<bits<7> funct7, dag ins, string opcodestr,
+ string argstr>
+ : RVInst<(outs GPRPairRV32:$rd), ins, opcodestr, argstr, [],
+ InstFormatOther> {
+ bits<5> rd;
+
+ let Inst{31-25} = funct7;
+ let Inst{14-12} = 0b010;
+ let Inst{11-8} = rd{4-1};
+ let Inst{7} = 0b0;
+ let Inst{6-0} = OPC_OP_IMM_32.Value;
+
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtP] in {
-let IsSignExtendingOpW = 1 in
-def CLS : Unary_r<0b011000000011, 0b001, "cls">;
-def ABS : Unary_r<0b011000000111, 0b001, "abs">;
+ let IsSignExtendingOpW = 1 in
+ def CLS : Unary_r<0b011000000011, 0b001, "cls">;
+ def ABS : Unary_r<0b011000000111, 0b001, "abs">;
} // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32] in
-def REV_RV32 : Unary_r<0b011010011111, 0b101, "rev">;
+
+let Predicates = [HasStdExtP, IsRV32] in {
+ def REV_RV32 : Unary_r<0b011010011111, 0b101, "rev">;
+} // Predicates = [HasStdExtP, IsRV32]
let Predicates = [HasStdExtP, IsRV64] in {
-def REV16 : Unary_r<0b011010110000, 0b101, "rev16">;
-def REV_RV64 : Unary_r<0b011010111111, 0b101, "rev">;
+ def REV16 : Unary_r<0b011010110000, 0b101, "rev16">;
+ def REV_RV64 : Unary_r<0b011010111111, 0b101, "rev">;
-let IsSignExtendingOpW = 1 in {
-def CLSW : UnaryW_r<0b011000000011, 0b001, "clsw">;
-def ABSW : UnaryW_r<0b011000000111, 0b001, "absw">;
-}
+ let IsSignExtendingOpW = 1 in {
+ def CLSW : UnaryW_r<0b011000000011, 0b001, "clsw">;
+ def ABSW : UnaryW_r<0b011000000111, 0b001, "absw">;
+ }
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in {
-def PSLLI_B : RVPShiftB_ri<0b000, 0b010, "pslli.b">;
-def PSLLI_H : RVPShiftH_ri<0b000, 0b010, "pslli.h">;
-def PSSLAI_H : RVPShiftH_ri<0b101, 0b010, "psslai.h">;
+ def PSLLI_B : RVPShiftB_ri<0b000, 0b010, "pslli.b">;
+ def PSLLI_H : RVPShiftH_ri<0b000, 0b010, "pslli.h">;
+ def PSSLAI_H : RVPShiftH_ri<0b101, 0b010, "psslai.h">;
} // Predicates = [HasStdExtP]
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtP, IsRV32] in
-def SSLAI : RVPShiftW_ri<0b101, 0b010, "sslai">;
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def SSLAI : RVPShiftW_ri<0b101, 0b010, "sslai">;
+} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
let Predicates = [HasStdExtP, IsRV64] in {
-def PSLLI_W : RVPShiftW_ri<0b000, 0b010, "pslli.w">;
-def PSSLAI_W : RVPShiftW_ri<0b101, 0b010, "psslai.w">;
+ def PSLLI_W : RVPShiftW_ri<0b000, 0b010, "pslli.w">;
+ def PSSLAI_W : RVPShiftW_ri<0b101, 0b010, "psslai.w">;
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in
def PLI_H : PLI_i<0b1011000, "pli.h">;
let Predicates = [HasStdExtP, IsRV64] in
def PLI_W : PLI_i<0b1011001, "pli.w">;
-let Predicates = [HasStdExtP] in
-def PLI_B : PLI_B_i<0b10110100, "pli.b">;
+let Predicates = [HasStdExtP] in {
+ def PLI_B : RVPLoadImm_i<0b1011010, (ins simm8_unsigned:$imm8), "pli.b",
+ "$rd, $imm8"> {
+ bits<8> imm8;
+
+ let Inst{24} = 0b0;
+ let Inst{23-16} = imm8;
+ let Inst{15} = 0b0;
+ }
+}
let Predicates = [HasStdExtP] in {
-def PSEXT_H_B : RVPUnary_ri<0b00, 0b00100, "psext.h.b">;
-def PSABS_H : RVPUnary_ri<0b00, 0b00111, "psabs.h">;
-def PSABS_B : RVPUnary_ri<0b10, 0b00111, "psabs.b">;
+ def PSEXT_H_B : RVPUnary_ri<0b00, 0b00100, "psext.h.b">;
+ def PSABS_H : RVPUnary_ri<0b00, 0b00111, "psabs.h">;
+ def PSABS_B : RVPUnary_ri<0b10, 0b00111, "psabs.b">;
} // Predicates = [HasStdExtP]
let Predicates = [HasStdExtP, IsRV64] in {
-def PSEXT_W_B : RVPUnary_ri<0b01, 0b00100, "psext.w.b">;
-def PSEXT_W_H : RVPUnary_ri<0b01, 0b00101, "psext.w.h">;
+ def PSEXT_W_B : RVPUnary_ri<0b01, 0b00100, "psext.w.b">;
+ def PSEXT_W_H : RVPUnary_ri<0b01, 0b00101, "psext.w.h">;
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in
def PLUI_H : PLUI_i<0b1111000, "plui.h">;
let Predicates = [HasStdExtP, IsRV64] in
def PLUI_W : PLUI_i<0b1111001, "plui.w">;
+
+let Predicates = [HasStdExtP] in {
+ def PSLL_HS : RVPBinaryScalar_rr<0b000, 0b00, 0b010, "psll.hs">;
+ def PSLL_BS : RVPBinaryScalar_rr<0b000, 0b10, 0b010, "psll.bs">;
+
+ def PADD_HS : RVPBinaryScalar_rr<0b001, 0b00, 0b010, "padd.hs">;
+ def PADD_BS : RVPBinaryScalar_rr<0b001, 0b10, 0b010, "padd.bs">;
+
+ def PSSHA_HS : RVPBinaryScalar_rr<0b110, 0b00, 0b010, "pssha.hs">;
+
+ def PSSHAR_HS : RVPBinaryScalar_rr<0b111, 0b00, 0b010, "psshar.hs">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def SSHA : RVPBinaryScalar_rr<0b110, 0b01, 0b010, "ssha">;
+
+ def SSHAR : RVPBinaryScalar_rr<0b111, 0b01, 0b010, "sshar">;
+} // Predicates = [HasStdExtP, IsRV32]
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PSLL_WS : RVPBinaryScalar_rr<0b000, 0b01, 0b010, "psll.ws">;
+
+ def PADD_WS : RVPBinaryScalar_rr<0b001, 0b01, 0b010, "padd.ws">;
+
+ def PSSHA_WS : RVPBinaryScalar_rr<0b110, 0b01, 0b010, "pssha.ws">;
+ def SHA : RVPBinaryScalar_rr<0b110, 0b11, 0b010, "sha">;
+
+ def PSSHAR_WS : RVPBinaryScalar_rr<0b111, 0b01, 0b010, "psshar.ws">;
+ def SHAR : RVPBinaryScalar_rr<0b111, 0b11, 0b010, "shar">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP] in {
+ def PSRLI_B : RVPShiftB_ri<0b000, 0b100, "psrli.b">;
+ def PSRLI_H : RVPShiftH_ri<0b000, 0b100, "psrli.h">;
+
+ def PUSATI_H : RVPShiftH_ri<0b010, 0b100, "pusati.h">;
+
+ def PSRAI_B : RVPShiftB_ri<0b100, 0b100, "psrai.b">;
+ def PSRAI_H : RVPShiftH_ri<0b100, 0b100, "psrai.h">;
+
+ def PSRARI_H : RVPShiftH_ri<0b101, 0b100, "psrari.h">;
+
+ def PSATI_H : RVPShiftH_ri<0b110, 0b100, "psati.h">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def USATI_RV32 : RVPShiftW_ri<0b010, 0b100, "usati">;
+
+ def SRARI_RV32 : RVPShiftW_ri<0b101, 0b100, "srari">;
+
+ def SATI_RV32 : RVPShiftW_ri<0b110, 0b100, "sati">;
+} // Predicates = [HasStdExtP, IsRV32]
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PSRLI_W : RVPShiftW_ri<0b000, 0b100, "psrli.w">;
+ def PSRAI_W : RVPShiftW_ri<0b100, 0b100, "psrai.w">;
+
+ def PUSATI_W : RVPShiftW_ri<0b010, 0b100, "pusati.w">;
+ def USATI_RV64 : RVPShiftD_ri<0b010, 0b100, "usati">;
+
+ def PSRARI_W : RVPShiftW_ri<0b101, 0b100, "psrari.w">;
+ def SRARI_RV64 : RVPShiftD_ri<0b101, 0b100, "srari">;
+
+ def PSATI_W : RVPShiftW_ri<0b110, 0b100, "psati.w">;
+ def SATI_RV64 : RVPShiftD_ri<0b110, 0b100, "sati">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP] in {
+ def PSRL_HS : RVPBinaryScalar_rr<0b000, 0b00, 0b100, "psrl.hs">;
+ def PSRL_BS : RVPBinaryScalar_rr<0b000, 0b10, 0b100, "psrl.bs">;
+
+ def PREDSUM_HS : RVPBinaryScalar_rr<0b001, 0b00, 0b100, "predsum.hs">;
+ def PREDSUM_BS : RVPBinaryScalar_rr<0b001, 0b10, 0b100, "predsum.bs">;
+
+ def PREDSUMU_HS : RVPBinaryScalar_rr<0b011, 0b00, 0b100, "predsumu.hs">;
+ def PREDSUMU_BS : RVPBinaryScalar_rr<0b011, 0b10, 0b100, "predsumu.bs">;
+
+ def PSRA_HS : RVPBinaryScalar_rr<0b100, 0b00, 0b100, "psra.hs">;
+ def PSRA_BS : RVPBinaryScalar_rr<0b100, 0b10, 0b100, "psra.bs">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PSRL_WS : RVPBinaryScalar_rr<0b000, 0b01, 0b100, "psrl.ws">;
+
+ def PREDSUM_WS : RVPBinaryScalar_rr<0b001, 0b01, 0b100, "predsum.ws">;
+
+ def PREDSUMU_WS : RVPBinaryScalar_rr<0b011, 0b01, 0b100, "predsumu.ws">;
+
+ def PSRA_WS : RVPBinaryScalar_rr<0b100, 0b01, 0b100, "psra.ws">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP] in {
+ def PADD_H : RVPBinary_rr<0b0000, 0b00, 0b000, "padd.h">;
+ def PADD_B : RVPBinary_rr<0b0000, 0b10, 0b000, "padd.b">;
+
+ def PSADD_H : RVPBinary_rr<0b0010, 0b00, 0b000, "psadd.h">;
+ def PSADD_B : RVPBinary_rr<0b0010, 0b10, 0b000, "psadd.b">;
+
+ def PAADD_H : RVPBinary_rr<0b0011, 0b00, 0b000, "paadd.h">;
+ def PAADD_B : RVPBinary_rr<0b0011, 0b10, 0b000, "paadd.b">;
+
+ def PSADDU_H : RVPBinary_rr<0b0110, 0b00, 0b000, "psaddu.h">;
+ def PSADDU_B : RVPBinary_rr<0b0110, 0b10, 0b000, "psaddu.b">;
+
+ def PAADDU_H : RVPBinary_rr<0b0111, 0b00, 0b000, "paaddu.h">;
+ def PAADDU_B : RVPBinary_rr<0b0111, 0b10, 0b000, "paaddu.b">;
+
+ def PSUB_H : RVPBinary_rr<0b1000, 0b00, 0b000, "psub.h">;
+ def PSUB_B : RVPBinary_rr<0b1000, 0b10, 0b000, "psub.b">;
+
+ def PDIF_H : RVPBinary_rr<0b1001, 0b00, 0b000, "pdif.h">;
+ def PDIF_B : RVPBinary_rr<0b1001, 0b10, 0b000, "pdif.b">;
+
+ def PSSUB_H : RVPBinary_rr<0b1010, 0b00, 0b000, "pssub.h">;
+ def PSSUB_B : RVPBinary_rr<0b1010, 0b10, 0b000, "pssub.b">;
+
+ def PASUB_H : RVPBinary_rr<0b1011, 0b00, 0b000, "pasub.h">;
+ def PASUB_B : RVPBinary_rr<0b1011, 0b10, 0b000, "pasub.b">;
+
+ def PDIFU_H : RVPBinary_rr<0b1101, 0b00, 0b000, "pdifu.h">;
+ def PDIFU_B : RVPBinary_rr<0b1101, 0b10, 0b000, "pdifu.b">;
+
+ def PSSUBU_H : RVPBinary_rr<0b1110, 0b00, 0b000, "pssubu.h">;
+ def PSSUBU_B : RVPBinary_rr<0b1110, 0b10, 0b000, "pssubu.b">;
+
+ def PASUBU_H : RVPBinary_rr<0b1111, 0b00, 0b000, "pasubu.h">;
+ def PASUBU_B : RVPBinary_rr<0b1111, 0b10, 0b000, "pasubu.b">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def SADD : RVPBinary_rr<0b0010, 0b01, 0b000, "sadd">;
+
+ def AADD : RVPBinary_rr<0b0011, 0b01, 0b000, "aadd">;
+
+ def SADDU : RVPBinary_rr<0b0110, 0b01, 0b000, "saddu">;
+
+ def AADDU : RVPBinary_rr<0b0111, 0b01, 0b000, "aaddu">;
+
+ def SSUB : RVPBinary_rr<0b1010, 0b01, 0b000, "ssub">;
+
+ def ASUB : RVPBinary_rr<0b1011, 0b01, 0b000, "asub">;
+
+ def SSUBU : RVPBinary_rr<0b1110, 0b01, 0b000, "ssubu">;
+
+ def ASUBU : RVPBinary_rr<0b1111, 0b01, 0b000, "asubu">;
+} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PADD_W : RVPBinary_rr<0b0000, 0b01, 0b000, "padd.w">;
+
+ def PSADD_W : RVPBinary_rr<0b0010, 0b01, 0b000, "psadd.w">;
+
+ def PAADD_W : RVPBinary_rr<0b0011, 0b01, 0b000, "paadd.w">;
+
+ def PSADDU_W : RVPBinary_rr<0b0110, 0b01, 0b000, "psaddu.w">;
+
+ def PAADDU_W : RVPBinary_rr<0b0111, 0b01, 0b000, "paaddu.w">;
+
+ def PSUB_W : RVPBinary_rr<0b1000, 0b01, 0b000, "psub.w">;
+
+ def PSSUB_W : RVPBinary_rr<0b1010, 0b01, 0b000, "pssub.w">;
+
+ def PASUB_W : RVPBinary_rr<0b1011, 0b01, 0b000, "pasub.w">;
+
+ def PSSUBU_W : RVPBinary_rr<0b1110, 0b01, 0b000, "pssubu.w">;
+
+ def PASUBU_W : RVPBinary_rr<0b1111, 0b01, 0b000, "pasubu.w">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP] in {
+ def SLX : RVPBinary_rr<0b0001, 0b11, 0b001, "slx">;
+
+ def PMUL_H_B01 : RVPBinary_rr<0b0010, 0b00, 0b001, "pmul.h.b01">;
+
+ def MVM : RVPTernary_rrr<0b0101, 0b00, 0b001, "mvm">;
+ def MVMN : RVPTernary_rrr<0b0101, 0b01, 0b001, "mvmn">;
+ def MERGE : RVPTernary_rrr<0b0101, 0b10, 0b001, "merge">;
+ def SRX : RVPTernary_rrr<0b0101, 0b11, 0b001, "srx">;
+
+ def PMULU_H_B01 : RVPBinary_rr<0b0110, 0b00, 0b001, "pmulu.h.b01">;
+ def PDIFSUMU_B : RVPBinary_rr<0b0110, 0b10, 0b001, "pdifsumu.b">;
+
+ def PDIFSUMAU_B : RVPTernary_rrr<0b0111, 0b10, 0b001, "pdifsumau.b">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def MUL_H01 : RVPBinary_rr<0b0010, 0b01, 0b001, "mul.h01">;
+
+ def MACC_H01 : RVPTernary_rrr<0b0011, 0b01, 0b001, "macc.h01">;
+
+ def MULU_H01 : RVPBinary_rr<0b0110, 0b01, 0b001, "mulu.h01">;
+
+ def MACCU_H01 : RVPTernary_rrr<0b0111, 0b01, 0b001, "maccu.h01">;
+} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PMUL_W_H01 : RVPBinary_rr<0b0010, 0b01, 0b001, "pmul.w.h01">;
+ def MUL_W01 : RVPBinary_rr<0b0010, 0b11, 0b001, "mul.w01">;
+
+ def PMACC_W_H01 : RVPTernary_rrr<0b0011, 0b01, 0b001, "pmacc.w.h01">;
+ def MACC_W01 : RVPTernary_rrr<0b0011, 0b11, 0b001, "macc.w01">;
+
+ def PMULU_W_H01 : RVPBinary_rr<0b0110, 0b01, 0b001, "pmulu.w.h01">;
+ def MULU_W01 : RVPBinary_rr<0b0110, 0b11, 0b001, "mulu.w01">;
+
+ def PMACCU_W_H01 : RVPTernary_rrr<0b0111, 0b01, 0b001, "pmaccu.w.h01">;
+ def MACCU_W01 : RVPTernary_rrr<0b0111, 0b11, 0b001, "maccu.w01">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+// Note the spec has a 3-bit f field in bits 30:28 with 0 in bit 27.
+// Here we include the 0 in the f field to reduce number of tablegen classes.
+let Predicates = [HasStdExtP] in {
+ def PSH1ADD_H : RVPBinary_rr<0b0100, 0b00, 0b010, "psh1add.h">;
+
+ def PSSH1SADD_H : RVPBinary_rr<0b0110, 0b00, 0b010, "pssh1sadd.h">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def SSH1SADD : RVPBinary_rr<0b0110, 0b01, 0b010, "ssh1sadd">;
+} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PSH1ADD_W : RVPBinary_rr<0b0100, 0b01, 0b010, "psh1add.w">;
+
+ def PSSH1SADD_W : RVPBinary_rr<0b0110, 0b01, 0b010, "pssh1sadd.w">;
+
+ def UNZIP8P : RVPBinary_rr<0b1100, 0b00, 0b010, "unzip8p">;
+ def UNZIP16P : RVPBinary_rr<0b1100, 0b01, 0b010, "unzip16p">;
+ def UNZIP8HP : RVPBinary_rr<0b1100, 0b10, 0b010, "unzip8hp">;
+ def UNZIP16HP : RVPBinary_rr<0b1100, 0b11, 0b010, "unzip16hp">;
+
+ def ZIP8P : RVPBinary_rr<0b1110, 0b00, 0b010, "zip8p">;
+ def ZIP16P : RVPBinary_rr<0b1110, 0b01, 0b010, "zip16p">;
+ def ZIP8HP : RVPBinary_rr<0b1110, 0b10, 0b010, "zip8hp">;
+ def ZIP16HP : RVPBinary_rr<0b1110, 0b11, 0b010, "zip16hp">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP] in {
+ def PMUL_H_B00 : RVPBinary_rr<0b0000, 0b00, 0b011, "pmul.h.b00">;
+
+ def PMUL_H_B11 : RVPBinary_rr<0b0010, 0b00, 0b011, "pmul.h.b11">;
+
+ def PMULU_H_B00 : RVPBinary_rr<0b0100, 0b00, 0b011, "pmulu.h.b00">;
+
+ def PMULU_H_B11 : RVPBinary_rr<0b0110, 0b00, 0b011, "pmulu.h.b11">;
+
+ def PMULSU_H_B00 : RVPBinary_rr<0b1100, 0b00, 0b011, "pmulsu.h.b00">;
+
+ def PMULSU_H_B11 : RVPBinary_rr<0b1110, 0b00, 0b011, "pmulsu.h.b11">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def MUL_H00 : RVPBinary_rr<0b0000, 0b01, 0b011, "mul.h00">;
+
+ def MACC_H00 : RVPTernary_rrr<0b0001, 0b01, 0b011, "macc.h00">;
+
+ def MUL_H11 : RVPBinary_rr<0b0010, 0b01, 0b011, "mul.h11">;
+
+ def MACC_H11 : RVPTernary_rrr<0b0011, 0b01, 0b011, "macc.h11">;
+
+ def MULU_H00 : RVPBinary_rr<0b0100, 0b01, 0b011, "mulu.h00">;
+
+ def MACCU_H00 : RVPTernary_rrr<0b0101, 0b01, 0b011, "maccu.h00">;
+
+ def MULU_H11 : RVPBinary_rr<0b0110, 0b01, 0b011, "mulu.h11">;
+
+ def MACCU_H11 : RVPTernary_rrr<0b0111, 0b01, 0b011, "maccu.h11">;
+
+ def MULSU_H00 : RVPBinary_rr<0b1100, 0b01, 0b011, "mulsu.h00">;
+
+ def MACCSU_H00 : RVPTernary_rrr<0b1101, 0b01, 0b011, "maccsu.h00">;
+
+ def MULSU_H11 : RVPBinary_rr<0b1110, 0b01, 0b011, "mulsu.h11">;
+
+ def MACCSU_H11 : RVPTernary_rrr<0b1111, 0b01, 0b011, "maccsu.h11">;
+} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PMUL_W_H00 : RVPBinary_rr<0b0000, 0b01, 0b011, "pmul.w.h00">;
+ def MUL_W00 : RVPBinary_rr<0b0000, 0b11, 0b011, "mul.w00">;
+
+ def PMACC_W_H00 : RVPTernary_rrr<0b0001, 0b01, 0b011, "pmacc.w.h00">;
+ def MACC_W00 : RVPTernary_rrr<0b0001, 0b11, 0b011, "macc.w00">;
+
+ def PMUL_W_H11 : RVPBinary_rr<0b0010, 0b01, 0b011, "pmul.w.h11">;
+ def MUL_W11 : RVPBinary_rr<0b0010, 0b11, 0b011, "mul.w11">;
+
+ def PMACC_W_H11 : RVPTernary_rrr<0b0011, 0b01, 0b011, "pmacc.w.h11">;
+ def MACC_W11 : RVPTernary_rrr<0b0011, 0b11, 0b011, "macc.w11">;
+
+ def PMULU_W_H00 : RVPBinary_rr<0b0100, 0b01, 0b011, "pmulu.w.h00">;
+ def MULU_W00 : RVPBinary_rr<0b0100, 0b11, 0b011, "mulu.w00">;
+
+ def PMACCU_W_H00 : RVPTernary_rrr<0b0101, 0b01, 0b011, "pmaccu.w.h00">;
+ def MACCU_W00 : RVPTernary_rrr<0b0101, 0b11, 0b011, "maccu.w00">;
+
+ def PMULU_W_H11 : RVPBinary_rr<0b0110, 0b01, 0b011, "pmulu.w.h11">;
+ def MULU_W11 : RVPBinary_rr<0b0110, 0b11, 0b011, "mulu.w11">;
+
+ def PMACCU_W_H11 : RVPTernary_rrr<0b0111, 0b01, 0b011, "pmaccu.w.h11">;
+ def MACCU_W11 : RVPTernary_rrr<0b0111, 0b11, 0b011, "maccu.w11">;
+
+ def PMULSU_W_H00 : RVPBinary_rr<0b1100, 0b01, 0b011, "pmulsu.w.h00">;
+ def MULSU_W00 : RVPBinary_rr<0b1100, 0b11, 0b011, "mulsu.w00">;
+
+ def PMACCSU_W_H00 : RVPTernary_rrr<0b1101, 0b01, 0b011, "pmaccsu.w.h00">;
+ def MACCSU_W00 : RVPTernary_rrr<0b1101, 0b11, 0b011, "maccsu.w00">;
+
+ def PMULSU_W_H11 : RVPBinary_rr<0b1110, 0b01, 0b011, "pmulsu.w.h11">;
+ def MULSU_W11 : RVPBinary_rr<0b1110, 0b11, 0b011, "mulsu.w11">;
+
+ def PMACCSU_W_H11 : RVPTernary_rrr<0b1111, 0b01, 0b011, "pmaccsu.w.h11">;
+ def MACCSU_W11 : RVPTernary_rrr<0b1111, 0b11, 0b011, "maccsu.w11">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+// Note the spec has a 3-bit f field in bits 30:28 with 0 in bit 27.
+// Here we include the 0 in the f field to reduce number of tablegen classes.
+let Predicates = [HasStdExtP] in {
+ def PPACK_H : RVPBinary_rr<0b0000, 0b00, 0b100, "ppack.h">;
+
+ def PPACKBT_H : RVPBinary_rr<0b0010, 0b00, 0b100, "ppackbt.h">;
+
+ def PPACKTB_H : RVPBinary_rr<0b0100, 0b00, 0b100, "ppacktb.h">;
+
+ def PPACKT_H : RVPBinary_rr<0b0110, 0b00, 0b100, "ppackt.h">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def PACKBT_RV32 : RVPBinary_rr<0b0010, 0b01, 0b100, "packbt">;
+
+ def PACKTB_RV32 : RVPBinary_rr<0b0100, 0b01, 0b100, "packtb">;
+
+ def PACKT_RV32 : RVPBinary_rr<0b0110, 0b01, 0b100, "packt">;
+} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PPACK_W : RVPBinary_rr<0b0000, 0b01, 0b100, "ppack.w">;
+
+ def PPACKBT_W : RVPBinary_rr<0b0010, 0b01, 0b100, "ppackbt.w">;
+ def PACKBT_RV64 : RVPBinary_rr<0b0010, 0b11, 0b100, "packbt">;
+
+ def PPACKTB_W : RVPBinary_rr<0b0100, 0b01, 0b100, "ppacktb.w">;
+ def PACKTB_RV64 : RVPBinary_rr<0b0100, 0b11, 0b100, "packtb">;
+
+ def PPACKT_W : RVPBinary_rr<0b0110, 0b01, 0b100, "ppackt.w">;
+ def PACKT_RV64 : RVPBinary_rr<0b0110, 0b11, 0b100, "packt">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP] in {
+ def PM2ADD_H : RVPBinary_rr<0b0000, 0b00, 0b101, "pm2add.h">;
+ def PM4ADD_B : RVPBinary_rr<0b0000, 0b10, 0b101, "pm4add.b">;
+
+ def PM2ADDA_H : RVPTernary_rrr<0b0001, 0b00, 0b101, "pm2adda.h">;
+ def PM4ADDA_B : RVPTernary_rrr<0b0001, 0b10, 0b101, "pm4adda.b">;
+
+ def PM2ADD_HX : RVPBinary_rr<0b0010, 0b00, 0b101, "pm2add.hx">;
+
+ def PM2ADDA_HX : RVPTernary_rrr<0b0011, 0b00, 0b101, "pm2adda.hx">;
+
+ def PM2ADDU_H : RVPBinary_rr<0b0100, 0b00, 0b101, "pm2addu.h">;
+ def PM4ADDU_B : RVPBinary_rr<0b0100, 0b10, 0b101, "pm4addu.b">;
+
+ def PM2ADDAU_H : RVPTernary_rrr<0b0101, 0b00, 0b101, "pm2addau.h">;
+ def PM4ADDAU_B : RVPTernary_rrr<0b0101, 0b10, 0b101, "pm4addau.b">;
+
+ def PMQ2ADD_H : RVPBinary_rr<0b0110, 0b00, 0b101, "pmq2add.h">;
+ def PMQR2ADD_H : RVPBinary_rr<0b0110, 0b10, 0b101, "pmqr2add.h">;
+
+ def PMQ2ADDA_H : RVPTernary_rrr<0b0111, 0b00, 0b101, "pmq2adda.h">;
+ def PMQR2ADDA_H : RVPTernary_rrr<0b0111, 0b10, 0b101, "pmqr2adda.h">;
+
+ def PM2SUB_H : RVPBinary_rr<0b1000, 0b00, 0b101, "pm2sub.h">;
+ def PM2SADD_H : RVPBinary_rr<0b1000, 0b10, 0b101, "pm2sadd.h">;
+
+ def PM2SUBA_H : RVPTernary_rrr<0b1001, 0b00, 0b101, "pm2suba.h">;
+
+ def PM2SUB_HX : RVPBinary_rr<0b1010, 0b00, 0b101, "pm2sub.hx">;
+ def PM2SADD_HX : RVPBinary_rr<0b1010, 0b10, 0b101, "pm2sadd.hx">;
+
+ def PM2SUBA_HX : RVPTernary_rrr<0b1011, 0b00, 0b101, "pm2suba.hx">;
+
+ def PM2ADDSU_H : RVPBinary_rr<0b1100, 0b00, 0b101, "pm2addsu.h">;
+ def PM4ADDSU_B : RVPBinary_rr<0b1100, 0b10, 0b101, "pm4addsu.b">;
+
+ def PM2ADDASU_H : RVPBinary_rr<0b1101, 0b00, 0b101, "pm2addasu.h">;
+ def PM4ADDASU_B : RVPBinary_rr<0b1101, 0b10, 0b101, "pm4addasu.b">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def MQACC_H01 : RVPTernary_rrr<0b1111, 0b00, 0b101, "mqacc.h01">;
+ def MQRACC_H01 : RVPTernary_rrr<0b1111, 0b10, 0b101, "mqracc.h01">;
+} // // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PM2ADD_W : RVPBinary_rr<0b0000, 0b01, 0b101, "pm2add.w">;
+ def PM4ADD_H : RVPBinary_rr<0b0000, 0b11, 0b101, "pm4add.h">;
+
+ def PM2ADDA_W : RVPTernary_rrr<0b0001, 0b01, 0b101, "pm2adda.w">;
+ def PM4ADDA_H : RVPTernary_rrr<0b0001, 0b11, 0b101, "pm4adda.h">;
+
+ def PM2ADD_WX : RVPBinary_rr<0b0010, 0b01, 0b101, "pm2add.wx">;
+
+ def PM2ADDA_WX : RVPTernary_rrr<0b0011, 0b01, 0b101, "pm2adda.wx">;
+
+ def PM2ADDU_W : RVPBinary_rr<0b0100, 0b01, 0b101, "pm2addu.w">;
+ def PM4ADDU_H : RVPBinary_rr<0b0100, 0b11, 0b101, "pm4addu.h">;
+
+ def PM2ADDAU_W : RVPTernary_rrr<0b0101, 0b01, 0b101, "pm2addau.w">;
+ def PM4ADDAU_H : RVPTernary_rrr<0b0101, 0b11, 0b101, "pm4addau.h">;
+
+ def PMQ2ADD_W : RVPBinary_rr<0b0110, 0b01, 0b101, "pmq2add.w">;
+ def PMQR2ADD_W : RVPBinary_rr<0b0110, 0b11, 0b101, "pmqr2add.w">;
+
+ def PMQ2ADDA_W : RVPTernary_rrr<0b0111, 0b01, 0b101, "pmq2adda.w">;
+ def PMQR2ADDA_W : RVPTernary_rrr<0b0111, 0b11, 0b101, "pmqr2adda.w">;
+
+ def PM2SUB_W : RVPBinary_rr<0b1000, 0b01, 0b101, "pm2sub.w">;
+
+ def PM2SUBA_W : RVPTernary_rrr<0b1001, 0b01, 0b101, "pm2suba.w">;
+
+ def PM2SUB_WX : RVPBinary_rr<0b1010, 0b01, 0b101, "pm2sub.wx">;
+
+ def PM2SUBA_WX : RVPBinary_rr<0b1011, 0b01, 0b101, "pm2suba.wx">;
+
+ def PM2ADDSU_W : RVPBinary_rr<0b1100, 0b01, 0b101, "pm2addsu.w">;
+ def PM4ADDSU_H : RVPBinary_rr<0b1100, 0b11, 0b101, "pm4addsu.h">;
+
+ def PM2ADDASU_W : RVPTernary_rrr<0b1101, 0b01, 0b101, "pm2addasu.w">;
+ def PM4ADDASU_H : RVPTernary_rrr<0b1101, 0b11, 0b101, "pm4addasu.h">;
+
+ def PMQACC_W_H01 : RVPTernary_rrr<0b1111, 0b00, 0b101, "pmqacc.w.h01">;
+ def MQACC_W01 : RVPTernary_rrr<0b1111, 0b01, 0b101, "mqacc.w01">;
+
+ def PMQRACC_W_H01 : RVPTernary_rrr<0b1111, 0b10, 0b101, "pmqracc.w.h01">;
+ def MQRACC_W01 : RVPTernary_rrr<0b1111, 0b11, 0b101, "mqracc.w01">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP] in {
+ def PAS_HX : RVPBinary_rr<0b0000, 0b00, 0b110, "pas.hx">;
+ def PSA_HX : RVPBinary_rr<0b0000, 0b10, 0b110, "psa.hx">;
+
+ def PSAS_HX : RVPBinary_rr<0b0010, 0b00, 0b110, "psas.hx">;
+ def PSSA_HX : RVPBinary_rr<0b0010, 0b10, 0b110, "pssa.hx">;
+
+ def PAAS_HX : RVPBinary_rr<0b0011, 0b00, 0b110, "paas.hx">;
+ def PASA_HX : RVPBinary_rr<0b0011, 0b10, 0b110, "pasa.hx">;
+
+ def PMSEQ_H : RVPBinary_rr<0b1000, 0b00, 0b110, "pmseq.h">;
+ def PMSEQ_B : RVPBinary_rr<0b1000, 0b10, 0b110, "pmseq.b">;
+
+ def PMSLT_H : RVPBinary_rr<0b1010, 0b00, 0b110, "pmslt.h">;
+ def PMSLT_B : RVPBinary_rr<0b1010, 0b10, 0b110, "pmslt.b">;
+
+ def PMSLTU_H : RVPBinary_rr<0b1011, 0b00, 0b110, "pmsltu.h">;
+ def PMSLTU_B : RVPBinary_rr<0b1011, 0b10, 0b110, "pmsltu.b">;
+
+ def PMIN_H : RVPBinary_rr<0b1100, 0b00, 0b110, "pmin.h">;
+ def PMIN_B : RVPBinary_rr<0b1100, 0b10, 0b110, "pmin.b">;
+
+ def PMINU_H : RVPBinary_rr<0b1101, 0b00, 0b110, "pminu.h">;
+ def PMINU_B : RVPBinary_rr<0b1101, 0b10, 0b110, "pminu.b">;
+
+ def PMAX_H : RVPBinary_rr<0b1110, 0b00, 0b110, "pmax.h">;
+ def PMAX_B : RVPBinary_rr<0b1110, 0b10, 0b110, "pmax.b">;
+
+ def PMAXU_H : RVPBinary_rr<0b1111, 0b00, 0b110, "pmaxu.h">;
+ def PMAXU_B : RVPBinary_rr<0b1111, 0b10, 0b110, "pmaxu.b">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def MSEQ : RVPBinary_rr<0b1000, 0b01, 0b110, "mseq">;
+
+ def MSLT : RVPBinary_rr<0b1010, 0b01, 0b110, "mslt">;
+
+ def MSLTU : RVPBinary_rr<0b1011, 0b01, 0b110, "msltu">;
+} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PAS_WX : RVPBinary_rr<0b0000, 0b01, 0b110, "pas.wx">;
+ def PSA_WX : RVPBinary_rr<0b0000, 0b11, 0b110, "psa.wx">;
+
+ def PSAS_WX : RVPBinary_rr<0b0010, 0b01, 0b110, "psas.wx">;
+ def PSSA_WX : RVPBinary_rr<0b0010, 0b11, 0b110, "pssa.wx">;
+
+ def PAAS_WX : RVPBinary_rr<0b0011, 0b01, 0b110, "paas.wx">;
+ def PASA_WX : RVPBinary_rr<0b0011, 0b11, 0b110, "pasa.wx">;
+
+ def PMSEQ_W : RVPBinary_rr<0b1000, 0b01, 0b110, "pmseq.w">;
+
+ def PMSLT_W : RVPBinary_rr<0b1010, 0b01, 0b110, "pmslt.w">;
+
+ def PMSLTU_W : RVPBinary_rr<0b1011, 0b01, 0b110, "pmsltu.w">;
+
+ def PMIN_W : RVPBinary_rr<0b1100, 0b01, 0b110, "pmin.w">;
+
+ def PMINU_W : RVPBinary_rr<0b1101, 0b01, 0b110, "pminu.w">;
+
+ def PMAX_W : RVPBinary_rr<0b1110, 0b01, 0b110, "pmax.w">;
+
+ def PMAXU_W : RVPBinary_rr<0b1111, 0b01, 0b110, "pmaxu.w">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP] in {
+ def PMULH_H : RVPBinary_rr<0b0000, 0b00, 0b111, "pmulh.h">;
+ def PMULHR_H : RVPBinary_rr<0b0000, 0b10, 0b111, "pmulhr.h">;
+
+ def PMHACC_H : RVPTernary_rrr<0b0001, 0b00, 0b111, "pmhacc.h">;
+ def PMHRACC_H : RVPTernary_rrr<0b0001, 0b10, 0b111, "pmhracc.h">;
+
+ def PMULHU_H : RVPBinary_rr<0b0010, 0b00, 0b111, "pmulhu.h">;
+ def PMULHRU_H : RVPBinary_rr<0b0010, 0b10, 0b111, "pmulhru.h">;
+
+ def PMHACCU_H : RVPTernary_rrr<0b0011, 0b00, 0b111, "pmhaccu.h">;
+ def PMHRACCU_H : RVPTernary_rrr<0b0011, 0b10, 0b111, "pmhraccu.h">;
+
+ def PMULH_H_B0 : RVPBinary_rr<0b0100, 0b00, 0b111, "pmulh.h.b0">;
+ def PMULHSU_H_B0 : RVPBinary_rr<0b0100, 0b10, 0b111, "pmulhsu.h.b0">;
+
+ def PMHACC_H_B0 : RVPTernary_rrr<0b0101, 0b00, 0b111, "pmhacc.h.b0">;
+ def PMHACCSU_H_B0 : RVPTernary_rrr<0b0101, 0b10, 0b111, "pmhaccsu.h.b0">;
+
+ def PMULH_H_B1 : RVPBinary_rr<0b0110, 0b00, 0b111, "pmulh.h.b1">;
+ def PMULHSU_H_B1 : RVPBinary_rr<0b0110, 0b10, 0b111, "pmulhsu.h.b1">;
+
+ def PMHACC_H_B1 : RVPTernary_rrr<0b0111, 0b00, 0b111, "pmhacc.h.b1">;
+ def PMHACCSU_H_B1 : RVPTernary_rrr<0b0111, 0b10, 0b111, "pmhaccsu.h.b1">;
+
+ def PMULHSU_H : RVPBinary_rr<0b1000, 0b00, 0b111, "pmulhsu.h">;
+ def PMULHRSU_H : RVPBinary_rr<0b1000, 0b10, 0b111, "pmulhrsu.h">;
+
+ def PMHACCSU_H : RVPTernary_rrr<0b1001, 0b00, 0b111, "pmhaccsu.h">;
+ def PMHRACCSU_H : RVPTernary_rrr<0b1001, 0b10, 0b111, "pmhraccsu.h">;
+
+ def PMULQ_H : RVPBinary_rr<0b1010, 0b00, 0b111, "pmulq.h">;
+ def PMULQR_H : RVPBinary_rr<0b1010, 0b10, 0b111, "pmulqr.h">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def MULHR : RVPBinary_rr<0b0000, 0b11, 0b111, "mulhr">;
+
+ def MHACC : RVPTernary_rrr<0b0001, 0b01, 0b111, "mhacc">;
+ def MHRACC : RVPTernary_rrr<0b0001, 0b11, 0b111, "mhracc">;
+
+ def MULHRU : RVPBinary_rr<0b0010, 0b11, 0b111, "mulhru">;
+
+ def MHACCU : RVPTernary_rrr<0b0011, 0b01, 0b111, "mhaccu">;
+ def MHRACCU : RVPTernary_rrr<0b0011, 0b11, 0b111, "mhraccu">;
+
+ def MULH_H0 : RVPBinary_rr<0b0100, 0b01, 0b111, "mulh.h0">;
+ def MULHSU_H0 : RVPBinary_rr<0b0100, 0b11, 0b111, "mulhsu.h0">;
+
+ def MHACC_H0 : RVPTernary_rrr<0b0101, 0b01, 0b111, "mhacc.h0">;
+ def MHACCSU_H0 : RVPTernary_rrr<0b0101, 0b11, 0b111, "mhaccsu.h0">;
+
+ def MULH_H1 : RVPBinary_rr<0b0110, 0b01, 0b111, "mulh.h1">;
+ def MULHSU_H1 : RVPBinary_rr<0b0110, 0b11, 0b111, "mulhsu.h1">;
+
+ def MHACC_H1 : RVPTernary_rrr<0b0111, 0b01, 0b111, "mhacc.h1">;
+ def MHACCSU_H1 : RVPTernary_rrr<0b0111, 0b11, 0b111, "mhaccsu.h1">;
+
+ def MULHRSU : RVPBinary_rr<0b1000, 0b11, 0b111, "mulhrsu">;
+
+ def MHACCSU : RVPTernary_rrr<0b1001, 0b01, 0b111, "mhaccsu">;
+ def MHRACCSU : RVPTernary_rrr<0b1001, 0b11, 0b111, "mhraccsu">;
+
+ def MULQ : RVPBinary_rr<0b1010, 0b01, 0b111, "mulq">;
+ def MULQR : RVPBinary_rr<0b1010, 0b11, 0b111, "mulqr">;
+
+ def MQACC_H00 : RVPTernary_rrr<0b1101, 0b00, 0b111, "mqacc.h00">;
+ def MQRACC_H00 : RVPTernary_rrr<0b1101, 0b10, 0b111, "mqracc.h00">;
+
+ def MQACC_H11 : RVPTernary_rrr<0b1111, 0b00, 0b111, "mqacc.h11">;
+ def MQRACC_H11 : RVPTernary_rrr<0b1111, 0b10, 0b111, "mqracc.h11">;
+} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PMULH_W : RVPBinary_rr<0b0000, 0b01, 0b111, "pmulh.w">;
+ def PMULHR_W : RVPBinary_rr<0b0000, 0b11, 0b111, "pmulhr.w">;
+
+ def PMHACC_W : RVPTernary_rrr<0b0001, 0b01, 0b111, "pmhacc.w">;
+ def PMHRACC_W : RVPTernary_rrr<0b0001, 0b11, 0b111, "pmhracc.w">;
+
+ def PMULHU_W : RVPBinary_rr<0b0010, 0b01, 0b111, "pmulhu.w">;
+ def PMULHRU_W : RVPBinary_rr<0b0010, 0b11, 0b111, "pmulhru.w">;
+
+ def PMHACCU_W : RVPTernary_rrr<0b0011, 0b01, 0b111, "pmhaccu.w">;
+ def PMHRACCU_W : RVPTernary_rrr<0b0011, 0b11, 0b111, "pmhraccu.w">;
+
+ def PMULH_W_H0 : RVPBinary_rr<0b0100, 0b01, 0b111, "pmulh.w.h0">;
+ def PMULHSU_W_H0 : RVPBinary_rr<0b0100, 0b11, 0b111, "pmulhsu.w.h0">;
+
+ def PMHACC_W_H0 : RVPTernary_rrr<0b0101, 0b01, 0b111, "pmhacc.w.h0">;
+ def PMHACCSU_W_H0 : RVPTernary_rrr<0b0101, 0b11, 0b111, "pmhaccsu.w.h0">;
+
+ def PMULH_W_H1 : RVPBinary_rr<0b0110, 0b01, 0b111, "pmulh.w.h1">;
+ def PMULHSU_W_H1 : RVPBinary_rr<0b0110, 0b11, 0b111, "pmulhsu.w.h1">;
+
+ def PMHACC_W_H1 : RVPTernary_rrr<0b0111, 0b01, 0b111, "pmhacc.w.h1">;
+ def PMHACCSU_W_H1 : RVPTernary_rrr<0b0111, 0b11, 0b111, "pmhaccsu.w.h1">;
+
+ def PMULHSU_W : RVPBinary_rr<0b1000, 0b01, 0b111, "pmulhsu.w">;
+ def PMULHRSU_W : RVPBinary_rr<0b1000, 0b11, 0b111, "pmulhrsu.w">;
+
+ def PMHACCSU_W : RVPTernary_rrr<0b1001, 0b01, 0b111, "pmhaccsu.w">;
+ def PMHRACCSU_W : RVPTernary_rrr<0b1001, 0b11, 0b111, "pmhraccsu.w">;
+
+ def PMULQ_W : RVPBinary_rr<0b1010, 0b01, 0b111, "pmulq.w">;
+ def PMULQR_W : RVPBinary_rr<0b1010, 0b11, 0b111, "pmulqr.w">;
+
+ def PMQACC_W_H00 : RVPTernary_rrr<0b1101, 0b00, 0b111, "pmqacc.w.h00">;
+ def MQACC_W00 : RVPTernary_rrr<0b1101, 0b01, 0b111, "mqacc.w00">;
+ def PMQRACC_W_H00 : RVPTernary_rrr<0b1101, 0b10, 0b111, "pmqracc.w.h00">;
+ def MQRACC_W00 : RVPTernary_rrr<0b1101, 0b11, 0b111, "mqracc.w00">;
+
+ def PMQACC_W_H11 : RVPTernary_rrr<0b1111, 0b00, 0b111, "pmqacc.w.h11">;
+ def MQACC_W11 : RVPTernary_rrr<0b1111, 0b01, 0b111, "mqacc.w11">;
+ def PMQRACC_W_H11 : RVPTernary_rrr<0b1111, 0b10, 0b111, "pmqracc.w.h11">;
+ def MQRACC_W11 : RVPTernary_rrr<0b1111, 0b11, 0b111, "mqracc.w11">;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP, IsRV32] in {
+ def PLI_DH : RVPPairLoadImm_i<0b0011000, (ins simm10:$imm10), "pli.dh",
+ "$rd, $imm10"> {
+ bits<10> imm10;
+
+ let Inst{24-16} = imm10{8-0};
+ let Inst{15} = imm10{9};
+ }
+
+ def PLI_DB : RVPPairLoadImm_i<0b0011010, (ins simm8_unsigned:$imm8), "pli.db",
+ "$rd, $imm8"> {
+ bits<8> imm8;
+
+ let Inst{24} = 0b0;
+ let Inst{23-16} = imm8;
+ let Inst{15} = 0b0;
+ }
+
+ def PLUI_DH : RVPPairLoadImm_i<0b0111000, (ins simm10_unsigned:$imm10),
+ "plui.dh", "$rd, $imm10"> {
+ bits<10> imm10;
+
+ let Inst{24} = imm10{0};
+ let Inst{23-15} = imm10{9-1};
+ }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 33c7138..8c0ebe6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -475,8 +475,8 @@ class VALUmVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
opcodestr, "$vd, $vs2, $vs1, $vm">;
// op vd, vs1, vs2, vm (reverse the order of vs1 and vs2)
-class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr,
- bit EarlyClobber = 0>
+class VMACVV<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber = 0>
: RVInstVV<funct6, opv, (outs VR:$vd_wb),
(ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm),
opcodestr, "$vd, $vs1, $vs2$vm"> {
@@ -505,8 +505,8 @@ class VALUmVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
opcodestr, "$vd, $vs2, $rs1, $vm">;
// op vd, rs1, vs2, vm (reverse the order of rs1 and vs2)
-class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr,
- bit EarlyClobber = 0>
+class VMACVX<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber = 0>
: RVInstVX<funct6, opv, (outs VR:$vd_wb),
(ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
opcodestr, "$vd, $rs1, $vs2$vm"> {
@@ -549,8 +549,8 @@ class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
opcodestr, "$vd, $vs2, $rs1$vm">;
// op vd, rs1, vs2, vm (Float) (with mask, reverse the order of rs1 and vs2)
-class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr,
- bit EarlyClobber = 0>
+class VMACVF<bits<6> funct6, RISCVVFormat opv, string opcodestr,
+ bit EarlyClobber = 0>
: RVInstVX<funct6, opv, (outs VR:$vd_wb),
(ins VR:$vd, FPR32:$rs1, VR:$vs2, VMaskOp:$vm),
opcodestr, "$vd, $rs1, $vs2$vm"> {
@@ -628,17 +628,17 @@ multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw> {
}
multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6> {
- def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">,
+ def V : VMACVV<funct6, OPMVV, opcodestr # ".vv">,
SchedTernaryMC<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV",
"ReadVIMulAddV">;
- def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
+ def X : VMACVX<funct6, OPMVX, opcodestr # ".vx">,
SchedTernaryMC<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX",
"ReadVIMulAddV">;
}
multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6> {
let RVVConstraint = WidenV in
- def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
+ def X : VMACVX<funct6, OPMVX, opcodestr # ".vx", EarlyClobber=1>,
SchedTernaryMC<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
"ReadVIWMulAddV">;
}
@@ -646,7 +646,7 @@ multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6> {
multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6>
: VWMAC_MV_X<opcodestr, funct6> {
let RVVConstraint = WidenV in
- def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv", EarlyClobber=1>,
+ def V : VMACVV<funct6, OPMVV, opcodestr # ".vv", EarlyClobber=1>,
SchedTernaryMC<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
"ReadVIWMulAddV">;
}
@@ -743,20 +743,20 @@ multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6> {
}
multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6> {
- def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">,
+ def V : VMACVV<funct6, OPFVV, opcodestr # ".vv">,
SchedTernaryMC<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
"ReadVFMulAddV">;
- def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">,
+ def F : VMACVF<funct6, OPFVF, opcodestr # ".vf">,
SchedTernaryMC<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
"ReadVFMulAddV">;
}
multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6> {
let RVVConstraint = WidenV in {
- def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv", EarlyClobber=1>,
+ def V : VMACVV<funct6, OPFVV, opcodestr # ".vv", EarlyClobber=1>,
SchedTernaryMC<"WriteVFWMulAddV", "ReadVFWMulAddV", "ReadVFWMulAddV",
"ReadVFWMulAddV">;
- def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf", EarlyClobber=1>,
+ def F : VMACVF<funct6, OPFVF, opcodestr # ".vf", EarlyClobber=1>,
SchedTernaryMC<"WriteVFWMulAddF", "ReadVFWMulAddV", "ReadVFWMulAddF",
"ReadVFWMulAddV">;
}
@@ -1703,8 +1703,9 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, /*slidesUp=*/true>;
defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
+let ReadsPastVL = 1 in
defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, /*slidesUp=*/false>;
-let ElementsDependOn = EltDepsVL in
+let ElementsDependOn = EltDepsVL, ReadsPastVL = 1 in
defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>;
} // Predicates = [HasVInstructions]
@@ -1712,19 +1713,19 @@ let Predicates = [HasVInstructionsAnyF] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
-let ElementsDependOn = EltDepsVL in
+let ElementsDependOn = EltDepsVL, ReadsPastVL = 1 in
defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>;
} // Predicates = [HasVInstructionsAnyF]
let Predicates = [HasVInstructions] in {
// Vector Register Gather Instruction
-let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather, ReadsPastVL = 1 in {
defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100>;
def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">,
SchedBinaryMC<"WriteVRGatherEI16VV",
"ReadVRGatherEI16VV_data",
"ReadVRGatherEI16VV_index">;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather
+} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather, ReadsPastVL = 1
// Vector Compress Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress, ElementsDependOn = EltDepsVLMask in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
index c75addd9..1fb30a0b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
@@ -420,7 +420,7 @@ class NDSRVInstVD4DOT<bits<6> funct6, string opcodestr>
}
class NDSRVInstVBFHCvt<bits<5> vs1, string opcodestr>
- : RVInst<(outs VR:$vd), (ins VR:$vs2, VMaskOp:$vm),
+ : RVInst<(outs VR:$vd), (ins VR:$vs2),
opcodestr, "$vd, $vs2", [], InstFormatR> {
bits<5> vs2;
bits<5> vd;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
index 0c8487c..889ea98 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
@@ -129,20 +129,20 @@ class Mips_prefetch_ri<dag outs, dag ins, string opcodestr, string argstr>
// MIPS extensions
//===----------------------------------------------------------------------===//
let Predicates = [HasVendorXMIPSCBOP] ,DecoderNamespace = "Xmipscbop" in {
- def MIPS_PREFETCH : Mips_prefetch_ri<(outs), (ins GPR:$rs1, uimm9:$imm9, uimm5:$hint),
- "mips.pref", "$hint, ${imm9}(${rs1})">,
- Sched<[]>;
+ def MIPS_PREF : Mips_prefetch_ri<(outs), (ins GPR:$rs1, uimm9:$imm9, uimm5:$hint),
+ "mips.pref", "$hint, ${imm9}(${rs1})">,
+ Sched<[]>;
}
let Predicates = [HasVendorXMIPSCBOP] in {
// Prefetch Data Write.
def : Pat<(prefetch (AddrRegImm9 (XLenVT GPR:$rs1), uimm9:$imm9),
(i32 1), timm, (i32 1)),
- (MIPS_PREFETCH GPR:$rs1, uimm9:$imm9, 9)>;
+ (MIPS_PREF GPR:$rs1, uimm9:$imm9, 9)>;
// Prefetch Data Read.
def : Pat<(prefetch (AddrRegImm9 (XLenVT GPR:$rs1), uimm9:$imm9),
(i32 0), timm, (i32 1)),
- (MIPS_PREFETCH GPR:$rs1, uimm9:$imm9, 8)>;
+ (MIPS_PREF GPR:$rs1, uimm9:$imm9, 8)>;
}
let Predicates = [HasVendorXMIPSCMov], hasSideEffects = 0, mayLoad = 0, mayStore = 0,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
index ebcf079..3a6ce3c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
@@ -58,7 +58,7 @@ class CustomRivosXVI<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
let Predicates = [HasVendorXRivosVizip], DecoderNamespace = "XRivos",
Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather,
- Inst<6-0> = OPC_CUSTOM_2.Value in {
+ Inst<6-0> = OPC_CUSTOM_2.Value, ReadsPastVL = 1 in {
defm RI_VZIPEVEN_V : VALU_IV_V<"ri.vzipeven", 0b001100>;
defm RI_VZIPODD_V : VALU_IV_V<"ri.vzipodd", 0b011100>;
defm RI_VZIP2A_V : VALU_IV_V<"ri.vzip2a", 0b000100>;
@@ -126,6 +126,7 @@ def RI_VINSERT : CustomRivosVXI<0b010000, OPMVX, (outs VR:$vd_wb),
(ins VR:$vd, GPR:$rs1, uimm5:$imm),
"ri.vinsert.v.x", "$vd, $rs1, $imm">;
+let ReadsPastVL = 1 in
def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd),
(ins VR:$vs2, uimm5:$imm),
"ri.vextract.x.v", "$rd, $vs2, $imm">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index a47dfe3..b546339 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -74,6 +74,7 @@ class RVInstVCCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins,
let Uses = [VL, VTYPE];
let RVVConstraint = NoConstraint;
let ElementsDependOn = EltDepsVLMask;
+ let ReadsPastVL = 1;
}
class RVInstVCFCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins,
@@ -98,6 +99,7 @@ class RVInstVCFCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins,
let Uses = [VL, VTYPE];
let RVVConstraint = NoConstraint;
let ElementsDependOn = EltDepsVLMask;
+ let ReadsPastVL = 1;
}
class VCIXInfo<string suffix, VCIXType type, DAGOperand TyRd,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
index 66cb2d5..a5ee701 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
@@ -65,6 +65,7 @@ class SFInstTileMemOp<dag outs, dag ins, bits<3> nf, RISCVOpcode opcode,
let Inst{6-0} = opcode.Value;
let Uses = [VTYPE, VL];
+ let ReadsPastVL = 1;
}
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
@@ -94,6 +95,7 @@ class SFInstTileMoveOp<bits<6> funct6, dag outs, dag ins, string opcodestr,
let Inst{6-0} = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
+ let ReadsPastVL = 1;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -113,6 +115,7 @@ class SFInstMatmulF<dag outs, dag ins, string opcodestr, string argstr>
let Inst{6-0} = OPC_OP_VE.Value;
let Uses = [VTYPE, VL];
+ let ReadsPastVL = 1;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -135,6 +138,7 @@ class SFInstMatmulF8<bit a, bit b, dag outs, dag ins,
let Inst{6-0} = OPC_OP_VE.Value;
let Uses = [VTYPE, VL];
+ let ReadsPastVL = 1;
}
@@ -167,6 +171,7 @@ class SFInstMatmulI8<bit funct6_1, bit a, bit b, dag outs, dag ins,
let Inst{6-0} = OPC_OP_VE.Value;
let Uses = [VTYPE, VL];
+ let ReadsPastVL = 1;
}
class I8Encode<bit encoding, string name> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSpacemiT.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSpacemiT.td
new file mode 100644
index 0000000..0f9b795
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSpacemiT.td
@@ -0,0 +1,141 @@
+//===-- RISCVInstrInfoXSpacemiT.td -------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the vendor extensions defined by SpacemiT.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand definitions.
+//===----------------------------------------------------------------------===//
+
+class SMTVDotOpcode<bits<7> val> {
+ bits<7> Value = val;
+}
+
+class SMTVEncoding2<bits<2> val> {
+ bits<2> Value = val;
+}
+
+def OPMMA : SMTVDotOpcode<0b1110001>;
+def OPMMA_SLIDE : SMTVDotOpcode<0b1110011>;
+
+//===----------------------------------------------------------------------===//
+// Vector Dot-Product Sign Encoding
+// Defines the signed/unsigned mixing modes for vector dot-product operations.
+// Encoding format: [1:0] bits
+// 00: UU (Unsigned x Unsigned)
+// 01: US (Unsigned x Signed)
+// 10: SU (Signed x Unsigned)
+// 11: SS (Signed x Signed)
+//===----------------------------------------------------------------------===//
+def SMT_VDot_UU : SMTVEncoding2<0b00>;
+def SMT_VDot_US : SMTVEncoding2<0b01>;
+def SMT_VDot_SU : SMTVEncoding2<0b10>;
+def SMT_VDot_SS : SMTVEncoding2<0b11>;
+
+//===----------------------------------------------------------------------===//
+// Vector Dot-Product Sliding Window Modes
+// Encoding format: [1:0] bits
+// 00: Slide1 (1-element sliding stride)
+// 01: Slide2 (2-element sliding stride)
+// 10: Slide3 (3-element sliding stride)
+// 11: Reserved
+//
+// Used in sliding-window dot-product operations:
+// vd = vs1 • vs2.slide{1|2|3} // • = dot product
+//===----------------------------------------------------------------------===//
+def SMT_VDot_Slide1 : SMTVEncoding2<0b00>;
+def SMT_VDot_Slide2 : SMTVEncoding2<0b01>;
+def SMT_VDot_Slide3 : SMTVEncoding2<0b10>;
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+// Base vector dot product (no slide) format.
+class RVInstSMTVDot<SMTVEncoding2 sign, string opcodestr>
+ : RVInst<(outs VRM2:$vd), (ins VR:$vs1, VR:$vs2), opcodestr,
+ "$vd, $vs1, $vs2", [], InstFormatR> {
+ bits<5> vd;
+ bits<5> vs1;
+ bits<5> vs2;
+
+ let Inst{31-25} = OPMMA.Value;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = vs1;
+ let Inst{14} = 0b0;
+ let Inst{13-12} = sign.Value;
+ let Inst{11-8} = vd{4-1};
+ let Inst{7} = 0b0;
+ let Inst{6-0} = OPC_CUSTOM_1.Value;
+}
+
+// Sliding-window vector dot product format.
+class RVInstSMTVDotSlide<SMTVEncoding2 funct2, SMTVEncoding2 sign, string opcodestr>
+ : RVInst<(outs VRM2:$vd), (ins VRM2:$vs1, VR:$vs2), opcodestr,
+ "$vd, $vs1, $vs2", [], InstFormatR> {
+ bits<5> vd;
+ bits<5> vs1;
+ bits<5> vs2;
+
+ let Inst{31-25} = OPMMA_SLIDE.Value;
+ let Inst{24-20} = vs2;
+ let Inst{19-16} = vs1{4-1};
+ let Inst{15-14} = funct2.Value;
+ let Inst{13-12} = sign.Value;
+ let Inst{11-8} = vd{4-1};
+ let Inst{7} = 0b0;
+ let Inst{6-0} = OPC_CUSTOM_1.Value;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let DecoderNamespace = "XSMT" in {
+
+let Predicates = [HasVendorXSMTVDot], ElementsDependOn = EltDepsVL in {
+// Base vector dot product (no slide) instructions
+// NOTE: Destination registers (vd) MUST be even-numbered (v0, v2, ..., v30)
+// due to hardware alignment constraints. Using odd registers may cause undefined behavior.
+def SMT_VMADOT : RVInstSMTVDot<SMT_VDot_SS, "smt.vmadot">;
+def SMT_VMADOTU : RVInstSMTVDot<SMT_VDot_UU, "smt.vmadotu">;
+def SMT_VMADOTSU : RVInstSMTVDot<SMT_VDot_SU, "smt.vmadotsu">;
+def SMT_VMADOTUS : RVInstSMTVDot<SMT_VDot_US, "smt.vmadotus">;
+
+//===----------------------------------------------------------------------===//
+// Sliding-window Vector Dot Product Instructions
+//
+// The numeric suffix (1, 2, 3) specifies the stride of the sliding window:
+// 1: Window slides by 1 element per operation
+// 2: Window slides by 2 elements per operation
+// 3: Window slides by 3 elements per operation
+//
+// These instructions compute dot products with overlapping operand windows
+// where the window position increments by <N> elements between computations.
+//===----------------------------------------------------------------------===//
+// NOTE: Destination registers (vd) and first source register (vs1) MUST be
+// even-numbered (v0, v2, ..., v30) due to hardware alignment constraints.
+// Using odd registers may cause undefined behavior.
+def SMT_VMADOT1 : RVInstSMTVDotSlide<SMT_VDot_Slide1, SMT_VDot_SS, "smt.vmadot1">;
+def SMT_VMADOT1U : RVInstSMTVDotSlide<SMT_VDot_Slide1, SMT_VDot_UU, "smt.vmadot1u">;
+def SMT_VMADOT1SU : RVInstSMTVDotSlide<SMT_VDot_Slide1, SMT_VDot_SU, "smt.vmadot1su">;
+def SMT_VMADOT1US : RVInstSMTVDotSlide<SMT_VDot_Slide1, SMT_VDot_US, "smt.vmadot1us">;
+def SMT_VMADOT2 : RVInstSMTVDotSlide<SMT_VDot_Slide2, SMT_VDot_SS, "smt.vmadot2">;
+def SMT_VMADOT2U : RVInstSMTVDotSlide<SMT_VDot_Slide2, SMT_VDot_UU, "smt.vmadot2u">;
+def SMT_VMADOT2SU : RVInstSMTVDotSlide<SMT_VDot_Slide2, SMT_VDot_SU, "smt.vmadot2su">;
+def SMT_VMADOT2US : RVInstSMTVDotSlide<SMT_VDot_Slide2, SMT_VDot_US, "smt.vmadot2us">;
+def SMT_VMADOT3 : RVInstSMTVDotSlide<SMT_VDot_Slide3, SMT_VDot_SS, "smt.vmadot3">;
+def SMT_VMADOT3U : RVInstSMTVDotSlide<SMT_VDot_Slide3, SMT_VDot_UU, "smt.vmadot3u">;
+def SMT_VMADOT3SU : RVInstSMTVDotSlide<SMT_VDot_Slide3, SMT_VDot_SU, "smt.vmadot3su">;
+def SMT_VMADOT3US : RVInstSMTVDotSlide<SMT_VDot_Slide3, SMT_VDot_US, "smt.vmadot3us">;
+}
+} // DecoderNamespace = "XSMT"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
index 77692f7..7cf6d5f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
@@ -70,7 +70,7 @@ defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>;
multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
list<Predicate> ExtraPreds = []> {
- let Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) in {
+ let Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) in {
def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr),
(vt GPR:$cmp),
(vt GPR:$new)),
@@ -91,7 +91,7 @@ multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
(vt GPR:$cmp),
(vt GPR:$new)),
(!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
- } // Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds)
+ } // Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds)
let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in {
def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr),
(vt GPR:$cmp),
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 413ad8b..2abd3e6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -399,7 +399,7 @@ def MAX : ALU_rr<0b0000101, 0b110, "max", Commutable=1>,
Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
def MAXU : ALU_rr<0b0000101, 0b111, "maxu", Commutable=1>,
Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
-} // Predicates = [HasStdExtZbb]
+} // Predicates = [HasStdExtZbbOrP]
let Predicates = [HasStdExtZbkbOrP] in
def PACK : ALU_rr<0b0000100, 0b100, "pack">,
@@ -692,6 +692,21 @@ def : Pat<(binop_allwusers<or>
(shl GPR:$op1rs1, (XLenVT 24))),
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
+
+def : Pat<(i64 (or (or (zexti16 (XLenVT GPR:$rs1)),
+ (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
+ (sext_inreg (shl GPR:$op1rs1, (XLenVT 24)), i32))),
+ (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
+
+// Match a pattern of 2 halfwords being inserted into bits [63:32], with bits
+// bits [31:0] coming from a zero extended value. We can use pack with packw for
+// bits [63:32]. If bits [63:31] can also be a packw, it can be matched
+// separately.
+def : Pat<(or (or (shl GPR:$op1rs2, (i64 48)),
+ (shl (zexti16 (i64 GPR:$op1rs1)), (i64 32))),
+ (zexti32 (i64 GPR:$rs1))),
+ (PACK (XLenVT GPR:$rs1),
+ (XLenVT (PACKW GPR:$op1rs1, GPR:$op1rs2)))>;
} // Predicates = [HasStdExtZbkb, IsRV64]
let Predicates = [HasStdExtZbb, IsRV32] in
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
index 32e7f96..76dc027 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
@@ -22,5 +22,5 @@ class CMOPInst<bits<3> imm3, string opcodestr>
foreach n = [1, 3, 5, 7, 9, 11, 13, 15] in {
let Predicates = [HasStdExtZcmop] in
- def C_MOP # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>;
+ def C_MOP_ # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
index 0723b2f..3ddcb1d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
@@ -58,11 +58,11 @@ let Predicates = [HasStdExtZicboz] in {
def CBO_ZERO : CBO_r<0b000000000100, "cbo.zero">, Sched<[]>;
} // Predicates = [HasStdExtZicboz]
-let Predicates = [HasStdExtZicbop, NotHasVendorXMIPSCBOP] in {
+let Predicates = [HasStdExtZicbop, NoVendorXMIPSCBOP] in {
def PREFETCH_I : Prefetch_ri<0b00000, "prefetch.i">, Sched<[]>;
def PREFETCH_R : Prefetch_ri<0b00001, "prefetch.r">, Sched<[]>;
def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>;
-} // Predicates = [HasStdExtZicbop]
+} // Predicates = [HasStdExtZicbop, NoVendorXMIPSCBOP]
//===----------------------------------------------------------------------===//
// Patterns
@@ -70,7 +70,7 @@ def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>;
def AddrRegImmLsb00000 : ComplexPattern<iPTR, 2, "SelectAddrRegImmLsb00000">;
-let Predicates = [HasStdExtZicbop, NotHasVendorXMIPSCBOP] in {
+let Predicates = [HasStdExtZicbop, NoVendorXMIPSCBOP] in {
def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
timm, timm, (i32 0)),
(PREFETCH_I GPR:$rs1, simm12_lsb00000:$imm12)>;
@@ -80,4 +80,4 @@ let Predicates = [HasStdExtZicbop, NotHasVendorXMIPSCBOP] in {
def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12),
(i32 1), timm, (i32 1)),
(PREFETCH_W GPR:$rs1, simm12_lsb00000:$imm12)>;
-}
+} // Predicates = [HasStdExtZicbop, NoVendorXMIPSCBOP]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
index 960f566..0d08176 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
@@ -33,13 +33,13 @@ class RVInstRMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcod
}
// May-Be-Operations
-def riscv_mopr : RVSDNode<"MOPR",
- SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>]>>;
-def riscv_moprr : RVSDNode<"MOPRR",
- SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>]>>;
+def riscv_mop_r : RVSDNode<"MOP_R",
+ SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>>;
+def riscv_mop_rr : RVSDNode<"MOP_RR",
+ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3,
@@ -50,31 +50,32 @@ class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3,
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3,
RISCVOpcode opcode, string opcodestr>
- : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
+ : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2),
opcodestr, "$rd, $rs1, $rs2">;
foreach i = 0...31 in {
let Predicates = [HasStdExtZimop] in
- def MOPR#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>,
- Sched<[]>;
+ def MOP_R_#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>,
+ Sched<[]>;
}
foreach i = 0...7 in {
let Predicates = [HasStdExtZimop] in
- def MOPRR#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>,
+ def MOP_RR_#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>,
Sched<[]>;
}
let Predicates = [HasStdExtZimop] in {
// Zimop instructions
foreach i = 0...31 in {
- def : Pat<(XLenVT (riscv_mopr GPR:$rs1, (XLenVT i))),
- (!cast<Instruction>("MOPR"#i) GPR:$rs1)>;
+ def : Pat<(XLenVT (riscv_mop_r GPR:$rs1, (XLenVT i))),
+ (!cast<Instruction>("MOP_R_"#i) GPR:$rs1)>;
}
foreach i = 0...7 in {
- def : Pat<(XLenVT (riscv_moprr GPR:$rs1, GPR:$rs2, (XLenVT i))),
- (!cast<Instruction>("MOPRR"#i) GPR:$rs1, GPR:$rs2)>;
+ def : Pat<(XLenVT (riscv_mop_rr GPR:$rs1, GPR:$rs2, (XLenVT i))),
+ (!cast<Instruction>("MOP_RR_"#i) GPR:$rs1, GPR:$rs2)>;
}
} // Predicates = [HasStdExtZimop]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td
index 27959ea..64fd508 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td
@@ -17,16 +17,39 @@
// Instructions
//===----------------------------------------------------------------------===//
+class VQDOTVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVV<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm),
+ opcodestr, "$vd, $vs2, $vs1$vm"> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = "$vd = $vd_wb";
+}
+
+class VQDOTVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVX<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, VR:$vs2, GPR:$rs1, VMaskOp:$vm),
+ opcodestr, "$vd, $vs2, $rs1$vm"> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = "$vd = $vd_wb";
+}
+
let Predicates = [HasStdExtZvqdotq] in {
- def VQDOT_VV : VALUVV<0b101100, OPMVV, "vqdot.vv">;
- def VQDOT_VX : VALUVX<0b101100, OPMVX, "vqdot.vx">;
- def VQDOTU_VV : VALUVV<0b101000, OPMVV, "vqdotu.vv">;
- def VQDOTU_VX : VALUVX<0b101000, OPMVX, "vqdotu.vx">;
- def VQDOTSU_VV : VALUVV<0b101010, OPMVV, "vqdotsu.vv">;
- def VQDOTSU_VX : VALUVX<0b101010, OPMVX, "vqdotsu.vx">;
- def VQDOTUS_VX : VALUVX<0b101110, OPMVX, "vqdotus.vx">;
+ def VQDOT_VV : VQDOTVV<0b101100, OPMVV, "vqdot.vv">;
+ def VQDOT_VX : VQDOTVX<0b101100, OPMVX, "vqdot.vx">;
+ def VQDOTU_VV : VQDOTVV<0b101000, OPMVV, "vqdotu.vv">;
+ def VQDOTU_VX : VQDOTVX<0b101000, OPMVX, "vqdotu.vx">;
+ def VQDOTSU_VV : VQDOTVV<0b101010, OPMVV, "vqdotsu.vv">;
+ def VQDOTSU_VX : VQDOTVX<0b101010, OPMVX, "vqdotsu.vx">;
+ def VQDOTUS_VX : VQDOTVX<0b101110, OPMVX, "vqdotus.vx">;
} // Predicates = [HasStdExtZvqdotq]
+//===----------------------------------------------------------------------===//
+// Helpers to define the VL patterns.
+//===----------------------------------------------------------------------===//
let HasPassthruOp = true, HasMaskOp = true in {
def riscv_vqdot_vl : RVSDNode<"VQDOT_VL", SDT_RISCVIntBinOp_VL>;
@@ -34,6 +57,10 @@ let HasPassthruOp = true, HasMaskOp = true in {
def riscv_vqdotsu_vl : RVSDNode<"VQDOTSU_VL", SDT_RISCVIntBinOp_VL>;
} // let HasPassthruOp = true, HasMaskOp = true
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions for CodeGen
+//===----------------------------------------------------------------------===//
+
multiclass VPseudoVQDOT_VV_VX {
foreach m = MxSet<32>.m in {
defm "" : VPseudoBinaryV_VV<m>,
@@ -54,6 +81,10 @@ let Predicates = [HasStdExtZvqdotq], mayLoad = 0, mayStore = 0,
defm PseudoVQDOTSU : VPseudoVQDOT_VV_VX;
}
+//===----------------------------------------------------------------------===//
+// Patterns.
+//===----------------------------------------------------------------------===//
+
defvar AllE32Vectors = [VI32MF2, VI32M1, VI32M2, VI32M4, VI32M8];
defm : VPatBinaryVL_VV_VX<riscv_vqdot_vl, "PseudoVQDOT", AllE32Vectors>;
defm : VPatBinaryVL_VV_VX<riscv_vqdotu_vl, "PseudoVQDOTU", AllE32Vectors>;
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 726920e..5e10631 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -63,6 +63,12 @@ static const Intrinsic::ID FixedVlsegIntrIds[] = {
Intrinsic::riscv_seg6_load_mask, Intrinsic::riscv_seg7_load_mask,
Intrinsic::riscv_seg8_load_mask};
+static const Intrinsic::ID FixedVlssegIntrIds[] = {
+ Intrinsic::riscv_sseg2_load_mask, Intrinsic::riscv_sseg3_load_mask,
+ Intrinsic::riscv_sseg4_load_mask, Intrinsic::riscv_sseg5_load_mask,
+ Intrinsic::riscv_sseg6_load_mask, Intrinsic::riscv_sseg7_load_mask,
+ Intrinsic::riscv_sseg8_load_mask};
+
static const Intrinsic::ID ScalableVlsegIntrIds[] = {
Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
@@ -75,6 +81,12 @@ static const Intrinsic::ID FixedVssegIntrIds[] = {
Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
Intrinsic::riscv_seg8_store_mask};
+static const Intrinsic::ID FixedVsssegIntrIds[] = {
+ Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask,
+ Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask,
+ Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask,
+ Intrinsic::riscv_sseg8_store_mask};
+
static const Intrinsic::ID ScalableVssegIntrIds[] = {
Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
@@ -197,9 +209,15 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
bool RISCVTargetLowering::lowerInterleavedLoad(
Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles,
- ArrayRef<unsigned> Indices, unsigned Factor) const {
+ ArrayRef<unsigned> Indices, unsigned Factor, const APInt &GapMask) const {
assert(Indices.size() == Shuffles.size());
+ assert(GapMask.getBitWidth() == Factor);
+ // We only support cases where the skipped fields are the trailing ones.
+ // TODO: Lower to strided load if there is only a single active field.
+ unsigned MaskFactor = GapMask.popcount();
+ if (MaskFactor < 2 || !GapMask.isMask())
+ return false;
IRBuilder<> Builder(Load);
const DataLayout &DL = Load->getDataLayout();
@@ -208,20 +226,37 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
Value *Ptr, *VL;
Align Alignment;
- if (!getMemOperands(Factor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment))
+ if (!getMemOperands(MaskFactor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment))
return false;
Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
- if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
+ if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL))
return false;
- CallInst *VlsegN = Builder.CreateIntrinsic(
- FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL});
+ CallInst *SegLoad = nullptr;
+ if (MaskFactor < Factor) {
+ // Lower to strided segmented load.
+ unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
+ Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
+ SegLoad = Builder.CreateIntrinsic(FixedVlssegIntrIds[MaskFactor - 2],
+ {VTy, PtrTy, XLenTy, XLenTy},
+ {Ptr, Stride, Mask, VL});
+ } else {
+ // Lower to normal segmented load.
+ SegLoad = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
+ {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL});
+ }
for (unsigned i = 0; i < Shuffles.size(); i++) {
- Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
- Shuffles[i]->replaceAllUsesWith(SubVec);
+ unsigned FactorIdx = Indices[i];
+ if (FactorIdx >= MaskFactor) {
+ // Replace masked-off factors (that are still extracted) with poison.
+ Shuffles[i]->replaceAllUsesWith(PoisonValue::get(VTy));
+ } else {
+ Value *SubVec = Builder.CreateExtractValue(SegLoad, FactorIdx);
+ Shuffles[i]->replaceAllUsesWith(SubVec);
+ }
}
return true;
@@ -246,7 +281,16 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
+ assert(GapMask.getBitWidth() == Factor);
+
+ // We only support cases where the skipped fields are the trailing ones.
+ // TODO: Lower to strided store if there is only a single active field.
+ unsigned MaskFactor = GapMask.popcount();
+ if (MaskFactor < 2 || !GapMask.isMask())
+ return false;
+
IRBuilder<> Builder(Store);
const DataLayout &DL = Store->getDataLayout();
auto Mask = SVI->getShuffleMask();
@@ -258,21 +302,31 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *Ptr, *VL;
Align Alignment;
- if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
+ if (!getMemOperands(MaskFactor, VTy, XLenTy, Store, Ptr, LaneMask, VL,
+ Alignment))
return false;
Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
- if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
+ if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL))
return false;
- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
+ Function *SegStoreFunc;
+ if (MaskFactor < Factor)
+ // Strided segmented store.
+ SegStoreFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), FixedVsssegIntrIds[MaskFactor - 2],
+ {VTy, PtrTy, XLenTy, XLenTy});
+ else
+ // Normal segmented store.
+ SegStoreFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), FixedVssegIntrIds[Factor - 2],
+ {VTy, PtrTy, XLenTy});
SmallVector<Value *, 10> Ops;
SmallVector<int, 16> NewShuffleMask;
- for (unsigned i = 0; i < Factor; i++) {
+ for (unsigned i = 0; i < MaskFactor; i++) {
// Collect shuffle mask for this lane.
for (unsigned j = 0; j < VTy->getNumElements(); j++)
NewShuffleMask.push_back(Mask[i + Factor * j]);
@@ -283,8 +337,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
NewShuffleMask.clear();
}
- Ops.append({Ptr, LaneMask, VL});
- Builder.CreateCall(VssegNFunc, Ops);
+ Ops.push_back(Ptr);
+ if (MaskFactor < Factor) {
+ // Insert the stride argument.
+ unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
+ Ops.push_back(ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes));
+ }
+ Ops.append({LaneMask, VL});
+ Builder.CreateCall(SegStoreFunc, Ops);
return true;
}
diff --git a/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp b/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
index 7a2541a..efea1b4 100644
--- a/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
@@ -26,6 +26,7 @@ struct RISCVMoveMerge : public MachineFunctionPass {
RISCVMoveMerge() : MachineFunctionPass(ID) {}
+ const RISCVSubtarget *ST;
const RISCVInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -37,15 +38,15 @@ struct RISCVMoveMerge : public MachineFunctionPass {
// Merge the two instructions indicated into a single pair instruction.
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, unsigned Opcode);
+ MachineBasicBlock::iterator Paired, bool MoveFromSToA);
// Look for C.MV instruction that can be combined with
// the given instruction into CM.MVA01S or CM.MVSA01. Return the matching
// instruction if one exists.
MachineBasicBlock::iterator
- findMatchingInst(MachineBasicBlock::iterator &MBBI, unsigned InstOpcode,
+ findMatchingInst(MachineBasicBlock::iterator &MBBI, bool MoveFromSToA,
const DestSourcePair &RegPair);
- bool mergeMoveSARegPair(const RISCVSubtarget &STI, MachineBasicBlock &MBB);
+ bool mergeMoveSARegPair(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &Fn) override;
StringRef getPassName() const override { return RISCV_MOVE_MERGE_NAME; }
@@ -58,41 +59,21 @@ char RISCVMoveMerge::ID = 0;
INITIALIZE_PASS(RISCVMoveMerge, "riscv-move-merge", RISCV_MOVE_MERGE_NAME,
false, false)
-static bool isMoveFromAToS(unsigned Opcode) {
- switch (Opcode) {
- case RISCV::CM_MVA01S:
- case RISCV::QC_CM_MVA01S:
- return true;
- default:
- return false;
- }
-}
-
-static unsigned getMoveFromAToSOpcode(const RISCVSubtarget &STI) {
- if (STI.hasStdExtZcmp())
+static unsigned getMoveFromSToAOpcode(const RISCVSubtarget &ST) {
+ if (ST.hasStdExtZcmp())
return RISCV::CM_MVA01S;
- if (STI.hasVendorXqccmp())
+ if (ST.hasVendorXqccmp())
return RISCV::QC_CM_MVA01S;
llvm_unreachable("Unhandled subtarget with paired A to S move.");
}
-static bool isMoveFromSToA(unsigned Opcode) {
- switch (Opcode) {
- case RISCV::CM_MVSA01:
- case RISCV::QC_CM_MVSA01:
- return true;
- default:
- return false;
- }
-}
-
-static unsigned getMoveFromSToAOpcode(const RISCVSubtarget &STI) {
- if (STI.hasStdExtZcmp())
+static unsigned getMoveFromAToSOpcode(const RISCVSubtarget &ST) {
+ if (ST.hasStdExtZcmp())
return RISCV::CM_MVSA01;
- if (STI.hasVendorXqccmp())
+ if (ST.hasVendorXqccmp())
return RISCV::QC_CM_MVSA01;
llvm_unreachable("Unhandled subtarget with paired S to A move");
@@ -123,20 +104,22 @@ bool RISCVMoveMerge::isCandidateToMergeMVSA01(const DestSourcePair &RegPair) {
MachineBasicBlock::iterator
RISCVMoveMerge::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- unsigned Opcode) {
+ bool MoveFromSToA) {
const MachineOperand *Sreg1, *Sreg2;
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
DestSourcePair FirstPair = TII->isCopyInstrImpl(*I).value();
DestSourcePair PairedRegs = TII->isCopyInstrImpl(*Paired).value();
- Register ARegInFirstPair = isMoveFromAToS(Opcode)
- ? FirstPair.Destination->getReg()
- : FirstPair.Source->getReg();
if (NextI == Paired)
NextI = next_nodbg(NextI, E);
DebugLoc DL = I->getDebugLoc();
+ // Make a copy so we can update the kill flag in the MoveFromSToA case. The
+ // copied operand needs to be scoped outside the if since we make a pointer
+ // to it.
+ MachineOperand PairedSource = *PairedRegs.Source;
+
// The order of S-reg depends on which instruction holds A0, instead of
// the order of register pair.
// e,g.
@@ -145,13 +128,26 @@ RISCVMoveMerge::mergePairedInsns(MachineBasicBlock::iterator I,
//
// mv a0, s2
// mv a1, s1 => cm.mva01s s2,s1
- bool StartWithX10 = ARegInFirstPair == RISCV::X10;
- if (isMoveFromAToS(Opcode)) {
- Sreg1 = StartWithX10 ? FirstPair.Source : PairedRegs.Source;
- Sreg2 = StartWithX10 ? PairedRegs.Source : FirstPair.Source;
+ unsigned Opcode;
+ if (MoveFromSToA) {
+ // We are moving one of the copies earlier so its kill flag may become
+ // invalid. Clear the copied kill flag if there are any reads of the
+ // register between the new location and the old location.
+ for (auto It = std::next(I); It != Paired && PairedSource.isKill(); ++It)
+ if (It->readsRegister(PairedSource.getReg(), TRI))
+ PairedSource.setIsKill(false);
+
+ Opcode = getMoveFromSToAOpcode(*ST);
+ Sreg1 = FirstPair.Source;
+ Sreg2 = &PairedSource;
+ if (FirstPair.Destination->getReg() != RISCV::X10)
+ std::swap(Sreg1, Sreg2);
} else {
- Sreg1 = StartWithX10 ? FirstPair.Destination : PairedRegs.Destination;
- Sreg2 = StartWithX10 ? PairedRegs.Destination : FirstPair.Destination;
+ Opcode = getMoveFromAToSOpcode(*ST);
+ Sreg1 = FirstPair.Destination;
+ Sreg2 = PairedRegs.Destination;
+ if (FirstPair.Source->getReg() != RISCV::X10)
+ std::swap(Sreg1, Sreg2);
}
BuildMI(*I->getParent(), I, DL, TII->get(Opcode)).add(*Sreg1).add(*Sreg2);
@@ -163,7 +159,7 @@ RISCVMoveMerge::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator
RISCVMoveMerge::findMatchingInst(MachineBasicBlock::iterator &MBBI,
- unsigned InstOpcode,
+ bool MoveFromSToA,
const DestSourcePair &RegPair) {
MachineBasicBlock::iterator E = MBBI->getParent()->end();
@@ -181,26 +177,20 @@ RISCVMoveMerge::findMatchingInst(MachineBasicBlock::iterator &MBBI,
Register SourceReg = SecondPair->Source->getReg();
Register DestReg = SecondPair->Destination->getReg();
- if (isMoveFromAToS(InstOpcode) && isCandidateToMergeMVA01S(*SecondPair)) {
- // If register pair is valid and destination registers are different.
- if ((RegPair.Destination->getReg() == DestReg))
+ bool IsCandidate = MoveFromSToA ? isCandidateToMergeMVA01S(*SecondPair)
+ : isCandidateToMergeMVSA01(*SecondPair);
+ if (IsCandidate) {
+ // Second destination must be different.
+ if (RegPair.Destination->getReg() == DestReg)
return E;
- // If paired destination register was modified or used, the source reg
- // was modified, there is no possibility of finding matching
- // instruction so exit early.
- if (!ModifiedRegUnits.available(DestReg) ||
- !UsedRegUnits.available(DestReg) ||
- !ModifiedRegUnits.available(SourceReg))
- return E;
-
- return I;
- } else if (isMoveFromSToA(InstOpcode) &&
- isCandidateToMergeMVSA01(*SecondPair)) {
- if ((RegPair.Source->getReg() == SourceReg) ||
- (RegPair.Destination->getReg() == DestReg))
+ // For AtoS the source must also be different.
+ if (!MoveFromSToA && RegPair.Source->getReg() == SourceReg)
return E;
+ // If paired destination register was modified or used, the source reg
+ // was modified, there is no possibility of finding matching
+ // instruction so exit early.
if (!ModifiedRegUnits.available(DestReg) ||
!UsedRegUnits.available(DestReg) ||
!ModifiedRegUnits.available(SourceReg))
@@ -217,8 +207,7 @@ RISCVMoveMerge::findMatchingInst(MachineBasicBlock::iterator &MBBI,
// Finds instructions, which could be represented as C.MV instructions and
// merged into CM.MVA01S or CM.MVSA01.
-bool RISCVMoveMerge::mergeMoveSARegPair(const RISCVSubtarget &STI,
- MachineBasicBlock &MBB) {
+bool RISCVMoveMerge::mergeMoveSARegPair(MachineBasicBlock &MBB) {
bool Modified = false;
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
@@ -227,22 +216,17 @@ bool RISCVMoveMerge::mergeMoveSARegPair(const RISCVSubtarget &STI,
// can, return Dest/Src register pair.
auto RegPair = TII->isCopyInstrImpl(*MBBI);
if (RegPair.has_value()) {
- unsigned Opcode = 0;
-
- if (isCandidateToMergeMVA01S(*RegPair))
- Opcode = getMoveFromAToSOpcode(STI);
- else if (isCandidateToMergeMVSA01(*RegPair))
- Opcode = getMoveFromSToAOpcode(STI);
- else {
+ bool MoveFromSToA = isCandidateToMergeMVA01S(*RegPair);
+ if (!MoveFromSToA && !isCandidateToMergeMVSA01(*RegPair)) {
++MBBI;
continue;
}
MachineBasicBlock::iterator Paired =
- findMatchingInst(MBBI, Opcode, RegPair.value());
+ findMatchingInst(MBBI, MoveFromSToA, RegPair.value());
// If matching instruction can be found merge them.
if (Paired != E) {
- MBBI = mergePairedInsns(MBBI, Paired, Opcode);
+ MBBI = mergePairedInsns(MBBI, Paired, MoveFromSToA);
Modified = true;
continue;
}
@@ -256,12 +240,12 @@ bool RISCVMoveMerge::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
- const RISCVSubtarget *Subtarget = &Fn.getSubtarget<RISCVSubtarget>();
- if (!(Subtarget->hasStdExtZcmp() || Subtarget->hasVendorXqccmp()))
+ ST = &Fn.getSubtarget<RISCVSubtarget>();
+ if (!ST->hasStdExtZcmp() && !ST->hasVendorXqccmp())
return false;
- TII = Subtarget->getInstrInfo();
- TRI = Subtarget->getRegisterInfo();
+ TII = ST->getInstrInfo();
+ TRI = ST->getRegisterInfo();
// Resize the modified and used register unit trackers. We do this once
// per function and then clear the register units each time we optimize a
// move.
@@ -269,7 +253,7 @@ bool RISCVMoveMerge::runOnMachineFunction(MachineFunction &Fn) {
UsedRegUnits.init(*TRI);
bool Modified = false;
for (auto &MBB : Fn)
- Modified |= mergeMoveSARegPair(*Subtarget, MBB);
+ Modified |= mergeMoveSARegPair(MBB);
return Modified;
}
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 31d2b3a..f89d94f 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -673,6 +673,7 @@ def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60",
FeatureStdExtZvfh,
FeatureStdExtZvkt,
FeatureStdExtZvl256b,
+ FeatureVendorXSMTVDot,
FeatureUnalignedScalarMem]),
[TuneDLenFactor2,
TuneOptimizedNF2SegmentLoadStore,
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 7e58b6f..40b6416 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -170,7 +170,7 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
if (Subtarget.hasStdExtE())
- report_fatal_error("Graal reserved registers do not exist in RVE");
+ reportFatalUsageError("Graal reserved registers do not exist in RVE");
markSuperRegs(Reserved, RISCV::X23_H);
markSuperRegs(Reserved, RISCV::X27_H);
}
@@ -216,7 +216,7 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
const int64_t NumOfVReg = Offset.getScalable() / 8;
const int64_t FixedOffset = NumOfVReg * VLENB;
if (!isInt<32>(FixedOffset)) {
- report_fatal_error(
+ reportFatalUsageError(
"Frame size outside of the signed 32-bit range not supported");
}
Offset = StackOffset::getFixed(FixedOffset + Offset.getFixed());
@@ -389,9 +389,25 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
.setMIFlag(Flag);
}
-// Split a VSPILLx_Mx pseudo into multiple whole register stores separated by
-// LMUL*VLENB bytes.
-void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
+static std::tuple<RISCVVType::VLMUL, const TargetRegisterClass &, unsigned>
+getSpillReloadInfo(unsigned NumRemaining, uint16_t RegEncoding, bool IsSpill) {
+ if (NumRemaining >= 8 && RegEncoding % 8 == 0)
+ return {RISCVVType::LMUL_8, RISCV::VRM8RegClass,
+ IsSpill ? RISCV::VS8R_V : RISCV::VL8RE8_V};
+ if (NumRemaining >= 4 && RegEncoding % 4 == 0)
+ return {RISCVVType::LMUL_4, RISCV::VRM4RegClass,
+ IsSpill ? RISCV::VS4R_V : RISCV::VL4RE8_V};
+ if (NumRemaining >= 2 && RegEncoding % 2 == 0)
+ return {RISCVVType::LMUL_2, RISCV::VRM2RegClass,
+ IsSpill ? RISCV::VS2R_V : RISCV::VL2RE8_V};
+ return {RISCVVType::LMUL_1, RISCV::VRRegClass,
+ IsSpill ? RISCV::VS1R_V : RISCV::VL1RE8_V};
+}
+
+// Split a VSPILLx_Mx/VSPILLx_Mx pseudo into multiple whole register stores
+// separated by LMUL*VLENB bytes.
+void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
+ bool IsSpill) const {
DebugLoc DL = II->getDebugLoc();
MachineBasicBlock &MBB = *II->getParent();
MachineFunction &MF = *MBB.getParent();
@@ -403,47 +419,11 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode());
unsigned NF = ZvlssegInfo->first;
unsigned LMUL = ZvlssegInfo->second;
- assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
- unsigned Opcode, SubRegIdx;
- switch (LMUL) {
- default:
- llvm_unreachable("LMUL must be 1, 2, or 4.");
- case 1:
- Opcode = RISCV::VS1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- break;
- case 2:
- Opcode = RISCV::VS2R_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- break;
- case 4:
- Opcode = RISCV::VS4R_V;
- SubRegIdx = RISCV::sub_vrm4_0;
- break;
- }
- static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
- "Unexpected subreg numbering");
- static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
- "Unexpected subreg numbering");
- static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
- "Unexpected subreg numbering");
-
- Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- // Optimize for constant VLEN.
- if (auto VLEN = STI.getRealVLen()) {
- const int64_t VLENB = *VLEN / 8;
- int64_t Offset = VLENB * LMUL;
- STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
- } else {
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
- }
+ unsigned NumRegs = NF * LMUL;
+ assert(NumRegs <= 8 && "Invalid NF/LMUL combinations.");
- Register SrcReg = II->getOperand(0).getReg();
+ Register Reg = II->getOperand(0).getReg();
+ uint16_t RegEncoding = TRI->getEncodingValue(Reg);
Register Base = II->getOperand(1).getReg();
bool IsBaseKill = II->getOperand(1).isKill();
Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);
@@ -451,100 +431,63 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
auto *OldMMO = *(II->memoperands_begin());
LocationSize OldLoc = OldMMO->getSize();
assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
- TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF);
- auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize);
- for (unsigned I = 0; I < NF; ++I) {
- // Adding implicit-use of super register to describe we are using part of
- // super register, that prevents machine verifier complaining when part of
- // subreg is undef, see comment in MachineVerifier::checkLiveness for more
- // detail.
- BuildMI(MBB, II, DL, TII->get(Opcode))
- .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I))
- .addReg(Base, getKillRegState(I == NF - 1))
- .addMemOperand(NewMMO)
- .addReg(SrcReg, RegState::Implicit);
- if (I != NF - 1)
+ TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
+
+ Register VLENB = 0;
+ unsigned PreHandledNum = 0;
+ unsigned I = 0;
+ while (I != NumRegs) {
+ auto [LMulHandled, RegClass, Opcode] =
+ getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill);
+ auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled);
+ bool IsLast = I + RegNumHandled == NumRegs;
+ if (PreHandledNum) {
+ Register Step;
+ // Optimize for constant VLEN.
+ if (auto VLEN = STI.getRealVLen()) {
+ int64_t Offset = *VLEN / 8 * PreHandledNum;
+ Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
+ } else {
+ if (!VLENB) {
+ VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
+ }
+ uint32_t ShiftAmount = Log2_32(PreHandledNum);
+ if (ShiftAmount == 0)
+ Step = VLENB;
+ else {
+ Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
+ .addReg(VLENB, getKillRegState(IsLast))
+ .addImm(ShiftAmount);
+ }
+ }
+
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
- .addReg(VL, getKillRegState(I == NF - 2));
- Base = NewBase;
- }
- II->eraseFromParent();
-}
+ .addReg(Step, getKillRegState(Step != VLENB || IsLast));
+ Base = NewBase;
+ }
-// Split a VSPILLx_Mx pseudo into multiple whole register loads separated by
-// LMUL*VLENB bytes.
-void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
- DebugLoc DL = II->getDebugLoc();
- MachineBasicBlock &MBB = *II->getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
- const TargetInstrInfo *TII = STI.getInstrInfo();
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ MCRegister ActualReg = findVRegWithEncoding(RegClass, RegEncoding);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, II, DL, TII->get(Opcode))
+ .addReg(ActualReg, getDefRegState(!IsSpill))
+ .addReg(Base, getKillRegState(IsLast))
+ .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(),
+ VRegSize * RegNumHandled));
- auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode());
- unsigned NF = ZvlssegInfo->first;
- unsigned LMUL = ZvlssegInfo->second;
- assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
- unsigned Opcode, SubRegIdx;
- switch (LMUL) {
- default:
- llvm_unreachable("LMUL must be 1, 2, or 4.");
- case 1:
- Opcode = RISCV::VL1RE8_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- break;
- case 2:
- Opcode = RISCV::VL2RE8_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- break;
- case 4:
- Opcode = RISCV::VL4RE8_V;
- SubRegIdx = RISCV::sub_vrm4_0;
- break;
- }
- static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
- "Unexpected subreg numbering");
- static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
- "Unexpected subreg numbering");
- static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
- "Unexpected subreg numbering");
-
- Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- // Optimize for constant VLEN.
- if (auto VLEN = STI.getRealVLen()) {
- const int64_t VLENB = *VLEN / 8;
- int64_t Offset = VLENB * LMUL;
- STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
- } else {
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
- }
+ // Adding implicit-use of super register to describe we are using part of
+ // super register, that prevents machine verifier complaining when part of
+ // subreg is undef, see comment in MachineVerifier::checkLiveness for more
+ // detail.
+ if (IsSpill)
+ MIB.addReg(Reg, RegState::Implicit);
- Register DestReg = II->getOperand(0).getReg();
- Register Base = II->getOperand(1).getReg();
- bool IsBaseKill = II->getOperand(1).isKill();
- Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- auto *OldMMO = *(II->memoperands_begin());
- LocationSize OldLoc = OldMMO->getSize();
- assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
- TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF);
- auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize);
- for (unsigned I = 0; I < NF; ++I) {
- BuildMI(MBB, II, DL, TII->get(Opcode),
- TRI->getSubReg(DestReg, SubRegIdx + I))
- .addReg(Base, getKillRegState(I == NF - 1))
- .addMemOperand(NewMMO);
- if (I != NF - 1)
- BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
- .addReg(Base, getKillRegState(I != 0 || IsBaseKill))
- .addReg(VL, getKillRegState(I == NF - 2));
- Base = NewBase;
+ PreHandledNum = RegNumHandled;
+ RegEncoding += RegNumHandled;
+ I += RegNumHandled;
}
II->eraseFromParent();
}
@@ -568,7 +511,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
if (!isInt<32>(Offset.getFixed())) {
- report_fatal_error(
+ reportFatalUsageError(
"Frame offsets outside of the signed 32-bit range not supported");
}
@@ -589,7 +532,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
(Lo12 & 0b11111) != 0) {
// Prefetch instructions require the offset to be 32 byte aligned.
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
- } else if (Opc == RISCV::MIPS_PREFETCH && !isUInt<9>(Val)) {
+ } else if (Opc == RISCV::MIPS_PREF && !isUInt<9>(Val)) {
// MIPS Prefetch instructions require the offset to be 9 bits encoded.
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
} else if ((Opc == RISCV::PseudoRV32ZdinxLD ||
@@ -635,9 +578,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Handle spill/fill of synthetic register classes for segment operations to
- // ensure correctness in the edge case one gets spilled. There are many
- // possible optimizations here, but given the extreme rarity of such spills,
- // we prefer simplicity of implementation for now.
+ // ensure correctness in the edge case one gets spilled.
switch (MI.getOpcode()) {
case RISCV::PseudoVSPILL2_M1:
case RISCV::PseudoVSPILL2_M2:
@@ -650,7 +591,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case RISCV::PseudoVSPILL6_M1:
case RISCV::PseudoVSPILL7_M1:
case RISCV::PseudoVSPILL8_M1:
- lowerVSPILL(II);
+ lowerSegmentSpillReload(II, /*IsSpill=*/true);
return true;
case RISCV::PseudoVRELOAD2_M1:
case RISCV::PseudoVRELOAD2_M2:
@@ -663,7 +604,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case RISCV::PseudoVRELOAD6_M1:
case RISCV::PseudoVRELOAD7_M1:
case RISCV::PseudoVRELOAD8_M1:
- lowerVRELOAD(II);
+ lowerSegmentSpillReload(II, /*IsSpill=*/false);
return true;
}
@@ -1025,7 +966,9 @@ bool RISCVRegisterInfo::getRegAllocationHints(
}
}
- // Add a hint if it would allow auipc/lui+addi(w) fusion.
+ // Add a hint if it would allow auipc/lui+addi(w) fusion. We do this even
+ // without the fusions explicitly enabled as the impact is rarely negative
+ // and some cores do implement this fusion.
if ((MI.getOpcode() == RISCV::ADDIW || MI.getOpcode() == RISCV::ADDI) &&
MI.getOperand(1).isReg()) {
const MachineBasicBlock &MBB = *MI.getParent();
@@ -1033,9 +976,7 @@ bool RISCVRegisterInfo::getRegAllocationHints(
// Is the previous instruction a LUI or AUIPC that can be fused?
if (I != MBB.begin()) {
I = skipDebugInstructionsBackward(std::prev(I), MBB.begin());
- if (((I->getOpcode() == RISCV::LUI && Subtarget.hasLUIADDIFusion()) ||
- (I->getOpcode() == RISCV::AUIPC &&
- Subtarget.hasAUIPCADDIFusion())) &&
+ if ((I->getOpcode() == RISCV::LUI || I->getOpcode() == RISCV::AUIPC) &&
I->getOperand(0).getReg() == MI.getOperand(1).getReg()) {
if (OpIdx == 0)
tryAddHint(MO, MI.getOperand(1), /*NeedGPRC=*/false);
@@ -1052,3 +993,12 @@ bool RISCVRegisterInfo::getRegAllocationHints(
return BaseImplRetVal;
}
+
+Register
+RISCVRegisterInfo::findVRegWithEncoding(const TargetRegisterClass &RegClass,
+ uint16_t Encoding) const {
+ MCRegister Reg = RISCV::V0 + Encoding;
+ if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1)
+ return Reg;
+ return getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index b368399..2810139 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -107,8 +107,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;
- void lowerVSPILL(MachineBasicBlock::iterator II) const;
- void lowerVRELOAD(MachineBasicBlock::iterator II) const;
+ void lowerSegmentSpillReload(MachineBasicBlock::iterator II,
+ bool IsSpill) const;
Register getFrameRegister(const MachineFunction &MF) const override;
@@ -144,6 +144,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
+ Register findVRegWithEncoding(const TargetRegisterClass &RegClass,
+ uint16_t Encoding) const;
+
static bool isVRRegClass(const TargetRegisterClass *RC) {
return RISCVRI::isVRegClass(RC->TSFlags) &&
RISCVRI::getNF(RC->TSFlags) == 1;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
index 5ef858a..8cf15fa 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
@@ -24,7 +24,7 @@ let SchedModel = Andes45Model in {
//===----------------------------------------------------------------------===//
// Andes 45 series CPU
-// - 2 Interger Arithmetic and Logical Units (ALU)
+// - 2 Integer Arithmetic and Logical Units (ALU)
// - Multiply / Divide Unit (MDU)
// - Load Store Unit (LSU)
// - Control and Status Register Unit (CSR)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index bf23812..24ebbc3 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -13,78 +13,113 @@
//
//===----------------------------------------------------------------------===//
-class SMX60IsWorstCaseMX<string mx, list<string> MxList> {
- string LLMUL = LargestLMUL<MxList>.r;
- bit c = !eq(mx, LLMUL);
-}
+//===----------------------------------------------------------------------===//
+// Helpers
+
+// Maps LMUL string to corresponding value from the Values array
+// LMUL values map to array indices as follows:
+// MF8 -> Values[0], MF4 -> Values[1], MF2 -> Values[2], M1 -> Values[3],
+// M2 -> Values[4], M4 -> Values[5], M8 -> Values[6]
+// Shorter lists are allowed, e.g., widening instructions don't work on M8
+class GetLMULValue<list<int> Values, string LMUL> {
+ defvar Index = !cond(
+ !eq(LMUL, "MF8"): 0,
+ !eq(LMUL, "MF4"): 1,
+ !eq(LMUL, "MF2"): 2,
+ !eq(LMUL, "M1"): 3,
+ !eq(LMUL, "M2"): 4,
+ !eq(LMUL, "M4"): 5,
+ !eq(LMUL, "M8"): 6,
+ );
-class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
- string LLMUL = LargestLMUL<MxList>.r;
- int SSEW = SmallestSEW<mx, isF>.r;
- bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
+ assert !lt(Index, !size(Values)),
+ "Missing LMUL value for '" # LMUL # "'. " #
+ "Expected at least " # !add(Index, 1) # " elements, but got " #
+ !size(Values) # ".";
+
+ int c = Values[Index];
}
-defvar SMX60VLEN = 256;
-defvar SMX60DLEN = !div(SMX60VLEN, 2);
+// Returns BaseValue for LMUL values before startLMUL, Value for startLMUL,
+// then doubles Value for each subsequent LMUL
+// Example: ConstValueUntilLMULThenDoubleBase<"M1", 2, 4, "M8"> returns:
+// MF8->2, MF4->2, MF2->2, M1->4, M2->8, M4->16, M8->32
+// This is useful for modeling scheduling parameters that scale with LMUL.
+class ConstValueUntilLMULThenDoubleBase<string startLMUL, int BaseValue, int Value, string currentLMUL> {
+ assert !le(BaseValue, Value), "BaseValue must be less-equal to Value";
+ defvar startPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], startLMUL>.c;
+ defvar currentPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], currentLMUL>.c;
+
+ // Calculate the difference in positions
+ defvar posDiff = !sub(currentPos, startPos);
-class Get1248Latency<string mx> {
+ // Calculate Value * (2^posDiff)
int c = !cond(
- !eq(mx, "M2") : 2,
- !eq(mx, "M4") : 4,
- !eq(mx, "M8") : 8,
- true: 1
+ !eq(posDiff, 0) : Value,
+ !eq(posDiff, 1) : !mul(Value, 2),
+ !eq(posDiff, 2) : !mul(Value, 4),
+ !eq(posDiff, 3) : !mul(Value, 8),
+ !eq(posDiff, 4) : !mul(Value, 16),
+ !eq(posDiff, 5) : !mul(Value, 32),
+ !eq(posDiff, 6) : !mul(Value, 64),
+ true : BaseValue
);
}
-// Used for: logical opsz, shifts, sign ext, merge/move, FP sign/recip/convert, mask ops, slides
-class Get4816Latency<string mx> {
- int c = !cond(
- !eq(mx, "M4") : 8,
- !eq(mx, "M8") : 16,
- true: 4
- );
+// Same as the previous function but BaseValue == Value
+class ConstValueUntilLMULThenDouble<string startLMUL, int Value, string currentLMUL> {
+ int c = ConstValueUntilLMULThenDoubleBase<startLMUL, Value, Value, currentLMUL>.c;
+}
+
+// Returns MF8->1, MF4->1, MF2->2, M1->4, M2->8, M4->16, M8->32
+class ConstOneUntilMF4ThenDouble<string mx> {
+ int c = ConstValueUntilLMULThenDouble<"MF4", 1, mx>.c;
}
+// Returns MF8->1, MF4->1, MF2->1, M1->2, M2->4, M4->8, M8->16
+class ConstOneUntilMF2ThenDouble<string mx> {
+ int c = ConstValueUntilLMULThenDouble<"MF2", 1, mx>.c;
+}
+
+// Returns MF8->1, MF4->1, MF2->1, M1->1, M2->2, M4->4, M8->8
+class ConstOneUntilM1ThenDouble<string mx> {
+ int c = ConstValueUntilLMULThenDouble<"M1", 1, mx>.c;
+}
+
+//===----------------------------------------------------------------------===//
+// Latency helper classes
+
// Used for: arithmetic (add/sub/min/max), saturating/averaging, FP add/sub/min/max
-class Get458Latency<string mx> {
- int c = !cond(
- !eq(mx, "M4") : 5,
- !eq(mx, "M8") : 8,
- true: 4
- );
+class Get4458Latency<string mx> {
+ int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/4, /*M4=*/5, /*M8=*/8], mx>.c;
}
-// Widening scaling pattern (4,4,4,4,5,8,8): plateaus at higher LMULs
-// Used for: widening operations
+// Used for: widening operations (no M8)
class Get4588Latency<string mx> {
- int c = !cond(
- !eq(mx, "M2") : 5,
- !eq(mx, "M4") : 8,
- !eq(mx, "M8") : 8, // M8 not supported for most widening, fallback
- true: 4
- );
+ int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/5, /*M4=*/8], mx>.c;
}
// Used for: mask-producing comparisons, carry ops with mask, FP comparisons
class Get461018Latency<string mx> {
- int c = !cond(
- !eq(mx, "M2") : 6,
- !eq(mx, "M4") : 10,
- !eq(mx, "M8") : 18,
- true: 4
- );
+ int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/6, /*M4=*/10, /*M8=*/18], mx>.c;
}
-// Used for: e64 multiply pattern, complex ops
-class Get781632Latency<string mx> {
- int c = !cond(
- !eq(mx, "M2") : 8,
- !eq(mx, "M4") : 16,
- !eq(mx, "M8") : 32,
- true: 7
- );
+//===----------------------------------------------------------------------===//
+
+class SMX60IsWorstCaseMX<string mx, list<string> MxList> {
+ string LLMUL = LargestLMUL<MxList>.r;
+ bit c = !eq(mx, LLMUL);
}
+class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
+ string LLMUL = LargestLMUL<MxList>.r;
+ int SSEW = SmallestSEW<mx, isF>.r;
+ bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
+}
+
+defvar SMX60VLEN = 256;
+defvar SMX60DLEN = !div(SMX60VLEN, 2);
+
def SpacemitX60Model : SchedMachineModel {
let IssueWidth = 2; // dual-issue
let MicroOpBufferSize = 0; // in-order
@@ -383,12 +418,13 @@ foreach LMul = [1, 2, 4, 8] in {
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = Get458Latency<mx>.c, ReleaseAtCycles = [4] in {
+ let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [4] in {
defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
}
- let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defvar VIALULat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ let Latency = VIALULat, ReleaseAtCycles = [4] in {
// Pattern of vadd, vsub, vrsub: 4/4/5/8
// Pattern of vand, vor, vxor: 4/4/8/16
// They are grouped together, so we used the worst case 4/4/8/16
@@ -425,7 +461,7 @@ foreach mx = SchedMxList in {
// Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
// e64 = 7,8,16,32. We use the worst-case until we can split the SEW.
// TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites
- let Latency = Get781632Latency<mx>.c, ReleaseAtCycles = [7] in {
+ let Latency = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c, ReleaseAtCycles = [7] in {
defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
@@ -461,15 +497,8 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- // Slightly reduced for fractional LMULs
- defvar Multiplier = !cond(
- !eq(mx, "MF8") : 12,
- !eq(mx, "MF4") : 12,
- !eq(mx, "MF2") : 12,
- true: 24
- );
-
- let Latency = !mul(Get1248Latency<mx>.c, Multiplier), ReleaseAtCycles = [12] in {
+ defvar VIDivLat = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c;
+ let Latency = VIDivLat, ReleaseAtCycles = [12] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
}
@@ -480,14 +509,8 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
- // Slightly increased for integer LMULs
- defvar Multiplier = !cond(
- !eq(mx, "M2") : 2,
- !eq(mx, "M4") : 2,
- true: 1
- );
-
- let Latency = !mul(Get4816Latency<mx>.c, Multiplier), ReleaseAtCycles = [4] in {
+ defvar VNarrowingLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
+ let Latency = VNarrowingLat, ReleaseAtCycles = [4] in {
defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
@@ -501,16 +524,33 @@ foreach mx = SchedMxListW in {
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>;
+ let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in {
+ defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ // Latency of vsmul: e8/e16 = 4/4/5/8, e32 = 5/5/5/8, e64 = 7/8/16/32
+ // We use the worst-case until we can split the SEW.
+ defvar VSMulLat = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c;
+ // Latency of vsmul: e8/e16/e32 = 1/2/4/8, e64 = 4/8/16/32
+ // We use the worst-case until we can split the SEW.
+ defvar VSMulOcc = ConstValueUntilLMULThenDoubleBase<"M1", 1, 4, mx>.c;
+ // TODO: change WriteVSMulV/X to be defined with LMULSEWSchedWrites
+ let Latency = VSMulLat, ReleaseAtCycles = [VSMulOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ defvar VSShiftLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VSShiftOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VSShiftLat, ReleaseAtCycles = [VSShiftOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
}
// 13. Vector Floating-Point Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 66ce134..d70b1d0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -38,7 +38,6 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Vectorize/EVLIndVarSimplify.h"
#include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h"
#include <optional>
using namespace llvm;
@@ -107,6 +106,8 @@ static cl::opt<bool>
extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
+ RegisterTargetMachine<RISCVTargetMachine> A(getTheRISCV32beTarget());
+ RegisterTargetMachine<RISCVTargetMachine> B(getTheRISCV64beTarget());
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
initializeRISCVO0PreLegalizerCombinerPass(*PR);
@@ -140,21 +141,37 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVAsmPrinterPass(*PR);
}
-static StringRef computeDataLayout(const Triple &TT,
- const TargetOptions &Options) {
- StringRef ABIName = Options.MCOptions.getABIName();
- if (TT.isArch64Bit()) {
- if (ABIName == "lp64e")
- return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S64";
+static std::string computeDataLayout(const Triple &TT,
+ const TargetOptions &Opts) {
+ std::string Ret;
+
+ if (TT.isLittleEndian())
+ Ret += "e";
+ else
+ Ret += "E";
+
+ Ret += "-m:e";
- return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+ // Pointer and integer sizes.
+ if (TT.isArch64Bit()) {
+ Ret += "-p:64:64-i64:64-i128:128";
+ Ret += "-n32:64";
+ } else {
+ assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
+ Ret += "-p:32:32-i64:64";
+ Ret += "-n32";
}
- assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
- if (ABIName == "ilp32e")
- return "e-m:e-p:32:32-i64:64-n32-S32";
+ // Stack alignment based on ABI.
+ StringRef ABI = Opts.MCOptions.getABIName();
+ if (ABI == "ilp32e")
+ Ret += "-S32";
+ else if (ABI == "lp64e")
+ Ret += "-S64";
+ else
+ Ret += "-S128";
- return "e-m:e-p:32:32-i64:64-n32-S128";
+ return Ret;
}
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 67f924a..5e30018 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1431,7 +1431,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::ctlz:
case Intrinsic::ctpop: {
auto LT = getTypeLegalizationCost(RetTy);
- if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) {
+ if (ST->hasStdExtZvbb() && LT.second.isVector()) {
unsigned Op;
switch (ICA.getID()) {
case Intrinsic::cttz:
@@ -1629,6 +1629,7 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
// scalarized if the legalized Src and Dst are not equal sized.
const DataLayout &DL = this->getDataLayout();
if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
+ !SrcLT.first.isValid() || !DstLT.first.isValid() ||
!TypeSize::isKnownLE(DL.getTypeSizeInBits(Src),
SrcLT.second.getSizeInBits()) ||
!TypeSize::isKnownLE(DL.getTypeSizeInBits(Dst),
@@ -2414,6 +2415,24 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return BaseCost + SlideCost;
}
+InstructionCost
+RISCVTTIImpl::getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index) const {
+ if (isa<FixedVectorType>(Val))
+ return BaseT::getIndexedVectorInstrCostFromEnd(Opcode, Val, CostKind,
+ Index);
+
+ // TODO: This code replicates what LoopVectorize.cpp used to do when asking
+ // for the cost of extracting the last lane of a scalable vector. It probably
+ // needs a more accurate cost.
+ ElementCount EC = cast<VectorType>(Val)->getElementCount();
+ assert(Index < EC.getKnownMinValue() && "Unexpected reverse index");
+ return getVectorInstrCost(Opcode, Val, CostKind,
+ EC.getKnownMinValue() - 1 - Index, nullptr,
+ nullptr);
+}
+
InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
@@ -2712,6 +2731,10 @@ unsigned RISCVTTIImpl::getMinTripCountTailFoldingThreshold() const {
return RVVMinTripCount;
}
+bool RISCVTTIImpl::preferAlternateOpcodeVectorization() const {
+ return ST->enableUnalignedVectorMem();
+}
+
TTI::AddressingModeKind
RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 05d504c..06fd8bb 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -114,6 +114,9 @@ public:
bool enableScalableVectorization() const override {
return ST->hasVInstructions();
}
+ bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override {
+ return ST->hasVInstructions();
+ }
TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL
@@ -129,7 +132,7 @@ public:
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;
- bool preferAlternateOpcodeVectorization() const override { return false; }
+ bool preferAlternateOpcodeVectorization() const override;
bool preferEpilogueVectorization() const override {
// Epilogue vectorization is usually unprofitable - tail folding or
@@ -240,6 +243,11 @@ public:
unsigned Index, const Value *Op0,
const Value *Op1) const override;
+ InstructionCost
+ getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index) const override;
+
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
@@ -364,6 +372,8 @@ public:
switch (RdxDesc.getRecurrenceKind()) {
case RecurKind::Add:
+ case RecurKind::Sub:
+ case RecurKind::AddChainWithSubs:
case RecurKind::And:
case RecurKind::Or:
case RecurKind::Xor:
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 37a71e8..01aef86 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -491,8 +491,42 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
// vfirst find-first-set mask bit
case RISCV::VCPOP_M:
case RISCV::VFIRST_M:
+ // Vector Bit-manipulation Instructions (Zvbb)
+ // Vector And-Not
+ case RISCV::VANDN_VV:
+ case RISCV::VANDN_VX:
+ // Vector Reverse Bits in Elements
+ case RISCV::VBREV_V:
+ // Vector Reverse Bits in Bytes
+ case RISCV::VBREV8_V:
+ // Vector Reverse Bytes
+ case RISCV::VREV8_V:
+ // Vector Count Leading Zeros
+ case RISCV::VCLZ_V:
+ // Vector Count Trailing Zeros
+ case RISCV::VCTZ_V:
+ // Vector Population Count
+ case RISCV::VCPOP_V:
+ // Vector Rotate Left
+ case RISCV::VROL_VV:
+ case RISCV::VROL_VX:
+ // Vector Rotate Right
+ case RISCV::VROR_VI:
+ case RISCV::VROR_VV:
+ case RISCV::VROR_VX:
+ // Vector Carry-less Multiplication Instructions (Zvbc)
+ // Vector Carry-less Multiply
+ case RISCV::VCLMUL_VV:
+ case RISCV::VCLMUL_VX:
+ // Vector Carry-less Multiply Return High Half
+ case RISCV::VCLMULH_VV:
+ case RISCV::VCLMULH_VX:
return MILog2SEW;
+ // Vector Widening Shift Left Logical (Zvbb)
+ case RISCV::VWSLL_VI:
+ case RISCV::VWSLL_VX:
+ case RISCV::VWSLL_VV:
// Vector Widening Integer Add/Subtract
// Def uses EEW=2*SEW . Operands use EEW=SEW.
case RISCV::VWADDU_VV:
@@ -503,9 +537,6 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VWADD_VX:
case RISCV::VWSUB_VV:
case RISCV::VWSUB_VX:
- case RISCV::VWSLL_VI:
- case RISCV::VWSLL_VX:
- case RISCV::VWSLL_VV:
// Vector Widening Integer Multiply Instructions
// Destination EEW=2*SEW. Source EEW=SEW.
case RISCV::VWMUL_VV:
@@ -816,13 +847,7 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VLUXEI32_V:
case RISCV::VLOXEI32_V:
case RISCV::VLUXEI64_V:
- case RISCV::VLOXEI64_V: {
- for (const MachineMemOperand *MMO : MI.memoperands())
- if (MMO->isVolatile())
- return false;
- return true;
- }
-
+ case RISCV::VLOXEI64_V:
// Vector Single-Width Integer Add and Subtract
case RISCV::VADD_VI:
case RISCV::VADD_VV:
@@ -1020,12 +1045,40 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VNCLIP_WV:
case RISCV::VNCLIP_WX:
case RISCV::VNCLIP_WI:
-
- // Vector Crypto
+ // Vector Bit-manipulation Instructions (Zvbb)
+ // Vector And-Not
+ case RISCV::VANDN_VV:
+ case RISCV::VANDN_VX:
+ // Vector Reverse Bits in Elements
+ case RISCV::VBREV_V:
+ // Vector Reverse Bits in Bytes
+ case RISCV::VBREV8_V:
+ // Vector Reverse Bytes
+ case RISCV::VREV8_V:
+ // Vector Count Leading Zeros
+ case RISCV::VCLZ_V:
+ // Vector Count Trailing Zeros
+ case RISCV::VCTZ_V:
+ // Vector Population Count
+ case RISCV::VCPOP_V:
+ // Vector Rotate Left
+ case RISCV::VROL_VV:
+ case RISCV::VROL_VX:
+ // Vector Rotate Right
+ case RISCV::VROR_VI:
+ case RISCV::VROR_VV:
+ case RISCV::VROR_VX:
+ // Vector Widening Shift Left Logical
case RISCV::VWSLL_VI:
case RISCV::VWSLL_VX:
case RISCV::VWSLL_VV:
-
+ // Vector Carry-less Multiplication Instructions (Zvbc)
+ // Vector Carry-less Multiply
+ case RISCV::VCLMUL_VV:
+ case RISCV::VCLMUL_VX:
+ // Vector Carry-less Multiply Return High Half
+ case RISCV::VCLMULH_VV:
+ case RISCV::VCLMULH_VX:
// Vector Mask Instructions
// Vector Mask-Register Logical Instructions
// vmsbf.m set-before-first mask bit
@@ -1213,34 +1266,6 @@ static bool isVectorOpUsedAsScalarOp(const MachineOperand &MO) {
}
}
-/// Return true if MI may read elements past VL.
-static bool mayReadPastVL(const MachineInstr &MI) {
- const RISCVVPseudosTable::PseudoInfo *RVV =
- RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
- if (!RVV)
- return true;
-
- switch (RVV->BaseInstr) {
- // vslidedown instructions may read elements past VL. They are handled
- // according to current tail policy.
- case RISCV::VSLIDEDOWN_VI:
- case RISCV::VSLIDEDOWN_VX:
- case RISCV::VSLIDE1DOWN_VX:
- case RISCV::VFSLIDE1DOWN_VF:
-
- // vrgather instructions may read the source vector at any index < VLMAX,
- // regardless of VL.
- case RISCV::VRGATHER_VI:
- case RISCV::VRGATHER_VV:
- case RISCV::VRGATHER_VX:
- case RISCV::VRGATHEREI16_VV:
- return true;
-
- default:
- return false;
- }
-}
-
bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
const MCInstrDesc &Desc = MI.getDesc();
if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
@@ -1261,6 +1286,13 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
return false;
}
+ for (const MachineMemOperand *MMO : MI.memoperands()) {
+ if (MMO->isVolatile()) {
+ LLVM_DEBUG(dbgs() << "Not a candidate because contains volatile MMO\n");
+ return false;
+ }
+ }
+
// Some instructions that produce vectors have semantics that make it more
// difficult to determine whether the VL can be reduced. For example, some
// instructions, such as reductions, may write lanes past VL to a scalar
@@ -1274,7 +1306,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
// TODO: Use a better approach than a white-list, such as adding
// properties to instructions using something like TSFlags.
if (!isSupportedInstr(MI)) {
- LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction\n");
+ LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction: "
+ << MI);
return false;
}
@@ -1296,13 +1329,14 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
const MCInstrDesc &Desc = UserMI.getDesc();
if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
- LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
+ LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
" use VLMAX\n");
return std::nullopt;
}
- if (mayReadPastVL(UserMI)) {
- LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
+ if (RISCVII::readsPastVL(
+ TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) {
+ LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
return std::nullopt;
}
@@ -1319,7 +1353,7 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc()));
auto DemandedVL = DemandedVLs.lookup(&UserMI);
if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) {
- LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
+ LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
"instruction with demanded tail\n");
return std::nullopt;
}
@@ -1416,7 +1450,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
}
bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
- LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
+ LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI);
unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
MachineOperand &VLOp = MI.getOperand(VLOpNum);
@@ -1436,13 +1470,13 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
"Expected VL to be an Imm or virtual Reg");
if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
- LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
+ LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
return false;
}
if (CommonVL->isIdenticalTo(VLOp)) {
LLVM_DEBUG(
- dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
+ dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
return false;
}
@@ -1453,8 +1487,10 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
return true;
}
const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
- if (!MDT->dominates(VLMI, &MI))
+ if (!MDT->dominates(VLMI, &MI)) {
+ LLVM_DEBUG(dbgs() << " Abort due to VL not dominating.\n");
return false;
+ }
LLVM_DEBUG(
dbgs() << " Reduce VL from " << VLOp << " to "
<< printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())
diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
index fc0965d..7b0afe4 100644
--- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
+++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -21,10 +21,24 @@ Target &llvm::getTheRISCV64Target() {
return TheRISCV64Target;
}
+Target &llvm::getTheRISCV32beTarget() {
+ static Target TheRISCV32beTarget;
+ return TheRISCV32beTarget;
+}
+
+Target &llvm::getTheRISCV64beTarget() {
+ static Target TheRISCV64beTarget;
+ return TheRISCV64beTarget;
+}
+
extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeRISCVTargetInfo() {
RegisterTarget<Triple::riscv32, /*HasJIT=*/true> X(
getTheRISCV32Target(), "riscv32", "32-bit RISC-V", "RISCV");
RegisterTarget<Triple::riscv64, /*HasJIT=*/true> Y(
getTheRISCV64Target(), "riscv64", "64-bit RISC-V", "RISCV");
+ RegisterTarget<Triple::riscv32be> A(getTheRISCV32beTarget(), "riscv32be",
+ "32-bit big endian RISC-V", "RISCV");
+ RegisterTarget<Triple::riscv64be> B(getTheRISCV64beTarget(), "riscv64be",
+ "64-bit big endian RISC-V", "RISCV");
}
diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
index ed00a01..9b9fd2c 100644
--- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
+++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
@@ -15,6 +15,8 @@ class Target;
Target &getTheRISCV32Target();
Target &getTheRISCV64Target();
+Target &getTheRISCV32beTarget();
+Target &getTheRISCV64beTarget();
} // namespace llvm