diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
57 files changed, 2454 insertions, 1037 deletions
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index d71c42c..cd83928 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -121,7 +121,7 @@ class RISCVAsmParser : public MCTargetAsmParser { bool parseVTypeToken(const AsmToken &Tok, VTypeState &State, unsigned &Sew, unsigned &Lmul, bool &Fractional, bool &TailAgnostic, - bool &MaskAgnostic); + bool &MaskAgnostic, bool &AltFmt); bool generateVTypeError(SMLoc ErrorLoc); bool generateXSfmmVTypeError(SMLoc ErrorLoc); @@ -903,6 +903,7 @@ public: VK == RISCV::S_QC_ABS20; } + bool isSImm8Unsigned() const { return isSImm<8>() || isUImm<8>(); } bool isSImm10Unsigned() const { return isSImm<10>() || isUImm<10>(); } bool isUImm20LUI() const { @@ -1199,6 +1200,14 @@ public: addExpr(Inst, getImm(), isRV64Imm()); } + void addSImm8UnsignedOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + int64_t Imm; + [[maybe_unused]] bool IsConstant = evaluateConstantImm(getImm(), Imm); + assert(IsConstant); + Inst.addOperand(MCOperand::createImm(SignExtend64<8>(Imm))); + } + void addSImm10UnsignedOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); int64_t Imm; @@ -1547,6 +1556,9 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return generateImmOutOfRangeError( Operands, ErrorInfo, 0, (1 << 9) - 8, "immediate must be a multiple of 8 bytes in the range"); + case Match_InvalidSImm8Unsigned: + return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 7), + (1 << 8) - 1); case Match_InvalidSImm10: return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 9), (1 << 9) - 1); @@ -2249,14 +2261,23 @@ ParseStatus RISCVAsmParser::parseJALOffset(OperandVector &Operands) { bool RISCVAsmParser::parseVTypeToken(const AsmToken &Tok, VTypeState &State, unsigned &Sew, unsigned &Lmul, bool &Fractional, bool &TailAgnostic, - bool &MaskAgnostic) { + bool &MaskAgnostic, bool &AltFmt) { if (Tok.isNot(AsmToken::Identifier)) return true; StringRef Identifier = Tok.getIdentifier(); if (State < VTypeState::SeenSew && Identifier.consume_front("e")) { - if (Identifier.getAsInteger(10, Sew)) - return true; + if (Identifier.getAsInteger(10, Sew)) { + if (Identifier == "16alt") { + AltFmt = true; + Sew = 16; + } else if (Identifier == "8alt") { + AltFmt = true; + Sew = 8; + } else { + return true; + } + } if (!RISCVVType::isValidSEW(Sew)) return true; @@ -2328,11 +2349,12 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) { bool Fractional = false; bool TailAgnostic = false; bool MaskAgnostic = false; + bool AltFmt = false; VTypeState State = VTypeState::SeenNothingYet; do { if (parseVTypeToken(getTok(), State, Sew, Lmul, Fractional, TailAgnostic, - MaskAgnostic)) { + MaskAgnostic, AltFmt)) { // The first time, errors return NoMatch rather than Failure if (State == VTypeState::SeenNothingYet) return ParseStatus::NoMatch; @@ -2358,12 +2380,17 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) { } unsigned VTypeI = - RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic); + RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic, AltFmt); Operands.push_back(RISCVOperand::createVType(VTypeI, S)); return ParseStatus::Success; } bool RISCVAsmParser::generateVTypeError(SMLoc ErrorLoc) { + if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa)) + return Error( + ErrorLoc, + "operand must be " + "e[8|8alt|16|16alt|32|64],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]"); return Error( ErrorLoc, "operand must be " @@ -4053,4 +4080,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVAsmParser() { RegisterMCAsmParser<RISCVAsmParser> X(getTheRISCV32Target()); RegisterMCAsmParser<RISCVAsmParser> Y(getTheRISCV64Target()); + RegisterMCAsmParser<RISCVAsmParser> A(getTheRISCV32beTarget()); + RegisterMCAsmParser<RISCVAsmParser> B(getTheRISCV64beTarget()); } diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 67cc01e..dbb16fc 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "TargetInfo/RISCVTargetInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoder.h" #include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" @@ -25,6 +26,7 @@ #include "llvm/Support/Endian.h" using namespace llvm; +using namespace llvm::MCD; #define DEBUG_TYPE "riscv-disassembler" @@ -72,6 +74,10 @@ LLVMInitializeRISCVDisassembler() { createRISCVDisassembler); TargetRegistry::RegisterMCDisassembler(getTheRISCV64Target(), createRISCVDisassembler); + TargetRegistry::RegisterMCDisassembler(getTheRISCV32beTarget(), + createRISCVDisassembler); + TargetRegistry::RegisterMCDisassembler(getTheRISCV64beTarget(), + createRISCVDisassembler); } static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo, @@ -552,16 +558,6 @@ static DecodeStatus decodeXqccmpRlistS0(MCInst &Inst, uint32_t Imm, return decodeZcmpRlist(Inst, Imm, Address, Decoder); } -static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -#include "RISCVGenDisassemblerTables.inc" - static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, uint64_t Address, const MCDisassembler *Decoder) { @@ -602,6 +598,8 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn, return S; } +#include "RISCVGenDisassemblerTables.inc" + // Add implied SP operand for C.*SP compressed instructions. The SP operand // isn't explicitly encoded in the instruction. void RISCVDisassembler::addSPOperands(MCInst &MI) const { @@ -672,8 +670,13 @@ static constexpr FeatureBitset XAndesGroup = { RISCV::FeatureVendorXAndesVSIntLoad, RISCV::FeatureVendorXAndesVPackFPH, RISCV::FeatureVendorXAndesVDot}; +static constexpr FeatureBitset XSMTGroup = {RISCV::FeatureVendorXSMTVDot}; + static constexpr DecoderListEntry DecoderList32[]{ // Vendor Extensions + {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"}, + {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"}, + {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"}, {DecoderTableXVentana32, {RISCV::FeatureVendorXVentanaCondOps}, "XVentanaCondOps"}, @@ -689,10 +692,8 @@ static constexpr DecoderListEntry DecoderList32[]{ {RISCV::FeatureVendorXMIPSCBOP}, "MIPS mips.pref"}, {DecoderTableXAndes32, XAndesGroup, "Andes extensions"}, + {DecoderTableXSMT32, XSMTGroup, "SpacemiT extensions"}, // Standard Extensions - {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"}, - {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"}, - {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"}, {DecoderTable32, {}, "standard 32-bit instructions"}, {DecoderTableRV32Only32, {}, "RV32-only standard 32-bit instructions"}, {DecoderTableZfinx32, {}, "Zfinx (Float in Integer)"}, diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index d2b75a6..34026ed 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -45,8 +45,8 @@ public: CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, CCState &State) override { - if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, Info.IsFixed, - IsRet, Info.Ty)) + if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, IsRet, + Info.Ty)) return true; StackSize = State.getStackSize(); @@ -196,8 +196,8 @@ public: if (LocVT.isScalableVector()) MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); - if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, - /*IsFixed=*/true, IsRet, Info.Ty)) + if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, IsRet, + Info.Ty)) return true; StackSize = State.getStackSize(); @@ -454,7 +454,7 @@ bool RISCVCallLowering::canLowerReturn(MachineFunction &MF, for (unsigned I = 0, E = Outs.size(); I < E; ++I) { MVT VT = MVT::getVT(Outs[I].Ty); if (CC_RISCV(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo, - /*IsFixed=*/true, /*isRet=*/true, nullptr)) + /*isRet=*/true, nullptr)) return false; } return true; diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index f83c2b6..51ea3fc 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -736,7 +736,6 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { } case TargetOpcode::G_FCONSTANT: { // TODO: Use constant pool for complex constants. - // TODO: Optimize +0.0 to use fcvt.d.w for s64 on rv32. Register DstReg = MI.getOperand(0).getReg(); const APFloat &FPimm = MI.getOperand(1).getFPImm()->getValueAPF(); APInt Imm = FPimm.bitcastToAPInt(); @@ -753,8 +752,22 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { if (!FMV.constrainAllUses(TII, TRI, RBI)) return false; } else { + // s64 on rv32 assert(Size == 64 && !Subtarget->is64Bit() && "Unexpected size or subtarget"); + + if (Imm.isNonNegative() && Imm.isZero()) { + // Optimize +0.0 to use fcvt.d.w + MachineInstrBuilder FCVT = + MIB.buildInstr(RISCV::FCVT_D_W, {DstReg}, {Register(RISCV::X0)}) + .addImm(RISCVFPRndMode::RNE); + if (!FCVT.constrainAllUses(TII, TRI, RBI)) + return false; + + MI.eraseFromParent(); + return true; + } + // Split into two pieces and build through the stack. Register GPRRegHigh = MRI->createVirtualRegister(&RISCV::GPRRegClass); Register GPRRegLow = MRI->createVirtualRegister(&RISCV::GPRRegClass); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index e88f33d..9fd9639 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -26,6 +26,8 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/Type.h" using namespace llvm; @@ -152,7 +154,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower(); // TODO: Use Vector Single-Width Saturating Instructions for vector types. - getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}) + getActionDefinitionsBuilder( + {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT, G_SSHLSAT, G_USHLSAT}) .lower(); getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) @@ -692,6 +695,11 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))); + getActionDefinitionsBuilder(G_ATOMICRMW_ADD) + .legalFor(ST.hasStdExtA(), {{sXLen, p0}}) + .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}}) + .clampScalar(0, sXLen, sXLen); + getLegacyLegalizerInfo().computeTables(); verify(*ST.getInstrInfo()); } @@ -729,6 +737,8 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MI.eraseFromParent(); return true; } + case Intrinsic::riscv_masked_atomicrmw_add: + return true; } } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 95ec42f..41a9c92 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -32,10 +32,17 @@ static cl::opt<bool> ULEB128Reloc( "riscv-uleb128-reloc", cl::init(true), cl::Hidden, cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate")); +static cl::opt<bool> + AlignRvc("riscv-align-rvc", cl::init(true), cl::Hidden, + cl::desc("When generating R_RISCV_ALIGN, insert $alignment-2 " + "bytes of NOPs even in norvc code")); + RISCVAsmBackend::RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, - bool Is64Bit, const MCTargetOptions &Options) - : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI), - Is64Bit(Is64Bit), TargetOptions(Options) { + bool Is64Bit, bool IsLittleEndian, + const MCTargetOptions &Options) + : MCAsmBackend(IsLittleEndian ? llvm::endianness::little + : llvm::endianness::big), + STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) { RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits()); } @@ -306,12 +313,21 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst, // If conditions are met, compute the padding size and create a fixup encoding // the padding size in the addend. bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { - // Use default handling unless linker relaxation is enabled and the alignment - // is larger than the nop size. - const MCSubtargetInfo *STI = F.getSubtargetInfo(); - if (!STI->hasFeature(RISCV::FeatureRelax)) + // Alignments before the first linker-relaxable instruction have fixed sizes + // and do not require relocations. Alignments after a linker-relaxable + // instruction require a relocation, even if the STI specifies norelax. + // + // firstLinkerRelaxable is the layout order within the subsection, which may + // be smaller than the section's order. Therefore, alignments in a + // lower-numbered subsection may be unnecessarily treated as linker-relaxable. + auto *Sec = F.getParent(); + if (F.getLayoutOrder() <= Sec->firstLinkerRelaxable()) return false; - unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; + + // Use default handling unless the alignment is larger than the nop size. + const MCSubtargetInfo *STI = F.getSubtargetInfo(); + unsigned MinNopLen = + AlignRvc || STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; if (F.getAlignment() <= MinNopLen) return false; @@ -321,16 +337,12 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN); F.setVarFixups({Fixup}); F.setLinkerRelaxable(); - F.getParent()->setLinkerRelaxable(); return true; } -bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F, - bool &WasRelaxed) const { +bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F) const { int64_t LineDelta = F.getDwarfLineDelta(); const MCExpr &AddrDelta = F.getDwarfAddrDelta(); - size_t OldSize = F.getVarSize(); - int64_t Value; // If the label difference can be resolved, use the default handling, which // utilizes a shorter special opcode. @@ -361,7 +373,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F, } else { PCBytes = 2; OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc); - support::endian::write<uint16_t>(OS, 0, llvm::endianness::little); + support::endian::write<uint16_t>(OS, 0, Endian); } auto Offset = OS.tell() - PCBytes; @@ -376,15 +388,12 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F, F.setVarContents(Data); F.setVarFixups({MCFixup::create(Offset, &AddrDelta, MCFixup::getDataKindForSize(PCBytes))}); - WasRelaxed = OldSize != Data.size(); return true; } -bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const { +bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F) const { const MCExpr &AddrDelta = F.getDwarfAddrDelta(); SmallVector<MCFixup, 2> Fixups; - size_t OldSize = F.getVarSize(); - int64_t Value; if (AddrDelta.evaluateAsAbsolute(Value, *Asm)) return false; @@ -397,7 +406,6 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const { if (Value == 0) { F.clearVarContents(); F.clearVarFixups(); - WasRelaxed = OldSize != 0; return true; } @@ -415,23 +423,21 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const { AddFixups(0, {ELF::R_RISCV_SET6, ELF::R_RISCV_SUB6}); } else if (isUInt<8>(Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc1); - support::endian::write<uint8_t>(OS, 0, llvm::endianness::little); + support::endian::write<uint8_t>(OS, 0, Endian); AddFixups(1, {ELF::R_RISCV_SET8, ELF::R_RISCV_SUB8}); } else if (isUInt<16>(Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc2); - support::endian::write<uint16_t>(OS, 0, llvm::endianness::little); + support::endian::write<uint16_t>(OS, 0, Endian); AddFixups(1, {ELF::R_RISCV_SET16, ELF::R_RISCV_SUB16}); } else if (isUInt<32>(Value)) { OS << uint8_t(dwarf::DW_CFA_advance_loc4); - support::endian::write<uint32_t>(OS, 0, llvm::endianness::little); + support::endian::write<uint32_t>(OS, 0, Endian); AddFixups(1, {ELF::R_RISCV_SET32, ELF::R_RISCV_SUB32}); } else { llvm_unreachable("unsupported CFA encoding"); } F.setVarContents(Data); F.setVarFixups(Fixups); - - WasRelaxed = OldSize != Data.size(); return true; } @@ -471,9 +477,12 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, Count -= 1; } + // TODO: emit a mapping symbol right here + if (Count % 4 == 2) { - // The canonical nop with Zca is c.nop. - OS.write(STI->hasFeature(RISCV::FeatureStdExtZca) ? "\x01\0" : "\0\0", 2); + // The canonical nop with Zca is c.nop. For .balign 4, we generate a 2-byte + // c.nop even in a norvc region. + OS.write("\x01\0", 2); Count -= 2; } @@ -803,6 +812,23 @@ void RISCVAsmBackend::maybeAddVendorReloc(const MCFragment &F, Asm->getWriter().recordRelocation(F, VendorFixup, VendorTarget, VendorValue); } +static bool relaxableFixupNeedsRelocation(const MCFixupKind Kind) { + // Some Fixups are marked as LinkerRelaxable by + // `RISCVMCCodeEmitter::getImmOpValue` only because they may be + // (assembly-)relaxed into a linker-relaxable instruction. This function + // should return `false` for those fixups so they do not get a `R_RISCV_RELAX` + // relocation emitted in addition to the relocation. + switch (Kind) { + default: + break; + case RISCV::fixup_riscv_rvc_jump: + case RISCV::fixup_riscv_rvc_branch: + case RISCV::fixup_riscv_jal: + return false; + } + return true; +} + bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup, const MCValue &Target, uint64_t &FixedValue, bool IsResolved) { @@ -845,30 +871,53 @@ bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup, return false; } - // If linker relaxation is enabled and supported by the current relocation, - // generate a relocation and then append a RELAX. - if (Fixup.isLinkerRelaxable()) + // If linker relaxation is enabled and supported by the current fixup, then we + // always want to generate a relocation. + bool NeedsRelax = Fixup.isLinkerRelaxable() && + relaxableFixupNeedsRelocation(Fixup.getKind()); + if (NeedsRelax) IsResolved = false; + if (IsResolved && Fixup.isPCRel()) IsResolved = isPCRelFixupResolved(Target.getAddSym(), F); if (!IsResolved) { - // Some Fixups require a vendor relocation, record it (directly) before we + // Some Fixups require a VENDOR relocation, record it (directly) before we // add the relocation. maybeAddVendorReloc(F, Fixup); Asm->getWriter().recordRelocation(F, Fixup, Target, FixedValue); - } - if (Fixup.isLinkerRelaxable()) { - auto FA = MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_RISCV_RELAX); - Asm->getWriter().recordRelocation(F, FA, MCValue::get(nullptr), - FixedValueA); + if (NeedsRelax) { + // Some Fixups get a RELAX relocation, record it (directly) after we add + // the relocation. + MCFixup RelaxFixup = + MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_RISCV_RELAX); + MCValue RelaxTarget = MCValue::get(nullptr); + uint64_t RelaxValue; + Asm->getWriter().recordRelocation(F, RelaxFixup, RelaxTarget, RelaxValue); + } } return false; } +// Data fixups should be swapped for big endian cores. +// Instruction fixups should not be swapped as RISC-V instructions +// are always little-endian. +static bool isDataFixup(unsigned Kind) { + switch (Kind) { + default: + return false; + + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: + return true; + } +} + void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, const MCValue &Target, uint8_t *Data, uint64_t Value, bool IsResolved) { @@ -892,8 +941,11 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. + // For big endian cores, data fixup should be swapped. + bool SwapValue = Endian == llvm::endianness::big && isDataFixup(Kind); for (unsigned i = 0; i != NumBytes; ++i) { - Data[i] |= uint8_t((Value >> (i * 8)) & 0xff); + unsigned Idx = SwapValue ? (NumBytes - 1 - i) : i; + Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff); } } @@ -908,5 +960,6 @@ MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T, const MCTargetOptions &Options) { const Triple &TT = STI.getTargetTriple(); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); - return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), Options); + return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), TT.isLittleEndian(), + Options); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index adec1ec..5152d05 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -35,7 +35,7 @@ class RISCVAsmBackend : public MCAsmBackend { public: RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, - const MCTargetOptions &Options); + bool IsLittleEndian, const MCTargetOptions &Options); ~RISCVAsmBackend() override = default; std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &, @@ -65,8 +65,8 @@ public: const MCSubtargetInfo &STI) const override; bool relaxAlign(MCFragment &F, unsigned &Size) override; - bool relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const override; - bool relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const override; + bool relaxDwarfLineAddr(MCFragment &) const override; + bool relaxDwarfCFA(MCFragment &) const override; std::pair<bool, bool> relaxLEB128(MCFragment &LF, int64_t &Value) const override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index bddea43..fcea23a 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -139,6 +139,9 @@ enum { // 3 -> SEW * 4 DestEEWShift = ElementsDependOnMaskShift + 1, DestEEWMask = 3ULL << DestEEWShift, + + ReadsPastVLShift = DestEEWShift + 2, + ReadsPastVLMask = 1ULL << ReadsPastVLShift, }; // Helper functions to read TSFlags. @@ -195,6 +198,12 @@ static inline bool elementsDependOnMask(uint64_t TSFlags) { return TSFlags & ElementsDependOnMaskMask; } +/// \returns true if the instruction may read elements past VL, e.g. +/// vslidedown/vrgather +static inline bool readsPastVL(uint64_t TSFlags) { + return TSFlags & ReadsPastVLMask; +} + static inline unsigned getVLOpNum(const MCInstrDesc &Desc) { const uint64_t TSFlags = Desc.TSFlags; // This method is only called if we expect to have a VL operand, and all @@ -337,8 +346,11 @@ enum OperandType : unsigned { OPERAND_SIMM5_PLUS1, OPERAND_SIMM6, OPERAND_SIMM6_NONZERO, + OPERAND_SIMM8, + OPERAND_SIMM8_UNSIGNED, OPERAND_SIMM10, OPERAND_SIMM10_LSB0000_NONZERO, + OPERAND_SIMM10_UNSIGNED, OPERAND_SIMM11, OPERAND_SIMM12, OPERAND_SIMM12_LSB00000, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 543c4c5..37fe325 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -36,6 +36,12 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S, setFlagsFromFeatures(STI); } +RISCVELFStreamer::RISCVELFStreamer(MCContext &C, + std::unique_ptr<MCAsmBackend> MAB, + std::unique_ptr<MCObjectWriter> MOW, + std::unique_ptr<MCCodeEmitter> MCE) + : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {} + RISCVELFStreamer &RISCVTargetELFStreamer::getStreamer() { return static_cast<RISCVELFStreamer &>(Streamer); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h index 98948cd..26da244 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h @@ -28,8 +28,7 @@ class RISCVELFStreamer : public MCELFStreamer { public: RISCVELFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> MAB, std::unique_ptr<MCObjectWriter> MOW, - std::unique_ptr<MCCodeEmitter> MCE) - : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {} + std::unique_ptr<MCCodeEmitter> MCE); void changeSection(MCSection *Section, uint32_t Subsection) override; void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 8c9ab8e..50f5a5d 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -75,7 +75,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (PrintAliases && !NoAliases) Res = RISCVRVC::uncompress(UncompressedMI, *MI, STI); if (Res) - NewMI = const_cast<MCInst *>(&UncompressedMI); + NewMI = &UncompressedMI; if (!PrintAliases || NoAliases || !printAliasInstr(NewMI, Address, STI, O)) printInstruction(NewMI, Address, STI, O); printAnnotation(O, Annot); @@ -216,9 +216,12 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); // Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx, - // or non-zero in bits 8 and above. + // altfmt=1 without zvfbfa extension, or non-zero in bits 9 and above. if (RISCVVType::getVLMUL(Imm) == RISCVVType::VLMUL::LMUL_RESERVED || - RISCVVType::getSEW(Imm) > 64 || (Imm >> 8) != 0) { + RISCVVType::getSEW(Imm) > 64 || + (RISCVVType::isAltFmt(Imm) && + !STI.hasFeature(RISCV::FeatureStdExtZvfbfa)) || + (Imm >> 9) != 0) { O << formatImm(Imm); return; } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp index 090d331..77f65d8 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp @@ -21,6 +21,7 @@ using namespace llvm; void RISCVMCAsmInfo::anchor() {} RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) { + IsLittleEndian = TT.isLittleEndian(); CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4; CommentString = "#"; AlignmentIsInBytes = false; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index cbeabdd..717fba6 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -576,8 +576,21 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, "getImmOpValue expects only expressions or immediates"); const MCExpr *Expr = MO.getExpr(); MCExpr::ExprKind Kind = Expr->getKind(); - unsigned FixupKind = RISCV::fixup_riscv_invalid; + + // `RelaxCandidate` must be set to `true` in two cases: + // - The fixup's relocation gets a R_RISCV_RELAX relocation + // - The underlying instruction may be relaxed to an instruction that gets a + // `R_RISCV_RELAX` relocation. + // + // The actual emission of `R_RISCV_RELAX` will be handled in + // `RISCVAsmBackend::applyFixup`. bool RelaxCandidate = false; + auto AsmRelaxToLinkerRelaxableWithFeature = [&](unsigned Feature) -> void { + if (!STI.hasFeature(RISCV::FeatureExactAssembly) && STI.hasFeature(Feature)) + RelaxCandidate = true; + }; + + unsigned FixupKind = RISCV::fixup_riscv_invalid; if (Kind == MCExpr::Specifier) { const auto *RVExpr = cast<MCSpecifierExpr>(Expr); FixupKind = RVExpr->getSpecifier(); @@ -644,18 +657,26 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, // FIXME: Sub kind binary exprs have chance of underflow. if (MIFrm == RISCVII::InstFormatJ) { FixupKind = RISCV::fixup_riscv_jal; + AsmRelaxToLinkerRelaxableWithFeature(RISCV::FeatureVendorXqcilb); } else if (MIFrm == RISCVII::InstFormatB) { FixupKind = RISCV::fixup_riscv_branch; + // This might be assembler relaxed to `b<cc>; jal` but we cannot relax + // the `jal` again in the assembler. } else if (MIFrm == RISCVII::InstFormatCJ) { FixupKind = RISCV::fixup_riscv_rvc_jump; + AsmRelaxToLinkerRelaxableWithFeature(RISCV::FeatureVendorXqcilb); } else if (MIFrm == RISCVII::InstFormatCB) { FixupKind = RISCV::fixup_riscv_rvc_branch; + // This might be assembler relaxed to `b<cc>; jal` but we cannot relax + // the `jal` again in the assembler. } else if (MIFrm == RISCVII::InstFormatCI) { FixupKind = RISCV::fixup_riscv_rvc_imm; } else if (MIFrm == RISCVII::InstFormatI) { FixupKind = RISCV::fixup_riscv_12_i; } else if (MIFrm == RISCVII::InstFormatQC_EB) { FixupKind = RISCV::fixup_riscv_qc_e_branch; + // This might be assembler relaxed to `qc.e.b<cc>; jal` but we cannot + // relax the `jal` again in the assembler. } else if (MIFrm == RISCVII::InstFormatQC_EAI) { FixupKind = RISCV::fixup_riscv_qc_e_32; RelaxCandidate = true; @@ -670,9 +691,9 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, assert(FixupKind != RISCV::fixup_riscv_invalid && "Unhandled expression!"); addFixup(Fixups, 0, Expr, FixupKind); - // If linker relaxation is enabled and supported by this relocation, set - // a bit so that if fixup is unresolved, a R_RISCV_RELAX relocation will be - // appended. + // If linker relaxation is enabled and supported by this relocation, set a bit + // so that the assembler knows the size of the instruction is not fixed/known, + // and the relocation will need a R_RISCV_RELAX relocation. if (EnableRelax && RelaxCandidate) Fixups.back().setLinkerRelaxable(); ++MCNumFixups; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index 61ecfb2..d917ef4 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -376,7 +376,8 @@ static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) { extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() { - for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) { + for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target(), + &getTheRISCV32beTarget(), &getTheRISCV64beTarget()}) { TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo); TargetRegistry::RegisterMCObjectFileInfo(*T, createRISCVMCObjectFileInfo); TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo); diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 2f32e2a..83566b1 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -611,6 +611,8 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVAsmPrinter() { RegisterAsmPrinter<RISCVAsmPrinter> X(getTheRISCV32Target()); RegisterAsmPrinter<RISCVAsmPrinter> Y(getTheRISCV64Target()); + RegisterAsmPrinter<RISCVAsmPrinter> A(getTheRISCV32beTarget()); + RegisterAsmPrinter<RISCVAsmPrinter> B(getTheRISCV64beTarget()); } void RISCVAsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) { diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp index cb6117e..78f4779 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp @@ -324,7 +324,7 @@ static MCRegister allocateRVVReg(MVT ValVT, unsigned ValNo, CCState &State, // Implements the RISC-V calling convention. Returns true upon failure. bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { + CCState &State, bool IsRet, Type *OrigTy) { const MachineFunction &MF = State.getMachineFunction(); const DataLayout &DL = MF.getDataLayout(); const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); @@ -379,12 +379,12 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, break; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: - UseGPRForF16_F32 = !IsFixed; + UseGPRForF16_F32 = ArgFlags.isVarArg(); break; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: - UseGPRForF16_F32 = !IsFixed; - UseGPRForF64 = !IsFixed; + UseGPRForF16_F32 = ArgFlags.isVarArg(); + UseGPRForF64 = ArgFlags.isVarArg(); break; } @@ -465,7 +465,7 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, // currently if we are using ILP32E calling convention. This behavior may be // changed when RV32E/ILP32E is ratified. unsigned TwoXLenInBytes = (2 * XLen) / 8; - if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && + if (ArgFlags.isVarArg() && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes && ABI != RISCVABI::ABI_ILP32E) { unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); @@ -620,8 +620,8 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, // benchmark. But theoretically, it may have benefit for some cases. bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State, - bool IsFixed, bool IsRet, Type *OrigTy) { + ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, + Type *OrigTy) { const MachineFunction &MF = State.getMachineFunction(); const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); const RISCVTargetLowering &TLI = *Subtarget.getTargetLowering(); @@ -741,7 +741,7 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State) { + Type *OrigTy, CCState &State) { if (ArgFlags.isNest()) { report_fatal_error( "Attribute 'nest' is not supported in GHC calling convention"); diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.h b/llvm/lib/Target/RISCV/RISCVCallingConv.h index bf823b7..0847dd6 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.h +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.h @@ -21,19 +21,19 @@ namespace llvm { typedef bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, - bool IsFixed, bool IsRet, Type *OrigTy); + bool IsRet, Type *OrigTy); bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, bool IsFixed, bool IsRet, Type *OrigTy); + CCState &State, bool IsRet, Type *OrigTy); bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, bool IsFixed, bool IsRet, Type *OrigTy); + CCState &State, bool IsRet, Type *OrigTy); bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State); + Type *OrigTy, CCState &State); namespace RISCV { diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index a7329d2..fa8272b 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -234,7 +234,7 @@ def FeatureStdExtZtso def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZtso()">, AssemblerPredicate<(all_of FeatureStdExtZtso), "'Ztso' (Memory Model - Total Store Order)">; -def NotHasStdExtZtso : Predicate<"!Subtarget->hasStdExtZtso()">; +def NoStdExtZtso : Predicate<"!Subtarget->hasStdExtZtso()">; def FeatureStdExtZa64rs : RISCVExtension<1, 0, "Reservation Set Size of at Most 64 Bytes">; @@ -473,7 +473,7 @@ def FeatureStdExtZba def HasStdExtZba : Predicate<"Subtarget->hasStdExtZba()">, AssemblerPredicate<(all_of FeatureStdExtZba), "'Zba' (Address Generation Instructions)">; -def NotHasStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">; +def NoStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">; def FeatureStdExtZbb : RISCVExtension<1, 0, "Basic Bit-Manipulation">, @@ -680,6 +680,13 @@ def FeatureStdExtV [FeatureStdExtZvl128b, FeatureStdExtZve64d]>, RISCVExtensionBitmask<0, 21>; +def FeatureStdExtZvfbfa + : RISCVExperimentalExtension<0, 1, "Additional BF16 vector compute support", + [FeatureStdExtZve32f, FeatureStdExtZfbfmin]>; +def HasStdExtZvfbfa : Predicate<"Subtarget->hasStdExtZvfbfa()">, + AssemblerPredicate<(all_of FeatureStdExtZvfbfa), + "'Zvfbfa' (Additional BF16 vector compute support)">; + def FeatureStdExtZvfbfmin : RISCVExtension<1, 0, "Vector BF16 Converts", [FeatureStdExtZve32f]>; def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">, @@ -1055,13 +1062,13 @@ def FeatureStdExtSupm "Indicates User-mode Pointer Masking">; def FeatureStdExtSmctr - : RISCVExperimentalExtension<1, 0, - "Control Transfer Records Machine Level", - [FeatureStdExtSscsrind]>; + : RISCVExtension<1, 0, + "Control Transfer Records Machine Level", + [FeatureStdExtSscsrind]>; def FeatureStdExtSsctr - : RISCVExperimentalExtension<1, 0, - "Control Transfer Records Supervisor Level", - [FeatureStdExtSscsrind]>; + : RISCVExtension<1, 0, + "Control Transfer Records Supervisor Level", + [FeatureStdExtSscsrind]>; def HasStdExtSmctrOrSsctr : Predicate<"Subtarget->hasStdExtSmctrOrSsctr()">, AssemblerPredicate<(any_of FeatureStdExtSmctr, FeatureStdExtSsctr), "'Smctr' (Control Transfer Records Machine Level) or " @@ -1069,7 +1076,7 @@ def HasStdExtSmctrOrSsctr : Predicate<"Subtarget->hasStdExtSmctrOrSsctr()">, // Packed SIMD Extensions def FeatureStdExtP - : RISCVExperimentalExtension<0, 14, + : RISCVExperimentalExtension<0, 15, "'Base P' (Packed SIMD)">; def HasStdExtP : Predicate<"Subtarget->hasStdExtP()">, AssemblerPredicate<(all_of FeatureStdExtP), @@ -1408,7 +1415,7 @@ def HasVendorXMIPSCBOP : Predicate<"Subtarget->hasVendorXMIPSCBOP()">, AssemblerPredicate<(all_of FeatureVendorXMIPSCBOP), "'Xmipscbop' (MIPS hardware prefetch)">; -def NotHasVendorXMIPSCBOP : Predicate<"!Subtarget->hasVendorXMIPSCBOP()">; +def NoVendorXMIPSCBOP : Predicate<"!Subtarget->hasVendorXMIPSCBOP()">; // WCH / Nanjing Qinheng Microelectronics Extension(s) @@ -1642,6 +1649,14 @@ def HasVendorXAndesVDot AssemblerPredicate<(all_of FeatureVendorXAndesVDot), "'XAndesVDot' (Andes Vector Dot Product Extension)">; +def FeatureVendorXSMTVDot + : RISCVExtension<1, 0, "SpacemiT Vector Dot Product Extension", + [FeatureStdExtZve32f]>; +def HasVendorXSMTVDot + : Predicate<"Subtarget->hasVendorXSMTVDot()">, + AssemblerPredicate<(all_of FeatureVendorXSMTVDot), + "'XSMTVDot' (SpacemiT Vector Dot Product Extension)">; + //===----------------------------------------------------------------------===// // LLVM specific features and extensions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 20ade6c..791efca 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -147,7 +147,7 @@ def : Pat<(i32 (trunc GPR:$src)), (COPY GPR:$src)>; def : Pat<(zext_is_sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>; } -let Predicates = [IsRV64, NotHasStdExtZba] in +let Predicates = [IsRV64, NoStdExtZba] in def : Pat<(zext (i32 GPR:$src)), (SRLI (i64 (SLLI GPR:$src, 32)), 32)>; let Predicates = [IsRV32, NoStdExtZbb, NoStdExtZbkb] in diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 5998653..f9f35f6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -18,6 +18,7 @@ #include "RISCVInstrInfo.h" #include "RISCVSelectionDAGInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Debug.h" @@ -681,40 +682,86 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) { if (!Subtarget->hasVendorXqcibm()) return false; - auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); - if (!N1C) + using namespace SDPatternMatch; + + SDValue X; + APInt MaskImm; + if (!sd_match(Node, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm)))) return false; - int32_t C1 = N1C->getSExtValue(); - if (!isShiftedMask_32(C1) || isInt<12>(C1)) + unsigned ShAmt, Width; + if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12)) return false; - // INSBI will clobber the input register in N0. Bail out if we need a copy to - // preserve this value. - SDValue N0 = Node->getOperand(0); - if (!N0.hasOneUse()) + // If Zbs is enabled and it is a single bit set we can use BSETI which + // can be compressed to C_BSETI when Xqcibm in enabled. + if (Width == 1 && Subtarget->hasStdExtZbs()) return false; // If C1 is a shifted mask (but can't be formed as an ORI), // use a bitfield insert of -1. // Transform (or x, C1) // -> (qc.insbi x, -1, width, shift) - const unsigned Leading = llvm::countl_zero((uint32_t)C1); - const unsigned Trailing = llvm::countr_zero((uint32_t)C1); - const unsigned Width = 32 - Leading - Trailing; + SDLoc DL(Node); + MVT VT = Node->getSimpleValueType(0); - // If Zbs is enabled and it is a single bit set we can use BSETI which - // can be compressed to C_BSETI when Xqcibm in enabled. - if (Width == 1 && Subtarget->hasStdExtZbs()) + SDValue Ops[] = {X, CurDAG->getSignedTargetConstant(-1, DL, VT), + CurDAG->getTargetConstant(Width, DL, VT), + CurDAG->getTargetConstant(ShAmt, DL, VT)}; + SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops); + ReplaceNode(Node, BitIns); + return true; +} + +// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value +// being inserted only sets known zero bits. +bool RISCVDAGToDAGISel::tryBitfieldInsertOpFromOrAndImm(SDNode *Node) { + // Supported only in Xqcibm for now. + if (!Subtarget->hasVendorXqcibm()) + return false; + + using namespace SDPatternMatch; + + SDValue And; + APInt MaskImm, OrImm; + if (!sd_match(Node, m_Or(m_OneUse(m_And(m_Value(And), m_ConstInt(MaskImm))), + m_ConstInt(OrImm)))) + return false; + + // Compute the Known Zero for the AND as this allows us to catch more general + // cases than just looking for AND with imm. + KnownBits Known = CurDAG->computeKnownBits(Node->getOperand(0)); + + // The bits being inserted must only set those bits that are known to be zero. + if (!OrImm.isSubsetOf(Known.Zero)) { + // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't + // currently handle this case. + return false; + } + + unsigned ShAmt, Width; + // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). + if (!Known.Zero.isShiftedMask(ShAmt, Width)) return false; + // QC_INSB(I) dst, src, #width, #shamt. SDLoc DL(Node); MVT VT = Node->getSimpleValueType(0); + SDValue ImmNode; + auto Opc = RISCV::QC_INSB; - SDValue Ops[] = {N0, CurDAG->getSignedTargetConstant(-1, DL, VT), - CurDAG->getTargetConstant(Width, DL, VT), - CurDAG->getTargetConstant(Trailing, DL, VT)}; - SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops); + int32_t LIImm = OrImm.getSExtValue() >> ShAmt; + + if (isInt<5>(LIImm)) { + Opc = RISCV::QC_INSBI; + ImmNode = CurDAG->getSignedTargetConstant(LIImm, DL, MVT::i32); + } else { + ImmNode = selectImm(CurDAG, DL, MVT::i32, LIImm, *Subtarget); + } + + SDValue Ops[] = {And, ImmNode, CurDAG->getTargetConstant(Width, DL, VT), + CurDAG->getTargetConstant(ShAmt, DL, VT)}; + SDNode *BitIns = CurDAG->getMachineNode(Opc, DL, VT, Ops); ReplaceNode(Node, BitIns); return true; } @@ -772,6 +819,49 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) { return false; } +// (xor X, (and (xor X, C1), C2)) +// -> (qc.insbi X, (C1 >> ShAmt), Width, ShAmt) +// where C2 is a shifted mask with width=Width and shift=ShAmt +bool RISCVDAGToDAGISel::tryBitfieldInsertOpFromXor(SDNode *Node) { + + if (!Subtarget->hasVendorXqcibm()) + return false; + + using namespace SDPatternMatch; + + SDValue X; + APInt CImm, CMask; + if (!sd_match( + Node, + m_Xor(m_Value(X), + m_OneUse(m_And(m_OneUse(m_Xor(m_Deferred(X), m_ConstInt(CImm))), + m_ConstInt(CMask)))))) + return false; + + unsigned Width, ShAmt; + if (!CMask.isShiftedMask(ShAmt, Width)) + return false; + + int64_t Imm = CImm.getSExtValue(); + Imm >>= ShAmt; + + SDLoc DL(Node); + SDValue ImmNode; + auto Opc = RISCV::QC_INSB; + + if (isInt<5>(Imm)) { + Opc = RISCV::QC_INSBI; + ImmNode = CurDAG->getSignedTargetConstant(Imm, DL, MVT::i32); + } else { + ImmNode = selectImm(CurDAG, DL, MVT::i32, Imm, *Subtarget); + } + SDValue Ops[] = {X, ImmNode, CurDAG->getTargetConstant(Width, DL, MVT::i32), + CurDAG->getTargetConstant(ShAmt, DL, MVT::i32)}; + ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, MVT::i32, Ops)); + + return true; +} + bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, @@ -1340,6 +1430,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (trySignedBitfieldInsertInMask(Node)) return; + if (tryBitfieldInsertOpFromOrAndImm(Node)) + return; + if (tryShrinkShlLogicImm(Node)) return; @@ -1349,6 +1442,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (tryShrinkShlLogicImm(Node)) return; + if (tryBitfieldInsertOpFromXor(Node)) + return; + break; case ISD::AND: { auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); @@ -1644,7 +1740,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // available. // Transform (and x, C1) // -> (<bfextract> x, msb, lsb) - if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) { + if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) && + !(C1 == 0xffff && Subtarget->hasStdExtZbb()) && + !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) { const unsigned Msb = llvm::bit_width(C1) - 1; if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) return; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index ee3a86e..c329a4c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -75,6 +75,8 @@ public: bool trySignedBitfieldExtract(SDNode *Node); bool trySignedBitfieldInsertInSign(SDNode *Node); bool trySignedBitfieldInsertInMask(SDNode *Node); + bool tryBitfieldInsertOpFromXor(SDNode *Node); + bool tryBitfieldInsertOpFromOrAndImm(SDNode *Node); bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb); bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 03e54b3..9115c13 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -109,7 +109,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, switch (ABI) { default: - report_fatal_error("Don't know how to lower this ABI"); + reportFatalUsageError("Don't know how to lower this ABI"); case RISCVABI::ABI_ILP32: case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_LP64E: @@ -1800,15 +1800,20 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, switch (Intrinsic) { default: return false; - case Intrinsic::riscv_masked_atomicrmw_xchg_i32: - case Intrinsic::riscv_masked_atomicrmw_add_i32: - case Intrinsic::riscv_masked_atomicrmw_sub_i32: - case Intrinsic::riscv_masked_atomicrmw_nand_i32: - case Intrinsic::riscv_masked_atomicrmw_max_i32: - case Intrinsic::riscv_masked_atomicrmw_min_i32: - case Intrinsic::riscv_masked_atomicrmw_umax_i32: - case Intrinsic::riscv_masked_atomicrmw_umin_i32: - case Intrinsic::riscv_masked_cmpxchg_i32: + case Intrinsic::riscv_masked_atomicrmw_xchg: + case Intrinsic::riscv_masked_atomicrmw_add: + case Intrinsic::riscv_masked_atomicrmw_sub: + case Intrinsic::riscv_masked_atomicrmw_nand: + case Intrinsic::riscv_masked_atomicrmw_max: + case Intrinsic::riscv_masked_atomicrmw_min: + case Intrinsic::riscv_masked_atomicrmw_umax: + case Intrinsic::riscv_masked_atomicrmw_umin: + case Intrinsic::riscv_masked_cmpxchg: + // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated + // narrow atomic operation. These will be expanded to an LR/SC loop that + // reads/writes to/from an aligned 4 byte location. And, or, shift, etc. + // will be used to modify the appropriate part of the 4 byte data and + // preserve the rest. Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); @@ -1844,6 +1849,17 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, /*IsStore*/ true, /*IsUnitStrided*/ false, /*UsePtrVal*/ true); + case Intrinsic::riscv_sseg2_store_mask: + case Intrinsic::riscv_sseg3_store_mask: + case Intrinsic::riscv_sseg4_store_mask: + case Intrinsic::riscv_sseg5_store_mask: + case Intrinsic::riscv_sseg6_store_mask: + case Intrinsic::riscv_sseg7_store_mask: + case Intrinsic::riscv_sseg8_store_mask: + // Operands are (vec, ..., vec, ptr, offset, mask, vl) + return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, + /*IsStore*/ true, + /*IsUnitStrided*/ false, /*UsePtrVal*/ true); case Intrinsic::riscv_vlm: return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, @@ -2512,11 +2528,11 @@ static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, } break; case ISD::SETUGT: - if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1) && - C != -1) { + if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) { // We have a branch immediate instruction for SETUGE but not SETUGT. - // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate. - RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType()); + // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned + // immediate. + RHS = DAG.getConstant(C + 1, DL, RHS.getValueType()); CC = ISD::SETUGE; return; } @@ -7289,7 +7305,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: - report_fatal_error("unimplemented operand"); + reportFatalInternalError( + "Unimplemented RISCVTargetLowering::LowerOperation Case"); case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG); case ISD::ATOMIC_FENCE: @@ -7487,7 +7504,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, // vscale as VLENB / 8. static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) - report_fatal_error("Support for VLEN==32 is incomplete."); + reportFatalInternalError("Support for VLEN==32 is incomplete."); // We assume VLENB is a multiple of 8. We manually choose the best shift // here because SimplifyDemandedBits isn't always able to simplify it. uint64_t Val = Op.getConstantOperandVal(0); @@ -8176,6 +8193,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal); return DAG.getLogicalNOT(DL, SetCC, VT); } + // Lower (setugt X, 2047) as (setne (srl X, 11), 0). + if (CCVal == ISD::SETUGT && Imm == 2047) { + SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS, + DAG.getShiftAmountConstant(11, OpVT, DL)); + return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT), + ISD::SETNE); + } } // Not a constant we could handle, swap the operands and condition code to @@ -8500,7 +8524,7 @@ SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain, SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget.is64Bit()) - llvm::report_fatal_error("Trampolines only implemented for RV64"); + llvm::reportFatalUsageError("Trampolines only implemented for RV64"); // Create an MCCodeEmitter to encode instructions. TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering(); @@ -8660,7 +8684,7 @@ SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op, SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget.is64Bit()) - llvm::report_fatal_error("Trampolines only implemented for RV64"); + llvm::reportFatalUsageError("Trampolines only implemented for RV64"); return Op.getOperand(0); } @@ -8795,7 +8819,7 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, switch (getTargetMachine().getCodeModel()) { default: - report_fatal_error("Unsupported code model for lowering"); + reportFatalUsageError("Unsupported code model for lowering"); case CodeModel::Small: { // Generate a sequence for accessing addresses within the first 2 GiB of // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). @@ -8931,10 +8955,7 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, // Prepare argument list to generate call. ArgListTy Args; - ArgListEntry Entry; - Entry.Node = Load; - Entry.Ty = CallTy; - Args.push_back(Entry); + Args.emplace_back(Load, CallTy); // Setup call to __tls_get_addr. TargetLowering::CallLoweringInfo CLI(DAG); @@ -8976,7 +8997,7 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, if (DAG.getMachineFunction().getFunction().getCallingConv() == CallingConv::GHC) - report_fatal_error("In GHC calling convention TLS is not supported"); + reportFatalUsageError("In GHC calling convention TLS is not supported"); SDValue Addr; switch (Model) { @@ -9266,19 +9287,38 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } - const int TrueValCost = RISCVMatInt::getIntMatCost( - TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); - const int FalseValCost = RISCVMatInt::getIntMatCost( - FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); - bool IsCZERO_NEZ = TrueValCost <= FalseValCost; + // Use SHL/ADDI (and possible XORI) to avoid having to materialize + // a constant in register + if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) { + SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT); + SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2); + return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff); + } + if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) { + SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT); + CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0)); + SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2); + return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff); + } + + auto getCost = [&](const APInt &Delta, const APInt &Addend) { + const int DeltaCost = RISCVMatInt::getIntMatCost( + Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); + // Does the addend fold into an ADDI + if (Addend.isSignedIntN(12)) + return DeltaCost; + const int AddendCost = RISCVMatInt::getIntMatCost( + Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); + return AddendCost + DeltaCost; + }; + bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <= + getCost(TrueVal - FalseVal, FalseVal); SDValue LHSVal = DAG.getConstant( IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT); - SDValue RHSVal = - DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT); SDValue CMOV = DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ, DL, VT, LHSVal, CondV); - return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal); + return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV); } // (select c, c1, t) -> (add (czero_nez t - c1, c), c1) @@ -9313,7 +9353,8 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( ISD::OR, DL, VT, DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), - DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); + DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV), + SDNodeFlags::Disjoint); } if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) @@ -10724,11 +10765,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); } case Intrinsic::riscv_mopr: - return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1), + return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); case Intrinsic::riscv_moprr: { - return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1), + return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } case Intrinsic::riscv_clmul: @@ -10825,7 +10866,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) - report_fatal_error("EGW should be greater than or equal to 4 * SEW."); + reportFatalUsageError("EGW should be greater than or equal to 4 * SEW."); return Op; } // EGS * EEW >= 256 bits @@ -10833,7 +10874,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::riscv_vsm3me: { if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) || !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget)) - report_fatal_error("EGW should be greater than or equal to 8 * SEW."); + reportFatalUsageError("EGW should be greater than or equal to 8 * SEW."); return Op; } // zvknha(SEW=32)/zvknhb(SEW=[32|64]) @@ -10842,11 +10883,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::riscv_vsha2ms: { if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 && !Subtarget.hasStdExtZvknhb()) - report_fatal_error("SEW=64 needs Zvknhb to be enabled."); + reportFatalUsageError("SEW=64 needs Zvknhb to be enabled."); if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) - report_fatal_error("EGW should be greater than or equal to 4 * SEW."); + reportFatalUsageError("EGW should be greater than or equal to 4 * SEW."); return Op; } case Intrinsic::riscv_sf_vc_v_x: @@ -11084,69 +11125,118 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); } -SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, - SelectionDAG &DAG) const { - unsigned IntNo = Op.getConstantOperandVal(1); +static SDValue +lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, + const RISCVSubtarget &Subtarget, + SelectionDAG &DAG) { + bool IsStrided; switch (IntNo) { - default: - break; case Intrinsic::riscv_seg2_store_mask: case Intrinsic::riscv_seg3_store_mask: case Intrinsic::riscv_seg4_store_mask: case Intrinsic::riscv_seg5_store_mask: case Intrinsic::riscv_seg6_store_mask: case Intrinsic::riscv_seg7_store_mask: - case Intrinsic::riscv_seg8_store_mask: { - SDLoc DL(Op); - static const Intrinsic::ID VssegInts[] = { - Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, - Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, - Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, - Intrinsic::riscv_vsseg8_mask}; + case Intrinsic::riscv_seg8_store_mask: + IsStrided = false; + break; + case Intrinsic::riscv_sseg2_store_mask: + case Intrinsic::riscv_sseg3_store_mask: + case Intrinsic::riscv_sseg4_store_mask: + case Intrinsic::riscv_sseg5_store_mask: + case Intrinsic::riscv_sseg6_store_mask: + case Intrinsic::riscv_sseg7_store_mask: + case Intrinsic::riscv_sseg8_store_mask: + IsStrided = true; + break; + default: + llvm_unreachable("unexpected intrinsic ID"); + } - // Operands: (chain, int_id, vec*, ptr, mask, vl) - unsigned NF = Op->getNumOperands() - 5; - assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); - MVT XLenVT = Subtarget.getXLenVT(); - MVT VT = Op->getOperand(2).getSimpleValueType(); - MVT ContainerVT = getContainerForFixedLengthVector(VT); - unsigned Sz = NF * ContainerVT.getVectorMinNumElements() * - ContainerVT.getScalarSizeInBits(); - EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF); + SDLoc DL(Op); + static const Intrinsic::ID VssegInts[] = { + Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, + Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, + Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, + Intrinsic::riscv_vsseg8_mask}; + static const Intrinsic::ID VsssegInts[] = { + Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask, + Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask, + Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask, + Intrinsic::riscv_vssseg8_mask}; + + // Operands: (chain, int_id, vec*, ptr, mask, vl) or + // (chain, int_id, vec*, ptr, stride, mask, vl) + unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5); + assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VT = Op->getOperand(2).getSimpleValueType(); + MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget); + unsigned Sz = NF * ContainerVT.getVectorMinNumElements() * + ContainerVT.getScalarSizeInBits(); + EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF); - SDValue VL = Op.getOperand(Op.getNumOperands() - 1); - SDValue Mask = Op.getOperand(Op.getNumOperands() - 2); - MVT MaskVT = Mask.getSimpleValueType(); - MVT MaskContainerVT = - ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget); - Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); + SDValue VL = Op.getOperand(Op.getNumOperands() - 1); + SDValue Mask = Op.getOperand(Op.getNumOperands() - 2); + MVT MaskVT = Mask.getSimpleValueType(); + MVT MaskContainerVT = + ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget); + Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); - SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); - SDValue Ptr = Op->getOperand(NF + 2); + SDValue IntID = DAG.getTargetConstant( + IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT); + SDValue Ptr = Op->getOperand(NF + 2); - auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); + auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); - SDValue StoredVal = DAG.getUNDEF(VecTupTy); - for (unsigned i = 0; i < NF; i++) - StoredVal = DAG.getNode( - RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal, - convertToScalableVector( - ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget), - DAG.getTargetConstant(i, DL, MVT::i32)); + SDValue StoredVal = DAG.getUNDEF(VecTupTy); + for (unsigned i = 0; i < NF; i++) + StoredVal = DAG.getNode( + RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal, + convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i), + DAG, Subtarget), + DAG.getTargetConstant(i, DL, MVT::i32)); + + SmallVector<SDValue, 10> Ops = { + FixedIntrinsic->getChain(), + IntID, + StoredVal, + Ptr, + Mask, + VL, + DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)}; + // Insert the stride operand. + if (IsStrided) + Ops.insert(std::next(Ops.begin(), 4), + Op.getOperand(Op.getNumOperands() - 3)); + + return DAG.getMemIntrinsicNode( + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, + FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); +} + +SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + default: + break; + case Intrinsic::riscv_seg2_store_mask: + case Intrinsic::riscv_seg3_store_mask: + case Intrinsic::riscv_seg4_store_mask: + case Intrinsic::riscv_seg5_store_mask: + case Intrinsic::riscv_seg6_store_mask: + case Intrinsic::riscv_seg7_store_mask: + case Intrinsic::riscv_seg8_store_mask: + case Intrinsic::riscv_sseg2_store_mask: + case Intrinsic::riscv_sseg3_store_mask: + case Intrinsic::riscv_sseg4_store_mask: + case Intrinsic::riscv_sseg5_store_mask: + case Intrinsic::riscv_sseg6_store_mask: + case Intrinsic::riscv_sseg7_store_mask: + case Intrinsic::riscv_sseg8_store_mask: + return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG); - SDValue Ops[] = { - FixedIntrinsic->getChain(), - IntID, - StoredVal, - Ptr, - Mask, - VL, - DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)}; - - return DAG.getMemIntrinsicNode( - ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, - FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); - } case Intrinsic::riscv_sf_vc_xv_se: return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE); case Intrinsic::riscv_sf_vc_iv_se: @@ -14273,7 +14363,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); MakeLibCallOptions CallOptions; EVT OpVT = Op0.getValueType(); - CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0)); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); SDValue Result; std::tie(Result, Chain) = @@ -14308,7 +14398,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; MakeLibCallOptions CallOptions; EVT OpVT = Op0.getValueType(); - CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true); + CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64); SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first; Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); Results.push_back(Result); @@ -14814,7 +14904,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); SDValue Res = DAG.getNode( - RISCVISD::MOPR, DL, MVT::i64, NewOp, + RISCVISD::MOP_R, DL, MVT::i64, NewOp, DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; @@ -14827,7 +14917,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); SDValue Res = DAG.getNode( - RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1, + RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1, DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; @@ -15996,9 +16086,10 @@ static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0), Cond); - SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), - Cond); - SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1); + SDValue NewN1 = + DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond); + SDValue NewOr = + DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint); return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1)); } @@ -16531,8 +16622,10 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg // can become a sext.w instead of a shift pair. -static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, +static SDValue performSETCCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { + SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -16548,6 +16641,20 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget)) return V; + // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI. + if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) && + N0.getOpcode() == ISD::AND && N0.hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + const APInt &AndRHSC = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) { + unsigned ShiftBits = AndRHSC.countr_zero(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0), + DAG.getConstant(ShiftBits, dl, VT)); + return DAG.getSetCC(dl, VT, Shift, N1, Cond); + } + } + if (OpVT != MVT::i64 || !Subtarget.is64Bit()) return SDValue(); @@ -16582,27 +16689,39 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, } static SDValue -performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, +performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { + SelectionDAG &DAG = DCI.DAG; SDValue Src = N->getOperand(0); EVT VT = N->getValueType(0); EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT(); unsigned Opc = Src.getOpcode(); + SDLoc DL(N); // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) // Don't do this with Zhinx. We need to explicitly sign extend the GPR. if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) && Subtarget.hasStdExtZfhmin()) - return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT, - Src.getOperand(0)); + return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0)); // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 && VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) && DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5) - return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0), + return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0), Src.getOperand(1)); + // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc)) + if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG()) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src); + + // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1) + if (Opc == ISD::XOR && SrcVT == MVT::i1 && + isAllOnesConstant(Src.getOperand(1)) && + Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG()) + return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0), + DAG.getAllOnesConstant(DL, VT)); + return SDValue(); } @@ -17461,7 +17580,7 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N, return SDValue(); SmallVector<SDNode *> Worklist; - SmallSet<SDNode *, 8> Inserted; + SmallPtrSet<SDNode *, 8> Inserted; Worklist.push_back(N); Inserted.insert(N); SmallVector<CombineResult> CombinesToApply; @@ -20022,9 +20141,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } case ISD::SETCC: - return performSETCCCombine(N, DAG, Subtarget); + return performSETCCCombine(N, DCI, Subtarget); case ISD::SIGN_EXTEND_INREG: - return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); + return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget); case ISD::ZERO_EXTEND: // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during // type legalization. This is safe because fp_to_uint produces poison if @@ -20580,10 +20699,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. // vfmv.f.s is represented as extract element from 0. Match it late to avoid // any illegal types. - if (Val.getOpcode() == RISCVISD::VMV_X_S || - (DCI.isAfterLegalizeDAG() && - Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isNullConstant(Val.getOperand(1)))) { + if ((Val.getOpcode() == RISCVISD::VMV_X_S || + (DCI.isAfterLegalizeDAG() && + Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isNullConstant(Val.getOperand(1)))) && + Val.hasOneUse()) { SDValue Src = Val.getOperand(0); MVT VecVT = Src.getSimpleValueType(); // VecVT should be scalable and memory VT should match the element type. @@ -20673,12 +20793,22 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, isNullConstant(Src.getOperand(1)) && Src.getOperand(0).getValueType().isScalableVector()) { EVT VT = N->getValueType(0); - EVT SrcVT = Src.getOperand(0).getValueType(); - assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); + SDValue EVSrc = Src.getOperand(0); + EVT EVSrcVT = EVSrc.getValueType(); + assert(EVSrcVT.getVectorElementType() == VT.getVectorElementType()); // Widths match, just return the original vector. - if (SrcVT == VT) - return Src.getOperand(0); - // TODO: Use insert_subvector/extract_subvector to change widen/narrow? + if (EVSrcVT == VT) + return EVSrc; + SDLoc DL(N); + // Width is narrower, using insert_subvector. + if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) { + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), + EVSrc, + DAG.getConstant(0, DL, Subtarget.getXLenVT())); + } + // Width is wider, using extract_subvector. + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc, + DAG.getConstant(0, DL, Subtarget.getXLenVT())); } [[fallthrough]]; } @@ -21018,9 +21148,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); - // Bail if we might break a sh{1,2,3}add pattern. - if ((Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 && - C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3 && N->hasOneUse() && + bool IsShXAdd = + (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 && + C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3; + bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 && + C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31; + + // Bail if we might break a sh{1,2,3}add/qc.shladd pattern. + if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() && N->user_begin()->getOpcode() == ISD::ADD && !isUsedByLdSt(*N->user_begin(), nullptr) && !isa<ConstantSDNode>(N->user_begin()->getOperand(1))) @@ -21244,6 +21379,15 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known = Known.sext(BitWidth); break; } + case RISCVISD::SRAW: { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32)); + // Restore the original width by sign extending. + Known = Known.sext(BitWidth); + break; + } case RISCVISD::CTZW: { KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); @@ -21349,8 +21493,16 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( if (Tmp < 33) return 1; return 33; } + case RISCVISD::SRAW: { + unsigned Tmp = + DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + // sraw produces at least 33 sign bits. If the input already has more than + // 33 sign bits sraw, will preserve them. + // TODO: A more precise answer could be calculated depending on known bits + // in the shift amount. + return std::max(Tmp, 33U); + } case RISCVISD::SLLW: - case RISCVISD::SRAW: case RISCVISD::SRLW: case RISCVISD::DIVW: case RISCVISD::DIVUW: @@ -21361,9 +21513,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( case RISCVISD::FCVT_WU_RV64: case RISCVISD::STRICT_FCVT_W_RV64: case RISCVISD::STRICT_FCVT_WU_RV64: - // TODO: As the result is sign-extended, this is conservatively correct. A - // more precise answer could be calculated for SRAW depending on known - // bits in the shift amount. + // TODO: As the result is sign-extended, this is conservatively correct. return 33; case RISCVISD::VMV_X_S: { // The number of sign bits of the scalar result is computed by obtaining the @@ -21382,24 +21532,23 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( switch (IntNo) { default: break; - case Intrinsic::riscv_masked_atomicrmw_xchg_i64: - case Intrinsic::riscv_masked_atomicrmw_add_i64: - case Intrinsic::riscv_masked_atomicrmw_sub_i64: - case Intrinsic::riscv_masked_atomicrmw_nand_i64: - case Intrinsic::riscv_masked_atomicrmw_max_i64: - case Intrinsic::riscv_masked_atomicrmw_min_i64: - case Intrinsic::riscv_masked_atomicrmw_umax_i64: - case Intrinsic::riscv_masked_atomicrmw_umin_i64: - case Intrinsic::riscv_masked_cmpxchg_i64: + case Intrinsic::riscv_masked_atomicrmw_xchg: + case Intrinsic::riscv_masked_atomicrmw_add: + case Intrinsic::riscv_masked_atomicrmw_sub: + case Intrinsic::riscv_masked_atomicrmw_nand: + case Intrinsic::riscv_masked_atomicrmw_max: + case Intrinsic::riscv_masked_atomicrmw_min: + case Intrinsic::riscv_masked_atomicrmw_umax: + case Intrinsic::riscv_masked_atomicrmw_umin: + case Intrinsic::riscv_masked_cmpxchg: // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated // narrow atomic operation. These are implemented using atomic // operations at the minimum supported atomicrmw/cmpxchg width whose // result is then sign extended to XLEN. With +A, the minimum width is // 32 for both 64 and 32. - assert(Subtarget.getXLen() == 64); assert(getMinCmpXchgSizeInBits() == 32); assert(Subtarget.hasStdExtA()); - return 33; + return Op.getValueSizeInBits() - 31; } break; } @@ -21447,6 +21596,14 @@ bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode( // TODO: Add more target nodes. switch (Op.getOpcode()) { + case RISCVISD::SLLW: + case RISCVISD::SRAW: + case RISCVISD::SRLW: + case RISCVISD::RORW: + case RISCVISD::ROLW: + // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift + // amount is bounds. + return false; case RISCVISD::SELECT_CC: // Integer comparisons cannot create poison. assert(Op.getOperand(0).getValueType().isInteger() && @@ -22234,8 +22391,8 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: if (!Subtarget.is64Bit()) - report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only " - "supported on 64-bit targets"); + reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only " + "supported on 64-bit targets"); return emitPatchPoint(MI, BB); } } @@ -22270,20 +22427,12 @@ void RISCVTargetLowering::analyzeInputArgs( MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, RISCVCCAssignFn Fn) const { - FunctionType *FType = MF.getFunction().getFunctionType(); - for (const auto &[Idx, In] : enumerate(Ins)) { MVT ArgVT = In.VT; ISD::ArgFlagsTy ArgFlags = In.Flags; - Type *ArgTy = nullptr; - if (IsRet) - ArgTy = FType->getReturnType(); - else if (In.isOrigArg()) - ArgTy = FType->getParamType(In.getOrigArgIndex()); - - if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, - /*IsFixed=*/true, IsRet, ArgTy)) { + if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet, + In.OrigTy)) { LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type " << ArgVT << '\n'); llvm_unreachable(nullptr); @@ -22298,10 +22447,9 @@ void RISCVTargetLowering::analyzeOutputArgs( for (const auto &[Idx, Out] : enumerate(Outs)) { MVT ArgVT = Out.VT; ISD::ArgFlagsTy ArgFlags = Out.Flags; - Type *OrigTy = CLI ? CLI->getArgs()[Out.OrigArgIndex].Ty : nullptr; - if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, Out.IsFixed, - IsRet, OrigTy)) { + if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet, + Out.OrigTy)) { LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type " << ArgVT << "\n"); llvm_unreachable(nullptr); @@ -22477,7 +22625,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( switch (CallConv) { default: - report_fatal_error("Unsupported calling convention"); + reportFatalUsageError("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: case CallingConv::SPIR_KERNEL: @@ -22501,17 +22649,17 @@ SDValue RISCVTargetLowering::LowerFormalArguments( break; case CallingConv::GHC: if (Subtarget.hasStdExtE()) - report_fatal_error("GHC calling convention is not supported on RVE!"); + reportFatalUsageError("GHC calling convention is not supported on RVE!"); if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) - report_fatal_error("GHC calling convention requires the (Zfinx/F) and " - "(Zdinx/D) instruction set extensions"); + reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and " + "(Zdinx/D) instruction set extensions"); } const Function &Func = MF.getFunction(); if (Func.hasFnAttribute("interrupt")) { if (!Func.arg_empty()) - report_fatal_error( - "Functions with the interrupt attribute cannot have arguments!"); + reportFatalUsageError( + "Functions with the interrupt attribute cannot have arguments!"); StringRef Kind = MF.getFunction().getFnAttribute("interrupt").getValueAsString(); @@ -22527,11 +22675,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments( "SiFive-CLIC-preemptible-stack-swap", }; if (!llvm::is_contained(SupportedInterruptKinds, Kind)) - report_fatal_error( - "Function interrupt attribute argument not supported!"); + reportFatalUsageError( + "Function interrupt attribute argument not supported!"); if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint()) - report_fatal_error("'qci-*' interrupt kinds require Xqciint extension"); + reportFatalUsageError( + "'qci-*' interrupt kinds require Xqciint extension"); if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic()) reportFatalUsageError( @@ -22769,7 +22918,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, if (CallConv == CallingConv::GHC) { if (Subtarget.hasStdExtE()) - report_fatal_error("GHC calling convention is not supported on RVE!"); + reportFatalUsageError("GHC calling convention is not supported on RVE!"); ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); } else analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, @@ -22783,8 +22932,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, if (IsTailCall) ++NumTailCalls; else if (CLI.CB && CLI.CB->isMustTailCall()) - report_fatal_error("failed to perform tail call elimination on a call " - "site marked musttail"); + reportFatalInternalError("failed to perform tail call elimination on a " + "call site marked musttail"); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getStackSize(); @@ -23083,7 +23232,7 @@ bool RISCVTargetLowering::CanLowerReturn( MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo, - /*IsFixed=*/true, /*IsRet=*/true, nullptr)) + /*IsRet=*/true, Outs[i].OrigTy)) return false; } return true; @@ -23109,7 +23258,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, nullptr, CC_RISCV); if (CallConv == CallingConv::GHC && !RVLocs.empty()) - report_fatal_error("GHC functions return void only"); + reportFatalUsageError("GHC functions return void only"); SDValue Glue; SmallVector<SDValue, 4> RetOps(1, Chain); @@ -23175,7 +23324,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const Function &Func = DAG.getMachineFunction().getFunction(); if (Func.hasFnAttribute("interrupt")) { if (!Func.getReturnType()->isVoidTy()) - report_fatal_error( + reportFatalUsageError( "Functions with the interrupt attribute must have void return type!"); MachineFunction &MF = DAG.getMachineFunction(); @@ -23343,6 +23492,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, &RISCV::VRN2M4RegClass}) { if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) return std::make_pair(0U, RC); + + if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) { + MVT ContainerVT = getContainerForFixedLengthVector(VT); + if (TRI->isTypeLegalForClass(*RC, ContainerVT)) + return std::make_pair(0U, RC); + } } } else if (Constraint == "vd") { for (const auto *RC : @@ -23356,10 +23511,24 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, &RISCV::VRN2M4NoV0RegClass}) { if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) return std::make_pair(0U, RC); + + if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) { + MVT ContainerVT = getContainerForFixedLengthVector(VT); + if (TRI->isTypeLegalForClass(*RC, ContainerVT)) + return std::make_pair(0U, RC); + } } } else if (Constraint == "vm") { if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) return std::make_pair(0U, &RISCV::VMV0RegClass); + + if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) { + MVT ContainerVT = getContainerForFixedLengthVector(VT); + // VT here might be coerced to vector with i8 elements, so we need to + // check if this is a M1 register here instead of checking VMV0RegClass. + if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT)) + return std::make_pair(0U, &RISCV::VMV0RegClass); + } } else if (Constraint == "cr") { if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) return std::make_pair(0U, &RISCV::GPRF16CRegClass); @@ -23679,53 +23848,26 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { - if (XLen == 32) { - switch (BinOp) { - default: - llvm_unreachable("Unexpected AtomicRMW BinOp"); - case AtomicRMWInst::Xchg: - return Intrinsic::riscv_masked_atomicrmw_xchg_i32; - case AtomicRMWInst::Add: - return Intrinsic::riscv_masked_atomicrmw_add_i32; - case AtomicRMWInst::Sub: - return Intrinsic::riscv_masked_atomicrmw_sub_i32; - case AtomicRMWInst::Nand: - return Intrinsic::riscv_masked_atomicrmw_nand_i32; - case AtomicRMWInst::Max: - return Intrinsic::riscv_masked_atomicrmw_max_i32; - case AtomicRMWInst::Min: - return Intrinsic::riscv_masked_atomicrmw_min_i32; - case AtomicRMWInst::UMax: - return Intrinsic::riscv_masked_atomicrmw_umax_i32; - case AtomicRMWInst::UMin: - return Intrinsic::riscv_masked_atomicrmw_umin_i32; - } - } - - if (XLen == 64) { - switch (BinOp) { - default: - llvm_unreachable("Unexpected AtomicRMW BinOp"); - case AtomicRMWInst::Xchg: - return Intrinsic::riscv_masked_atomicrmw_xchg_i64; - case AtomicRMWInst::Add: - return Intrinsic::riscv_masked_atomicrmw_add_i64; - case AtomicRMWInst::Sub: - return Intrinsic::riscv_masked_atomicrmw_sub_i64; - case AtomicRMWInst::Nand: - return Intrinsic::riscv_masked_atomicrmw_nand_i64; - case AtomicRMWInst::Max: - return Intrinsic::riscv_masked_atomicrmw_max_i64; - case AtomicRMWInst::Min: - return Intrinsic::riscv_masked_atomicrmw_min_i64; - case AtomicRMWInst::UMax: - return Intrinsic::riscv_masked_atomicrmw_umax_i64; - case AtomicRMWInst::UMin: - return Intrinsic::riscv_masked_atomicrmw_umin_i64; - } + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + return Intrinsic::riscv_masked_atomicrmw_xchg; + case AtomicRMWInst::Add: + return Intrinsic::riscv_masked_atomicrmw_add; + case AtomicRMWInst::Sub: + return Intrinsic::riscv_masked_atomicrmw_sub; + case AtomicRMWInst::Nand: + return Intrinsic::riscv_masked_atomicrmw_nand; + case AtomicRMWInst::Max: + return Intrinsic::riscv_masked_atomicrmw_max; + case AtomicRMWInst::Min: + return Intrinsic::riscv_masked_atomicrmw_min; + case AtomicRMWInst::UMax: + return Intrinsic::riscv_masked_atomicrmw_umax; + case AtomicRMWInst::UMin: + return Intrinsic::riscv_masked_atomicrmw_umin; } - - llvm_unreachable("Unexpected XLen\n"); } Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( @@ -23750,7 +23892,7 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( unsigned XLen = Subtarget.getXLen(); Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); - Type *Tys[] = {AlignedAddr->getType()}; + Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()}; Function *LrwOpScwLoop = Intrinsic::getOrInsertDeclaration( AI->getModule(), getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); @@ -23806,14 +23948,13 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { unsigned XLen = Subtarget.getXLen(); Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); - Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; + Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg; if (XLen == 64) { CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); - CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; } - Type *Tys[] = {AlignedAddr->getType()}; + Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()}; Value *Result = Builder.CreateIntrinsic( CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); if (XLen == 64) @@ -24237,7 +24378,12 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts( return true; } - if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { + if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) && + PartVT.isScalableVector()) { + if (ValueVT.isFixedLengthVector()) { + ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT()); + Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget); + } LLVMContext &Context = *DAG.getContext(); EVT ValueEltVT = ValueVT.getVectorElementType(); EVT PartEltVT = PartVT.getVectorElementType(); @@ -24307,12 +24453,17 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( return Val; } - if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { + if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) && + PartVT.isScalableVector()) { LLVMContext &Context = *DAG.getContext(); SDValue Val = Parts[0]; EVT ValueEltVT = ValueVT.getVectorElementType(); EVT PartEltVT = PartVT.getVectorElementType(); unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); + if (ValueVT.isFixedLengthVector()) + ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT()) + .getSizeInBits() + .getKnownMinValue(); unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); if (PartVTBitSize % ValueVTBitSize == 0) { assert(PartVTBitSize >= ValueVTBitSize); @@ -24330,7 +24481,10 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); } - Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0); + if (ValueVT.isFixedLengthVector()) + Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget); + else + Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0); return Val; } } @@ -24437,8 +24591,8 @@ RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) - report_fatal_error(Twine("Trying to obtain non-reserved register \"" + - StringRef(RegName) + "\".")); + reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" + + StringRef(RegName) + "\".")); return Reg; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 433b8be..4581c11 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -431,12 +431,12 @@ public: bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles, - ArrayRef<unsigned> Indices, - unsigned Factor) const override; + ArrayRef<unsigned> Indices, unsigned Factor, + const APInt &GapMask) const override; bool lowerInterleavedStore(Instruction *Store, Value *Mask, - ShuffleVectorInst *SVI, - unsigned Factor) const override; + ShuffleVectorInst *SVI, unsigned Factor, + const APInt &GapMask) const override; bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, IntrinsicInst *DI) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index d9c6101..c2667b0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -261,17 +261,18 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr, // Indicates the EEW of a vector instruction's destination operand. EEW DestEEW = EEWSEWx1; let TSFlags{25-24} = DestEEW.Value; + + // Some vector instructions like vslidedown/vrgather will read elements past + // VL, and should be marked to make sure RISCVVLOptimizer doesn't reduce its + // operands' VLs. + bit ReadsPastVL = 0; + let TSFlags{26} = ReadsPastVL; } class RVInst<dag outs, dag ins, string opcodestr, string argstr, list<dag> pattern, InstFormat format> : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> { field bits<32> Inst; - // SoftFail is a field the disassembler can use to provide a way for - // instructions to not match without killing the whole decode process. It is - // mainly used for ARM, but Tablegen expects this field to exist or it fails - // to build the decode table. - field bits<32> SoftFail = 0; let Size = 4; } @@ -279,7 +280,6 @@ class RVInst48<dag outs, dag ins, string opcodestr, string argstr, list<dag> pattern, InstFormat format> : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> { field bits<48> Inst; - field bits<48> SoftFail = 0; let Size = 6; } @@ -287,7 +287,6 @@ class RVInst64<dag outs, dag ins, string opcodestr, string argstr, list<dag> pattern, InstFormat format> : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> { field bits<64> Inst; - field bits<64> SoftFail = 0; let Size = 8; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td index 5e16061..209c3fa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td @@ -14,11 +14,6 @@ class RVInst16<dag outs, dag ins, string opcodestr, string argstr, list<dag> pattern, InstFormat format> : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> { field bits<16> Inst; - // SoftFail is a field the disassembler can use to provide a way for - // instructions to not match without killing the whole decode process. It is - // mainly used for ARM, but Tablegen expects this field to exist or it fails - // to build the decode table. - field bits<16> SoftFail = 0; let Size = 2; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 085064e..7b4a1de 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -382,7 +382,7 @@ void RISCVInstrInfo::copyPhysRegVector( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RegClass) const { - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const RISCVRegisterInfo *TRI = STI.getRegisterInfo(); RISCVVType::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags); unsigned NF = RISCVRI::getNF(RegClass->TSFlags); @@ -444,13 +444,7 @@ void RISCVInstrInfo::copyPhysRegVector( return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; }; - auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass, - uint16_t Encoding) { - MCRegister Reg = RISCV::V0 + Encoding; - if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1) - return Reg; - return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass); - }; + while (I != NumRegs) { // For non-segment copying, we only do this once as the registers are always // aligned. @@ -470,9 +464,9 @@ void RISCVInstrInfo::copyPhysRegVector( // Emit actual copying. // For reversed copying, the encoding should be decreased. - MCRegister ActualSrcReg = FindRegWithEncoding( + MCRegister ActualSrcReg = TRI->findVRegWithEncoding( RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding); - MCRegister ActualDstReg = FindRegWithEncoding( + MCRegister ActualDstReg = TRI->findVRegWithEncoding( RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding); auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 8bd3830..23f5a84 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1694,6 +1694,16 @@ multiclass SelectCC_GPR_riirr<DAGOperand valty, DAGOperand imm> { valty:$truev, valty:$falsev), []>; } +let Predicates = [IsRV32] in { +def : Pat<(i32 (setlt (i32 GPR:$rs1), 0)), (SRLI GPR:$rs1, 31)>; // compressible +} +let Predicates = [IsRV64] in { +def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x0000000080000000)), 0)), + (XORI (i64 (SRLIW GPR:$rs1, 31)), 1)>; +def : Pat<(i64 (setlt (i64 GPR:$rs1), 0)), (SRLI GPR:$rs1, 63)>; // compressible +def : Pat<(i64 (setlt (sext_inreg GPR:$rs1, i32), 0)), (SRLIW GPR:$rs1, 31)>; +} + /// Branches and jumps // Match `riscv_brcc` and lower to the appropriate RISC-V branch instruction. @@ -2129,14 +2139,14 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), /// RV64 patterns -let Predicates = [IsRV64, NotHasStdExtZba] in { +let Predicates = [IsRV64, NoStdExtZba] in { def : Pat<(i64 (and GPR:$rs1, 0xffffffff)), (SRLI (i64 (SLLI GPR:$rs1, 32)), 32)>; // If we're shifting a 32-bit zero extended value left by 0-31 bits, use 2 // shifts instead of 3. This can occur when unsigned is used to index an array. def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)), (SRLI (i64 (SLLI GPR:$rs1, 32)), (ImmSubFrom32 uimm5:$shamt))>; -} +} // Predicates = [IsRV64, NoStdExtZba] class binop_allhusers<SDPatternOperator operator> : PatFrag<(ops node:$lhs, node:$rhs), @@ -2367,6 +2377,7 @@ include "RISCVInstrInfoXqccmp.td" include "RISCVInstrInfoXMips.td" include "RISCVInstrInfoXRivos.td" include "RISCVInstrInfoXAndes.td" +include "RISCVInstrInfoXSpacemiT.td" //===----------------------------------------------------------------------===// // Global ISel diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 5fa7d41..59f5aeb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -190,7 +190,7 @@ let Predicates = [HasAtomicLdSt, IsRV64] in { multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, list<Predicate> ExtraPreds = []> { -let Predicates = !listconcat([HasStdExtA, NotHasStdExtZtso], ExtraPreds) in { +let Predicates = !listconcat([HasStdExtA, NoStdExtZtso], ExtraPreds) in { def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), !cast<RVInst>(BaseInst), vt>; def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), @@ -277,13 +277,30 @@ class PseudoMaskedAMOUMinUMax let hasSideEffects = 0; } +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +multiclass PseudoAMOPat<string AtomicOp, Pseudo AMOInst, ValueType vt = XLenVT> { + def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_monotonic") GPR:$addr, GPR:$incr)), + (AMOInst GPR:$addr, GPR:$incr, 2)>; + def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_acquire") GPR:$addr, GPR:$incr)), + (AMOInst GPR:$addr, GPR:$incr, 4)>; + def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_release") GPR:$addr, GPR:$incr)), + (AMOInst GPR:$addr, GPR:$incr, 5)>; + def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_acq_rel") GPR:$addr, GPR:$incr)), + (AMOInst GPR:$addr, GPR:$incr, 6)>; + def : Pat<(vt (!cast<PatFrag>(AtomicOp#"_seq_cst") GPR:$addr, GPR:$incr)), + (AMOInst GPR:$addr, GPR:$incr, 7)>; +} + class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst> - : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), + : Pat<(XLenVT (intrin (XLenVT GPR:$addr), (XLenVT GPR:$incr), + (XLenVT GPR:$mask), (XLenVT timm:$ordering))), (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>; class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst> - : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, - timm:$ordering), + : Pat<(XLenVT (intrin (XLenVT GPR:$addr), (XLenVT GPR:$incr), + (XLenVT GPR:$mask), (XLenVT GPR:$shiftamt), + (XLenVT timm:$ordering))), (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, timm:$ordering)>; @@ -291,50 +308,40 @@ let Predicates = [HasStdExtA] in { let Size = 20 in def PseudoAtomicLoadNand32 : PseudoAMO; -// Ordering constants must be kept in sync with the AtomicOrdering enum in -// AtomicOrdering.h. -def : Pat<(XLenVT (atomic_load_nand_i32_monotonic GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>; -def : Pat<(XLenVT (atomic_load_nand_i32_acquire GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>; -def : Pat<(XLenVT (atomic_load_nand_i32_release GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>; -def : Pat<(XLenVT (atomic_load_nand_i32_acq_rel GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>; -def : Pat<(XLenVT (atomic_load_nand_i32_seq_cst GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>; - -let Size = 28 in -def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32, +defm : PseudoAMOPat<"atomic_load_nand_i32", PseudoAtomicLoadNand32>; + +let Size = 28 in { + def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO; + def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO; + def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO; +} +let Size = 32 in { + def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO; +} +let Size = 44 in { + def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax; + def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax; +} +let Size = 36 in { + def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax; + def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax; +} + +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg, PseudoMaskedAtomicSwap32>; -let Size = 28 in -def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32, +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add, PseudoMaskedAtomicLoadAdd32>; -let Size = 28 in -def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32, +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub, PseudoMaskedAtomicLoadSub32>; -let Size = 32 in -def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32, +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand, PseudoMaskedAtomicLoadNand32>; -let Size = 44 in -def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32, +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max, PseudoMaskedAtomicLoadMax32>; -let Size = 44 in -def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32, +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min, PseudoMaskedAtomicLoadMin32>; -let Size = 36 in -def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32, +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax, PseudoMaskedAtomicLoadUMax32>; -let Size = 36 in -def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32, +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin, PseudoMaskedAtomicLoadUMin32>; } // Predicates = [HasStdExtA] @@ -342,35 +349,7 @@ let Predicates = [HasStdExtA, IsRV64] in { let Size = 20 in def PseudoAtomicLoadNand64 : PseudoAMO; -// Ordering constants must be kept in sync with the AtomicOrdering enum in -// AtomicOrdering.h. -def : Pat<(i64 (atomic_load_nand_i64_monotonic GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>; -def : Pat<(i64 (atomic_load_nand_i64_acquire GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>; -def : Pat<(i64 (atomic_load_nand_i64_release GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>; -def : Pat<(i64 (atomic_load_nand_i64_acq_rel GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>; -def : Pat<(i64 (atomic_load_nand_i64_seq_cst GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>; - -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64, - PseudoMaskedAtomicSwap32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64, - PseudoMaskedAtomicLoadAdd32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64, - PseudoMaskedAtomicLoadSub32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64, - PseudoMaskedAtomicLoadNand32>; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64, - PseudoMaskedAtomicLoadMax32>; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64, - PseudoMaskedAtomicLoadMin32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64, - PseudoMaskedAtomicLoadUMax32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64, - PseudoMaskedAtomicLoadUMin32>; +defm : PseudoAMOPat<"atomic_load_nand_i64", PseudoAtomicLoadNand64, i64>; } // Predicates = [HasStdExtA, IsRV64] @@ -424,15 +403,9 @@ def PseudoMaskedCmpXchg32 let Size = 32; } -def : Pat<(int_riscv_masked_cmpxchg_i32 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), +def : Pat<(XLenVT (int_riscv_masked_cmpxchg + (XLenVT GPR:$addr), (XLenVT GPR:$cmpval), (XLenVT GPR:$newval), + (XLenVT GPR:$mask), (XLenVT timm:$ordering))), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; } // Predicates = [HasStdExtA] - -let Predicates = [HasStdExtA, IsRV64] in { -def : Pat<(int_riscv_masked_cmpxchg_i64 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), - (PseudoMaskedCmpXchg32 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; -} // Predicates = [HasStdExtA, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index c5551fb..bfc766d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -301,14 +301,6 @@ def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd), let Inst{5} = imm{3}; } -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in -def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>, - Sched<[WriteFLD64, ReadFMemBase]> { - bits<8> imm; - let Inst{12-10} = imm{5-3}; - let Inst{6-5} = imm{7-6}; -} - def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>, Sched<[WriteLDW, ReadMemBase]> { bits<7> imm; @@ -326,16 +318,6 @@ def C_LW_INX : CLoad_ri<0b010, "c.lw", GPRF32C, uimm7_lsb00>, let Inst{5} = imm{6}; } -let DecoderNamespace = "RV32Only", - Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in -def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>, - Sched<[WriteFLD32, ReadFMemBase]> { - bits<7> imm; - let Inst{12-10} = imm{5-3}; - let Inst{6} = imm{2}; - let Inst{5} = imm{6}; -} - let Predicates = [HasStdExtZca, IsRV64] in def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>, Sched<[WriteLDD, ReadMemBase]> { @@ -344,14 +326,6 @@ def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>, let Inst{6-5} = imm{7-6}; } -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in -def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>, - Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> { - bits<8> imm; - let Inst{12-10} = imm{5-3}; - let Inst{6-5} = imm{7-6}; -} - def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>, Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { bits<7> imm; @@ -369,16 +343,6 @@ def C_SW_INX : CStore_rri<0b110, "c.sw", GPRF32C, uimm7_lsb00>, let Inst{5} = imm{6}; } -let DecoderNamespace = "RV32Only", - Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in -def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>, - Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> { - bits<7> imm; - let Inst{12-10} = imm{5-3}; - let Inst{6} = imm{2}; - let Inst{5} = imm{6}; -} - let Predicates = [HasStdExtZca, IsRV64] in def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>, Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { @@ -500,12 +464,6 @@ def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), let Constraints = "$rd = $rd_wb"; } -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in -def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>, - Sched<[WriteFLD64, ReadFMemBase]> { - let Inst{4-2} = imm{8-6}; -} - def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>, Sched<[WriteLDW, ReadMemBase]> { let Inst{3-2} = imm{7-6}; @@ -517,13 +475,6 @@ def C_LWSP_INX : CStackLoad<0b010, "c.lwsp", GPRF32NoX0, uimm8_lsb00>, let Inst{3-2} = imm{7-6}; } -let DecoderNamespace = "RV32Only", - Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in -def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>, - Sched<[WriteFLD32, ReadFMemBase]> { - let Inst{3-2} = imm{7-6}; -} - let Predicates = [HasStdExtZca, IsRV64] in def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>, Sched<[WriteLDD, ReadMemBase]> { @@ -560,12 +511,6 @@ def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPR:$rd), let Constraints = "$rs1 = $rd"; } -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in -def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>, - Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> { - let Inst{9-7} = imm{8-6}; -} - def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>, Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { let Inst{8-7} = imm{7-6}; @@ -577,13 +522,6 @@ def C_SWSP_INX : CStackStore<0b110, "c.swsp", GPRF32, uimm8_lsb00>, let Inst{8-7} = imm{7-6}; } -let DecoderNamespace = "RV32Only", - Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in -def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>, - Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> { - let Inst{8-7} = imm{7-6}; -} - let Predicates = [HasStdExtZca, IsRV64] in def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000>, Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { @@ -600,6 +538,61 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>, } // Predicates = [HasStdExtZca] +let DecoderNamespace = "RV32Only", + Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { + def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>, + Sched<[WriteFLD32, ReadFMemBase]> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; + } + + def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>, + Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; + } + + def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>, + Sched<[WriteFLD32, ReadFMemBase]> { + let Inst{3-2} = imm{7-6}; + } + + def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>, + Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> { + let Inst{8-7} = imm{7-6}; + } +} // DecoderNamespace = "RV32Only", Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] + +let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { + def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>, + Sched<[WriteFLD64, ReadFMemBase]> { + bits<8> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6-5} = imm{7-6}; + } + + def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>, + Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> { + bits<8> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6-5} = imm{7-6}; + } + + def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>, + Sched<[WriteFLD64, ReadFMemBase]> { + let Inst{4-2} = imm{8-6}; + } + + def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>, + Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> { + let Inst{9-7} = imm{8-6}; + } +} // Predicates = [HasStdExtCOrZcd, HasStdExtD] in { + //===----------------------------------------------------------------------===// // HINT Instructions //===----------------------------------------------------------------------===// @@ -767,20 +760,17 @@ def : InstAlias<".insn_cj $opcode, $funct3, $imm11", // Compress Instruction tablegen backend. //===----------------------------------------------------------------------===// -// Patterns are defined in the same order the compressed instructions appear +// Zca patterns are defined in the same order the compressed instructions appear // under the "RVC Instruction Set Listings" section of the ISA manual. +// Zca Instructions + // Quadrant 0 let Predicates = [HasStdExtZca] in { def : CompressPat<(ADDI GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm), (C_ADDI4SPN GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { -def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm), - (C_FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasStdExtCOrZcd, HasStdExtD] - let Predicates = [HasStdExtZca] in { def : CompressPat<(LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; @@ -790,21 +780,11 @@ def : CompressPat<(LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { -def : CompressPat<(FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), - (C_FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; -} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] - let Predicates = [HasStdExtZca, IsRV64] in { def : CompressPat<(LD GPRC:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm), (C_LD GPRC:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>; } // Predicates = [HasStdExtZca, IsRV64] -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { -def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm), - (C_FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasStdExtCOrZcd, HasStdExtD] - let Predicates = [HasStdExtZca] in { def : CompressPat<(SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; @@ -814,11 +794,6 @@ def : CompressPat<(SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { -def : CompressPat<(FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), - (C_FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; -} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] - let Predicates = [HasStdExtZca, IsRV64] in { def : CompressPat<(SD GPRC:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm), (C_SD GPRC:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>; @@ -907,11 +882,6 @@ def : CompressPat<(SLLI GPRNoX0:$rs1, GPRNoX0:$rs1, uimmlog2xlennonzero:$imm), (C_SLLI GPRNoX0:$rs1, uimmlog2xlennonzero:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { -def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm), - (C_FLDSP FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasStdExtCOrZcd, HasStdExtD] - let Predicates = [HasStdExtZca] in { def : CompressPat<(LW GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm), (C_LWSP GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; @@ -921,11 +891,6 @@ def : CompressPat<(LW_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm), (C_LWSP_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { -def : CompressPat<(FLW FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm), - (C_FLWSP FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; -} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] - let Predicates = [HasStdExtZca, IsRV64] in { def : CompressPat<(LD GPRNoX0:$rd, SPMem:$rs1, uimm9_lsb000:$imm), (C_LDSP GPRNoX0:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>; @@ -953,11 +918,6 @@ def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs1), (C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { -def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm), - (C_FSDSP FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasStdExtCOrZcd, HasStdExtD] - let Predicates = [HasStdExtZca] in { def : CompressPat<(SW GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), (C_SWSP GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; @@ -967,12 +927,38 @@ def : CompressPat<(SW_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), (C_SWSP_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { -def : CompressPat<(FSW FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), - (C_FSWSP FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; -} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] - let Predicates = [HasStdExtZca, IsRV64] in { def : CompressPat<(SD GPR:$rs2, SPMem:$rs1, uimm9_lsb000:$imm), (C_SDSP GPR:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>; } // Predicates = [HasStdExtZca, IsRV64] + +// Zcf Instructions +let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { + // Quadrant 0 + def : CompressPat<(FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), + (C_FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; + def : CompressPat<(FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), + (C_FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; + + // Quadrant 2 + def : CompressPat<(FLW FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm), + (C_FLWSP FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; + def : CompressPat<(FSW FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), + (C_FSWSP FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; +} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] + +// Zcd Instructions +let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { + // Quadrant 0 + def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm), + (C_FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>; + def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm), + (C_FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>; + + // Quadrant 2 + def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm), + (C_FLDSP FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>; + def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm), + (C_FSDSP FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>; +} // Predicates = [HasStdExtCOrZcd, HasStdExtD] + diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index c1f67f7..fdf0195 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -111,11 +111,11 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))), (REMW GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtM, IsRV64] -let Predicates = [HasStdExtZmmul, IsRV64, NotHasStdExtZba] in { +let Predicates = [HasStdExtZmmul, IsRV64, NoStdExtZba] in { // Special case for calculating the full 64-bit product of a 32x32 unsigned // multiply where the inputs aren't known to be zero extended. We can shift the // inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish // zeroing the upper 32 bits. def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))), (MULHU (i64 (SLLI GPR:$rs1, 32)), (i64 (SLLI GPR:$rs2, 32)))>; -} // Predicates = [HasStdExtZmmul, IsRV64, NotHasStdExtZba] +} // Predicates = [HasStdExtZmmul, IsRV64, NoStdExtZba] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 8297d50..c342b41 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -18,7 +18,26 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// -def simm10 : RISCVSImmLeafOp<10>; +def simm10 : RISCVSImmOp<10>; + +def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> { + let RenderMethod = "addSImm8UnsignedOperands"; +} + +// A 8-bit signed immediate allowing range [-128, 255] +// but represented as [-128, 255]. +def simm8_unsigned : RISCVOp { + let ParserMatchClass = SImm8UnsignedAsmOperand; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeSImmOperand<8>"; + let OperandType = "OPERAND_SIMM8_UNSIGNED"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isInt<8>(Imm); + }]; +} def SImm10UnsignedAsmOperand : SImmAsmOperand<10, "Unsigned"> { let RenderMethod = "addSImm10UnsignedOperands"; @@ -30,7 +49,7 @@ def simm10_unsigned : RISCVOp { let ParserMatchClass = SImm10UnsignedAsmOperand; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeSImmOperand<10>"; - let OperandType = "OPERAND_SIMM10"; + let OperandType = "OPERAND_SIMM10_UNSIGNED"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) @@ -43,49 +62,40 @@ def simm10_unsigned : RISCVOp { // Instruction class templates //===----------------------------------------------------------------------===// -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class PLI_i<bits<7> funct7, string opcodestr> - : RVInst<(outs GPR:$rd), (ins simm10:$imm10), opcodestr, "$rd, $imm10", [], +// Common base for pli.b/h/w and plui.h/w +class RVPLoadImm_i<bits<7> funct7, dag ins, string opcodestr, + string argstr> + : RVInst<(outs GPR:$rd), ins, opcodestr, argstr, [], InstFormatOther> { - bits<10> imm10; bits<5> rd; let Inst{31-25} = funct7; - let Inst{24-16} = imm10{8-0}; - let Inst{15} = imm10{9}; let Inst{14-12} = 0b010; let Inst{11-7} = rd; let Inst{6-0} = OPC_OP_IMM_32.Value; + + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class PLUI_i<bits<7> funct7, string opcodestr> - : RVInst<(outs GPR:$rd), (ins simm10_unsigned:$imm10), opcodestr, - "$rd, $imm10", [], InstFormatOther> { +// Base for pli.h/w. +class PLI_i<bits<7> funct7, string opcodestr> + : RVPLoadImm_i<funct7, (ins simm10:$imm10), opcodestr, "$rd, $imm10"> { bits<10> imm10; - bits<5> rd; - let Inst{31-25} = funct7; - let Inst{24} = imm10{0}; - let Inst{23-15} = imm10{9-1}; - let Inst{14-12} = 0b010; - let Inst{11-7} = rd; - let Inst{6-0} = OPC_OP_IMM_32.Value; + let Inst{24-16} = imm10{8-0}; + let Inst{15} = imm10{9}; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class PLI_B_i<bits<8> funct8, string opcodestr> - : RVInst<(outs GPR:$rd), (ins uimm8:$uimm8), opcodestr, "$rd, $uimm8", [], - InstFormatOther> { - bits<8> uimm8; - bits<5> rd; +// Base for plui.h/w. +class PLUI_i<bits<7> funct7, string opcodestr> + : RVPLoadImm_i<funct7, (ins simm10_unsigned:$imm10), opcodestr, + "$rd, $imm10"> { + bits<10> imm10; - let Inst{31-24} = funct8; - let Inst{23-16} = uimm8; - let Inst{15} = 0b0; - let Inst{14-12} = 0b010; - let Inst{11-7} = rd; - let Inst{6-0} = OPC_OP_IMM_32.Value; + let Inst{24} = imm10{0}; + let Inst{23-15} = imm10{9-1}; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in @@ -98,6 +108,14 @@ class RVPShift_ri<bits<3> f, bits<3> funct3, string opcodestr, Operand ImmType> let Inst{27} = 0b0; } +class RVPShiftD_ri<bits<3> f, bits<3> funct3, string opcodestr> + : RVPShift_ri<f, funct3, opcodestr, uimm6> { + bits<6> shamt; + + let Inst{26} = 0b1; + let Inst{25-20} = shamt; +} + class RVPShiftW_ri<bits<3> f, bits<3> funct3, string opcodestr> : RVPShift_ri<f, funct3, opcodestr, uimm5> { bits<5> shamt; @@ -131,59 +149,743 @@ class RVPUnary_ri<bits<2> w, bits<5> uf, string opcodestr> let Inst{24-20} = uf; } +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPBinaryScalar_rr<bits<3> f, bits<2> w, bits<3> funct3, string opcodestr> + : RVInstRBase<funct3, OPC_OP_IMM_32, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2"> { + let Inst{31} = 0b1; + let Inst{30-28} = f; + let Inst{27} = 0b1; + let Inst{26-25} = w; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPBinary_rr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr> + : RVInstRBase<funct3, OPC_OP_32, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2"> { + let Inst{31} = 0b1; + let Inst{30-27} = f; + let Inst{26-25} = w; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPTernary_rrr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr> + : RVInstRBase<funct3, OPC_OP_32, (outs GPR:$rd_wb), + (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr, + "$rd, $rs1, $rs2"> { + let Inst{31} = 0b1; + let Inst{30-27} = f; + let Inst{26-25} = w; + + let Constraints = "$rd = $rd_wb"; +} + +// Common base for pli.db/h/w and plui.dh/w +class RVPPairLoadImm_i<bits<7> funct7, dag ins, string opcodestr, + string argstr> + : RVInst<(outs GPRPairRV32:$rd), ins, opcodestr, argstr, [], + InstFormatOther> { + bits<5> rd; + + let Inst{31-25} = funct7; + let Inst{14-12} = 0b010; + let Inst{11-8} = rd{4-1}; + let Inst{7} = 0b0; + let Inst{6-0} = OPC_OP_IMM_32.Value; + + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// let Predicates = [HasStdExtP] in { -let IsSignExtendingOpW = 1 in -def CLS : Unary_r<0b011000000011, 0b001, "cls">; -def ABS : Unary_r<0b011000000111, 0b001, "abs">; + let IsSignExtendingOpW = 1 in + def CLS : Unary_r<0b011000000011, 0b001, "cls">; + def ABS : Unary_r<0b011000000111, 0b001, "abs">; } // Predicates = [HasStdExtP] -let Predicates = [HasStdExtP, IsRV32] in -def REV_RV32 : Unary_r<0b011010011111, 0b101, "rev">; + +let Predicates = [HasStdExtP, IsRV32] in { + def REV_RV32 : Unary_r<0b011010011111, 0b101, "rev">; +} // Predicates = [HasStdExtP, IsRV32] let Predicates = [HasStdExtP, IsRV64] in { -def REV16 : Unary_r<0b011010110000, 0b101, "rev16">; -def REV_RV64 : Unary_r<0b011010111111, 0b101, "rev">; + def REV16 : Unary_r<0b011010110000, 0b101, "rev16">; + def REV_RV64 : Unary_r<0b011010111111, 0b101, "rev">; -let IsSignExtendingOpW = 1 in { -def CLSW : UnaryW_r<0b011000000011, 0b001, "clsw">; -def ABSW : UnaryW_r<0b011000000111, 0b001, "absw">; -} + let IsSignExtendingOpW = 1 in { + def CLSW : UnaryW_r<0b011000000011, 0b001, "clsw">; + def ABSW : UnaryW_r<0b011000000111, 0b001, "absw">; + } } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in { -def PSLLI_B : RVPShiftB_ri<0b000, 0b010, "pslli.b">; -def PSLLI_H : RVPShiftH_ri<0b000, 0b010, "pslli.h">; -def PSSLAI_H : RVPShiftH_ri<0b101, 0b010, "psslai.h">; + def PSLLI_B : RVPShiftB_ri<0b000, 0b010, "pslli.b">; + def PSLLI_H : RVPShiftH_ri<0b000, 0b010, "pslli.h">; + def PSSLAI_H : RVPShiftH_ri<0b101, 0b010, "psslai.h">; } // Predicates = [HasStdExtP] -let DecoderNamespace = "RV32Only", - Predicates = [HasStdExtP, IsRV32] in -def SSLAI : RVPShiftW_ri<0b101, 0b010, "sslai">; +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def SSLAI : RVPShiftW_ri<0b101, 0b010, "sslai">; +} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" let Predicates = [HasStdExtP, IsRV64] in { -def PSLLI_W : RVPShiftW_ri<0b000, 0b010, "pslli.w">; -def PSSLAI_W : RVPShiftW_ri<0b101, 0b010, "psslai.w">; + def PSLLI_W : RVPShiftW_ri<0b000, 0b010, "pslli.w">; + def PSSLAI_W : RVPShiftW_ri<0b101, 0b010, "psslai.w">; } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in def PLI_H : PLI_i<0b1011000, "pli.h">; let Predicates = [HasStdExtP, IsRV64] in def PLI_W : PLI_i<0b1011001, "pli.w">; -let Predicates = [HasStdExtP] in -def PLI_B : PLI_B_i<0b10110100, "pli.b">; +let Predicates = [HasStdExtP] in { + def PLI_B : RVPLoadImm_i<0b1011010, (ins simm8_unsigned:$imm8), "pli.b", + "$rd, $imm8"> { + bits<8> imm8; + + let Inst{24} = 0b0; + let Inst{23-16} = imm8; + let Inst{15} = 0b0; + } +} let Predicates = [HasStdExtP] in { -def PSEXT_H_B : RVPUnary_ri<0b00, 0b00100, "psext.h.b">; -def PSABS_H : RVPUnary_ri<0b00, 0b00111, "psabs.h">; -def PSABS_B : RVPUnary_ri<0b10, 0b00111, "psabs.b">; + def PSEXT_H_B : RVPUnary_ri<0b00, 0b00100, "psext.h.b">; + def PSABS_H : RVPUnary_ri<0b00, 0b00111, "psabs.h">; + def PSABS_B : RVPUnary_ri<0b10, 0b00111, "psabs.b">; } // Predicates = [HasStdExtP] let Predicates = [HasStdExtP, IsRV64] in { -def PSEXT_W_B : RVPUnary_ri<0b01, 0b00100, "psext.w.b">; -def PSEXT_W_H : RVPUnary_ri<0b01, 0b00101, "psext.w.h">; + def PSEXT_W_B : RVPUnary_ri<0b01, 0b00100, "psext.w.b">; + def PSEXT_W_H : RVPUnary_ri<0b01, 0b00101, "psext.w.h">; } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in def PLUI_H : PLUI_i<0b1111000, "plui.h">; let Predicates = [HasStdExtP, IsRV64] in def PLUI_W : PLUI_i<0b1111001, "plui.w">; + +let Predicates = [HasStdExtP] in { + def PSLL_HS : RVPBinaryScalar_rr<0b000, 0b00, 0b010, "psll.hs">; + def PSLL_BS : RVPBinaryScalar_rr<0b000, 0b10, 0b010, "psll.bs">; + + def PADD_HS : RVPBinaryScalar_rr<0b001, 0b00, 0b010, "padd.hs">; + def PADD_BS : RVPBinaryScalar_rr<0b001, 0b10, 0b010, "padd.bs">; + + def PSSHA_HS : RVPBinaryScalar_rr<0b110, 0b00, 0b010, "pssha.hs">; + + def PSSHAR_HS : RVPBinaryScalar_rr<0b111, 0b00, 0b010, "psshar.hs">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def SSHA : RVPBinaryScalar_rr<0b110, 0b01, 0b010, "ssha">; + + def SSHAR : RVPBinaryScalar_rr<0b111, 0b01, 0b010, "sshar">; +} // Predicates = [HasStdExtP, IsRV32] +let Predicates = [HasStdExtP, IsRV64] in { + def PSLL_WS : RVPBinaryScalar_rr<0b000, 0b01, 0b010, "psll.ws">; + + def PADD_WS : RVPBinaryScalar_rr<0b001, 0b01, 0b010, "padd.ws">; + + def PSSHA_WS : RVPBinaryScalar_rr<0b110, 0b01, 0b010, "pssha.ws">; + def SHA : RVPBinaryScalar_rr<0b110, 0b11, 0b010, "sha">; + + def PSSHAR_WS : RVPBinaryScalar_rr<0b111, 0b01, 0b010, "psshar.ws">; + def SHAR : RVPBinaryScalar_rr<0b111, 0b11, 0b010, "shar">; +} // Predicates = [HasStdExtP, IsRV64] + +let Predicates = [HasStdExtP] in { + def PSRLI_B : RVPShiftB_ri<0b000, 0b100, "psrli.b">; + def PSRLI_H : RVPShiftH_ri<0b000, 0b100, "psrli.h">; + + def PUSATI_H : RVPShiftH_ri<0b010, 0b100, "pusati.h">; + + def PSRAI_B : RVPShiftB_ri<0b100, 0b100, "psrai.b">; + def PSRAI_H : RVPShiftH_ri<0b100, 0b100, "psrai.h">; + + def PSRARI_H : RVPShiftH_ri<0b101, 0b100, "psrari.h">; + + def PSATI_H : RVPShiftH_ri<0b110, 0b100, "psati.h">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def USATI_RV32 : RVPShiftW_ri<0b010, 0b100, "usati">; + + def SRARI_RV32 : RVPShiftW_ri<0b101, 0b100, "srari">; + + def SATI_RV32 : RVPShiftW_ri<0b110, 0b100, "sati">; +} // Predicates = [HasStdExtP, IsRV32] +let Predicates = [HasStdExtP, IsRV64] in { + def PSRLI_W : RVPShiftW_ri<0b000, 0b100, "psrli.w">; + def PSRAI_W : RVPShiftW_ri<0b100, 0b100, "psrai.w">; + + def PUSATI_W : RVPShiftW_ri<0b010, 0b100, "pusati.w">; + def USATI_RV64 : RVPShiftD_ri<0b010, 0b100, "usati">; + + def PSRARI_W : RVPShiftW_ri<0b101, 0b100, "psrari.w">; + def SRARI_RV64 : RVPShiftD_ri<0b101, 0b100, "srari">; + + def PSATI_W : RVPShiftW_ri<0b110, 0b100, "psati.w">; + def SATI_RV64 : RVPShiftD_ri<0b110, 0b100, "sati">; +} // Predicates = [HasStdExtP, IsRV64] + +let Predicates = [HasStdExtP] in { + def PSRL_HS : RVPBinaryScalar_rr<0b000, 0b00, 0b100, "psrl.hs">; + def PSRL_BS : RVPBinaryScalar_rr<0b000, 0b10, 0b100, "psrl.bs">; + + def PREDSUM_HS : RVPBinaryScalar_rr<0b001, 0b00, 0b100, "predsum.hs">; + def PREDSUM_BS : RVPBinaryScalar_rr<0b001, 0b10, 0b100, "predsum.bs">; + + def PREDSUMU_HS : RVPBinaryScalar_rr<0b011, 0b00, 0b100, "predsumu.hs">; + def PREDSUMU_BS : RVPBinaryScalar_rr<0b011, 0b10, 0b100, "predsumu.bs">; + + def PSRA_HS : RVPBinaryScalar_rr<0b100, 0b00, 0b100, "psra.hs">; + def PSRA_BS : RVPBinaryScalar_rr<0b100, 0b10, 0b100, "psra.bs">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV64] in { + def PSRL_WS : RVPBinaryScalar_rr<0b000, 0b01, 0b100, "psrl.ws">; + + def PREDSUM_WS : RVPBinaryScalar_rr<0b001, 0b01, 0b100, "predsum.ws">; + + def PREDSUMU_WS : RVPBinaryScalar_rr<0b011, 0b01, 0b100, "predsumu.ws">; + + def PSRA_WS : RVPBinaryScalar_rr<0b100, 0b01, 0b100, "psra.ws">; +} // Predicates = [HasStdExtP, IsRV64] + +let Predicates = [HasStdExtP] in { + def PADD_H : RVPBinary_rr<0b0000, 0b00, 0b000, "padd.h">; + def PADD_B : RVPBinary_rr<0b0000, 0b10, 0b000, "padd.b">; + + def PSADD_H : RVPBinary_rr<0b0010, 0b00, 0b000, "psadd.h">; + def PSADD_B : RVPBinary_rr<0b0010, 0b10, 0b000, "psadd.b">; + + def PAADD_H : RVPBinary_rr<0b0011, 0b00, 0b000, "paadd.h">; + def PAADD_B : RVPBinary_rr<0b0011, 0b10, 0b000, "paadd.b">; + + def PSADDU_H : RVPBinary_rr<0b0110, 0b00, 0b000, "psaddu.h">; + def PSADDU_B : RVPBinary_rr<0b0110, 0b10, 0b000, "psaddu.b">; + + def PAADDU_H : RVPBinary_rr<0b0111, 0b00, 0b000, "paaddu.h">; + def PAADDU_B : RVPBinary_rr<0b0111, 0b10, 0b000, "paaddu.b">; + + def PSUB_H : RVPBinary_rr<0b1000, 0b00, 0b000, "psub.h">; + def PSUB_B : RVPBinary_rr<0b1000, 0b10, 0b000, "psub.b">; + + def PDIF_H : RVPBinary_rr<0b1001, 0b00, 0b000, "pdif.h">; + def PDIF_B : RVPBinary_rr<0b1001, 0b10, 0b000, "pdif.b">; + + def PSSUB_H : RVPBinary_rr<0b1010, 0b00, 0b000, "pssub.h">; + def PSSUB_B : RVPBinary_rr<0b1010, 0b10, 0b000, "pssub.b">; + + def PASUB_H : RVPBinary_rr<0b1011, 0b00, 0b000, "pasub.h">; + def PASUB_B : RVPBinary_rr<0b1011, 0b10, 0b000, "pasub.b">; + + def PDIFU_H : RVPBinary_rr<0b1101, 0b00, 0b000, "pdifu.h">; + def PDIFU_B : RVPBinary_rr<0b1101, 0b10, 0b000, "pdifu.b">; + + def PSSUBU_H : RVPBinary_rr<0b1110, 0b00, 0b000, "pssubu.h">; + def PSSUBU_B : RVPBinary_rr<0b1110, 0b10, 0b000, "pssubu.b">; + + def PASUBU_H : RVPBinary_rr<0b1111, 0b00, 0b000, "pasubu.h">; + def PASUBU_B : RVPBinary_rr<0b1111, 0b10, 0b000, "pasubu.b">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def SADD : RVPBinary_rr<0b0010, 0b01, 0b000, "sadd">; + + def AADD : RVPBinary_rr<0b0011, 0b01, 0b000, "aadd">; + + def SADDU : RVPBinary_rr<0b0110, 0b01, 0b000, "saddu">; + + def AADDU : RVPBinary_rr<0b0111, 0b01, 0b000, "aaddu">; + + def SSUB : RVPBinary_rr<0b1010, 0b01, 0b000, "ssub">; + + def ASUB : RVPBinary_rr<0b1011, 0b01, 0b000, "asub">; + + def SSUBU : RVPBinary_rr<0b1110, 0b01, 0b000, "ssubu">; + + def ASUBU : RVPBinary_rr<0b1111, 0b01, 0b000, "asubu">; +} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" +let Predicates = [HasStdExtP, IsRV64] in { + def PADD_W : RVPBinary_rr<0b0000, 0b01, 0b000, "padd.w">; + + def PSADD_W : RVPBinary_rr<0b0010, 0b01, 0b000, "psadd.w">; + + def PAADD_W : RVPBinary_rr<0b0011, 0b01, 0b000, "paadd.w">; + + def PSADDU_W : RVPBinary_rr<0b0110, 0b01, 0b000, "psaddu.w">; + + def PAADDU_W : RVPBinary_rr<0b0111, 0b01, 0b000, "paaddu.w">; + + def PSUB_W : RVPBinary_rr<0b1000, 0b01, 0b000, "psub.w">; + + def PSSUB_W : RVPBinary_rr<0b1010, 0b01, 0b000, "pssub.w">; + + def PASUB_W : RVPBinary_rr<0b1011, 0b01, 0b000, "pasub.w">; + + def PSSUBU_W : RVPBinary_rr<0b1110, 0b01, 0b000, "pssubu.w">; + + def PASUBU_W : RVPBinary_rr<0b1111, 0b01, 0b000, "pasubu.w">; +} // Predicates = [HasStdExtP, IsRV64] + +let Predicates = [HasStdExtP] in { + def SLX : RVPBinary_rr<0b0001, 0b11, 0b001, "slx">; + + def PMUL_H_B01 : RVPBinary_rr<0b0010, 0b00, 0b001, "pmul.h.b01">; + + def MVM : RVPTernary_rrr<0b0101, 0b00, 0b001, "mvm">; + def MVMN : RVPTernary_rrr<0b0101, 0b01, 0b001, "mvmn">; + def MERGE : RVPTernary_rrr<0b0101, 0b10, 0b001, "merge">; + def SRX : RVPTernary_rrr<0b0101, 0b11, 0b001, "srx">; + + def PMULU_H_B01 : RVPBinary_rr<0b0110, 0b00, 0b001, "pmulu.h.b01">; + def PDIFSUMU_B : RVPBinary_rr<0b0110, 0b10, 0b001, "pdifsumu.b">; + + def PDIFSUMAU_B : RVPTernary_rrr<0b0111, 0b10, 0b001, "pdifsumau.b">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def MUL_H01 : RVPBinary_rr<0b0010, 0b01, 0b001, "mul.h01">; + + def MACC_H01 : RVPTernary_rrr<0b0011, 0b01, 0b001, "macc.h01">; + + def MULU_H01 : RVPBinary_rr<0b0110, 0b01, 0b001, "mulu.h01">; + + def MACCU_H01 : RVPTernary_rrr<0b0111, 0b01, 0b001, "maccu.h01">; +} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" +let Predicates = [HasStdExtP, IsRV64] in { + def PMUL_W_H01 : RVPBinary_rr<0b0010, 0b01, 0b001, "pmul.w.h01">; + def MUL_W01 : RVPBinary_rr<0b0010, 0b11, 0b001, "mul.w01">; + + def PMACC_W_H01 : RVPTernary_rrr<0b0011, 0b01, 0b001, "pmacc.w.h01">; + def MACC_W01 : RVPTernary_rrr<0b0011, 0b11, 0b001, "macc.w01">; + + def PMULU_W_H01 : RVPBinary_rr<0b0110, 0b01, 0b001, "pmulu.w.h01">; + def MULU_W01 : RVPBinary_rr<0b0110, 0b11, 0b001, "mulu.w01">; + + def PMACCU_W_H01 : RVPTernary_rrr<0b0111, 0b01, 0b001, "pmaccu.w.h01">; + def MACCU_W01 : RVPTernary_rrr<0b0111, 0b11, 0b001, "maccu.w01">; +} // Predicates = [HasStdExtP, IsRV64] + +// Note the spec has a 3-bit f field in bits 30:28 with 0 in bit 27. +// Here we include the 0 in the f field to reduce number of tablegen classes. +let Predicates = [HasStdExtP] in { + def PSH1ADD_H : RVPBinary_rr<0b0100, 0b00, 0b010, "psh1add.h">; + + def PSSH1SADD_H : RVPBinary_rr<0b0110, 0b00, 0b010, "pssh1sadd.h">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def SSH1SADD : RVPBinary_rr<0b0110, 0b01, 0b010, "ssh1sadd">; +} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" +let Predicates = [HasStdExtP, IsRV64] in { + def PSH1ADD_W : RVPBinary_rr<0b0100, 0b01, 0b010, "psh1add.w">; + + def PSSH1SADD_W : RVPBinary_rr<0b0110, 0b01, 0b010, "pssh1sadd.w">; + + def UNZIP8P : RVPBinary_rr<0b1100, 0b00, 0b010, "unzip8p">; + def UNZIP16P : RVPBinary_rr<0b1100, 0b01, 0b010, "unzip16p">; + def UNZIP8HP : RVPBinary_rr<0b1100, 0b10, 0b010, "unzip8hp">; + def UNZIP16HP : RVPBinary_rr<0b1100, 0b11, 0b010, "unzip16hp">; + + def ZIP8P : RVPBinary_rr<0b1110, 0b00, 0b010, "zip8p">; + def ZIP16P : RVPBinary_rr<0b1110, 0b01, 0b010, "zip16p">; + def ZIP8HP : RVPBinary_rr<0b1110, 0b10, 0b010, "zip8hp">; + def ZIP16HP : RVPBinary_rr<0b1110, 0b11, 0b010, "zip16hp">; +} // Predicates = [HasStdExtP, IsRV64] + +let Predicates = [HasStdExtP] in { + def PMUL_H_B00 : RVPBinary_rr<0b0000, 0b00, 0b011, "pmul.h.b00">; + + def PMUL_H_B11 : RVPBinary_rr<0b0010, 0b00, 0b011, "pmul.h.b11">; + + def PMULU_H_B00 : RVPBinary_rr<0b0100, 0b00, 0b011, "pmulu.h.b00">; + + def PMULU_H_B11 : RVPBinary_rr<0b0110, 0b00, 0b011, "pmulu.h.b11">; + + def PMULSU_H_B00 : RVPBinary_rr<0b1100, 0b00, 0b011, "pmulsu.h.b00">; + + def PMULSU_H_B11 : RVPBinary_rr<0b1110, 0b00, 0b011, "pmulsu.h.b11">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def MUL_H00 : RVPBinary_rr<0b0000, 0b01, 0b011, "mul.h00">; + + def MACC_H00 : RVPTernary_rrr<0b0001, 0b01, 0b011, "macc.h00">; + + def MUL_H11 : RVPBinary_rr<0b0010, 0b01, 0b011, "mul.h11">; + + def MACC_H11 : RVPTernary_rrr<0b0011, 0b01, 0b011, "macc.h11">; + + def MULU_H00 : RVPBinary_rr<0b0100, 0b01, 0b011, "mulu.h00">; + + def MACCU_H00 : RVPTernary_rrr<0b0101, 0b01, 0b011, "maccu.h00">; + + def MULU_H11 : RVPBinary_rr<0b0110, 0b01, 0b011, "mulu.h11">; + + def MACCU_H11 : RVPTernary_rrr<0b0111, 0b01, 0b011, "maccu.h11">; + + def MULSU_H00 : RVPBinary_rr<0b1100, 0b01, 0b011, "mulsu.h00">; + + def MACCSU_H00 : RVPTernary_rrr<0b1101, 0b01, 0b011, "maccsu.h00">; + + def MULSU_H11 : RVPBinary_rr<0b1110, 0b01, 0b011, "mulsu.h11">; + + def MACCSU_H11 : RVPTernary_rrr<0b1111, 0b01, 0b011, "maccsu.h11">; +} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" +let Predicates = [HasStdExtP, IsRV64] in { + def PMUL_W_H00 : RVPBinary_rr<0b0000, 0b01, 0b011, "pmul.w.h00">; + def MUL_W00 : RVPBinary_rr<0b0000, 0b11, 0b011, "mul.w00">; + + def PMACC_W_H00 : RVPTernary_rrr<0b0001, 0b01, 0b011, "pmacc.w.h00">; + def MACC_W00 : RVPTernary_rrr<0b0001, 0b11, 0b011, "macc.w00">; + + def PMUL_W_H11 : RVPBinary_rr<0b0010, 0b01, 0b011, "pmul.w.h11">; + def MUL_W11 : RVPBinary_rr<0b0010, 0b11, 0b011, "mul.w11">; + + def PMACC_W_H11 : RVPTernary_rrr<0b0011, 0b01, 0b011, "pmacc.w.h11">; + def MACC_W11 : RVPTernary_rrr<0b0011, 0b11, 0b011, "macc.w11">; + + def PMULU_W_H00 : RVPBinary_rr<0b0100, 0b01, 0b011, "pmulu.w.h00">; + def MULU_W00 : RVPBinary_rr<0b0100, 0b11, 0b011, "mulu.w00">; + + def PMACCU_W_H00 : RVPTernary_rrr<0b0101, 0b01, 0b011, "pmaccu.w.h00">; + def MACCU_W00 : RVPTernary_rrr<0b0101, 0b11, 0b011, "maccu.w00">; + + def PMULU_W_H11 : RVPBinary_rr<0b0110, 0b01, 0b011, "pmulu.w.h11">; + def MULU_W11 : RVPBinary_rr<0b0110, 0b11, 0b011, "mulu.w11">; + + def PMACCU_W_H11 : RVPTernary_rrr<0b0111, 0b01, 0b011, "pmaccu.w.h11">; + def MACCU_W11 : RVPTernary_rrr<0b0111, 0b11, 0b011, "maccu.w11">; + + def PMULSU_W_H00 : RVPBinary_rr<0b1100, 0b01, 0b011, "pmulsu.w.h00">; + def MULSU_W00 : RVPBinary_rr<0b1100, 0b11, 0b011, "mulsu.w00">; + + def PMACCSU_W_H00 : RVPTernary_rrr<0b1101, 0b01, 0b011, "pmaccsu.w.h00">; + def MACCSU_W00 : RVPTernary_rrr<0b1101, 0b11, 0b011, "maccsu.w00">; + + def PMULSU_W_H11 : RVPBinary_rr<0b1110, 0b01, 0b011, "pmulsu.w.h11">; + def MULSU_W11 : RVPBinary_rr<0b1110, 0b11, 0b011, "mulsu.w11">; + + def PMACCSU_W_H11 : RVPTernary_rrr<0b1111, 0b01, 0b011, "pmaccsu.w.h11">; + def MACCSU_W11 : RVPTernary_rrr<0b1111, 0b11, 0b011, "maccsu.w11">; +} // Predicates = [HasStdExtP, IsRV64] + +// Note the spec has a 3-bit f field in bits 30:28 with 0 in bit 27. +// Here we include the 0 in the f field to reduce number of tablegen classes. +let Predicates = [HasStdExtP] in { + def PPACK_H : RVPBinary_rr<0b0000, 0b00, 0b100, "ppack.h">; + + def PPACKBT_H : RVPBinary_rr<0b0010, 0b00, 0b100, "ppackbt.h">; + + def PPACKTB_H : RVPBinary_rr<0b0100, 0b00, 0b100, "ppacktb.h">; + + def PPACKT_H : RVPBinary_rr<0b0110, 0b00, 0b100, "ppackt.h">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def PACKBT_RV32 : RVPBinary_rr<0b0010, 0b01, 0b100, "packbt">; + + def PACKTB_RV32 : RVPBinary_rr<0b0100, 0b01, 0b100, "packtb">; + + def PACKT_RV32 : RVPBinary_rr<0b0110, 0b01, 0b100, "packt">; +} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" +let Predicates = [HasStdExtP, IsRV64] in { + def PPACK_W : RVPBinary_rr<0b0000, 0b01, 0b100, "ppack.w">; + + def PPACKBT_W : RVPBinary_rr<0b0010, 0b01, 0b100, "ppackbt.w">; + def PACKBT_RV64 : RVPBinary_rr<0b0010, 0b11, 0b100, "packbt">; + + def PPACKTB_W : RVPBinary_rr<0b0100, 0b01, 0b100, "ppacktb.w">; + def PACKTB_RV64 : RVPBinary_rr<0b0100, 0b11, 0b100, "packtb">; + + def PPACKT_W : RVPBinary_rr<0b0110, 0b01, 0b100, "ppackt.w">; + def PACKT_RV64 : RVPBinary_rr<0b0110, 0b11, 0b100, "packt">; +} // Predicates = [HasStdExtP, IsRV64] + +let Predicates = [HasStdExtP] in { + def PM2ADD_H : RVPBinary_rr<0b0000, 0b00, 0b101, "pm2add.h">; + def PM4ADD_B : RVPBinary_rr<0b0000, 0b10, 0b101, "pm4add.b">; + + def PM2ADDA_H : RVPTernary_rrr<0b0001, 0b00, 0b101, "pm2adda.h">; + def PM4ADDA_B : RVPTernary_rrr<0b0001, 0b10, 0b101, "pm4adda.b">; + + def PM2ADD_HX : RVPBinary_rr<0b0010, 0b00, 0b101, "pm2add.hx">; + + def PM2ADDA_HX : RVPTernary_rrr<0b0011, 0b00, 0b101, "pm2adda.hx">; + + def PM2ADDU_H : RVPBinary_rr<0b0100, 0b00, 0b101, "pm2addu.h">; + def PM4ADDU_B : RVPBinary_rr<0b0100, 0b10, 0b101, "pm4addu.b">; + + def PM2ADDAU_H : RVPTernary_rrr<0b0101, 0b00, 0b101, "pm2addau.h">; + def PM4ADDAU_B : RVPTernary_rrr<0b0101, 0b10, 0b101, "pm4addau.b">; + + def PMQ2ADD_H : RVPBinary_rr<0b0110, 0b00, 0b101, "pmq2add.h">; + def PMQR2ADD_H : RVPBinary_rr<0b0110, 0b10, 0b101, "pmqr2add.h">; + + def PMQ2ADDA_H : RVPTernary_rrr<0b0111, 0b00, 0b101, "pmq2adda.h">; + def PMQR2ADDA_H : RVPTernary_rrr<0b0111, 0b10, 0b101, "pmqr2adda.h">; + + def PM2SUB_H : RVPBinary_rr<0b1000, 0b00, 0b101, "pm2sub.h">; + def PM2SADD_H : RVPBinary_rr<0b1000, 0b10, 0b101, "pm2sadd.h">; + + def PM2SUBA_H : RVPTernary_rrr<0b1001, 0b00, 0b101, "pm2suba.h">; + + def PM2SUB_HX : RVPBinary_rr<0b1010, 0b00, 0b101, "pm2sub.hx">; + def PM2SADD_HX : RVPBinary_rr<0b1010, 0b10, 0b101, "pm2sadd.hx">; + + def PM2SUBA_HX : RVPTernary_rrr<0b1011, 0b00, 0b101, "pm2suba.hx">; + + def PM2ADDSU_H : RVPBinary_rr<0b1100, 0b00, 0b101, "pm2addsu.h">; + def PM4ADDSU_B : RVPBinary_rr<0b1100, 0b10, 0b101, "pm4addsu.b">; + + def PM2ADDASU_H : RVPBinary_rr<0b1101, 0b00, 0b101, "pm2addasu.h">; + def PM4ADDASU_B : RVPBinary_rr<0b1101, 0b10, 0b101, "pm4addasu.b">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def MQACC_H01 : RVPTernary_rrr<0b1111, 0b00, 0b101, "mqacc.h01">; + def MQRACC_H01 : RVPTernary_rrr<0b1111, 0b10, 0b101, "mqracc.h01">; +} // // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" +let Predicates = [HasStdExtP, IsRV64] in { + def PM2ADD_W : RVPBinary_rr<0b0000, 0b01, 0b101, "pm2add.w">; + def PM4ADD_H : RVPBinary_rr<0b0000, 0b11, 0b101, "pm4add.h">; + + def PM2ADDA_W : RVPTernary_rrr<0b0001, 0b01, 0b101, "pm2adda.w">; + def PM4ADDA_H : RVPTernary_rrr<0b0001, 0b11, 0b101, "pm4adda.h">; + + def PM2ADD_WX : RVPBinary_rr<0b0010, 0b01, 0b101, "pm2add.wx">; + + def PM2ADDA_WX : RVPTernary_rrr<0b0011, 0b01, 0b101, "pm2adda.wx">; + + def PM2ADDU_W : RVPBinary_rr<0b0100, 0b01, 0b101, "pm2addu.w">; + def PM4ADDU_H : RVPBinary_rr<0b0100, 0b11, 0b101, "pm4addu.h">; + + def PM2ADDAU_W : RVPTernary_rrr<0b0101, 0b01, 0b101, "pm2addau.w">; + def PM4ADDAU_H : RVPTernary_rrr<0b0101, 0b11, 0b101, "pm4addau.h">; + + def PMQ2ADD_W : RVPBinary_rr<0b0110, 0b01, 0b101, "pmq2add.w">; + def PMQR2ADD_W : RVPBinary_rr<0b0110, 0b11, 0b101, "pmqr2add.w">; + + def PMQ2ADDA_W : RVPTernary_rrr<0b0111, 0b01, 0b101, "pmq2adda.w">; + def PMQR2ADDA_W : RVPTernary_rrr<0b0111, 0b11, 0b101, "pmqr2adda.w">; + + def PM2SUB_W : RVPBinary_rr<0b1000, 0b01, 0b101, "pm2sub.w">; + + def PM2SUBA_W : RVPTernary_rrr<0b1001, 0b01, 0b101, "pm2suba.w">; + + def PM2SUB_WX : RVPBinary_rr<0b1010, 0b01, 0b101, "pm2sub.wx">; + + def PM2SUBA_WX : RVPBinary_rr<0b1011, 0b01, 0b101, "pm2suba.wx">; + + def PM2ADDSU_W : RVPBinary_rr<0b1100, 0b01, 0b101, "pm2addsu.w">; + def PM4ADDSU_H : RVPBinary_rr<0b1100, 0b11, 0b101, "pm4addsu.h">; + + def PM2ADDASU_W : RVPTernary_rrr<0b1101, 0b01, 0b101, "pm2addasu.w">; + def PM4ADDASU_H : RVPTernary_rrr<0b1101, 0b11, 0b101, "pm4addasu.h">; + + def PMQACC_W_H01 : RVPTernary_rrr<0b1111, 0b00, 0b101, "pmqacc.w.h01">; + def MQACC_W01 : RVPTernary_rrr<0b1111, 0b01, 0b101, "mqacc.w01">; + + def PMQRACC_W_H01 : RVPTernary_rrr<0b1111, 0b10, 0b101, "pmqracc.w.h01">; + def MQRACC_W01 : RVPTernary_rrr<0b1111, 0b11, 0b101, "mqracc.w01">; +} // Predicates = [HasStdExtP, IsRV64] + +let Predicates = [HasStdExtP] in { + def PAS_HX : RVPBinary_rr<0b0000, 0b00, 0b110, "pas.hx">; + def PSA_HX : RVPBinary_rr<0b0000, 0b10, 0b110, "psa.hx">; + + def PSAS_HX : RVPBinary_rr<0b0010, 0b00, 0b110, "psas.hx">; + def PSSA_HX : RVPBinary_rr<0b0010, 0b10, 0b110, "pssa.hx">; + + def PAAS_HX : RVPBinary_rr<0b0011, 0b00, 0b110, "paas.hx">; + def PASA_HX : RVPBinary_rr<0b0011, 0b10, 0b110, "pasa.hx">; + + def PMSEQ_H : RVPBinary_rr<0b1000, 0b00, 0b110, "pmseq.h">; + def PMSEQ_B : RVPBinary_rr<0b1000, 0b10, 0b110, "pmseq.b">; + + def PMSLT_H : RVPBinary_rr<0b1010, 0b00, 0b110, "pmslt.h">; + def PMSLT_B : RVPBinary_rr<0b1010, 0b10, 0b110, "pmslt.b">; + + def PMSLTU_H : RVPBinary_rr<0b1011, 0b00, 0b110, "pmsltu.h">; + def PMSLTU_B : RVPBinary_rr<0b1011, 0b10, 0b110, "pmsltu.b">; + + def PMIN_H : RVPBinary_rr<0b1100, 0b00, 0b110, "pmin.h">; + def PMIN_B : RVPBinary_rr<0b1100, 0b10, 0b110, "pmin.b">; + + def PMINU_H : RVPBinary_rr<0b1101, 0b00, 0b110, "pminu.h">; + def PMINU_B : RVPBinary_rr<0b1101, 0b10, 0b110, "pminu.b">; + + def PMAX_H : RVPBinary_rr<0b1110, 0b00, 0b110, "pmax.h">; + def PMAX_B : RVPBinary_rr<0b1110, 0b10, 0b110, "pmax.b">; + + def PMAXU_H : RVPBinary_rr<0b1111, 0b00, 0b110, "pmaxu.h">; + def PMAXU_B : RVPBinary_rr<0b1111, 0b10, 0b110, "pmaxu.b">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def MSEQ : RVPBinary_rr<0b1000, 0b01, 0b110, "mseq">; + + def MSLT : RVPBinary_rr<0b1010, 0b01, 0b110, "mslt">; + + def MSLTU : RVPBinary_rr<0b1011, 0b01, 0b110, "msltu">; +} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" +let Predicates = [HasStdExtP, IsRV64] in { + def PAS_WX : RVPBinary_rr<0b0000, 0b01, 0b110, "pas.wx">; + def PSA_WX : RVPBinary_rr<0b0000, 0b11, 0b110, "psa.wx">; + + def PSAS_WX : RVPBinary_rr<0b0010, 0b01, 0b110, "psas.wx">; + def PSSA_WX : RVPBinary_rr<0b0010, 0b11, 0b110, "pssa.wx">; + + def PAAS_WX : RVPBinary_rr<0b0011, 0b01, 0b110, "paas.wx">; + def PASA_WX : RVPBinary_rr<0b0011, 0b11, 0b110, "pasa.wx">; + + def PMSEQ_W : RVPBinary_rr<0b1000, 0b01, 0b110, "pmseq.w">; + + def PMSLT_W : RVPBinary_rr<0b1010, 0b01, 0b110, "pmslt.w">; + + def PMSLTU_W : RVPBinary_rr<0b1011, 0b01, 0b110, "pmsltu.w">; + + def PMIN_W : RVPBinary_rr<0b1100, 0b01, 0b110, "pmin.w">; + + def PMINU_W : RVPBinary_rr<0b1101, 0b01, 0b110, "pminu.w">; + + def PMAX_W : RVPBinary_rr<0b1110, 0b01, 0b110, "pmax.w">; + + def PMAXU_W : RVPBinary_rr<0b1111, 0b01, 0b110, "pmaxu.w">; +} // Predicates = [HasStdExtP, IsRV64] + +let Predicates = [HasStdExtP] in { + def PMULH_H : RVPBinary_rr<0b0000, 0b00, 0b111, "pmulh.h">; + def PMULHR_H : RVPBinary_rr<0b0000, 0b10, 0b111, "pmulhr.h">; + + def PMHACC_H : RVPTernary_rrr<0b0001, 0b00, 0b111, "pmhacc.h">; + def PMHRACC_H : RVPTernary_rrr<0b0001, 0b10, 0b111, "pmhracc.h">; + + def PMULHU_H : RVPBinary_rr<0b0010, 0b00, 0b111, "pmulhu.h">; + def PMULHRU_H : RVPBinary_rr<0b0010, 0b10, 0b111, "pmulhru.h">; + + def PMHACCU_H : RVPTernary_rrr<0b0011, 0b00, 0b111, "pmhaccu.h">; + def PMHRACCU_H : RVPTernary_rrr<0b0011, 0b10, 0b111, "pmhraccu.h">; + + def PMULH_H_B0 : RVPBinary_rr<0b0100, 0b00, 0b111, "pmulh.h.b0">; + def PMULHSU_H_B0 : RVPBinary_rr<0b0100, 0b10, 0b111, "pmulhsu.h.b0">; + + def PMHACC_H_B0 : RVPTernary_rrr<0b0101, 0b00, 0b111, "pmhacc.h.b0">; + def PMHACCSU_H_B0 : RVPTernary_rrr<0b0101, 0b10, 0b111, "pmhaccsu.h.b0">; + + def PMULH_H_B1 : RVPBinary_rr<0b0110, 0b00, 0b111, "pmulh.h.b1">; + def PMULHSU_H_B1 : RVPBinary_rr<0b0110, 0b10, 0b111, "pmulhsu.h.b1">; + + def PMHACC_H_B1 : RVPTernary_rrr<0b0111, 0b00, 0b111, "pmhacc.h.b1">; + def PMHACCSU_H_B1 : RVPTernary_rrr<0b0111, 0b10, 0b111, "pmhaccsu.h.b1">; + + def PMULHSU_H : RVPBinary_rr<0b1000, 0b00, 0b111, "pmulhsu.h">; + def PMULHRSU_H : RVPBinary_rr<0b1000, 0b10, 0b111, "pmulhrsu.h">; + + def PMHACCSU_H : RVPTernary_rrr<0b1001, 0b00, 0b111, "pmhaccsu.h">; + def PMHRACCSU_H : RVPTernary_rrr<0b1001, 0b10, 0b111, "pmhraccsu.h">; + + def PMULQ_H : RVPBinary_rr<0b1010, 0b00, 0b111, "pmulq.h">; + def PMULQR_H : RVPBinary_rr<0b1010, 0b10, 0b111, "pmulqr.h">; +} // Predicates = [HasStdExtP] +let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in { + def MULHR : RVPBinary_rr<0b0000, 0b11, 0b111, "mulhr">; + + def MHACC : RVPTernary_rrr<0b0001, 0b01, 0b111, "mhacc">; + def MHRACC : RVPTernary_rrr<0b0001, 0b11, 0b111, "mhracc">; + + def MULHRU : RVPBinary_rr<0b0010, 0b11, 0b111, "mulhru">; + + def MHACCU : RVPTernary_rrr<0b0011, 0b01, 0b111, "mhaccu">; + def MHRACCU : RVPTernary_rrr<0b0011, 0b11, 0b111, "mhraccu">; + + def MULH_H0 : RVPBinary_rr<0b0100, 0b01, 0b111, "mulh.h0">; + def MULHSU_H0 : RVPBinary_rr<0b0100, 0b11, 0b111, "mulhsu.h0">; + + def MHACC_H0 : RVPTernary_rrr<0b0101, 0b01, 0b111, "mhacc.h0">; + def MHACCSU_H0 : RVPTernary_rrr<0b0101, 0b11, 0b111, "mhaccsu.h0">; + + def MULH_H1 : RVPBinary_rr<0b0110, 0b01, 0b111, "mulh.h1">; + def MULHSU_H1 : RVPBinary_rr<0b0110, 0b11, 0b111, "mulhsu.h1">; + + def MHACC_H1 : RVPTernary_rrr<0b0111, 0b01, 0b111, "mhacc.h1">; + def MHACCSU_H1 : RVPTernary_rrr<0b0111, 0b11, 0b111, "mhaccsu.h1">; + + def MULHRSU : RVPBinary_rr<0b1000, 0b11, 0b111, "mulhrsu">; + + def MHACCSU : RVPTernary_rrr<0b1001, 0b01, 0b111, "mhaccsu">; + def MHRACCSU : RVPTernary_rrr<0b1001, 0b11, 0b111, "mhraccsu">; + + def MULQ : RVPBinary_rr<0b1010, 0b01, 0b111, "mulq">; + def MULQR : RVPBinary_rr<0b1010, 0b11, 0b111, "mulqr">; + + def MQACC_H00 : RVPTernary_rrr<0b1101, 0b00, 0b111, "mqacc.h00">; + def MQRACC_H00 : RVPTernary_rrr<0b1101, 0b10, 0b111, "mqracc.h00">; + + def MQACC_H11 : RVPTernary_rrr<0b1111, 0b00, 0b111, "mqacc.h11">; + def MQRACC_H11 : RVPTernary_rrr<0b1111, 0b10, 0b111, "mqracc.h11">; +} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in +let Predicates = [HasStdExtP, IsRV64] in { + def PMULH_W : RVPBinary_rr<0b0000, 0b01, 0b111, "pmulh.w">; + def PMULHR_W : RVPBinary_rr<0b0000, 0b11, 0b111, "pmulhr.w">; + + def PMHACC_W : RVPTernary_rrr<0b0001, 0b01, 0b111, "pmhacc.w">; + def PMHRACC_W : RVPTernary_rrr<0b0001, 0b11, 0b111, "pmhracc.w">; + + def PMULHU_W : RVPBinary_rr<0b0010, 0b01, 0b111, "pmulhu.w">; + def PMULHRU_W : RVPBinary_rr<0b0010, 0b11, 0b111, "pmulhru.w">; + + def PMHACCU_W : RVPTernary_rrr<0b0011, 0b01, 0b111, "pmhaccu.w">; + def PMHRACCU_W : RVPTernary_rrr<0b0011, 0b11, 0b111, "pmhraccu.w">; + + def PMULH_W_H0 : RVPBinary_rr<0b0100, 0b01, 0b111, "pmulh.w.h0">; + def PMULHSU_W_H0 : RVPBinary_rr<0b0100, 0b11, 0b111, "pmulhsu.w.h0">; + + def PMHACC_W_H0 : RVPTernary_rrr<0b0101, 0b01, 0b111, "pmhacc.w.h0">; + def PMHACCSU_W_H0 : RVPTernary_rrr<0b0101, 0b11, 0b111, "pmhaccsu.w.h0">; + + def PMULH_W_H1 : RVPBinary_rr<0b0110, 0b01, 0b111, "pmulh.w.h1">; + def PMULHSU_W_H1 : RVPBinary_rr<0b0110, 0b11, 0b111, "pmulhsu.w.h1">; + + def PMHACC_W_H1 : RVPTernary_rrr<0b0111, 0b01, 0b111, "pmhacc.w.h1">; + def PMHACCSU_W_H1 : RVPTernary_rrr<0b0111, 0b11, 0b111, "pmhaccsu.w.h1">; + + def PMULHSU_W : RVPBinary_rr<0b1000, 0b01, 0b111, "pmulhsu.w">; + def PMULHRSU_W : RVPBinary_rr<0b1000, 0b11, 0b111, "pmulhrsu.w">; + + def PMHACCSU_W : RVPTernary_rrr<0b1001, 0b01, 0b111, "pmhaccsu.w">; + def PMHRACCSU_W : RVPTernary_rrr<0b1001, 0b11, 0b111, "pmhraccsu.w">; + + def PMULQ_W : RVPBinary_rr<0b1010, 0b01, 0b111, "pmulq.w">; + def PMULQR_W : RVPBinary_rr<0b1010, 0b11, 0b111, "pmulqr.w">; + + def PMQACC_W_H00 : RVPTernary_rrr<0b1101, 0b00, 0b111, "pmqacc.w.h00">; + def MQACC_W00 : RVPTernary_rrr<0b1101, 0b01, 0b111, "mqacc.w00">; + def PMQRACC_W_H00 : RVPTernary_rrr<0b1101, 0b10, 0b111, "pmqracc.w.h00">; + def MQRACC_W00 : RVPTernary_rrr<0b1101, 0b11, 0b111, "mqracc.w00">; + + def PMQACC_W_H11 : RVPTernary_rrr<0b1111, 0b00, 0b111, "pmqacc.w.h11">; + def MQACC_W11 : RVPTernary_rrr<0b1111, 0b01, 0b111, "mqacc.w11">; + def PMQRACC_W_H11 : RVPTernary_rrr<0b1111, 0b10, 0b111, "pmqracc.w.h11">; + def MQRACC_W11 : RVPTernary_rrr<0b1111, 0b11, 0b111, "mqracc.w11">; +} // Predicates = [HasStdExtP, IsRV64] + +let Predicates = [HasStdExtP, IsRV32] in { + def PLI_DH : RVPPairLoadImm_i<0b0011000, (ins simm10:$imm10), "pli.dh", + "$rd, $imm10"> { + bits<10> imm10; + + let Inst{24-16} = imm10{8-0}; + let Inst{15} = imm10{9}; + } + + def PLI_DB : RVPPairLoadImm_i<0b0011010, (ins simm8_unsigned:$imm8), "pli.db", + "$rd, $imm8"> { + bits<8> imm8; + + let Inst{24} = 0b0; + let Inst{23-16} = imm8; + let Inst{15} = 0b0; + } + + def PLUI_DH : RVPPairLoadImm_i<0b0111000, (ins simm10_unsigned:$imm10), + "plui.dh", "$rd, $imm10"> { + bits<10> imm10; + + let Inst{24} = imm10{0}; + let Inst{23-15} = imm10{9-1}; + } +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 33c7138..8c0ebe6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -475,8 +475,8 @@ class VALUmVV<bits<6> funct6, RISCVVFormat opv, string opcodestr> opcodestr, "$vd, $vs2, $vs1, $vm">; // op vd, vs1, vs2, vm (reverse the order of vs1 and vs2) -class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr, - bit EarlyClobber = 0> +class VMACVV<bits<6> funct6, RISCVVFormat opv, string opcodestr, + bit EarlyClobber = 0> : RVInstVV<funct6, opv, (outs VR:$vd_wb), (ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vd, $vs1, $vs2$vm"> { @@ -505,8 +505,8 @@ class VALUmVX<bits<6> funct6, RISCVVFormat opv, string opcodestr> opcodestr, "$vd, $vs2, $rs1, $vm">; // op vd, rs1, vs2, vm (reverse the order of rs1 and vs2) -class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr, - bit EarlyClobber = 0> +class VMACVX<bits<6> funct6, RISCVVFormat opv, string opcodestr, + bit EarlyClobber = 0> : RVInstVX<funct6, opv, (outs VR:$vd_wb), (ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vd, $rs1, $vs2$vm"> { @@ -549,8 +549,8 @@ class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr> opcodestr, "$vd, $vs2, $rs1$vm">; // op vd, rs1, vs2, vm (Float) (with mask, reverse the order of rs1 and vs2) -class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr, - bit EarlyClobber = 0> +class VMACVF<bits<6> funct6, RISCVVFormat opv, string opcodestr, + bit EarlyClobber = 0> : RVInstVX<funct6, opv, (outs VR:$vd_wb), (ins VR:$vd, FPR32:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vd, $rs1, $vs2$vm"> { @@ -628,17 +628,17 @@ multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw> { } multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6> { - def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">, + def V : VMACVV<funct6, OPMVV, opcodestr # ".vv">, SchedTernaryMC<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV">; - def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">, + def X : VMACVX<funct6, OPMVX, opcodestr # ".vx">, SchedTernaryMC<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX", "ReadVIMulAddV">; } multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6> { let RVVConstraint = WidenV in - def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">, + def X : VMACVX<funct6, OPMVX, opcodestr # ".vx", EarlyClobber=1>, SchedTernaryMC<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX", "ReadVIWMulAddV">; } @@ -646,7 +646,7 @@ multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6> { multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6> : VWMAC_MV_X<opcodestr, funct6> { let RVVConstraint = WidenV in - def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv", EarlyClobber=1>, + def V : VMACVV<funct6, OPMVV, opcodestr # ".vv", EarlyClobber=1>, SchedTernaryMC<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV">; } @@ -743,20 +743,20 @@ multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6> { } multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6> { - def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">, + def V : VMACVV<funct6, OPFVV, opcodestr # ".vv">, SchedTernaryMC<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV">; - def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">, + def F : VMACVF<funct6, OPFVF, opcodestr # ".vf">, SchedTernaryMC<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF", "ReadVFMulAddV">; } multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6> { let RVVConstraint = WidenV in { - def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv", EarlyClobber=1>, + def V : VMACVV<funct6, OPFVV, opcodestr # ".vv", EarlyClobber=1>, SchedTernaryMC<"WriteVFWMulAddV", "ReadVFWMulAddV", "ReadVFWMulAddV", "ReadVFWMulAddV">; - def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf", EarlyClobber=1>, + def F : VMACVF<funct6, OPFVF, opcodestr # ".vf", EarlyClobber=1>, SchedTernaryMC<"WriteVFWMulAddF", "ReadVFWMulAddV", "ReadVFWMulAddF", "ReadVFWMulAddV">; } @@ -1703,8 +1703,9 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, /*slidesUp=*/true>; defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp +let ReadsPastVL = 1 in defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, /*slidesUp=*/false>; -let ElementsDependOn = EltDepsVL in +let ElementsDependOn = EltDepsVL, ReadsPastVL = 1 in defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>; } // Predicates = [HasVInstructions] @@ -1712,19 +1713,19 @@ let Predicates = [HasVInstructionsAnyF] in { let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp -let ElementsDependOn = EltDepsVL in +let ElementsDependOn = EltDepsVL, ReadsPastVL = 1 in defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>; } // Predicates = [HasVInstructionsAnyF] let Predicates = [HasVInstructions] in { // Vector Register Gather Instruction -let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in { +let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather, ReadsPastVL = 1 in { defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100>; def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">, SchedBinaryMC<"WriteVRGatherEI16VV", "ReadVRGatherEI16VV_data", "ReadVRGatherEI16VV_index">; -} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather +} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather, ReadsPastVL = 1 // Vector Compress Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress, ElementsDependOn = EltDepsVLMask in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td index c75addd9..1fb30a0b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td @@ -420,7 +420,7 @@ class NDSRVInstVD4DOT<bits<6> funct6, string opcodestr> } class NDSRVInstVBFHCvt<bits<5> vs1, string opcodestr> - : RVInst<(outs VR:$vd), (ins VR:$vs2, VMaskOp:$vm), + : RVInst<(outs VR:$vd), (ins VR:$vs2), opcodestr, "$vd, $vs2", [], InstFormatR> { bits<5> vs2; bits<5> vd; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td index 0c8487c..889ea98 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td @@ -129,20 +129,20 @@ class Mips_prefetch_ri<dag outs, dag ins, string opcodestr, string argstr> // MIPS extensions //===----------------------------------------------------------------------===// let Predicates = [HasVendorXMIPSCBOP] ,DecoderNamespace = "Xmipscbop" in { - def MIPS_PREFETCH : Mips_prefetch_ri<(outs), (ins GPR:$rs1, uimm9:$imm9, uimm5:$hint), - "mips.pref", "$hint, ${imm9}(${rs1})">, - Sched<[]>; + def MIPS_PREF : Mips_prefetch_ri<(outs), (ins GPR:$rs1, uimm9:$imm9, uimm5:$hint), + "mips.pref", "$hint, ${imm9}(${rs1})">, + Sched<[]>; } let Predicates = [HasVendorXMIPSCBOP] in { // Prefetch Data Write. def : Pat<(prefetch (AddrRegImm9 (XLenVT GPR:$rs1), uimm9:$imm9), (i32 1), timm, (i32 1)), - (MIPS_PREFETCH GPR:$rs1, uimm9:$imm9, 9)>; + (MIPS_PREF GPR:$rs1, uimm9:$imm9, 9)>; // Prefetch Data Read. def : Pat<(prefetch (AddrRegImm9 (XLenVT GPR:$rs1), uimm9:$imm9), (i32 0), timm, (i32 1)), - (MIPS_PREFETCH GPR:$rs1, uimm9:$imm9, 8)>; + (MIPS_PREF GPR:$rs1, uimm9:$imm9, 8)>; } let Predicates = [HasVendorXMIPSCMov], hasSideEffects = 0, mayLoad = 0, mayStore = 0, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td index ebcf079..3a6ce3c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td @@ -58,7 +58,7 @@ class CustomRivosXVI<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins, let Predicates = [HasVendorXRivosVizip], DecoderNamespace = "XRivos", Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather, - Inst<6-0> = OPC_CUSTOM_2.Value in { + Inst<6-0> = OPC_CUSTOM_2.Value, ReadsPastVL = 1 in { defm RI_VZIPEVEN_V : VALU_IV_V<"ri.vzipeven", 0b001100>; defm RI_VZIPODD_V : VALU_IV_V<"ri.vzipodd", 0b011100>; defm RI_VZIP2A_V : VALU_IV_V<"ri.vzip2a", 0b000100>; @@ -126,6 +126,7 @@ def RI_VINSERT : CustomRivosVXI<0b010000, OPMVX, (outs VR:$vd_wb), (ins VR:$vd, GPR:$rs1, uimm5:$imm), "ri.vinsert.v.x", "$vd, $rs1, $imm">; +let ReadsPastVL = 1 in def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd), (ins VR:$vs2, uimm5:$imm), "ri.vextract.x.v", "$rd, $vs2, $imm">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index a47dfe3..b546339 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -74,6 +74,7 @@ class RVInstVCCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins, let Uses = [VL, VTYPE]; let RVVConstraint = NoConstraint; let ElementsDependOn = EltDepsVLMask; + let ReadsPastVL = 1; } class RVInstVCFCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins, @@ -98,6 +99,7 @@ class RVInstVCFCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins, let Uses = [VL, VTYPE]; let RVVConstraint = NoConstraint; let ElementsDependOn = EltDepsVLMask; + let ReadsPastVL = 1; } class VCIXInfo<string suffix, VCIXType type, DAGOperand TyRd, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td index 66cb2d5..a5ee701 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td @@ -65,6 +65,7 @@ class SFInstTileMemOp<dag outs, dag ins, bits<3> nf, RISCVOpcode opcode, let Inst{6-0} = opcode.Value; let Uses = [VTYPE, VL]; + let ReadsPastVL = 1; } let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in @@ -94,6 +95,7 @@ class SFInstTileMoveOp<bits<6> funct6, dag outs, dag ins, string opcodestr, let Inst{6-0} = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let ReadsPastVL = 1; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in @@ -113,6 +115,7 @@ class SFInstMatmulF<dag outs, dag ins, string opcodestr, string argstr> let Inst{6-0} = OPC_OP_VE.Value; let Uses = [VTYPE, VL]; + let ReadsPastVL = 1; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in @@ -135,6 +138,7 @@ class SFInstMatmulF8<bit a, bit b, dag outs, dag ins, let Inst{6-0} = OPC_OP_VE.Value; let Uses = [VTYPE, VL]; + let ReadsPastVL = 1; } @@ -167,6 +171,7 @@ class SFInstMatmulI8<bit funct6_1, bit a, bit b, dag outs, dag ins, let Inst{6-0} = OPC_OP_VE.Value; let Uses = [VTYPE, VL]; + let ReadsPastVL = 1; } class I8Encode<bit encoding, string name> { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSpacemiT.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSpacemiT.td new file mode 100644 index 0000000..0f9b795 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSpacemiT.td @@ -0,0 +1,141 @@ +//===-- RISCVInstrInfoXSpacemiT.td -------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the vendor extensions defined by SpacemiT. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Operand definitions. +//===----------------------------------------------------------------------===// + +class SMTVDotOpcode<bits<7> val> { + bits<7> Value = val; +} + +class SMTVEncoding2<bits<2> val> { + bits<2> Value = val; +} + +def OPMMA : SMTVDotOpcode<0b1110001>; +def OPMMA_SLIDE : SMTVDotOpcode<0b1110011>; + +//===----------------------------------------------------------------------===// +// Vector Dot-Product Sign Encoding +// Defines the signed/unsigned mixing modes for vector dot-product operations. +// Encoding format: [1:0] bits +// 00: UU (Unsigned x Unsigned) +// 01: US (Unsigned x Signed) +// 10: SU (Signed x Unsigned) +// 11: SS (Signed x Signed) +//===----------------------------------------------------------------------===// +def SMT_VDot_UU : SMTVEncoding2<0b00>; +def SMT_VDot_US : SMTVEncoding2<0b01>; +def SMT_VDot_SU : SMTVEncoding2<0b10>; +def SMT_VDot_SS : SMTVEncoding2<0b11>; + +//===----------------------------------------------------------------------===// +// Vector Dot-Product Sliding Window Modes +// Encoding format: [1:0] bits +// 00: Slide1 (1-element sliding stride) +// 01: Slide2 (2-element sliding stride) +// 10: Slide3 (3-element sliding stride) +// 11: Reserved +// +// Used in sliding-window dot-product operations: +// vd = vs1 • vs2.slide{1|2|3} // • = dot product +//===----------------------------------------------------------------------===// +def SMT_VDot_Slide1 : SMTVEncoding2<0b00>; +def SMT_VDot_Slide2 : SMTVEncoding2<0b01>; +def SMT_VDot_Slide3 : SMTVEncoding2<0b10>; + +//===----------------------------------------------------------------------===// +// Instruction formats +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +// Base vector dot product (no slide) format. +class RVInstSMTVDot<SMTVEncoding2 sign, string opcodestr> + : RVInst<(outs VRM2:$vd), (ins VR:$vs1, VR:$vs2), opcodestr, + "$vd, $vs1, $vs2", [], InstFormatR> { + bits<5> vd; + bits<5> vs1; + bits<5> vs2; + + let Inst{31-25} = OPMMA.Value; + let Inst{24-20} = vs2; + let Inst{19-15} = vs1; + let Inst{14} = 0b0; + let Inst{13-12} = sign.Value; + let Inst{11-8} = vd{4-1}; + let Inst{7} = 0b0; + let Inst{6-0} = OPC_CUSTOM_1.Value; +} + +// Sliding-window vector dot product format. +class RVInstSMTVDotSlide<SMTVEncoding2 funct2, SMTVEncoding2 sign, string opcodestr> + : RVInst<(outs VRM2:$vd), (ins VRM2:$vs1, VR:$vs2), opcodestr, + "$vd, $vs1, $vs2", [], InstFormatR> { + bits<5> vd; + bits<5> vs1; + bits<5> vs2; + + let Inst{31-25} = OPMMA_SLIDE.Value; + let Inst{24-20} = vs2; + let Inst{19-16} = vs1{4-1}; + let Inst{15-14} = funct2.Value; + let Inst{13-12} = sign.Value; + let Inst{11-8} = vd{4-1}; + let Inst{7} = 0b0; + let Inst{6-0} = OPC_CUSTOM_1.Value; +} +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let DecoderNamespace = "XSMT" in { + +let Predicates = [HasVendorXSMTVDot], ElementsDependOn = EltDepsVL in { +// Base vector dot product (no slide) instructions +// NOTE: Destination registers (vd) MUST be even-numbered (v0, v2, ..., v30) +// due to hardware alignment constraints. Using odd registers may cause undefined behavior. +def SMT_VMADOT : RVInstSMTVDot<SMT_VDot_SS, "smt.vmadot">; +def SMT_VMADOTU : RVInstSMTVDot<SMT_VDot_UU, "smt.vmadotu">; +def SMT_VMADOTSU : RVInstSMTVDot<SMT_VDot_SU, "smt.vmadotsu">; +def SMT_VMADOTUS : RVInstSMTVDot<SMT_VDot_US, "smt.vmadotus">; + +//===----------------------------------------------------------------------===// +// Sliding-window Vector Dot Product Instructions +// +// The numeric suffix (1, 2, 3) specifies the stride of the sliding window: +// 1: Window slides by 1 element per operation +// 2: Window slides by 2 elements per operation +// 3: Window slides by 3 elements per operation +// +// These instructions compute dot products with overlapping operand windows +// where the window position increments by <N> elements between computations. +//===----------------------------------------------------------------------===// +// NOTE: Destination registers (vd) and first source register (vs1) MUST be +// even-numbered (v0, v2, ..., v30) due to hardware alignment constraints. +// Using odd registers may cause undefined behavior. +def SMT_VMADOT1 : RVInstSMTVDotSlide<SMT_VDot_Slide1, SMT_VDot_SS, "smt.vmadot1">; +def SMT_VMADOT1U : RVInstSMTVDotSlide<SMT_VDot_Slide1, SMT_VDot_UU, "smt.vmadot1u">; +def SMT_VMADOT1SU : RVInstSMTVDotSlide<SMT_VDot_Slide1, SMT_VDot_SU, "smt.vmadot1su">; +def SMT_VMADOT1US : RVInstSMTVDotSlide<SMT_VDot_Slide1, SMT_VDot_US, "smt.vmadot1us">; +def SMT_VMADOT2 : RVInstSMTVDotSlide<SMT_VDot_Slide2, SMT_VDot_SS, "smt.vmadot2">; +def SMT_VMADOT2U : RVInstSMTVDotSlide<SMT_VDot_Slide2, SMT_VDot_UU, "smt.vmadot2u">; +def SMT_VMADOT2SU : RVInstSMTVDotSlide<SMT_VDot_Slide2, SMT_VDot_SU, "smt.vmadot2su">; +def SMT_VMADOT2US : RVInstSMTVDotSlide<SMT_VDot_Slide2, SMT_VDot_US, "smt.vmadot2us">; +def SMT_VMADOT3 : RVInstSMTVDotSlide<SMT_VDot_Slide3, SMT_VDot_SS, "smt.vmadot3">; +def SMT_VMADOT3U : RVInstSMTVDotSlide<SMT_VDot_Slide3, SMT_VDot_UU, "smt.vmadot3u">; +def SMT_VMADOT3SU : RVInstSMTVDotSlide<SMT_VDot_Slide3, SMT_VDot_SU, "smt.vmadot3su">; +def SMT_VMADOT3US : RVInstSMTVDotSlide<SMT_VDot_Slide3, SMT_VDot_US, "smt.vmadot3us">; +} +} // DecoderNamespace = "XSMT" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index 77692f7..7cf6d5f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -70,7 +70,7 @@ defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>; multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, list<Predicate> ExtraPreds = []> { - let Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) in { + let Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) in { def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), @@ -91,7 +91,7 @@ multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, (vt GPR:$cmp), (vt GPR:$new)), (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; - } // Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) + } // Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in { def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), (vt GPR:$cmp), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 413ad8b..2abd3e6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -399,7 +399,7 @@ def MAX : ALU_rr<0b0000101, 0b110, "max", Commutable=1>, Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>; def MAXU : ALU_rr<0b0000101, 0b111, "maxu", Commutable=1>, Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>; -} // Predicates = [HasStdExtZbb] +} // Predicates = [HasStdExtZbbOrP] let Predicates = [HasStdExtZbkbOrP] in def PACK : ALU_rr<0b0000100, 0b100, "pack">, @@ -692,6 +692,21 @@ def : Pat<(binop_allwusers<or> (shl GPR:$op1rs1, (XLenVT 24))), (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + +def : Pat<(i64 (or (or (zexti16 (XLenVT GPR:$rs1)), + (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), + (sext_inreg (shl GPR:$op1rs1, (XLenVT 24)), i32))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + +// Match a pattern of 2 halfwords being inserted into bits [63:32], with bits +// bits [31:0] coming from a zero extended value. We can use pack with packw for +// bits [63:32]. If bits [63:31] can also be a packw, it can be matched +// separately. +def : Pat<(or (or (shl GPR:$op1rs2, (i64 48)), + (shl (zexti16 (i64 GPR:$op1rs1)), (i64 32))), + (zexti32 (i64 GPR:$rs1))), + (PACK (XLenVT GPR:$rs1), + (XLenVT (PACKW GPR:$op1rs1, GPR:$op1rs2)))>; } // Predicates = [HasStdExtZbkb, IsRV64] let Predicates = [HasStdExtZbb, IsRV32] in diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td index 32e7f96..76dc027 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td @@ -22,5 +22,5 @@ class CMOPInst<bits<3> imm3, string opcodestr> foreach n = [1, 3, 5, 7, 9, 11, 13, 15] in { let Predicates = [HasStdExtZcmop] in - def C_MOP # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>; + def C_MOP_ # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td index 0723b2f..3ddcb1d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td @@ -58,11 +58,11 @@ let Predicates = [HasStdExtZicboz] in { def CBO_ZERO : CBO_r<0b000000000100, "cbo.zero">, Sched<[]>; } // Predicates = [HasStdExtZicboz] -let Predicates = [HasStdExtZicbop, NotHasVendorXMIPSCBOP] in { +let Predicates = [HasStdExtZicbop, NoVendorXMIPSCBOP] in { def PREFETCH_I : Prefetch_ri<0b00000, "prefetch.i">, Sched<[]>; def PREFETCH_R : Prefetch_ri<0b00001, "prefetch.r">, Sched<[]>; def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>; -} // Predicates = [HasStdExtZicbop] +} // Predicates = [HasStdExtZicbop, NoVendorXMIPSCBOP] //===----------------------------------------------------------------------===// // Patterns @@ -70,7 +70,7 @@ def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>; def AddrRegImmLsb00000 : ComplexPattern<iPTR, 2, "SelectAddrRegImmLsb00000">; -let Predicates = [HasStdExtZicbop, NotHasVendorXMIPSCBOP] in { +let Predicates = [HasStdExtZicbop, NoVendorXMIPSCBOP] in { def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12), timm, timm, (i32 0)), (PREFETCH_I GPR:$rs1, simm12_lsb00000:$imm12)>; @@ -80,4 +80,4 @@ let Predicates = [HasStdExtZicbop, NotHasVendorXMIPSCBOP] in { def : Pat<(prefetch (AddrRegImmLsb00000 (XLenVT GPR:$rs1), simm12_lsb00000:$imm12), (i32 1), timm, (i32 1)), (PREFETCH_W GPR:$rs1, simm12_lsb00000:$imm12)>; -} +} // Predicates = [HasStdExtZicbop, NoVendorXMIPSCBOP] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td index 960f566..0d08176 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td @@ -33,13 +33,13 @@ class RVInstRMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcod } // May-Be-Operations -def riscv_mopr : RVSDNode<"MOPR", - SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>]>>; -def riscv_moprr : RVSDNode<"MOPRR", - SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>>; +def riscv_mop_r : RVSDNode<"MOP_R", + SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>>; +def riscv_mop_rr : RVSDNode<"MOP_RR", + SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, @@ -50,31 +50,32 @@ class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcode, string opcodestr> - : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), + : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">; foreach i = 0...31 in { let Predicates = [HasStdExtZimop] in - def MOPR#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>, - Sched<[]>; + def MOP_R_#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>, + Sched<[]>; } foreach i = 0...7 in { let Predicates = [HasStdExtZimop] in - def MOPRR#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>, + def MOP_RR_#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>, Sched<[]>; } let Predicates = [HasStdExtZimop] in { // Zimop instructions foreach i = 0...31 in { - def : Pat<(XLenVT (riscv_mopr GPR:$rs1, (XLenVT i))), - (!cast<Instruction>("MOPR"#i) GPR:$rs1)>; + def : Pat<(XLenVT (riscv_mop_r GPR:$rs1, (XLenVT i))), + (!cast<Instruction>("MOP_R_"#i) GPR:$rs1)>; } foreach i = 0...7 in { - def : Pat<(XLenVT (riscv_moprr GPR:$rs1, GPR:$rs2, (XLenVT i))), - (!cast<Instruction>("MOPRR"#i) GPR:$rs1, GPR:$rs2)>; + def : Pat<(XLenVT (riscv_mop_rr GPR:$rs1, GPR:$rs2, (XLenVT i))), + (!cast<Instruction>("MOP_RR_"#i) GPR:$rs1, GPR:$rs2)>; } } // Predicates = [HasStdExtZimop] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td index 27959ea..64fd508 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td @@ -17,16 +17,39 @@ // Instructions //===----------------------------------------------------------------------===// +class VQDOTVV<bits<6> funct6, RISCVVFormat opv, string opcodestr> + : RVInstVV<funct6, opv, (outs VR:$vd_wb), + (ins VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), + opcodestr, "$vd, $vs2, $vs1$vm"> { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = "$vd = $vd_wb"; +} + +class VQDOTVX<bits<6> funct6, RISCVVFormat opv, string opcodestr> + : RVInstVX<funct6, opv, (outs VR:$vd_wb), + (ins VR:$vd, VR:$vs2, GPR:$rs1, VMaskOp:$vm), + opcodestr, "$vd, $vs2, $rs1$vm"> { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = "$vd = $vd_wb"; +} + let Predicates = [HasStdExtZvqdotq] in { - def VQDOT_VV : VALUVV<0b101100, OPMVV, "vqdot.vv">; - def VQDOT_VX : VALUVX<0b101100, OPMVX, "vqdot.vx">; - def VQDOTU_VV : VALUVV<0b101000, OPMVV, "vqdotu.vv">; - def VQDOTU_VX : VALUVX<0b101000, OPMVX, "vqdotu.vx">; - def VQDOTSU_VV : VALUVV<0b101010, OPMVV, "vqdotsu.vv">; - def VQDOTSU_VX : VALUVX<0b101010, OPMVX, "vqdotsu.vx">; - def VQDOTUS_VX : VALUVX<0b101110, OPMVX, "vqdotus.vx">; + def VQDOT_VV : VQDOTVV<0b101100, OPMVV, "vqdot.vv">; + def VQDOT_VX : VQDOTVX<0b101100, OPMVX, "vqdot.vx">; + def VQDOTU_VV : VQDOTVV<0b101000, OPMVV, "vqdotu.vv">; + def VQDOTU_VX : VQDOTVX<0b101000, OPMVX, "vqdotu.vx">; + def VQDOTSU_VV : VQDOTVV<0b101010, OPMVV, "vqdotsu.vv">; + def VQDOTSU_VX : VQDOTVX<0b101010, OPMVX, "vqdotsu.vx">; + def VQDOTUS_VX : VQDOTVX<0b101110, OPMVX, "vqdotus.vx">; } // Predicates = [HasStdExtZvqdotq] +//===----------------------------------------------------------------------===// +// Helpers to define the VL patterns. +//===----------------------------------------------------------------------===// let HasPassthruOp = true, HasMaskOp = true in { def riscv_vqdot_vl : RVSDNode<"VQDOT_VL", SDT_RISCVIntBinOp_VL>; @@ -34,6 +57,10 @@ let HasPassthruOp = true, HasMaskOp = true in { def riscv_vqdotsu_vl : RVSDNode<"VQDOTSU_VL", SDT_RISCVIntBinOp_VL>; } // let HasPassthruOp = true, HasMaskOp = true +//===----------------------------------------------------------------------===// +// Pseudo Instructions for CodeGen +//===----------------------------------------------------------------------===// + multiclass VPseudoVQDOT_VV_VX { foreach m = MxSet<32>.m in { defm "" : VPseudoBinaryV_VV<m>, @@ -54,6 +81,10 @@ let Predicates = [HasStdExtZvqdotq], mayLoad = 0, mayStore = 0, defm PseudoVQDOTSU : VPseudoVQDOT_VV_VX; } +//===----------------------------------------------------------------------===// +// Patterns. +//===----------------------------------------------------------------------===// + defvar AllE32Vectors = [VI32MF2, VI32M1, VI32M2, VI32M4, VI32M8]; defm : VPatBinaryVL_VV_VX<riscv_vqdot_vl, "PseudoVQDOT", AllE32Vectors>; defm : VPatBinaryVL_VV_VX<riscv_vqdotu_vl, "PseudoVQDOTU", AllE32Vectors>; diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index 726920e..5e10631 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -63,6 +63,12 @@ static const Intrinsic::ID FixedVlsegIntrIds[] = { Intrinsic::riscv_seg6_load_mask, Intrinsic::riscv_seg7_load_mask, Intrinsic::riscv_seg8_load_mask}; +static const Intrinsic::ID FixedVlssegIntrIds[] = { + Intrinsic::riscv_sseg2_load_mask, Intrinsic::riscv_sseg3_load_mask, + Intrinsic::riscv_sseg4_load_mask, Intrinsic::riscv_sseg5_load_mask, + Intrinsic::riscv_sseg6_load_mask, Intrinsic::riscv_sseg7_load_mask, + Intrinsic::riscv_sseg8_load_mask}; + static const Intrinsic::ID ScalableVlsegIntrIds[] = { Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask, Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask, @@ -75,6 +81,12 @@ static const Intrinsic::ID FixedVssegIntrIds[] = { Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask, Intrinsic::riscv_seg8_store_mask}; +static const Intrinsic::ID FixedVsssegIntrIds[] = { + Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask, + Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask, + Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask, + Intrinsic::riscv_sseg8_store_mask}; + static const Intrinsic::ID ScalableVssegIntrIds[] = { Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, @@ -197,9 +209,15 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy, /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 bool RISCVTargetLowering::lowerInterleavedLoad( Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles, - ArrayRef<unsigned> Indices, unsigned Factor) const { + ArrayRef<unsigned> Indices, unsigned Factor, const APInt &GapMask) const { assert(Indices.size() == Shuffles.size()); + assert(GapMask.getBitWidth() == Factor); + // We only support cases where the skipped fields are the trailing ones. + // TODO: Lower to strided load if there is only a single active field. + unsigned MaskFactor = GapMask.popcount(); + if (MaskFactor < 2 || !GapMask.isMask()) + return false; IRBuilder<> Builder(Load); const DataLayout &DL = Load->getDataLayout(); @@ -208,20 +226,37 @@ bool RISCVTargetLowering::lowerInterleavedLoad( Value *Ptr, *VL; Align Alignment; - if (!getMemOperands(Factor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment)) + if (!getMemOperands(MaskFactor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment)) return false; Type *PtrTy = Ptr->getType(); unsigned AS = PtrTy->getPointerAddressSpace(); - if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL)) + if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL)) return false; - CallInst *VlsegN = Builder.CreateIntrinsic( - FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL}); + CallInst *SegLoad = nullptr; + if (MaskFactor < Factor) { + // Lower to strided segmented load. + unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType()); + Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); + SegLoad = Builder.CreateIntrinsic(FixedVlssegIntrIds[MaskFactor - 2], + {VTy, PtrTy, XLenTy, XLenTy}, + {Ptr, Stride, Mask, VL}); + } else { + // Lower to normal segmented load. + SegLoad = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], + {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL}); + } for (unsigned i = 0; i < Shuffles.size(); i++) { - Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]); - Shuffles[i]->replaceAllUsesWith(SubVec); + unsigned FactorIdx = Indices[i]; + if (FactorIdx >= MaskFactor) { + // Replace masked-off factors (that are still extracted) with poison. + Shuffles[i]->replaceAllUsesWith(PoisonValue::get(VTy)); + } else { + Value *SubVec = Builder.CreateExtractValue(SegLoad, FactorIdx); + Shuffles[i]->replaceAllUsesWith(SubVec); + } } return true; @@ -246,7 +281,16 @@ bool RISCVTargetLowering::lowerInterleavedLoad( bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store, Value *LaneMask, ShuffleVectorInst *SVI, - unsigned Factor) const { + unsigned Factor, + const APInt &GapMask) const { + assert(GapMask.getBitWidth() == Factor); + + // We only support cases where the skipped fields are the trailing ones. + // TODO: Lower to strided store if there is only a single active field. + unsigned MaskFactor = GapMask.popcount(); + if (MaskFactor < 2 || !GapMask.isMask()) + return false; + IRBuilder<> Builder(Store); const DataLayout &DL = Store->getDataLayout(); auto Mask = SVI->getShuffleMask(); @@ -258,21 +302,31 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store, Value *Ptr, *VL; Align Alignment; - if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment)) + if (!getMemOperands(MaskFactor, VTy, XLenTy, Store, Ptr, LaneMask, VL, + Alignment)) return false; Type *PtrTy = Ptr->getType(); unsigned AS = PtrTy->getPointerAddressSpace(); - if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL)) + if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL)) return false; - Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}); + Function *SegStoreFunc; + if (MaskFactor < Factor) + // Strided segmented store. + SegStoreFunc = Intrinsic::getOrInsertDeclaration( + Store->getModule(), FixedVsssegIntrIds[MaskFactor - 2], + {VTy, PtrTy, XLenTy, XLenTy}); + else + // Normal segmented store. + SegStoreFunc = Intrinsic::getOrInsertDeclaration( + Store->getModule(), FixedVssegIntrIds[Factor - 2], + {VTy, PtrTy, XLenTy}); SmallVector<Value *, 10> Ops; SmallVector<int, 16> NewShuffleMask; - for (unsigned i = 0; i < Factor; i++) { + for (unsigned i = 0; i < MaskFactor; i++) { // Collect shuffle mask for this lane. for (unsigned j = 0; j < VTy->getNumElements(); j++) NewShuffleMask.push_back(Mask[i + Factor * j]); @@ -283,8 +337,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store, NewShuffleMask.clear(); } - Ops.append({Ptr, LaneMask, VL}); - Builder.CreateCall(VssegNFunc, Ops); + Ops.push_back(Ptr); + if (MaskFactor < Factor) { + // Insert the stride argument. + unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType()); + Ops.push_back(ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes)); + } + Ops.append({LaneMask, VL}); + Builder.CreateCall(SegStoreFunc, Ops); return true; } diff --git a/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp b/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp index 7a2541a..efea1b4 100644 --- a/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp +++ b/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp @@ -26,6 +26,7 @@ struct RISCVMoveMerge : public MachineFunctionPass { RISCVMoveMerge() : MachineFunctionPass(ID) {} + const RISCVSubtarget *ST; const RISCVInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -37,15 +38,15 @@ struct RISCVMoveMerge : public MachineFunctionPass { // Merge the two instructions indicated into a single pair instruction. MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, unsigned Opcode); + MachineBasicBlock::iterator Paired, bool MoveFromSToA); // Look for C.MV instruction that can be combined with // the given instruction into CM.MVA01S or CM.MVSA01. Return the matching // instruction if one exists. MachineBasicBlock::iterator - findMatchingInst(MachineBasicBlock::iterator &MBBI, unsigned InstOpcode, + findMatchingInst(MachineBasicBlock::iterator &MBBI, bool MoveFromSToA, const DestSourcePair &RegPair); - bool mergeMoveSARegPair(const RISCVSubtarget &STI, MachineBasicBlock &MBB); + bool mergeMoveSARegPair(MachineBasicBlock &MBB); bool runOnMachineFunction(MachineFunction &Fn) override; StringRef getPassName() const override { return RISCV_MOVE_MERGE_NAME; } @@ -58,41 +59,21 @@ char RISCVMoveMerge::ID = 0; INITIALIZE_PASS(RISCVMoveMerge, "riscv-move-merge", RISCV_MOVE_MERGE_NAME, false, false) -static bool isMoveFromAToS(unsigned Opcode) { - switch (Opcode) { - case RISCV::CM_MVA01S: - case RISCV::QC_CM_MVA01S: - return true; - default: - return false; - } -} - -static unsigned getMoveFromAToSOpcode(const RISCVSubtarget &STI) { - if (STI.hasStdExtZcmp()) +static unsigned getMoveFromSToAOpcode(const RISCVSubtarget &ST) { + if (ST.hasStdExtZcmp()) return RISCV::CM_MVA01S; - if (STI.hasVendorXqccmp()) + if (ST.hasVendorXqccmp()) return RISCV::QC_CM_MVA01S; llvm_unreachable("Unhandled subtarget with paired A to S move."); } -static bool isMoveFromSToA(unsigned Opcode) { - switch (Opcode) { - case RISCV::CM_MVSA01: - case RISCV::QC_CM_MVSA01: - return true; - default: - return false; - } -} - -static unsigned getMoveFromSToAOpcode(const RISCVSubtarget &STI) { - if (STI.hasStdExtZcmp()) +static unsigned getMoveFromAToSOpcode(const RISCVSubtarget &ST) { + if (ST.hasStdExtZcmp()) return RISCV::CM_MVSA01; - if (STI.hasVendorXqccmp()) + if (ST.hasVendorXqccmp()) return RISCV::QC_CM_MVSA01; llvm_unreachable("Unhandled subtarget with paired S to A move"); @@ -123,20 +104,22 @@ bool RISCVMoveMerge::isCandidateToMergeMVSA01(const DestSourcePair &RegPair) { MachineBasicBlock::iterator RISCVMoveMerge::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, - unsigned Opcode) { + bool MoveFromSToA) { const MachineOperand *Sreg1, *Sreg2; MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator NextI = next_nodbg(I, E); DestSourcePair FirstPair = TII->isCopyInstrImpl(*I).value(); DestSourcePair PairedRegs = TII->isCopyInstrImpl(*Paired).value(); - Register ARegInFirstPair = isMoveFromAToS(Opcode) - ? FirstPair.Destination->getReg() - : FirstPair.Source->getReg(); if (NextI == Paired) NextI = next_nodbg(NextI, E); DebugLoc DL = I->getDebugLoc(); + // Make a copy so we can update the kill flag in the MoveFromSToA case. The + // copied operand needs to be scoped outside the if since we make a pointer + // to it. + MachineOperand PairedSource = *PairedRegs.Source; + // The order of S-reg depends on which instruction holds A0, instead of // the order of register pair. // e,g. @@ -145,13 +128,26 @@ RISCVMoveMerge::mergePairedInsns(MachineBasicBlock::iterator I, // // mv a0, s2 // mv a1, s1 => cm.mva01s s2,s1 - bool StartWithX10 = ARegInFirstPair == RISCV::X10; - if (isMoveFromAToS(Opcode)) { - Sreg1 = StartWithX10 ? FirstPair.Source : PairedRegs.Source; - Sreg2 = StartWithX10 ? PairedRegs.Source : FirstPair.Source; + unsigned Opcode; + if (MoveFromSToA) { + // We are moving one of the copies earlier so its kill flag may become + // invalid. Clear the copied kill flag if there are any reads of the + // register between the new location and the old location. + for (auto It = std::next(I); It != Paired && PairedSource.isKill(); ++It) + if (It->readsRegister(PairedSource.getReg(), TRI)) + PairedSource.setIsKill(false); + + Opcode = getMoveFromSToAOpcode(*ST); + Sreg1 = FirstPair.Source; + Sreg2 = &PairedSource; + if (FirstPair.Destination->getReg() != RISCV::X10) + std::swap(Sreg1, Sreg2); } else { - Sreg1 = StartWithX10 ? FirstPair.Destination : PairedRegs.Destination; - Sreg2 = StartWithX10 ? PairedRegs.Destination : FirstPair.Destination; + Opcode = getMoveFromAToSOpcode(*ST); + Sreg1 = FirstPair.Destination; + Sreg2 = PairedRegs.Destination; + if (FirstPair.Source->getReg() != RISCV::X10) + std::swap(Sreg1, Sreg2); } BuildMI(*I->getParent(), I, DL, TII->get(Opcode)).add(*Sreg1).add(*Sreg2); @@ -163,7 +159,7 @@ RISCVMoveMerge::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator RISCVMoveMerge::findMatchingInst(MachineBasicBlock::iterator &MBBI, - unsigned InstOpcode, + bool MoveFromSToA, const DestSourcePair &RegPair) { MachineBasicBlock::iterator E = MBBI->getParent()->end(); @@ -181,26 +177,20 @@ RISCVMoveMerge::findMatchingInst(MachineBasicBlock::iterator &MBBI, Register SourceReg = SecondPair->Source->getReg(); Register DestReg = SecondPair->Destination->getReg(); - if (isMoveFromAToS(InstOpcode) && isCandidateToMergeMVA01S(*SecondPair)) { - // If register pair is valid and destination registers are different. - if ((RegPair.Destination->getReg() == DestReg)) + bool IsCandidate = MoveFromSToA ? isCandidateToMergeMVA01S(*SecondPair) + : isCandidateToMergeMVSA01(*SecondPair); + if (IsCandidate) { + // Second destination must be different. + if (RegPair.Destination->getReg() == DestReg) return E; - // If paired destination register was modified or used, the source reg - // was modified, there is no possibility of finding matching - // instruction so exit early. - if (!ModifiedRegUnits.available(DestReg) || - !UsedRegUnits.available(DestReg) || - !ModifiedRegUnits.available(SourceReg)) - return E; - - return I; - } else if (isMoveFromSToA(InstOpcode) && - isCandidateToMergeMVSA01(*SecondPair)) { - if ((RegPair.Source->getReg() == SourceReg) || - (RegPair.Destination->getReg() == DestReg)) + // For AtoS the source must also be different. + if (!MoveFromSToA && RegPair.Source->getReg() == SourceReg) return E; + // If paired destination register was modified or used, the source reg + // was modified, there is no possibility of finding matching + // instruction so exit early. if (!ModifiedRegUnits.available(DestReg) || !UsedRegUnits.available(DestReg) || !ModifiedRegUnits.available(SourceReg)) @@ -217,8 +207,7 @@ RISCVMoveMerge::findMatchingInst(MachineBasicBlock::iterator &MBBI, // Finds instructions, which could be represented as C.MV instructions and // merged into CM.MVA01S or CM.MVSA01. -bool RISCVMoveMerge::mergeMoveSARegPair(const RISCVSubtarget &STI, - MachineBasicBlock &MBB) { +bool RISCVMoveMerge::mergeMoveSARegPair(MachineBasicBlock &MBB) { bool Modified = false; for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); @@ -227,22 +216,17 @@ bool RISCVMoveMerge::mergeMoveSARegPair(const RISCVSubtarget &STI, // can, return Dest/Src register pair. auto RegPair = TII->isCopyInstrImpl(*MBBI); if (RegPair.has_value()) { - unsigned Opcode = 0; - - if (isCandidateToMergeMVA01S(*RegPair)) - Opcode = getMoveFromAToSOpcode(STI); - else if (isCandidateToMergeMVSA01(*RegPair)) - Opcode = getMoveFromSToAOpcode(STI); - else { + bool MoveFromSToA = isCandidateToMergeMVA01S(*RegPair); + if (!MoveFromSToA && !isCandidateToMergeMVSA01(*RegPair)) { ++MBBI; continue; } MachineBasicBlock::iterator Paired = - findMatchingInst(MBBI, Opcode, RegPair.value()); + findMatchingInst(MBBI, MoveFromSToA, RegPair.value()); // If matching instruction can be found merge them. if (Paired != E) { - MBBI = mergePairedInsns(MBBI, Paired, Opcode); + MBBI = mergePairedInsns(MBBI, Paired, MoveFromSToA); Modified = true; continue; } @@ -256,12 +240,12 @@ bool RISCVMoveMerge::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; - const RISCVSubtarget *Subtarget = &Fn.getSubtarget<RISCVSubtarget>(); - if (!(Subtarget->hasStdExtZcmp() || Subtarget->hasVendorXqccmp())) + ST = &Fn.getSubtarget<RISCVSubtarget>(); + if (!ST->hasStdExtZcmp() && !ST->hasVendorXqccmp()) return false; - TII = Subtarget->getInstrInfo(); - TRI = Subtarget->getRegisterInfo(); + TII = ST->getInstrInfo(); + TRI = ST->getRegisterInfo(); // Resize the modified and used register unit trackers. We do this once // per function and then clear the register units each time we optimize a // move. @@ -269,7 +253,7 @@ bool RISCVMoveMerge::runOnMachineFunction(MachineFunction &Fn) { UsedRegUnits.init(*TRI); bool Modified = false; for (auto &MBB : Fn) - Modified |= mergeMoveSARegPair(*Subtarget, MBB); + Modified |= mergeMoveSARegPair(MBB); return Modified; } diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 31d2b3a..f89d94f 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -673,6 +673,7 @@ def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60", FeatureStdExtZvfh, FeatureStdExtZvkt, FeatureStdExtZvl256b, + FeatureVendorXSMTVDot, FeatureUnalignedScalarMem]), [TuneDLenFactor2, TuneOptimizedNF2SegmentLoadStore, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 7e58b6f..40b6416 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -170,7 +170,7 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { if (Subtarget.hasStdExtE()) - report_fatal_error("Graal reserved registers do not exist in RVE"); + reportFatalUsageError("Graal reserved registers do not exist in RVE"); markSuperRegs(Reserved, RISCV::X23_H); markSuperRegs(Reserved, RISCV::X27_H); } @@ -216,7 +216,7 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, const int64_t NumOfVReg = Offset.getScalable() / 8; const int64_t FixedOffset = NumOfVReg * VLENB; if (!isInt<32>(FixedOffset)) { - report_fatal_error( + reportFatalUsageError( "Frame size outside of the signed 32-bit range not supported"); } Offset = StackOffset::getFixed(FixedOffset + Offset.getFixed()); @@ -389,9 +389,25 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, .setMIFlag(Flag); } -// Split a VSPILLx_Mx pseudo into multiple whole register stores separated by -// LMUL*VLENB bytes. -void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { +static std::tuple<RISCVVType::VLMUL, const TargetRegisterClass &, unsigned> +getSpillReloadInfo(unsigned NumRemaining, uint16_t RegEncoding, bool IsSpill) { + if (NumRemaining >= 8 && RegEncoding % 8 == 0) + return {RISCVVType::LMUL_8, RISCV::VRM8RegClass, + IsSpill ? RISCV::VS8R_V : RISCV::VL8RE8_V}; + if (NumRemaining >= 4 && RegEncoding % 4 == 0) + return {RISCVVType::LMUL_4, RISCV::VRM4RegClass, + IsSpill ? RISCV::VS4R_V : RISCV::VL4RE8_V}; + if (NumRemaining >= 2 && RegEncoding % 2 == 0) + return {RISCVVType::LMUL_2, RISCV::VRM2RegClass, + IsSpill ? RISCV::VS2R_V : RISCV::VL2RE8_V}; + return {RISCVVType::LMUL_1, RISCV::VRRegClass, + IsSpill ? RISCV::VS1R_V : RISCV::VL1RE8_V}; +} + +// Split a VSPILLx_Mx/VSPILLx_Mx pseudo into multiple whole register stores +// separated by LMUL*VLENB bytes. +void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, + bool IsSpill) const { DebugLoc DL = II->getDebugLoc(); MachineBasicBlock &MBB = *II->getParent(); MachineFunction &MF = *MBB.getParent(); @@ -403,47 +419,11 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode()); unsigned NF = ZvlssegInfo->first; unsigned LMUL = ZvlssegInfo->second; - assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations."); - unsigned Opcode, SubRegIdx; - switch (LMUL) { - default: - llvm_unreachable("LMUL must be 1, 2, or 4."); - case 1: - Opcode = RISCV::VS1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - break; - case 2: - Opcode = RISCV::VS2R_V; - SubRegIdx = RISCV::sub_vrm2_0; - break; - case 4: - Opcode = RISCV::VS4R_V; - SubRegIdx = RISCV::sub_vrm4_0; - break; - } - static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, - "Unexpected subreg numbering"); - static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, - "Unexpected subreg numbering"); - static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, - "Unexpected subreg numbering"); - - Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - // Optimize for constant VLEN. - if (auto VLEN = STI.getRealVLen()) { - const int64_t VLENB = *VLEN / 8; - int64_t Offset = VLENB * LMUL; - STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset); - } else { - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); - uint32_t ShiftAmount = Log2_32(LMUL); - if (ShiftAmount != 0) - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) - .addReg(VL) - .addImm(ShiftAmount); - } + unsigned NumRegs = NF * LMUL; + assert(NumRegs <= 8 && "Invalid NF/LMUL combinations."); - Register SrcReg = II->getOperand(0).getReg(); + Register Reg = II->getOperand(0).getReg(); + uint16_t RegEncoding = TRI->getEncodingValue(Reg); Register Base = II->getOperand(1).getReg(); bool IsBaseKill = II->getOperand(1).isKill(); Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass); @@ -451,100 +431,63 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { auto *OldMMO = *(II->memoperands_begin()); LocationSize OldLoc = OldMMO->getSize(); assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF)); - TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF); - auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize); - for (unsigned I = 0; I < NF; ++I) { - // Adding implicit-use of super register to describe we are using part of - // super register, that prevents machine verifier complaining when part of - // subreg is undef, see comment in MachineVerifier::checkLiveness for more - // detail. - BuildMI(MBB, II, DL, TII->get(Opcode)) - .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I)) - .addReg(Base, getKillRegState(I == NF - 1)) - .addMemOperand(NewMMO) - .addReg(SrcReg, RegState::Implicit); - if (I != NF - 1) + TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs); + + Register VLENB = 0; + unsigned PreHandledNum = 0; + unsigned I = 0; + while (I != NumRegs) { + auto [LMulHandled, RegClass, Opcode] = + getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill); + auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled); + bool IsLast = I + RegNumHandled == NumRegs; + if (PreHandledNum) { + Register Step; + // Optimize for constant VLEN. + if (auto VLEN = STI.getRealVLen()) { + int64_t Offset = *VLEN / 8 * PreHandledNum; + Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); + STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset); + } else { + if (!VLENB) { + VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB); + } + uint32_t ShiftAmount = Log2_32(PreHandledNum); + if (ShiftAmount == 0) + Step = VLENB; + else { + Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step) + .addReg(VLENB, getKillRegState(IsLast)) + .addImm(ShiftAmount); + } + } + BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) .addReg(Base, getKillRegState(I != 0 || IsBaseKill)) - .addReg(VL, getKillRegState(I == NF - 2)); - Base = NewBase; - } - II->eraseFromParent(); -} + .addReg(Step, getKillRegState(Step != VLENB || IsLast)); + Base = NewBase; + } -// Split a VSPILLx_Mx pseudo into multiple whole register loads separated by -// LMUL*VLENB bytes. -void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const { - DebugLoc DL = II->getDebugLoc(); - MachineBasicBlock &MBB = *II->getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); - const TargetInstrInfo *TII = STI.getInstrInfo(); - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + MCRegister ActualReg = findVRegWithEncoding(RegClass, RegEncoding); + MachineInstrBuilder MIB = + BuildMI(MBB, II, DL, TII->get(Opcode)) + .addReg(ActualReg, getDefRegState(!IsSpill)) + .addReg(Base, getKillRegState(IsLast)) + .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), + VRegSize * RegNumHandled)); - auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode()); - unsigned NF = ZvlssegInfo->first; - unsigned LMUL = ZvlssegInfo->second; - assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations."); - unsigned Opcode, SubRegIdx; - switch (LMUL) { - default: - llvm_unreachable("LMUL must be 1, 2, or 4."); - case 1: - Opcode = RISCV::VL1RE8_V; - SubRegIdx = RISCV::sub_vrm1_0; - break; - case 2: - Opcode = RISCV::VL2RE8_V; - SubRegIdx = RISCV::sub_vrm2_0; - break; - case 4: - Opcode = RISCV::VL4RE8_V; - SubRegIdx = RISCV::sub_vrm4_0; - break; - } - static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, - "Unexpected subreg numbering"); - static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, - "Unexpected subreg numbering"); - static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, - "Unexpected subreg numbering"); - - Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - // Optimize for constant VLEN. - if (auto VLEN = STI.getRealVLen()) { - const int64_t VLENB = *VLEN / 8; - int64_t Offset = VLENB * LMUL; - STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset); - } else { - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); - uint32_t ShiftAmount = Log2_32(LMUL); - if (ShiftAmount != 0) - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) - .addReg(VL) - .addImm(ShiftAmount); - } + // Adding implicit-use of super register to describe we are using part of + // super register, that prevents machine verifier complaining when part of + // subreg is undef, see comment in MachineVerifier::checkLiveness for more + // detail. + if (IsSpill) + MIB.addReg(Reg, RegState::Implicit); - Register DestReg = II->getOperand(0).getReg(); - Register Base = II->getOperand(1).getReg(); - bool IsBaseKill = II->getOperand(1).isKill(); - Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass); - auto *OldMMO = *(II->memoperands_begin()); - LocationSize OldLoc = OldMMO->getSize(); - assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF)); - TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF); - auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize); - for (unsigned I = 0; I < NF; ++I) { - BuildMI(MBB, II, DL, TII->get(Opcode), - TRI->getSubReg(DestReg, SubRegIdx + I)) - .addReg(Base, getKillRegState(I == NF - 1)) - .addMemOperand(NewMMO); - if (I != NF - 1) - BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) - .addReg(Base, getKillRegState(I != 0 || IsBaseKill)) - .addReg(VL, getKillRegState(I == NF - 2)); - Base = NewBase; + PreHandledNum = RegNumHandled; + RegEncoding += RegNumHandled; + I += RegNumHandled; } II->eraseFromParent(); } @@ -568,7 +511,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); if (!isInt<32>(Offset.getFixed())) { - report_fatal_error( + reportFatalUsageError( "Frame offsets outside of the signed 32-bit range not supported"); } @@ -589,7 +532,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, (Lo12 & 0b11111) != 0) { // Prefetch instructions require the offset to be 32 byte aligned. MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); - } else if (Opc == RISCV::MIPS_PREFETCH && !isUInt<9>(Val)) { + } else if (Opc == RISCV::MIPS_PREF && !isUInt<9>(Val)) { // MIPS Prefetch instructions require the offset to be 9 bits encoded. MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); } else if ((Opc == RISCV::PseudoRV32ZdinxLD || @@ -635,9 +578,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Handle spill/fill of synthetic register classes for segment operations to - // ensure correctness in the edge case one gets spilled. There are many - // possible optimizations here, but given the extreme rarity of such spills, - // we prefer simplicity of implementation for now. + // ensure correctness in the edge case one gets spilled. switch (MI.getOpcode()) { case RISCV::PseudoVSPILL2_M1: case RISCV::PseudoVSPILL2_M2: @@ -650,7 +591,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case RISCV::PseudoVSPILL6_M1: case RISCV::PseudoVSPILL7_M1: case RISCV::PseudoVSPILL8_M1: - lowerVSPILL(II); + lowerSegmentSpillReload(II, /*IsSpill=*/true); return true; case RISCV::PseudoVRELOAD2_M1: case RISCV::PseudoVRELOAD2_M2: @@ -663,7 +604,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case RISCV::PseudoVRELOAD6_M1: case RISCV::PseudoVRELOAD7_M1: case RISCV::PseudoVRELOAD8_M1: - lowerVRELOAD(II); + lowerSegmentSpillReload(II, /*IsSpill=*/false); return true; } @@ -1025,7 +966,9 @@ bool RISCVRegisterInfo::getRegAllocationHints( } } - // Add a hint if it would allow auipc/lui+addi(w) fusion. + // Add a hint if it would allow auipc/lui+addi(w) fusion. We do this even + // without the fusions explicitly enabled as the impact is rarely negative + // and some cores do implement this fusion. if ((MI.getOpcode() == RISCV::ADDIW || MI.getOpcode() == RISCV::ADDI) && MI.getOperand(1).isReg()) { const MachineBasicBlock &MBB = *MI.getParent(); @@ -1033,9 +976,7 @@ bool RISCVRegisterInfo::getRegAllocationHints( // Is the previous instruction a LUI or AUIPC that can be fused? if (I != MBB.begin()) { I = skipDebugInstructionsBackward(std::prev(I), MBB.begin()); - if (((I->getOpcode() == RISCV::LUI && Subtarget.hasLUIADDIFusion()) || - (I->getOpcode() == RISCV::AUIPC && - Subtarget.hasAUIPCADDIFusion())) && + if ((I->getOpcode() == RISCV::LUI || I->getOpcode() == RISCV::AUIPC) && I->getOperand(0).getReg() == MI.getOperand(1).getReg()) { if (OpIdx == 0) tryAddHint(MO, MI.getOperand(1), /*NeedGPRC=*/false); @@ -1052,3 +993,12 @@ bool RISCVRegisterInfo::getRegAllocationHints( return BaseImplRetVal; } + +Register +RISCVRegisterInfo::findVRegWithEncoding(const TargetRegisterClass &RegClass, + uint16_t Encoding) const { + MCRegister Reg = RISCV::V0 + Encoding; + if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1) + return Reg; + return getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass); +} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index b368399..2810139 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -107,8 +107,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override; - void lowerVSPILL(MachineBasicBlock::iterator II) const; - void lowerVRELOAD(MachineBasicBlock::iterator II) const; + void lowerSegmentSpillReload(MachineBasicBlock::iterator II, + bool IsSpill) const; Register getFrameRegister(const MachineFunction &MF) const override; @@ -144,6 +144,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + Register findVRegWithEncoding(const TargetRegisterClass &RegClass, + uint16_t Encoding) const; + static bool isVRRegClass(const TargetRegisterClass *RC) { return RISCVRI::isVRegClass(RC->TSFlags) && RISCVRI::getNF(RC->TSFlags) == 1; diff --git a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td index 5ef858a..8cf15fa 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td +++ b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td @@ -24,7 +24,7 @@ let SchedModel = Andes45Model in { //===----------------------------------------------------------------------===// // Andes 45 series CPU -// - 2 Interger Arithmetic and Logical Units (ALU) +// - 2 Integer Arithmetic and Logical Units (ALU) // - Multiply / Divide Unit (MDU) // - Load Store Unit (LSU) // - Control and Status Register Unit (CSR) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index bf23812..24ebbc3 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -13,78 +13,113 @@ // //===----------------------------------------------------------------------===// -class SMX60IsWorstCaseMX<string mx, list<string> MxList> { - string LLMUL = LargestLMUL<MxList>.r; - bit c = !eq(mx, LLMUL); -} +//===----------------------------------------------------------------------===// +// Helpers + +// Maps LMUL string to corresponding value from the Values array +// LMUL values map to array indices as follows: +// MF8 -> Values[0], MF4 -> Values[1], MF2 -> Values[2], M1 -> Values[3], +// M2 -> Values[4], M4 -> Values[5], M8 -> Values[6] +// Shorter lists are allowed, e.g., widening instructions don't work on M8 +class GetLMULValue<list<int> Values, string LMUL> { + defvar Index = !cond( + !eq(LMUL, "MF8"): 0, + !eq(LMUL, "MF4"): 1, + !eq(LMUL, "MF2"): 2, + !eq(LMUL, "M1"): 3, + !eq(LMUL, "M2"): 4, + !eq(LMUL, "M4"): 5, + !eq(LMUL, "M8"): 6, + ); -class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> { - string LLMUL = LargestLMUL<MxList>.r; - int SSEW = SmallestSEW<mx, isF>.r; - bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); + assert !lt(Index, !size(Values)), + "Missing LMUL value for '" # LMUL # "'. " # + "Expected at least " # !add(Index, 1) # " elements, but got " # + !size(Values) # "."; + + int c = Values[Index]; } -defvar SMX60VLEN = 256; -defvar SMX60DLEN = !div(SMX60VLEN, 2); +// Returns BaseValue for LMUL values before startLMUL, Value for startLMUL, +// then doubles Value for each subsequent LMUL +// Example: ConstValueUntilLMULThenDoubleBase<"M1", 2, 4, "M8"> returns: +// MF8->2, MF4->2, MF2->2, M1->4, M2->8, M4->16, M8->32 +// This is useful for modeling scheduling parameters that scale with LMUL. +class ConstValueUntilLMULThenDoubleBase<string startLMUL, int BaseValue, int Value, string currentLMUL> { + assert !le(BaseValue, Value), "BaseValue must be less-equal to Value"; + defvar startPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], startLMUL>.c; + defvar currentPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], currentLMUL>.c; + + // Calculate the difference in positions + defvar posDiff = !sub(currentPos, startPos); -class Get1248Latency<string mx> { + // Calculate Value * (2^posDiff) int c = !cond( - !eq(mx, "M2") : 2, - !eq(mx, "M4") : 4, - !eq(mx, "M8") : 8, - true: 1 + !eq(posDiff, 0) : Value, + !eq(posDiff, 1) : !mul(Value, 2), + !eq(posDiff, 2) : !mul(Value, 4), + !eq(posDiff, 3) : !mul(Value, 8), + !eq(posDiff, 4) : !mul(Value, 16), + !eq(posDiff, 5) : !mul(Value, 32), + !eq(posDiff, 6) : !mul(Value, 64), + true : BaseValue ); } -// Used for: logical opsz, shifts, sign ext, merge/move, FP sign/recip/convert, mask ops, slides -class Get4816Latency<string mx> { - int c = !cond( - !eq(mx, "M4") : 8, - !eq(mx, "M8") : 16, - true: 4 - ); +// Same as the previous function but BaseValue == Value +class ConstValueUntilLMULThenDouble<string startLMUL, int Value, string currentLMUL> { + int c = ConstValueUntilLMULThenDoubleBase<startLMUL, Value, Value, currentLMUL>.c; +} + +// Returns MF8->1, MF4->1, MF2->2, M1->4, M2->8, M4->16, M8->32 +class ConstOneUntilMF4ThenDouble<string mx> { + int c = ConstValueUntilLMULThenDouble<"MF4", 1, mx>.c; } +// Returns MF8->1, MF4->1, MF2->1, M1->2, M2->4, M4->8, M8->16 +class ConstOneUntilMF2ThenDouble<string mx> { + int c = ConstValueUntilLMULThenDouble<"MF2", 1, mx>.c; +} + +// Returns MF8->1, MF4->1, MF2->1, M1->1, M2->2, M4->4, M8->8 +class ConstOneUntilM1ThenDouble<string mx> { + int c = ConstValueUntilLMULThenDouble<"M1", 1, mx>.c; +} + +//===----------------------------------------------------------------------===// +// Latency helper classes + // Used for: arithmetic (add/sub/min/max), saturating/averaging, FP add/sub/min/max -class Get458Latency<string mx> { - int c = !cond( - !eq(mx, "M4") : 5, - !eq(mx, "M8") : 8, - true: 4 - ); +class Get4458Latency<string mx> { + int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/4, /*M4=*/5, /*M8=*/8], mx>.c; } -// Widening scaling pattern (4,4,4,4,5,8,8): plateaus at higher LMULs -// Used for: widening operations +// Used for: widening operations (no M8) class Get4588Latency<string mx> { - int c = !cond( - !eq(mx, "M2") : 5, - !eq(mx, "M4") : 8, - !eq(mx, "M8") : 8, // M8 not supported for most widening, fallback - true: 4 - ); + int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/5, /*M4=*/8], mx>.c; } // Used for: mask-producing comparisons, carry ops with mask, FP comparisons class Get461018Latency<string mx> { - int c = !cond( - !eq(mx, "M2") : 6, - !eq(mx, "M4") : 10, - !eq(mx, "M8") : 18, - true: 4 - ); + int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/6, /*M4=*/10, /*M8=*/18], mx>.c; } -// Used for: e64 multiply pattern, complex ops -class Get781632Latency<string mx> { - int c = !cond( - !eq(mx, "M2") : 8, - !eq(mx, "M4") : 16, - !eq(mx, "M8") : 32, - true: 7 - ); +//===----------------------------------------------------------------------===// + +class SMX60IsWorstCaseMX<string mx, list<string> MxList> { + string LLMUL = LargestLMUL<MxList>.r; + bit c = !eq(mx, LLMUL); } +class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> { + string LLMUL = LargestLMUL<MxList>.r; + int SSEW = SmallestSEW<mx, isF>.r; + bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); +} + +defvar SMX60VLEN = 256; +defvar SMX60DLEN = !div(SMX60VLEN, 2); + def SpacemitX60Model : SchedMachineModel { let IssueWidth = 2; // dual-issue let MicroOpBufferSize = 0; // in-order @@ -383,12 +418,13 @@ foreach LMul = [1, 2, 4, 8] in { foreach mx = SchedMxList in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = Get458Latency<mx>.c, ReleaseAtCycles = [4] in { + let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [4] in { defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>; } - let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in { + defvar VIALULat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c; + let Latency = VIALULat, ReleaseAtCycles = [4] in { // Pattern of vadd, vsub, vrsub: 4/4/5/8 // Pattern of vand, vor, vxor: 4/4/8/16 // They are grouped together, so we used the worst case 4/4/8/16 @@ -425,7 +461,7 @@ foreach mx = SchedMxList in { // Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8, // e64 = 7,8,16,32. We use the worst-case until we can split the SEW. // TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites - let Latency = Get781632Latency<mx>.c, ReleaseAtCycles = [7] in { + let Latency = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c, ReleaseAtCycles = [7] in { defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>; @@ -461,15 +497,8 @@ foreach mx = SchedMxList in { foreach sew = SchedSEWSet<mx>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - // Slightly reduced for fractional LMULs - defvar Multiplier = !cond( - !eq(mx, "MF8") : 12, - !eq(mx, "MF4") : 12, - !eq(mx, "MF2") : 12, - true: 24 - ); - - let Latency = !mul(Get1248Latency<mx>.c, Multiplier), ReleaseAtCycles = [12] in { + defvar VIDivLat = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c; + let Latency = VIDivLat, ReleaseAtCycles = [12] in { defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>; } @@ -480,14 +509,8 @@ foreach mx = SchedMxList in { foreach mx = SchedMxListW in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c; - // Slightly increased for integer LMULs - defvar Multiplier = !cond( - !eq(mx, "M2") : 2, - !eq(mx, "M4") : 2, - true: 1 - ); - - let Latency = !mul(Get4816Latency<mx>.c, Multiplier), ReleaseAtCycles = [4] in { + defvar VNarrowingLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c; + let Latency = VNarrowingLat, ReleaseAtCycles = [4] in { defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>; @@ -501,16 +524,33 @@ foreach mx = SchedMxListW in { foreach mx = SchedMxList in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c; - defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>; + let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in { + defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>; + } + + // Latency of vsmul: e8/e16 = 4/4/5/8, e32 = 5/5/5/8, e64 = 7/8/16/32 + // We use the worst-case until we can split the SEW. + defvar VSMulLat = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c; + // Latency of vsmul: e8/e16/e32 = 1/2/4/8, e64 = 4/8/16/32 + // We use the worst-case until we can split the SEW. + defvar VSMulOcc = ConstValueUntilLMULThenDoubleBase<"M1", 1, 4, mx>.c; + // TODO: change WriteVSMulV/X to be defined with LMULSEWSchedWrites + let Latency = VSMulLat, ReleaseAtCycles = [VSMulOcc] in { + defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>; + } + + defvar VSShiftLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c; + defvar VSShiftOcc = ConstOneUntilMF2ThenDouble<mx>.c; + let Latency = VSShiftLat, ReleaseAtCycles = [VSShiftOcc] in { + defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>; + } } // 13. Vector Floating-Point Instructions diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 66ce134..d70b1d0 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -38,7 +38,6 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Vectorize/EVLIndVarSimplify.h" #include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h" #include <optional> using namespace llvm; @@ -107,6 +106,8 @@ static cl::opt<bool> extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target()); RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target()); + RegisterTargetMachine<RISCVTargetMachine> A(getTheRISCV32beTarget()); + RegisterTargetMachine<RISCVTargetMachine> B(getTheRISCV64beTarget()); auto *PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); initializeRISCVO0PreLegalizerCombinerPass(*PR); @@ -140,21 +141,37 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVAsmPrinterPass(*PR); } -static StringRef computeDataLayout(const Triple &TT, - const TargetOptions &Options) { - StringRef ABIName = Options.MCOptions.getABIName(); - if (TT.isArch64Bit()) { - if (ABIName == "lp64e") - return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S64"; +static std::string computeDataLayout(const Triple &TT, + const TargetOptions &Opts) { + std::string Ret; + + if (TT.isLittleEndian()) + Ret += "e"; + else + Ret += "E"; + + Ret += "-m:e"; - return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; + // Pointer and integer sizes. + if (TT.isArch64Bit()) { + Ret += "-p:64:64-i64:64-i128:128"; + Ret += "-n32:64"; + } else { + assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported"); + Ret += "-p:32:32-i64:64"; + Ret += "-n32"; } - assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported"); - if (ABIName == "ilp32e") - return "e-m:e-p:32:32-i64:64-n32-S32"; + // Stack alignment based on ABI. + StringRef ABI = Opts.MCOptions.getABIName(); + if (ABI == "ilp32e") + Ret += "-S32"; + else if (ABI == "lp64e") + Ret += "-S64"; + else + Ret += "-S128"; - return "e-m:e-p:32:32-i64:64-n32-S128"; + return Ret; } static Reloc::Model getEffectiveRelocModel(const Triple &TT, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 67f924a..5e30018 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1431,7 +1431,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::ctlz: case Intrinsic::ctpop: { auto LT = getTypeLegalizationCost(RetTy); - if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) { + if (ST->hasStdExtZvbb() && LT.second.isVector()) { unsigned Op; switch (ICA.getID()) { case Intrinsic::cttz: @@ -1629,6 +1629,7 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, // scalarized if the legalized Src and Dst are not equal sized. const DataLayout &DL = this->getDataLayout(); if (!SrcLT.second.isVector() || !DstLT.second.isVector() || + !SrcLT.first.isValid() || !DstLT.first.isValid() || !TypeSize::isKnownLE(DL.getTypeSizeInBits(Src), SrcLT.second.getSizeInBits()) || !TypeSize::isKnownLE(DL.getTypeSizeInBits(Dst), @@ -2414,6 +2415,24 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, return BaseCost + SlideCost; } +InstructionCost +RISCVTTIImpl::getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, + TTI::TargetCostKind CostKind, + unsigned Index) const { + if (isa<FixedVectorType>(Val)) + return BaseT::getIndexedVectorInstrCostFromEnd(Opcode, Val, CostKind, + Index); + + // TODO: This code replicates what LoopVectorize.cpp used to do when asking + // for the cost of extracting the last lane of a scalable vector. It probably + // needs a more accurate cost. + ElementCount EC = cast<VectorType>(Val)->getElementCount(); + assert(Index < EC.getKnownMinValue() && "Unexpected reverse index"); + return getVectorInstrCost(Opcode, Val, CostKind, + EC.getKnownMinValue() - 1 - Index, nullptr, + nullptr); +} + InstructionCost RISCVTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, @@ -2712,6 +2731,10 @@ unsigned RISCVTTIImpl::getMinTripCountTailFoldingThreshold() const { return RVVMinTripCount; } +bool RISCVTTIImpl::preferAlternateOpcodeVectorization() const { + return ST->enableUnalignedVectorMem(); +} + TTI::AddressingModeKind RISCVTTIImpl::getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 05d504c..06fd8bb 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -114,6 +114,9 @@ public: bool enableScalableVectorization() const override { return ST->hasVInstructions(); } + bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override { + return ST->hasVInstructions(); + } TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL @@ -129,7 +132,7 @@ public: unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override; - bool preferAlternateOpcodeVectorization() const override { return false; } + bool preferAlternateOpcodeVectorization() const override; bool preferEpilogueVectorization() const override { // Epilogue vectorization is usually unprofitable - tail folding or @@ -240,6 +243,11 @@ public: unsigned Index, const Value *Op0, const Value *Op1) const override; + InstructionCost + getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, + TTI::TargetCostKind CostKind, + unsigned Index) const override; + InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, @@ -364,6 +372,8 @@ public: switch (RdxDesc.getRecurrenceKind()) { case RecurKind::Add: + case RecurKind::Sub: + case RecurKind::AddChainWithSubs: case RecurKind::And: case RecurKind::Or: case RecurKind::Xor: diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 37a71e8..01aef86 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -491,8 +491,42 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { // vfirst find-first-set mask bit case RISCV::VCPOP_M: case RISCV::VFIRST_M: + // Vector Bit-manipulation Instructions (Zvbb) + // Vector And-Not + case RISCV::VANDN_VV: + case RISCV::VANDN_VX: + // Vector Reverse Bits in Elements + case RISCV::VBREV_V: + // Vector Reverse Bits in Bytes + case RISCV::VBREV8_V: + // Vector Reverse Bytes + case RISCV::VREV8_V: + // Vector Count Leading Zeros + case RISCV::VCLZ_V: + // Vector Count Trailing Zeros + case RISCV::VCTZ_V: + // Vector Population Count + case RISCV::VCPOP_V: + // Vector Rotate Left + case RISCV::VROL_VV: + case RISCV::VROL_VX: + // Vector Rotate Right + case RISCV::VROR_VI: + case RISCV::VROR_VV: + case RISCV::VROR_VX: + // Vector Carry-less Multiplication Instructions (Zvbc) + // Vector Carry-less Multiply + case RISCV::VCLMUL_VV: + case RISCV::VCLMUL_VX: + // Vector Carry-less Multiply Return High Half + case RISCV::VCLMULH_VV: + case RISCV::VCLMULH_VX: return MILog2SEW; + // Vector Widening Shift Left Logical (Zvbb) + case RISCV::VWSLL_VI: + case RISCV::VWSLL_VX: + case RISCV::VWSLL_VV: // Vector Widening Integer Add/Subtract // Def uses EEW=2*SEW . Operands use EEW=SEW. case RISCV::VWADDU_VV: @@ -503,9 +537,6 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VWADD_VX: case RISCV::VWSUB_VV: case RISCV::VWSUB_VX: - case RISCV::VWSLL_VI: - case RISCV::VWSLL_VX: - case RISCV::VWSLL_VV: // Vector Widening Integer Multiply Instructions // Destination EEW=2*SEW. Source EEW=SEW. case RISCV::VWMUL_VV: @@ -816,13 +847,7 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VLUXEI32_V: case RISCV::VLOXEI32_V: case RISCV::VLUXEI64_V: - case RISCV::VLOXEI64_V: { - for (const MachineMemOperand *MMO : MI.memoperands()) - if (MMO->isVolatile()) - return false; - return true; - } - + case RISCV::VLOXEI64_V: // Vector Single-Width Integer Add and Subtract case RISCV::VADD_VI: case RISCV::VADD_VV: @@ -1020,12 +1045,40 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VNCLIP_WV: case RISCV::VNCLIP_WX: case RISCV::VNCLIP_WI: - - // Vector Crypto + // Vector Bit-manipulation Instructions (Zvbb) + // Vector And-Not + case RISCV::VANDN_VV: + case RISCV::VANDN_VX: + // Vector Reverse Bits in Elements + case RISCV::VBREV_V: + // Vector Reverse Bits in Bytes + case RISCV::VBREV8_V: + // Vector Reverse Bytes + case RISCV::VREV8_V: + // Vector Count Leading Zeros + case RISCV::VCLZ_V: + // Vector Count Trailing Zeros + case RISCV::VCTZ_V: + // Vector Population Count + case RISCV::VCPOP_V: + // Vector Rotate Left + case RISCV::VROL_VV: + case RISCV::VROL_VX: + // Vector Rotate Right + case RISCV::VROR_VI: + case RISCV::VROR_VV: + case RISCV::VROR_VX: + // Vector Widening Shift Left Logical case RISCV::VWSLL_VI: case RISCV::VWSLL_VX: case RISCV::VWSLL_VV: - + // Vector Carry-less Multiplication Instructions (Zvbc) + // Vector Carry-less Multiply + case RISCV::VCLMUL_VV: + case RISCV::VCLMUL_VX: + // Vector Carry-less Multiply Return High Half + case RISCV::VCLMULH_VV: + case RISCV::VCLMULH_VX: // Vector Mask Instructions // Vector Mask-Register Logical Instructions // vmsbf.m set-before-first mask bit @@ -1213,34 +1266,6 @@ static bool isVectorOpUsedAsScalarOp(const MachineOperand &MO) { } } -/// Return true if MI may read elements past VL. -static bool mayReadPastVL(const MachineInstr &MI) { - const RISCVVPseudosTable::PseudoInfo *RVV = - RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); - if (!RVV) - return true; - - switch (RVV->BaseInstr) { - // vslidedown instructions may read elements past VL. They are handled - // according to current tail policy. - case RISCV::VSLIDEDOWN_VI: - case RISCV::VSLIDEDOWN_VX: - case RISCV::VSLIDE1DOWN_VX: - case RISCV::VFSLIDE1DOWN_VF: - - // vrgather instructions may read the source vector at any index < VLMAX, - // regardless of VL. - case RISCV::VRGATHER_VI: - case RISCV::VRGATHER_VV: - case RISCV::VRGATHER_VX: - case RISCV::VRGATHEREI16_VV: - return true; - - default: - return false; - } -} - bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { const MCInstrDesc &Desc = MI.getDesc(); if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) @@ -1261,6 +1286,13 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { return false; } + for (const MachineMemOperand *MMO : MI.memoperands()) { + if (MMO->isVolatile()) { + LLVM_DEBUG(dbgs() << "Not a candidate because contains volatile MMO\n"); + return false; + } + } + // Some instructions that produce vectors have semantics that make it more // difficult to determine whether the VL can be reduced. For example, some // instructions, such as reductions, may write lanes past VL to a scalar @@ -1274,7 +1306,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { // TODO: Use a better approach than a white-list, such as adding // properties to instructions using something like TSFlags. if (!isSupportedInstr(MI)) { - LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction\n"); + LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction: " + << MI); return false; } @@ -1296,13 +1329,14 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { const MCInstrDesc &Desc = UserMI.getDesc(); if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) { - LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that" + LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that" " use VLMAX\n"); return std::nullopt; } - if (mayReadPastVL(UserMI)) { - LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); + if (RISCVII::readsPastVL( + TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) { + LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); return std::nullopt; } @@ -1319,7 +1353,7 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc())); auto DemandedVL = DemandedVLs.lookup(&UserMI); if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) { - LLVM_DEBUG(dbgs() << " Abort because user is passthru in " + LLVM_DEBUG(dbgs() << " Abort because user is passthru in " "instruction with demanded tail\n"); return std::nullopt; } @@ -1416,7 +1450,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { } bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { - LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n"); + LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI); unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc()); MachineOperand &VLOp = MI.getOperand(VLOpNum); @@ -1436,13 +1470,13 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { "Expected VL to be an Imm or virtual Reg"); if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) { - LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n"); + LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n"); return false; } if (CommonVL->isIdenticalTo(VLOp)) { LLVM_DEBUG( - dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n"); + dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n"); return false; } @@ -1453,8 +1487,10 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { return true; } const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg()); - if (!MDT->dominates(VLMI, &MI)) + if (!MDT->dominates(VLMI, &MI)) { + LLVM_DEBUG(dbgs() << " Abort due to VL not dominating.\n"); return false; + } LLVM_DEBUG( dbgs() << " Reduce VL from " << VLOp << " to " << printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo()) diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp index fc0965d..7b0afe4 100644 --- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp +++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp @@ -21,10 +21,24 @@ Target &llvm::getTheRISCV64Target() { return TheRISCV64Target; } +Target &llvm::getTheRISCV32beTarget() { + static Target TheRISCV32beTarget; + return TheRISCV32beTarget; +} + +Target &llvm::getTheRISCV64beTarget() { + static Target TheRISCV64beTarget; + return TheRISCV64beTarget; +} + extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetInfo() { RegisterTarget<Triple::riscv32, /*HasJIT=*/true> X( getTheRISCV32Target(), "riscv32", "32-bit RISC-V", "RISCV"); RegisterTarget<Triple::riscv64, /*HasJIT=*/true> Y( getTheRISCV64Target(), "riscv64", "64-bit RISC-V", "RISCV"); + RegisterTarget<Triple::riscv32be> A(getTheRISCV32beTarget(), "riscv32be", + "32-bit big endian RISC-V", "RISCV"); + RegisterTarget<Triple::riscv64be> B(getTheRISCV64beTarget(), "riscv64be", + "64-bit big endian RISC-V", "RISCV"); } diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h index ed00a01..9b9fd2c 100644 --- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h +++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h @@ -15,6 +15,8 @@ class Target; Target &getTheRISCV32Target(); Target &getTheRISCV64Target(); +Target &getTheRISCV32beTarget(); +Target &getTheRISCV64beTarget(); } // namespace llvm |