diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
21 files changed, 395 insertions, 392 deletions
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index a143d85..d71c42c 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -3849,9 +3849,14 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, switch (Inst.getOpcode()) { default: break; - case RISCV::PseudoC_ADDI_NOP: - emitToStreamer(Out, MCInstBuilder(RISCV::C_NOP)); + case RISCV::PseudoC_ADDI_NOP: { + if (Inst.getOperand(2).getImm() == 0) + emitToStreamer(Out, MCInstBuilder(RISCV::C_NOP)); + else + emitToStreamer( + Out, MCInstBuilder(RISCV::C_NOP_HINT).addOperand(Inst.getOperand(2))); return false; + } case RISCV::PseudoLLAImm: case RISCV::PseudoLAImm: case RISCV::PseudoLI: { diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index fa7bcfa..5e54b82 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -193,21 +193,19 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, uint32_t RegNo, static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - if (RegNo == 0) { + if (RegNo == 0) return MCDisassembler::Fail; - } return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus -DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint32_t Address, - const MCDisassembler *Decoder) { - if (RegNo == 2) { +static DecodeStatus DecodeGPRNoX2RegisterClass(MCInst &Inst, uint64_t RegNo, + uint32_t Address, + const MCDisassembler *Decoder) { + if (RegNo == 2) return MCDisassembler::Fail; - } - return DecodeGPRNoX0RegisterClass(Inst, RegNo, Address, Decoder); + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } static DecodeStatus DecodeGPRNoX31RegisterClass(MCInst &Inst, uint32_t RegNo, @@ -536,31 +534,6 @@ static DecodeStatus decodeRTZArg(MCInst &Inst, uint32_t Imm, int64_t Address, return MCDisassembler::Success; } -static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus decodeRVCInstrRdSImm6(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus decodeRVCInstrRdCLUIImm(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus -decodeRVCInstrRdRs1UImmLog2XLenNonZero(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - -static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder); - static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn, uint64_t Address, const MCDisassembler *Decoder); @@ -579,18 +552,6 @@ static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, #include "RISCVGenDisassemblerTables.inc" -static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - DecodeStatus S = MCDisassembler::Success; - uint32_t Rd = fieldFromInstruction(Insn, 7, 5); - if (!Check(S, DecodeGPRNoX0RegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - Inst.addOperand(Inst.getOperand(0)); - Inst.addOperand(MCOperand::createImm(0)); - return S; -} - static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, uint64_t Address, const MCDisassembler *Decoder) { @@ -601,66 +562,6 @@ static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, return MCDisassembler::Success; } -static DecodeStatus decodeRVCInstrRdSImm6(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - Inst.addOperand(MCOperand::createReg(RISCV::X0)); - uint32_t Imm = - fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); - [[maybe_unused]] DecodeStatus Result = - decodeSImmOperand<6>(Inst, Imm, Address, Decoder); - assert(Result == MCDisassembler::Success && "Invalid immediate"); - return MCDisassembler::Success; -} - -static DecodeStatus decodeRVCInstrRdCLUIImm(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - Inst.addOperand(MCOperand::createReg(RISCV::X0)); - uint32_t Imm = - fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); - return decodeCLUIImmOperand(Inst, Imm, Address, Decoder); -} - -static DecodeStatus -decodeRVCInstrRdRs1UImmLog2XLenNonZero(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - Inst.addOperand(MCOperand::createReg(RISCV::X0)); - Inst.addOperand(Inst.getOperand(0)); - - uint32_t UImm6 = - fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); - return decodeUImmLog2XLenNonZeroOperand(Inst, UImm6, Address, Decoder); -} - -static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - DecodeStatus S = MCDisassembler::Success; - uint32_t Rd = fieldFromInstruction(Insn, 7, 5); - uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder))) - return MCDisassembler::Fail; - return S; -} - -static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn, - uint64_t Address, - const MCDisassembler *Decoder) { - DecodeStatus S = MCDisassembler::Success; - uint32_t Rd = fieldFromInstruction(Insn, 7, 5); - uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - Inst.addOperand(Inst.getOperand(0)); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder))) - return MCDisassembler::Fail; - return S; -} - static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 2c37c3b..82e3b5c 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -320,6 +320,7 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { MCFixup Fixup = MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN); F.setVarFixups({Fixup}); + F.setLinkerRelaxable(); F.getParent()->setLinkerRelaxable(); return true; } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 7ad5d5f..bddea43 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -330,7 +330,6 @@ enum OperandType : unsigned { OPERAND_UIMM32, OPERAND_UIMM48, OPERAND_UIMM64, - OPERAND_ZERO, OPERAND_THREE, OPERAND_FOUR, OPERAND_SIMM5, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp index baa508a..269b117 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp @@ -13,13 +13,7 @@ #include "MCTargetDesc/RISCVAsmBackend.h" #include "MCTargetDesc/RISCVMCAsmInfo.h" -#include "RISCVFixupKinds.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index d4f5d8f..2f32e2a 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -293,7 +293,7 @@ void RISCVAsmPrinter::emitNTLHint(const MachineInstr *MI) { MCInst Hint; if (STI->hasStdExtZca()) - Hint.setOpcode(RISCV::C_ADD_HINT); + Hint.setOpcode(RISCV::C_ADD); else Hint.setOpcode(RISCV::ADD); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index f9c0b54..171940e 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1272,7 +1272,7 @@ def FeatureVendorXSfmm128t def FeatureVendorXSfvqmaccdod : RISCVExtension<1, 0, "SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2)", - [FeatureStdExtZve32x]>; + [FeatureStdExtZve32x, FeatureStdExtZvl128b]>; def HasVendorXSfvqmaccdod : Predicate<"Subtarget->hasVendorXSfvqmaccdod()">, AssemblerPredicate<(all_of FeatureVendorXSfvqmaccdod), @@ -1281,7 +1281,7 @@ def HasVendorXSfvqmaccdod def FeatureVendorXSfvqmaccqoq : RISCVExtension<1, 0, "SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4)", - [FeatureStdExtZve32x]>; + [FeatureStdExtZve32x, FeatureStdExtZvl256b]>; def HasVendorXSfvqmaccqoq : Predicate<"Subtarget->hasVendorXSfvqmaccqoq()">, AssemblerPredicate<(all_of FeatureVendorXSfvqmaccqoq), @@ -1290,7 +1290,7 @@ def HasVendorXSfvqmaccqoq def FeatureVendorXSfvfwmaccqqq : RISCVExtension<1, 0, "SiFive Matrix Multiply Accumulate Instruction (4-by-4)", - [FeatureStdExtZvfbfmin]>; + [FeatureStdExtZvfbfmin, FeatureStdExtZvl128b]>; def HasVendorXSfvfwmaccqqq : Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">, AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq), diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index b1ab76a..9fc0d81 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1581,7 +1581,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, // Set the register and all its subregisters. if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) { SavedRegs.set(CSReg); - llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); }); + for (unsigned Reg : SubRegs) + SavedRegs.set(Reg); } // Combine to super register if all of its subregisters are marked. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3918dd2..54845e5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1618,6 +1618,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } + // Customize load and store operation for bf16 if zfh isn't enabled. + if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) { + setOperationAction(ISD::LOAD, MVT::bf16, Custom); + setOperationAction(ISD::STORE, MVT::bf16, Custom); + } + // Function alignments. const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4); setMinFunctionAlignment(FunctionAlignment); @@ -7216,6 +7222,47 @@ static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues({V, HiRes.getValue(1)}, DL); } +SDValue +RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() && + "Unexpected bfloat16 load lowering"); + + SDLoc DL(Op); + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + EVT MemVT = LD->getMemoryVT(); + SDValue Load = DAG.getExtLoad( + ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(), + LD->getBasePtr(), + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()), + LD->getMemOperand()); + // Using mask to make bf16 nan-boxing valid when we don't have flh + // instruction. -65536 would be treat as a small number and thus it can be + // directly used lui to get the constant. + SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT()); + SDValue OrSixteenOne = + DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask}); + SDValue ConvertedResult = + DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne); + return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL); +} + +SDValue +RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() && + "Unexpected bfloat16 store lowering"); + + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + SDLoc DL(Op); + SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL, + Subtarget.getXLenVT(), ST->getValue()); + return DAG.getTruncStore( + ST->getChain(), DL, FMV, ST->getBasePtr(), + EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()), + ST->getMemOperand()); +} + SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -7914,6 +7961,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return DAG.getMergeValues({Pair, Chain}, DL); } + if (VT == MVT::bf16) + return lowerXAndesBfHCvtBFloat16Load(Op, DAG); + // Handle normal vector tuple load. if (VT.isRISCVVectorTuple()) { SDLoc DL(Op); @@ -7998,6 +8048,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64, Store->getMemOperand()); } + + if (VT == MVT::bf16) + return lowerXAndesBfHCvtBFloat16Store(Op, DAG); + // Handle normal vector tuple store. if (VT.isRISCVVectorTuple()) { SDLoc DL(Op); @@ -16079,7 +16133,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt = CNode->getZExtValue(); // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12. - if (Subtarget.hasVendorXqciac() && isInt<12>(MulAmt)) + if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue())) return SDValue(); const bool HasShlAdd = Subtarget.hasStdExtZba() || @@ -16184,10 +16238,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x)) for (uint64_t Offset : {3, 5, 9}) { if (isPowerOf2_64(MulAmt + Offset)) { + unsigned ShAmt = Log2_64(MulAmt + Offset); + if (ShAmt >= VT.getSizeInBits()) + continue; SDLoc DL(N); SDValue Shift1 = - DAG.getNode(ISD::SHL, DL, VT, X, - DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT)); + DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT)); SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, DAG.getConstant(Log2_64(Offset - 1), DL, VT), X); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index f0447e0..ca70c46 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -578,6 +578,9 @@ private: SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerXAndesBfHCvtBFloat16Load(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerXAndesBfHCvtBFloat16Store(SDValue Op, SelectionDAG &DAG) const; + bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVector<CCValAssign, 16> &ArgLocs) const; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 64f9e3e..085064e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2859,9 +2859,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_UIMM16_NONZERO: Ok = isUInt<16>(Imm) && (Imm != 0); break; - case RISCVOp::OPERAND_ZERO: - Ok = Imm == 0; - break; case RISCVOp::OPERAND_THREE: Ok = Imm == 3; break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index 8252a9b..c5551fb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -57,12 +57,6 @@ def simm6nonzero : RISCVOp, }]; } -def immzero : RISCVOp, - ImmLeaf<XLenVT, [{return (Imm == 0);}]> { - let ParserMatchClass = ImmZeroAsmOperand; - let OperandType = "OPERAND_ZERO"; -} - def CLUIImmAsmOperand : AsmOperandClass { let Name = "CLUIImm"; let RenderMethod = "addImmOperands"; @@ -272,7 +266,7 @@ class Bcz<bits<3> funct3, string OpcodeStr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class Shift_right<bits<2> funct2, string OpcodeStr> : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), - (ins GPRC:$rs1, uimmlog2xlennonzero:$imm), + (ins GPRC:$rs1, uimmlog2xlen:$imm), OpcodeStr, "$rs1, $imm"> { let Constraints = "$rs1 = $rd"; let Inst{12} = imm{5}; @@ -402,17 +396,19 @@ def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">, let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), - (ins GPRNoX0:$rd, simm6nonzero:$imm), + (ins GPRNoX0:$rd, simm6:$imm), "c.addi", "$rd, $imm">, Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; } -// Alternate syntax for c.nop. Converted to C_NOP by the assembler. +// Alternate syntax for c.nop. Converted to C_NOP/C_NOP_HINT by the assembler. let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, isAsmParserOnly = 1 in -def PseudoC_ADDI_NOP : Pseudo<(outs GPRX0:$rd), (ins GPRX0:$rs1, immzero:$imm), - [], "c.addi", "$rd, $imm">; +def PseudoC_ADDI_NOP : Pseudo<(outs GPRX0:$rd), (ins GPRX0:$rs1, simm6:$imm), + [], "c.addi", "$rd, $imm"> { + let Constraints = "$rs1 = $rd"; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, DecoderNamespace = "RV32Only", Defs = [X1], @@ -430,7 +426,7 @@ def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb), } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm), +def C_LI : RVInst16CI<0b010, 0b01, (outs GPR:$rd), (ins simm6:$imm), "c.li", "$rd, $imm">, Sched<[WriteIALU]>; @@ -449,7 +445,7 @@ def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb), } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd), +def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX2:$rd), (ins c_lui_imm:$imm), "c.lui", "$rd, $imm">, Sched<[WriteIALU]>; @@ -497,8 +493,8 @@ def C_BEQZ : Bcz<0b110, "c.beqz">, Sched<[WriteJmp, ReadJmp]>; def C_BNEZ : Bcz<0b111, "c.bnez">, Sched<[WriteJmp, ReadJmp]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb), - (ins GPRNoX0:$rd, uimmlog2xlennonzero:$imm), +def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), + (ins GPR:$rd, uimmlog2xlen:$imm), "c.slli", "$rd, $imm">, Sched<[WriteShiftImm, ReadShiftImm]> { let Constraints = "$rd = $rd_wb"; @@ -544,7 +540,7 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1), let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isMoveReg = 1, isAsCheapAsAMove = 1 in -def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2), +def C_MV : RVInst16CR<0b1000, 0b10, (outs GPR:$rs1), (ins GPRNoX0:$rs2), "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]>; @@ -557,8 +553,8 @@ def C_JALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPRNoX0:$rs1), "c.jalr", "$rs1">, Sched<[WriteJalr, ReadJalr]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPRNoX0:$rd), - (ins GPRNoX0:$rs1, GPRNoX0:$rs2), +def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPR:$rd), + (ins GPR:$rs1, GPRNoX0:$rs2), "c.add", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU, ReadIALU]> { let Constraints = "$rs1 = $rd"; @@ -616,81 +612,6 @@ def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm), let rd = 0; } -def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), - (ins GPRNoX0:$rd, immzero:$imm), - "c.addi", "$rd, $imm">, - Sched<[WriteIALU, ReadIALU]> { - let Constraints = "$rd = $rd_wb"; - let imm = 0; - let DecoderMethod = "decodeRVCInstrRdRs1ImmZero"; -} - -def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm), - "c.li", "$rd, $imm">, - Sched<[WriteIALU]> { - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdSImm6"; -} - -def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd), - (ins c_lui_imm:$imm), - "c.lui", "$rd, $imm">, - Sched<[WriteIALU]> { - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdCLUIImm"; -} - -def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2), - "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]> { - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdRs2"; -} - -def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rd), - (ins GPRX0:$rs1, GPRNoX0:$rs2), - "c.add", "$rs1, $rs2">, - Sched<[WriteIALU, ReadIALU, ReadIALU]> { - let Constraints = "$rs1 = $rd"; - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdRs1Rs2"; -} - -def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb), - (ins GPRX0:$rd, uimmlog2xlennonzero:$imm), - "c.slli", "$rd, $imm">, - Sched<[WriteShiftImm, ReadShiftImm]> { - let Constraints = "$rd = $rd_wb"; - let Inst{11-7} = 0; - let DecoderMethod = "decodeRVCInstrRdRs1UImmLog2XLenNonZero"; -} - -def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd), - "c.slli64", "$rd">, - Sched<[WriteShiftImm, ReadShiftImm]> { - let Constraints = "$rd = $rd_wb"; - let imm = 0; -} - -def C_SRLI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), - (ins GPRC:$rs1), - "c.srli64", "$rs1">, - Sched<[WriteShiftImm, ReadShiftImm]> { - let Constraints = "$rs1 = $rd"; - let Inst{6-2} = 0; - let Inst{11-10} = 0b00; - let Inst{12} = 0; -} - -def C_SRAI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), - (ins GPRC:$rs1), - "c.srai64", "$rs1">, - Sched<[WriteShiftImm, ReadShiftImm]> { - let Constraints = "$rs1 = $rd"; - let Inst{6-2} = 0; - let Inst{11-10} = 0b01; - let Inst{12} = 0; -} - } // Predicates = [HasStdExtZca], hasSideEffects = 0, mayLoad = 0, // mayStore = 0 @@ -699,15 +620,17 @@ def C_SRAI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), //===----------------------------------------------------------------------===// let Predicates = [HasStdExtZca] in { -// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6. -def : InstAlias<"c.addi x0, $imm", (C_NOP_HINT simm6nonzero:$imm), 0>; +// Legacy aliases. +def : InstAlias<"c.slli64 $rd", (C_SLLI GPR:$rd, 0), 0>; +def : InstAlias<"c.srli64 $rs1", (C_SRLI GPRC:$rs1, 0), 0>; +def : InstAlias<"c.srai64 $rs1", (C_SRAI GPRC:$rs1, 0), 0>; } let Predicates = [HasStdExtC, HasStdExtZihintntl] in { -def : InstAlias<"c.ntl.p1", (C_ADD_HINT X0, X2)>; -def : InstAlias<"c.ntl.pall", (C_ADD_HINT X0, X3)>; -def : InstAlias<"c.ntl.s1", (C_ADD_HINT X0, X4)>; -def : InstAlias<"c.ntl.all", (C_ADD_HINT X0, X5)>; +def : InstAlias<"c.ntl.p1", (C_ADD X0, X2)>; +def : InstAlias<"c.ntl.pall", (C_ADD X0, X3)>; +def : InstAlias<"c.ntl.s1", (C_ADD X0, X4)>; +def : InstAlias<"c.ntl.all", (C_ADD X0, X5)>; } // Predicates = [HasStdExtC, HasStdExtZihintntl] let EmitPriority = 0 in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 7f1077a..dd365cf 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -89,39 +89,41 @@ class PLI_B_i<bits<8> funct8, string opcodestr> } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class RVPUnary<bits<3> f, string opcodestr, dag operands, string argstr> - : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), operands, opcodestr, argstr> { +class RVPShift_ri<bits<3> f, bits<3> funct3, string opcodestr, Operand ImmType> + : RVInstIBase<funct3, OPC_OP_IMM_32, (outs GPR:$rd), + (ins GPR:$rs1, ImmType:$shamt), opcodestr, + "$rd, $rs1, $shamt"> { let Inst{31} = 0b1; let Inst{30-28} = f; let Inst{27} = 0b0; } -class RVPUnaryImm5<bits<3> f, string opcodestr> - : RVPUnary<f, opcodestr, (ins GPR:$rs1, uimm5:$uimm5), "$rd, $rs1, $uimm5"> { - bits<5> uimm5; +class RVPShiftW_ri<bits<3> f, bits<3> funct3, string opcodestr> + : RVPShift_ri<f, funct3, opcodestr, uimm5> { + bits<5> shamt; let Inst{26-25} = 0b01; - let Inst{24-20} = uimm5; + let Inst{24-20} = shamt; } -class RVPUnaryImm4<bits<3> f, string opcodestr> - : RVPUnary<f, opcodestr, (ins GPR:$rs1, uimm4:$uimm4), "$rd, $rs1, $uimm4"> { - bits<4> uimm4; +class RVPShiftH_ri<bits<3> f, bits<3> funct3, string opcodestr> + : RVPShift_ri<f, funct3, opcodestr, uimm4> { + bits<4> shamt; let Inst{26-24} = 0b001; - let Inst{23-20} = uimm4; + let Inst{23-20} = shamt; } -class RVPUnaryImm3<bits<3> f, string opcodestr> - : RVPUnary<f, opcodestr, (ins GPR:$rs1, uimm3:$uimm3), "$rd, $rs1, $uimm3"> { - bits<3> uimm3; +class RVPShiftB_ri<bits<3> f, bits<3> funct3, string opcodestr> + : RVPShift_ri<f, funct3, opcodestr, uimm3> { + bits<3> shamt; let Inst{26-23} = 0b0001; - let Inst{22-20} = uimm3; + let Inst{22-20} = shamt; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class RVPUnaryWUF<bits<2> w, bits<5> uf, string opcodestr> +class RVPUnary_ri<bits<2> w, bits<5> uf, string opcodestr> : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins GPR:$rs1), opcodestr, "$rd, $rs1"> { let Inst{31-27} = 0b11100; @@ -149,16 +151,16 @@ def ABSW : UnaryW_r<0b011000000111, 0b001, "absw">; } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in { -def PSLLI_B : RVPUnaryImm3<0b000, "pslli.b">; -def PSLLI_H : RVPUnaryImm4<0b000, "pslli.h">; -def PSSLAI_H : RVPUnaryImm4<0b101, "psslai.h">; +def PSLLI_B : RVPShiftB_ri<0b000, 0b010, "pslli.b">; +def PSLLI_H : RVPShiftH_ri<0b000, 0b010, "pslli.h">; +def PSSLAI_H : RVPShiftH_ri<0b101, 0b010, "psslai.h">; } // Predicates = [HasStdExtP] let DecoderNamespace = "RV32Only", Predicates = [HasStdExtP, IsRV32] in -def SSLAI : RVPUnaryImm5<0b101, "sslai">; +def SSLAI : RVPShiftW_ri<0b101, 0b010, "sslai">; let Predicates = [HasStdExtP, IsRV64] in { -def PSLLI_W : RVPUnaryImm5<0b000, "pslli.w">; -def PSSLAI_W : RVPUnaryImm5<0b101, "psslai.w">; +def PSLLI_W : RVPShiftW_ri<0b000, 0b010, "pslli.w">; +def PSSLAI_W : RVPShiftW_ri<0b101, 0b010, "psslai.w">; } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in @@ -169,13 +171,13 @@ let Predicates = [HasStdExtP] in def PLI_B : PLI_B_i<0b10110100, "pli.b">; let Predicates = [HasStdExtP] in { -def PSEXT_H_B : RVPUnaryWUF<0b00, 0b00100, "psext.h.b">; -def PSABS_H : RVPUnaryWUF<0b00, 0b00111, "psabs.h">; -def PSABS_B : RVPUnaryWUF<0b10, 0b00111, "psabs.b">; +def PSEXT_H_B : RVPUnary_ri<0b00, 0b00100, "psext.h.b">; +def PSABS_H : RVPUnary_ri<0b00, 0b00111, "psabs.h">; +def PSABS_B : RVPUnary_ri<0b10, 0b00111, "psabs.b">; } // Predicates = [HasStdExtP] let Predicates = [HasStdExtP, IsRV64] in { -def PSEXT_W_B : RVPUnaryWUF<0b01, 0b00100, "psext.w.b">; -def PSEXT_W_H : RVPUnaryWUF<0b01, 0b00101, "psext.w.h">; +def PSEXT_W_B : RVPUnary_ri<0b01, 0b00100, "psext.w.b">; +def PSEXT_W_H : RVPUnary_ri<0b01, 0b00101, "psext.w.h">; } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index dfa532a..6afc942d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -788,7 +788,7 @@ class VPseudoUSLoadNoMask<VReg RetClass, DAGOperand sewop = sew> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMemZeroOffset:$rs1, AVL:$vl, - sewop:$sew, vec_policy:$policy), []>, + sewop:$sew, vec_policy:$policy)>, RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -804,7 +804,7 @@ class VPseudoUSLoadMask<VReg RetClass, RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMemZeroOffset:$rs1, VMaskOp:$vm, AVL:$vl, sew:$sew, - vec_policy:$policy), []>, + vec_policy:$policy)>, RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -821,7 +821,7 @@ class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW> : RISCVVPseudo<(outs RetClass:$rd, GPR:$vl), (ins RetClass:$dest, GPRMemZeroOffset:$rs1, AVL:$avl, - sew:$sew, vec_policy:$policy), []>, + sew:$sew, vec_policy:$policy)>, RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -837,7 +837,7 @@ class VPseudoUSLoadFFMask<VReg RetClass, RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl), (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMemZeroOffset:$rs1, VMaskOp:$vm, AVL:$avl, sew:$sew, - vec_policy:$policy), []>, + vec_policy:$policy)>, RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -854,7 +854,7 @@ class VPseudoSLoadNoMask<VReg RetClass, int EEW> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMemZeroOffset:$rs1, GPR:$rs2, - AVL:$vl, sew:$sew, vec_policy:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy)>, RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -870,7 +870,7 @@ class VPseudoSLoadMask<VReg RetClass, RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMemZeroOffset:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl, - sew:$sew, vec_policy:$policy), []>, + sew:$sew, vec_policy:$policy)>, RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -892,7 +892,7 @@ class VPseudoILoadNoMask<VReg RetClass, bits<2> TargetConstraintType = 1> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMemZeroOffset:$rs1, IdxClass:$rs2, - AVL:$vl, sew:$sew, vec_policy:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy)>, RISCVVLX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 1; let mayStore = 0; @@ -914,7 +914,7 @@ class VPseudoILoadMask<VReg RetClass, RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMemZeroOffset:$rs1, IdxClass:$rs2, VMaskOp:$vm, - AVL:$vl, sew:$sew, vec_policy:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy)>, RISCVVLX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 1; let mayStore = 0; @@ -933,7 +933,7 @@ class VPseudoUSStoreNoMask<VReg StClass, DAGOperand sewop = sew> : RISCVVPseudo<(outs), (ins StClass:$rd, GPRMemZeroOffset:$rs1, AVL:$vl, - sewop:$sew), []>, + sewop:$sew)>, RISCVVSE</*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> { let mayLoad = 0; let mayStore = 1; @@ -946,7 +946,7 @@ class VPseudoUSStoreMask<VReg StClass, int EEW> : RISCVVPseudo<(outs), (ins StClass:$rd, GPRMemZeroOffset:$rs1, - VMaskOp:$vm, AVL:$vl, sew:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew)>, RISCVVSE</*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> { let mayLoad = 0; let mayStore = 1; @@ -960,7 +960,7 @@ class VPseudoSStoreNoMask<VReg StClass, int EEW> : RISCVVPseudo<(outs), (ins StClass:$rd, GPRMemZeroOffset:$rs1, GPR:$rs2, - AVL:$vl, sew:$sew), []>, + AVL:$vl, sew:$sew)>, RISCVVSE</*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> { let mayLoad = 0; let mayStore = 1; @@ -973,7 +973,7 @@ class VPseudoSStoreMask<VReg StClass, int EEW> : RISCVVPseudo<(outs), (ins StClass:$rd, GPRMemZeroOffset:$rs1, GPR:$rs2, - VMaskOp:$vm, AVL:$vl, sew:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew)>, RISCVVSE</*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> { let mayLoad = 0; let mayStore = 1; @@ -986,7 +986,7 @@ class VPseudoSStoreMask<VReg StClass, class VPseudoNullaryNoMask<VReg RegClass> : RISCVVPseudo<(outs RegClass:$rd), (ins RegClass:$passthru, - AVL:$vl, sew:$sew, vec_policy:$policy), []> { + AVL:$vl, sew:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1015,7 +1015,7 @@ class VPseudoNullaryMask<VReg RegClass> : // Nullary for pseudo instructions. They are expanded in // RISCVExpandPseudoInsts pass. class VPseudoNullaryPseudoM<string BaseInst> : - RISCVVPseudo<(outs VR:$rd), (ins AVL:$vl, sew_mask:$sew), []> { + RISCVVPseudo<(outs VR:$rd), (ins AVL:$vl, sew_mask:$sew)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1031,7 +1031,7 @@ class VPseudoUnaryNoMask<DAGOperand RetClass, bits<2> TargetConstraintType = 1> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$passthru, OpClass:$rs2, - AVL:$vl, sew:$sew, vec_policy:$policy), []> { + AVL:$vl, sew:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1047,7 +1047,7 @@ class VPseudoUnaryNoMaskNoPolicy<DAGOperand RetClass, string Constraint = "", bits<2> TargetConstraintType = 1> : RISCVVPseudo<(outs RetClass:$rd), - (ins OpClass:$rs2, AVL:$vl, sew_mask:$sew), []> { + (ins OpClass:$rs2, AVL:$vl, sew_mask:$sew)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1063,7 +1063,7 @@ class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass, bits<2> TargetConstraintType = 1> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$passthru, OpClass:$rs2, vec_rm:$rm, - AVL:$vl, sew:$sew, vec_policy:$policy), []> { + AVL:$vl, sew:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1084,7 +1084,7 @@ class VPseudoUnaryMask<VReg RetClass, DAGOperand sewop = sew> : RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, AVL:$vl, sewop:$sew, vec_policy:$policy), []> { + VMaskOp:$vm, AVL:$vl, sewop:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1104,7 +1104,7 @@ class VPseudoUnaryMaskRoundingMode<VReg RetClass, RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$passthru, OpClass:$rs2, VMaskOp:$vm, vec_rm:$rm, - AVL:$vl, sew:$sew, vec_policy:$policy), []> { + AVL:$vl, sew:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1139,7 +1139,7 @@ class VPseudoUnaryMask_NoExcept<VReg RetClass, class VPseudoUnaryNoMaskGPROut : RISCVVPseudo<(outs GPR:$rd), - (ins VR:$rs2, AVL:$vl, sew_mask:$sew), []> { + (ins VR:$rs2, AVL:$vl, sew_mask:$sew)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1149,7 +1149,7 @@ class VPseudoUnaryNoMaskGPROut : class VPseudoUnaryMaskGPROut : RISCVVPseudo<(outs GPR:$rd), - (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, sew_mask:$sew), []> { + (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, sew_mask:$sew)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1163,7 +1163,7 @@ class VPseudoUnaryAnyMask<VReg RetClass, VReg Op1Class> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, - VR:$vm, AVL:$vl, sew:$sew), []> { + VR:$vm, AVL:$vl, sew:$sew)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1197,7 +1197,7 @@ class VPseudoBinaryNoMaskPolicy<VReg RetClass, bits<2> TargetConstraintType = 1> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, - AVL:$vl, sew:$sew, vec_policy:$policy), []> { + AVL:$vl, sew:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1241,7 +1241,7 @@ class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass, (ins GetVRegNoV0<RetClass>.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, VMaskOp:$vm, vec_rm:$rm, AVL:$vl, - sew:$sew, vec_policy:$policy), []> { + sew:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1266,7 +1266,7 @@ class VPseudoTiedBinaryNoMask<VReg RetClass, bits<2> TargetConstraintType = 1> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew, - vec_policy:$policy), []> { + vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1288,7 +1288,7 @@ class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass, (ins RetClass:$rs2, Op2Class:$rs1, vec_rm:$rm, AVL:$vl, sew:$sew, - vec_policy:$policy), []> { + vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1380,7 +1380,7 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass, Op1Class:$rs2, Op2Class:$rs1, VMaskOp:$vm, vec_rm:$rm, - AVL:$vl, sew:$sew, vec_policy:$policy), []> { + AVL:$vl, sew:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1451,7 +1451,7 @@ class VPseudoTiedBinaryMaskRoundingMode<VReg RetClass, Op2Class:$rs1, VMaskOp:$vm, vec_rm:$rm, - AVL:$vl, sew:$sew, vec_policy:$policy), []> { + AVL:$vl, sew:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1480,7 +1480,7 @@ class VPseudoBinaryCarry<VReg RetClass, (ins Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl, sew:$sew), (ins Op1Class:$rs2, Op2Class:$rs1, - AVL:$vl, sew:$sew)), []> { + AVL:$vl, sew:$sew))> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1498,7 +1498,7 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass, bits<2> TargetConstraintType = 1> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMV0:$carry, AVL:$vl, sew:$sew), []> { + VMV0:$carry, AVL:$vl, sew:$sew)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1516,7 +1516,7 @@ class VPseudoTernaryNoMask<VReg RetClass, string Constraint> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - AVL:$vl, sew:$sew), []> { + AVL:$vl, sew:$sew)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1532,7 +1532,7 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass, bits<2> TargetConstraintType = 1> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - AVL:$vl, sew:$sew, vec_policy:$policy), []> { + AVL:$vl, sew:$sew, vec_policy:$policy)> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1570,7 +1570,7 @@ class VPseudoUSSegLoadNoMask<VReg RetClass, bits<4> NF> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMemZeroOffset:$rs1, AVL:$vl, - sew:$sew, vec_policy:$policy), []>, + sew:$sew, vec_policy:$policy)>, RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -1587,7 +1587,7 @@ class VPseudoUSSegLoadMask<VReg RetClass, RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMemZeroOffset:$rs1, VMaskOp:$vm, AVL:$vl, sew:$sew, - vec_policy:$policy), []>, + vec_policy:$policy)>, RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -1605,7 +1605,7 @@ class VPseudoUSSegLoadFFNoMask<VReg RetClass, bits<4> NF> : RISCVVPseudo<(outs RetClass:$rd, GPR:$vl), (ins RetClass:$dest, GPRMemZeroOffset:$rs1, AVL:$avl, - sew:$sew, vec_policy:$policy), []>, + sew:$sew, vec_policy:$policy)>, RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -1622,7 +1622,7 @@ class VPseudoUSSegLoadFFMask<VReg RetClass, RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl), (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMemZeroOffset:$rs1, VMaskOp:$vm, AVL:$avl, sew:$sew, - vec_policy:$policy), []>, + vec_policy:$policy)>, RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -1640,7 +1640,7 @@ class VPseudoSSegLoadNoMask<VReg RetClass, bits<4> NF> : RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$passthru, GPRMemZeroOffset:$rs1, GPR:$offset, - AVL:$vl, sew:$sew, vec_policy:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy)>, RISCVVLSEG<NF, /*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -1657,7 +1657,7 @@ class VPseudoSSegLoadMask<VReg RetClass, RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMemZeroOffset:$rs1, GPR:$offset, VMaskOp:$vm, - AVL:$vl, sew:$sew, vec_policy:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy)>, RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> { let mayLoad = 1; let mayStore = 0; @@ -1679,7 +1679,7 @@ class VPseudoISegLoadNoMask<VReg RetClass, RISCVVPseudo<(outs RetClass:$rd), (ins RetClass:$passthru, GPRMemZeroOffset:$rs1, IdxClass:$offset, AVL:$vl, sew:$sew, - vec_policy:$policy), []>, + vec_policy:$policy)>, RISCVVLXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 1; let mayStore = 0; @@ -1701,7 +1701,7 @@ class VPseudoISegLoadMask<VReg RetClass, RISCVVPseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMemZeroOffset:$rs1, IdxClass:$offset, VMaskOp:$vm, - AVL:$vl, sew:$sew, vec_policy:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy)>, RISCVVLXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 1; let mayStore = 0; @@ -1735,7 +1735,7 @@ class VPseudoUSSegStoreMask<VReg ValClass, bits<4> NF> : RISCVVPseudo<(outs), (ins ValClass:$rd, GPRMemZeroOffset:$rs1, - VMaskOp:$vm, AVL:$vl, sew:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew)>, RISCVVSSEG<NF, /*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> { let mayLoad = 0; let mayStore = 1; @@ -1750,7 +1750,7 @@ class VPseudoSSegStoreNoMask<VReg ValClass, bits<4> NF> : RISCVVPseudo<(outs), (ins ValClass:$rd, GPRMemZeroOffset:$rs1, GPR:$offset, - AVL:$vl, sew:$sew), []>, + AVL:$vl, sew:$sew)>, RISCVVSSEG<NF, /*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> { let mayLoad = 0; let mayStore = 1; @@ -1764,7 +1764,7 @@ class VPseudoSSegStoreMask<VReg ValClass, bits<4> NF> : RISCVVPseudo<(outs), (ins ValClass:$rd, GPRMemZeroOffset:$rs1, GPR: $offset, - VMaskOp:$vm, AVL:$vl, sew:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew)>, RISCVVSSEG<NF, /*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> { let mayLoad = 0; let mayStore = 1; @@ -1782,7 +1782,7 @@ class VPseudoISegStoreNoMask<VReg ValClass, bit Ordered> : RISCVVPseudo<(outs), (ins ValClass:$rd, GPRMemZeroOffset:$rs1, IdxClass: $index, - AVL:$vl, sew:$sew), []>, + AVL:$vl, sew:$sew)>, RISCVVSXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 0; let mayStore = 1; @@ -1799,7 +1799,7 @@ class VPseudoISegStoreMask<VReg ValClass, bit Ordered> : RISCVVPseudo<(outs), (ins ValClass:$rd, GPRMemZeroOffset:$rs1, IdxClass: $index, - VMaskOp:$vm, AVL:$vl, sew:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew)>, RISCVVSXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> { let mayLoad = 0; let mayStore = 1; @@ -6703,7 +6703,7 @@ let Predicates = [HasVInstructions] in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { let HasSEWOp = 1, BaseInstr = VMV_X_S in def PseudoVMV_X_S: - RISCVVPseudo<(outs GPR:$rd), (ins VR:$rs2, sew:$sew), []>, + RISCVVPseudo<(outs GPR:$rd), (ins VR:$rs2, sew:$sew)>, Sched<[WriteVMovXS, ReadVMovXS]>; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, isReMaterializable = 1, Constraints = "$rd = $passthru" in @@ -6723,8 +6723,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { foreach f = FPList in { let HasSEWOp = 1, BaseInstr = VFMV_F_S in def "PseudoVFMV_" # f.FX # "_S" : - RISCVVPseudo<(outs f.fprclass:$rd), - (ins VR:$rs2, sew:$sew), []>, + RISCVVPseudo<(outs f.fprclass:$rd), (ins VR:$rs2, sew:$sew)>, Sched<[WriteVMovFS, ReadVMovFS]>; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1, Constraints = "$rd = $passthru" in diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td index 1bb67f4..c75addd9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td @@ -11,6 +11,20 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// +// RISC-V specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDT_NDS_FMV_BF16_X + : SDTypeProfile<1, 1, [SDTCisVT<0, bf16>, SDTCisVT<1, XLenVT>]>; +def SDT_NDS_FMV_X_ANYEXTBF16 + : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisVT<1, bf16>]>; + +def riscv_nds_fmv_bf16_x + : SDNode<"RISCVISD::NDS_FMV_BF16_X", SDT_NDS_FMV_BF16_X>; +def riscv_nds_fmv_x_anyextbf16 + : SDNode<"RISCVISD::NDS_FMV_X_ANYEXTBF16", SDT_NDS_FMV_X_ANYEXTBF16>; + +//===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// @@ -773,6 +787,25 @@ def : Pat<(bf16 (fpround FPR32:$rs)), (NDS_FCVT_BF16_S FPR32:$rs)>; } // Predicates = [HasVendorXAndesBFHCvt] +let isCodeGenOnly = 1 in { +def NDS_FMV_BF16_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR16, GPR, "fmv.w.x">, + Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>; +def NDS_FMV_X_BF16 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR16, "fmv.x.w">, + Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>; +} + +let Predicates = [HasVendorXAndesBFHCvt] in { +def : Pat<(riscv_nds_fmv_bf16_x GPR:$src), (NDS_FMV_BF16_X GPR:$src)>; +def : Pat<(riscv_nds_fmv_x_anyextbf16 (bf16 FPR16:$src)), + (NDS_FMV_X_BF16 (bf16 FPR16:$src))>; +} // Predicates = [HasVendorXAndesBFHCvt] + +// Use flh/fsh to load/store bf16 if zfh is enabled. +let Predicates = [HasStdExtZfh, HasVendorXAndesBFHCvt] in { +def : LdPat<load, FLH, bf16>; +def : StPat<store, FSH, FPR16, bf16>; +} // Predicates = [HasStdExtZfh, HasVendorXAndesBFHCvt] + let Predicates = [HasVendorXAndesVBFHCvt] in { defm PseudoNDS_VFWCVT_S_BF16 : VPseudoVWCVT_S_BF16; defm PseudoNDS_VFNCVT_BF16_S : VPseudoVNCVT_BF16_S; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td index f173440..ed1a60a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -291,31 +291,31 @@ def : CompressPat<(MUL GPRC:$rs1, GPRC:$rs2, GPRC:$rs1), let Predicates = [HasStdExtZcb, HasStdExtZbb] in{ def : CompressPat<(SEXT_B GPRC:$rs1, GPRC:$rs1), - (C_SEXT_B GPRC:$rs1, GPRC:$rs1)>; + (C_SEXT_B GPRC:$rs1)>; def : CompressPat<(SEXT_H GPRC:$rs1, GPRC:$rs1), - (C_SEXT_H GPRC:$rs1, GPRC:$rs1)>; + (C_SEXT_H GPRC:$rs1)>; } // Predicates = [HasStdExtZcb, HasStdExtZbb] let Predicates = [HasStdExtZcb, HasStdExtZbb] in{ def : CompressPat<(ZEXT_H_RV32 GPRC:$rs1, GPRC:$rs1), - (C_ZEXT_H GPRC:$rs1, GPRC:$rs1)>; + (C_ZEXT_H GPRC:$rs1)>; def : CompressPat<(ZEXT_H_RV64 GPRC:$rs1, GPRC:$rs1), - (C_ZEXT_H GPRC:$rs1, GPRC:$rs1)>; + (C_ZEXT_H GPRC:$rs1)>; } // Predicates = [HasStdExtZcb, HasStdExtZbb] let Predicates = [HasStdExtZcb] in{ def : CompressPat<(ANDI GPRC:$rs1, GPRC:$rs1, 255), - (C_ZEXT_B GPRC:$rs1, GPRC:$rs1)>; + (C_ZEXT_B GPRC:$rs1)>; } // Predicates = [HasStdExtZcb] let Predicates = [HasStdExtZcb, HasStdExtZba, IsRV64] in{ def : CompressPat<(ADD_UW GPRC:$rs1, GPRC:$rs1, X0), - (C_ZEXT_W GPRC:$rs1, GPRC:$rs1)>; + (C_ZEXT_W GPRC:$rs1)>; } // Predicates = [HasStdExtZcb, HasStdExtZba, IsRV64] let Predicates = [HasStdExtZcb] in{ def : CompressPat<(XORI GPRC:$rs1, GPRC:$rs1, -1), - (C_NOT GPRC:$rs1, GPRC:$rs1)>; + (C_NOT GPRC:$rs1)>; } let Predicates = [HasStdExtZcb] in{ diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index 0565fcd..30d8f85 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -224,10 +224,10 @@ bool RISCVTargetLowering::lowerInterleavedLoad( Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes); Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset); - // Note: Same VL as above, but i32 not xlen due to signature of - // vp.strided.load - VL = Builder.CreateElementCount(Builder.getInt32Ty(), - VTy->getElementCount()); + // For rv64, need to truncate i64 to i32 to match signature. As VL is at most + // the number of active lanes (which is bounded by i32) this is safe. + VL = Builder.CreateTrunc(VL, Builder.getInt32Ty()); + CallInst *CI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load, {VTy, BasePtr->getType(), Stride->getType()}, @@ -302,10 +302,9 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store, Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes); Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset); - // Note: Same VL as above, but i32 not xlen due to signature of - // vp.strided.store - VL = Builder.CreateElementCount(Builder.getInt32Ty(), - VTy->getElementCount()); + // For rv64, need to truncate i64 to i32 to match signature. As VL is at + // most the number of active lanes (which is bounded by i32) this is safe. + VL = Builder.CreateTrunc(VL, Builder.getInt32Ty()); CallInst *CI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index e87f452..ccb39e8 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -268,6 +268,11 @@ def GPRNoX0 : GPRRegisterClass<(sub GPR, X0)> { let DiagnosticString = "register must be a GPR excluding zero (x0)"; } +def GPRNoX2 : GPRRegisterClass<(sub GPR, X2)> { + let DiagnosticType = "InvalidRegClassGPRNoX2"; + let DiagnosticString = "register must be a GPR excluding sp (x2)"; +} + def GPRNoX0X2 : GPRRegisterClass<(sub GPR, X0, X2)> { let DiagnosticType = "InvalidRegClassGPRNoX0X2"; let DiagnosticString = "register must be a GPR excluding zero (x0) and sp (x2)"; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 3e286a7..bf23812 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -24,6 +24,67 @@ class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0 bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); } +defvar SMX60VLEN = 256; +defvar SMX60DLEN = !div(SMX60VLEN, 2); + +class Get1248Latency<string mx> { + int c = !cond( + !eq(mx, "M2") : 2, + !eq(mx, "M4") : 4, + !eq(mx, "M8") : 8, + true: 1 + ); +} + +// Used for: logical opsz, shifts, sign ext, merge/move, FP sign/recip/convert, mask ops, slides +class Get4816Latency<string mx> { + int c = !cond( + !eq(mx, "M4") : 8, + !eq(mx, "M8") : 16, + true: 4 + ); +} + +// Used for: arithmetic (add/sub/min/max), saturating/averaging, FP add/sub/min/max +class Get458Latency<string mx> { + int c = !cond( + !eq(mx, "M4") : 5, + !eq(mx, "M8") : 8, + true: 4 + ); +} + +// Widening scaling pattern (4,4,4,4,5,8,8): plateaus at higher LMULs +// Used for: widening operations +class Get4588Latency<string mx> { + int c = !cond( + !eq(mx, "M2") : 5, + !eq(mx, "M4") : 8, + !eq(mx, "M8") : 8, // M8 not supported for most widening, fallback + true: 4 + ); +} + +// Used for: mask-producing comparisons, carry ops with mask, FP comparisons +class Get461018Latency<string mx> { + int c = !cond( + !eq(mx, "M2") : 6, + !eq(mx, "M4") : 10, + !eq(mx, "M8") : 18, + true: 4 + ); +} + +// Used for: e64 multiply pattern, complex ops +class Get781632Latency<string mx> { + int c = !cond( + !eq(mx, "M2") : 8, + !eq(mx, "M4") : 16, + !eq(mx, "M8") : 32, + true: 7 + ); +} + def SpacemitX60Model : SchedMachineModel { let IssueWidth = 2; // dual-issue let MicroOpBufferSize = 0; // in-order @@ -322,58 +383,96 @@ foreach LMul = [1, 2, 4, 8] in { foreach mx = SchedMxList in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c; - defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICmpV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICmpX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMergeI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMovV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMovX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMovI", [SMX60_VIEU], mx, IsWorstCase>; - - defm "" : LMULWriteResMX<"WriteVShiftV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVShiftX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVShiftI", [SMX60_VIEU], mx, IsWorstCase>; - - defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIMulAddX", [SMX60_VIEU], mx, IsWorstCase>; + let Latency = Get458Latency<mx>.c, ReleaseAtCycles = [4] in { + defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>; + } + + let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in { + // Pattern of vadd, vsub, vrsub: 4/4/5/8 + // Pattern of vand, vor, vxor: 4/4/8/16 + // They are grouped together, so we used the worst case 4/4/8/16 + // TODO: use InstRW to override individual instructions' scheduling data + defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>; + + defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftI", [SMX60_VIEU], mx, IsWorstCase>; + + defm "" : LMULWriteResMX<"WriteVICALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>; + } + + let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [4] in { + defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>; + } + + // Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8, + // e64 = 7,8,16,32. We use the worst-case until we can split the SEW. + // TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites + let Latency = Get781632Latency<mx>.c, ReleaseAtCycles = [7] in { + defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddX", [SMX60_VIEU], mx, IsWorstCase>; + } } // Widening +// Pattern of vwmul, vwmacc, etc: e8/e16 = 4/4/5/8, e32 = 5,5,5,8 +// We use the worst-case for all. foreach mx = SchedMxListW in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c; - defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWMulV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWMulX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SMX60_VIEU], mx, IsWorstCase>; + let Latency = Get4588Latency<mx>.c, ReleaseAtCycles = [4] in { + defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SMX60_VIEU], mx, IsWorstCase>; + } } -// Vector Integer Division and Remainder +// Division and remainder operations +// Pattern of vdivu: 11/11/11/20/40/80/160 +// Pattern of vdiv: 12/12/12/22/44/88/176 +// Pattern of vremu: 12/12/12/22/44/88/176 +// Pattern of vrem: 13/13/13/24/48/96/192 +// We use for all: 12/12/12/24/48/96/192 +// TODO: Create separate WriteVIRem to more closely match the latencies foreach mx = SchedMxList in { foreach sew = SchedSEWSet<mx>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>; + // Slightly reduced for fractional LMULs + defvar Multiplier = !cond( + !eq(mx, "MF8") : 12, + !eq(mx, "MF4") : 12, + !eq(mx, "MF2") : 12, + true: 24 + ); + + let Latency = !mul(Get1248Latency<mx>.c, Multiplier), ReleaseAtCycles = [12] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>; + } } } @@ -381,12 +480,21 @@ foreach mx = SchedMxList in { foreach mx = SchedMxListW in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c; - defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVNClipV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVNClipX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVNClipI", [SMX60_VIEU], mx, IsWorstCase>; + // Slightly increased for integer LMULs + defvar Multiplier = !cond( + !eq(mx, "M2") : 2, + !eq(mx, "M4") : 2, + true: 1 + ); + + let Latency = !mul(Get4816Latency<mx>.c, Multiplier), ReleaseAtCycles = [4] in { + defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipI", [SMX60_VIEU], mx, IsWorstCase>; + } } // 12. Vector Fixed-Point Arithmetic Instructions diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index b43b915..da6ac2f 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -104,11 +104,6 @@ static cl::opt<bool> EnablePostMISchedLoadStoreClustering( cl::desc("Enable PostRA load and store clustering in the machine scheduler"), cl::init(true)); -static cl::opt<bool> - EnableVLOptimizer("riscv-enable-vl-optimizer", - cl::desc("Enable the RISC-V VL Optimizer pass"), - cl::init(true), cl::Hidden); - static cl::opt<bool> DisableVectorMaskMutation( "riscv-disable-vector-mask-mutation", cl::desc("Disable the vector mask scheduling mutation"), cl::init(false), @@ -617,8 +612,7 @@ void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVPreRAExpandPseudoPass()); if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createRISCVMergeBaseOffsetOptPass()); - if (EnableVLOptimizer) - addPass(createRISCVVLOptimizerPass()); + addPass(createRISCVVLOptimizerPass()); } addPass(createRISCVInsertReadWriteCSRPass()); diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index b53d919..c946451 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -114,14 +114,6 @@ FunctionPass *llvm::createRISCVVLOptimizerPass() { return new RISCVVLOptimizer(); } -/// Return true if R is a physical or virtual vector register, false otherwise. -static bool isVectorRegClass(Register R, const MachineRegisterInfo *MRI) { - if (R.isPhysical()) - return RISCV::VRRegClass.contains(R); - const TargetRegisterClass *RC = MRI->getRegClass(R); - return RISCVRI::isVRegClass(RC->TSFlags); -} - LLVM_ATTRIBUTE_UNUSED static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) { OI.print(OS); @@ -183,37 +175,28 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor, return Log2EEW; } -/// Check whether MO is a mask operand of MI. -static bool isMaskOperand(const MachineInstr &MI, const MachineOperand &MO, - const MachineRegisterInfo *MRI) { - - if (!MO.isReg() || !isVectorRegClass(MO.getReg(), MRI)) - return false; - - const MCInstrDesc &Desc = MI.getDesc(); - return Desc.operands()[MO.getOperandNo()].RegClass == RISCV::VMV0RegClassID; -} - static std::optional<unsigned> getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { const MachineInstr &MI = *MO.getParent(); + const MCInstrDesc &Desc = MI.getDesc(); const RISCVVPseudosTable::PseudoInfo *RVV = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); assert(RVV && "Could not find MI in PseudoTable"); // MI has a SEW associated with it. The RVV specification defines // the EEW of each operand and definition in relation to MI.SEW. - unsigned MILog2SEW = - MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + unsigned MILog2SEW = MI.getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); - const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MI.getDesc()); - const bool IsTied = RISCVII::isTiedPseudo(MI.getDesc().TSFlags); + const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc); + const bool IsTied = RISCVII::isTiedPseudo(Desc.TSFlags); bool IsMODef = MO.getOperandNo() == 0 || (HasPassthru && MO.getOperandNo() == MI.getNumExplicitDefs()); // All mask operands have EEW=1 - if (isMaskOperand(MI, MO, MRI)) + const MCOperandInfo &Info = Desc.operands()[MO.getOperandNo()]; + if (Info.OperandType == MCOI::OPERAND_REGISTER && + Info.RegClass == RISCV::VMV0RegClassID) return 0; // switch against BaseInstr to reduce number of cases that need to be @@ -1296,8 +1279,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { TII->get(RISCV::getRVVMCOpcode(MI.getOpcode())).TSFlags) && "Instruction shouldn't be supported if elements depend on VL"); - assert(MI.getOperand(0).isReg() && - isVectorRegClass(MI.getOperand(0).getReg(), MRI) && + assert(RISCVRI::isVRegClass( + MRI->getRegClass(MI.getOperand(0).getReg())->TSFlags) && "All supported instructions produce a vector register result"); LLVM_DEBUG(dbgs() << "Found a candidate for VL reduction: " << MI << "\n"); |