diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r-- | llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp | 148 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp | 24 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVFeatures.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVGISel.td | 108 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 74 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZa.td | 56 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 118 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrPredicates.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVProcessors.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 178 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVScheduleV.td | 71 |
17 files changed, 484 insertions, 338 deletions
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index ecfb5fe..eb41588 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -334,7 +334,7 @@ static bool isLegalElementTypeForRVV(Type *EltTy, if (EltTy->isIntegerTy(64)) return Subtarget.hasVInstructionsI64(); if (EltTy->isHalfTy()) - return Subtarget.hasVInstructionsF16(); + return Subtarget.hasVInstructionsF16Minimal(); if (EltTy->isBFloatTy()) return Subtarget.hasVInstructionsBF16Minimal(); if (EltTy->isFloatTy()) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 71c21e4..53633ea 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -675,6 +675,45 @@ static void getOperandsForBranch(Register CondReg, RISCVCC::CondCode &CC, CC = getRISCVCCFromICmp(Pred); } +/// Select the RISC-V Zalasr opcode for the G_LOAD or G_STORE operation +/// \p GenericOpc, appropriate for the GPR register bank and of memory access +/// size \p OpSize. +static unsigned selectZalasrLoadStoreOp(unsigned GenericOpc, unsigned OpSize) { + const bool IsStore = GenericOpc == TargetOpcode::G_STORE; + switch (OpSize) { + default: + llvm_unreachable("Unexpected memory size"); + case 8: + return IsStore ? RISCV::SB_RL : RISCV::LB_AQ; + case 16: + return IsStore ? RISCV::SH_RL : RISCV::LH_AQ; + case 32: + return IsStore ? RISCV::SW_RL : RISCV::LW_AQ; + case 64: + return IsStore ? RISCV::SD_RL : RISCV::LD_AQ; + } +} + +/// Select the RISC-V regimm opcode for the G_LOAD or G_STORE operation +/// \p GenericOpc, appropriate for the GPR register bank and of memory access +/// size \p OpSize. \returns \p GenericOpc if the combination is unsupported. +static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) { + const bool IsStore = GenericOpc == TargetOpcode::G_STORE; + switch (OpSize) { + case 8: + // Prefer unsigned due to no c.lb in Zcb. + return IsStore ? RISCV::SB : RISCV::LBU; + case 16: + return IsStore ? RISCV::SH : RISCV::LH; + case 32: + return IsStore ? RISCV::SW : RISCV::LW; + case 64: + return IsStore ? RISCV::SD : RISCV::LD; + } + + return GenericOpc; +} + bool RISCVInstructionSelector::select(MachineInstr &MI) { MachineIRBuilder MIB(MI); @@ -736,6 +775,62 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { MI.eraseFromParent(); return true; } + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: { + bool IsSigned = Opc != TargetOpcode::G_ZEXT; + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI->getType(SrcReg); + unsigned SrcSize = SrcTy.getSizeInBits(); + + if (SrcTy.isVector()) + return false; // Should be handled by imported patterns. + + assert((*RBI.getRegBank(DstReg, *MRI, TRI)).getID() == + RISCV::GPRBRegBankID && + "Unexpected ext regbank"); + + // Use addiw SrcReg, 0 (sext.w) for i32. + if (IsSigned && SrcSize == 32) { + MI.setDesc(TII.get(RISCV::ADDIW)); + MI.addOperand(MachineOperand::CreateImm(0)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + // Use add.uw SrcReg, X0 (zext.w) for i32 with Zba. + if (!IsSigned && SrcSize == 32 && STI.hasStdExtZba()) { + MI.setDesc(TII.get(RISCV::ADD_UW)); + MI.addOperand(MachineOperand::CreateReg(RISCV::X0, /*isDef=*/false)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + // Use sext.h/zext.h for i16 with Zbb. + if (SrcSize == 16 && STI.hasStdExtZbb()) { + MI.setDesc(TII.get(IsSigned ? RISCV::SEXT_H + : STI.isRV64() ? RISCV::ZEXT_H_RV64 + : RISCV::ZEXT_H_RV32)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + // Use pack(w) SrcReg, X0 for i16 zext with Zbkb. + if (!IsSigned && SrcSize == 16 && STI.hasStdExtZbkb()) { + MI.setDesc(TII.get(STI.is64Bit() ? RISCV::PACKW : RISCV::PACK)); + MI.addOperand(MachineOperand::CreateReg(RISCV::X0, /*isDef=*/false)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + // Fall back to shift pair. + auto ShiftLeft = + MIB.buildInstr(RISCV::SLLI, {&RISCV::GPRRegClass}, {SrcReg}) + .addImm(STI.getXLen() - SrcSize); + constrainSelectedInstRegOperands(*ShiftLeft, TII, TRI, RBI); + auto ShiftRight = MIB.buildInstr(IsSigned ? RISCV::SRAI : RISCV::SRLI, + {DstReg}, {ShiftLeft}) + .addImm(STI.getXLen() - SrcSize); + constrainSelectedInstRegOperands(*ShiftRight, TII, TRI, RBI); + MI.eraseFromParent(); + return true; + } case TargetOpcode::G_FCONSTANT: { // TODO: Use constant pool for complex constants. Register DstReg = MI.getOperand(0).getReg(); @@ -836,6 +931,59 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { return selectImplicitDef(MI, MIB); case TargetOpcode::G_UNMERGE_VALUES: return selectUnmergeValues(MI, MIB); + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: { + GLoadStore &LdSt = cast<GLoadStore>(MI); + const Register ValReg = LdSt.getReg(0); + const Register PtrReg = LdSt.getPointerReg(); + LLT PtrTy = MRI->getType(PtrReg); + + const RegisterBank &RB = *RBI.getRegBank(ValReg, *MRI, TRI); + if (RB.getID() != RISCV::GPRBRegBankID) + return false; + +#ifndef NDEBUG + const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, *MRI, TRI); + // Check that the pointer register is valid. + assert(PtrRB.getID() == RISCV::GPRBRegBankID && + "Load/Store pointer operand isn't a GPR"); + assert(PtrTy.isPointer() && "Load/Store pointer operand isn't a pointer"); +#endif + + // Can only handle AddressSpace 0. + if (PtrTy.getAddressSpace() != 0) + return false; + + unsigned MemSize = LdSt.getMemSizeInBits().getValue(); + AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); + + if (isStrongerThanMonotonic(Order)) { + MI.setDesc(TII.get(selectZalasrLoadStoreOp(Opc, MemSize))); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + const unsigned NewOpc = selectRegImmLoadStoreOp(MI.getOpcode(), MemSize); + if (NewOpc == MI.getOpcode()) + return false; + + // Check if we can fold anything into the addressing mode. + auto AddrModeFns = selectAddrRegImm(MI.getOperand(1)); + if (!AddrModeFns) + return false; + + // Folded something. Create a new instruction and return it. + auto NewInst = MIB.buildInstr(NewOpc, {}, {}, MI.getFlags()); + if (isa<GStore>(MI)) + NewInst.addUse(ValReg); + else + NewInst.addDef(ValReg); + NewInst.cloneMemRefs(MI); + for (auto &Fn : *AddrModeFns) + Fn(NewInst); + MI.eraseFromParent(); + + return constrainSelectedInstRegOperands(*NewInst, TII, TRI, RBI); + } default: return false; } diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index a537904..5dd4bf4 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -166,7 +166,7 @@ static unsigned getLRForRMW32(AtomicOrdering Ordering, return RISCV::LR_W; return RISCV::LR_W_AQ; case AtomicOrdering::SequentiallyConsistent: - return RISCV::LR_W_AQ_RL; + return RISCV::LR_W_AQRL; } } @@ -210,7 +210,7 @@ static unsigned getLRForRMW64(AtomicOrdering Ordering, return RISCV::LR_D; return RISCV::LR_D_AQ; case AtomicOrdering::SequentiallyConsistent: - return RISCV::LR_D_AQ_RL; + return RISCV::LR_D_AQRL; } } @@ -287,8 +287,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, break; } BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(ScratchReg); + .addReg(ScratchReg) + .addReg(AddrReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) @@ -375,8 +375,8 @@ static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII, ScratchReg); BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(ScratchReg); + .addReg(ScratchReg) + .addReg(AddrReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) @@ -535,8 +535,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( // sc.w scratch1, scratch1, (addr) // bnez scratch1, loop BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg) - .addReg(AddrReg) - .addReg(Scratch1Reg); + .addReg(Scratch1Reg) + .addReg(AddrReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) .addReg(Scratch1Reg) .addReg(RISCV::X0) @@ -674,8 +674,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // bnez scratch, loophead BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(NewValReg); + .addReg(NewValReg) + .addReg(AddrReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) @@ -707,8 +707,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( MaskReg, ScratchReg); BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(ScratchReg); + .addReg(ScratchReg) + .addReg(AddrReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 27cf057..333b693 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -265,7 +265,7 @@ def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">, def NoStdExtZacas : Predicate<"!Subtarget->hasStdExtZacas()">; def FeatureStdExtZalasr - : RISCVExperimentalExtension<0, 1, "Load-Acquire and Store-Release Instructions">; + : RISCVExperimentalExtension<0, 9, "Load-Acquire and Store-Release Instructions">; def HasStdExtZalasr : Predicate<"Subtarget->hasStdExtZalasr()">, AssemblerPredicate<(all_of FeatureStdExtZalasr), "'Zalasr' (Load-Acquire and Store-Release Instructions)">; @@ -1823,6 +1823,11 @@ def TuneConditionalCompressedMoveFusion def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">; def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()">; +def TuneHasSingleElementVecFP64 + : SubtargetFeature<"single-element-vec-fp64", "HasSingleElementVectorFP64", "true", + "Certain vector FP64 operations produce a single result " + "element per cycle">; + def TuneMIPSP8700 : SubtargetFeature<"mips-p8700", "RISCVProcFamily", "MIPSP8700", "MIPS p8700 processor">; diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 6d01250..eba35ef 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -100,119 +100,11 @@ def : LdPat<load, LD, PtrVT>; def : StPat<store, SD, GPR, PtrVT>; } -// Load and store patterns for i16, needed because Zfh makes s16 load/store -// legal and regbank select may not constrain registers to FP. -def : LdPat<load, LH, i16>; -def : StPat<store, SH, GPR, i16>; - -def : LdPat<extloadi8, LBU, i16>; // Prefer unsigned due to no c.lb in Zcb. -def : StPat<truncstorei8, SB, GPR, i16>; - -let Predicates = [HasAtomicLdSt] in { - // Prefer unsigned due to no c.lb in Zcb. - def : LdPat<relaxed_load<atomic_load_aext_8>, LBU, i16>; - def : LdPat<relaxed_load<atomic_load_nonext_16>, LH, i16>; - - def : StPat<relaxed_store<atomic_store_8>, SB, GPR, i16>; - def : StPat<relaxed_store<atomic_store_16>, SH, GPR, i16>; -} - -let Predicates = [HasAtomicLdSt, IsRV64] in { - // Load pattern is in RISCVInstrInfoA.td and shared with RV32. - def : StPat<relaxed_store<atomic_store_32>, SW, GPR, i32>; -} - //===----------------------------------------------------------------------===// // RV64 i32 patterns not used by SelectionDAG //===----------------------------------------------------------------------===// let Predicates = [IsRV64] in { -def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb. -def : LdPat<extloadi16, LH, i32>; - -def : StPat<truncstorei8, SB, GPR, i32>; -def : StPat<truncstorei16, SH, GPR, i32>; - -def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>; - def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32), (ADDIW GPR:$rs1, simm12_lo:$imm)>; } - -let Predicates = [IsRV64, NoStdExtZba] in -def : Pat<(zext (i32 GPR:$src)), (SRLI (i64 (SLLI GPR:$src, 32)), 32)>; - -let Predicates = [IsRV32, NoStdExtZbb, NoStdExtZbkb] in -def : Pat<(XLenVT (zext (i16 GPR:$src))), - (SRLI (XLenVT (SLLI GPR:$src, 16)), 16)>; - -let Predicates = [IsRV64, NoStdExtZbb, NoStdExtZbkb] in { -def : Pat<(i64 (zext (i16 GPR:$src))), - (SRLI (XLenVT (SLLI GPR:$src, 48)), 48)>; -def : Pat<(i32 (zext (i16 GPR:$src))), - (SRLI (XLenVT (SLLI GPR:$src, 48)), 48)>; -} - -let Predicates = [IsRV32, NoStdExtZbb] in -def : Pat<(XLenVT (sext (i16 GPR:$src))), - (SRAI (XLenVT (SLLI GPR:$src, 16)), 16)>; - -let Predicates = [IsRV64, NoStdExtZbb] in { -def : Pat<(i64 (sext (i16 GPR:$src))), - (SRAI (XLenVT (SLLI GPR:$src, 48)), 48)>; -def : Pat<(i32 (sext (i16 GPR:$src))), - (SRAI (XLenVT (SLLI GPR:$src, 48)), 48)>; -} - -//===----------------------------------------------------------------------===// -// Zb* RV64 patterns not used by SelectionDAG. -//===----------------------------------------------------------------------===// - -let Predicates = [HasStdExtZba, IsRV64] in { -def : Pat<(zext (i32 GPR:$src)), (ADD_UW GPR:$src, (XLenVT X0))>; -} - -let Predicates = [HasStdExtZbb] in -def : Pat<(i32 (sext (i16 GPR:$rs))), (SEXT_H GPR:$rs)>; -let Predicates = [HasStdExtZbb, IsRV64] in -def : Pat<(i64 (sext (i16 GPR:$rs))), (SEXT_H GPR:$rs)>; - -let Predicates = [HasStdExtZbb, IsRV32] in -def : Pat<(i32 (zext (i16 GPR:$rs))), (ZEXT_H_RV32 GPR:$rs)>; -let Predicates = [HasStdExtZbb, IsRV64] in { -def : Pat<(i64 (zext (i16 GPR:$rs))), (ZEXT_H_RV64 GPR:$rs)>; -def : Pat<(i32 (zext (i16 GPR:$rs))), (ZEXT_H_RV64 GPR:$rs)>; -} - -let Predicates = [HasStdExtZbkb, NoStdExtZbb, IsRV32] in -def : Pat<(i32 (zext (i16 GPR:$rs))), (PACK GPR:$rs, (XLenVT X0))>; -let Predicates = [HasStdExtZbkb, NoStdExtZbb, IsRV64] in { -def : Pat<(i64 (zext (i16 GPR:$rs))), (PACKW GPR:$rs, (XLenVT X0))>; -def : Pat<(i32 (zext (i16 GPR:$rs))), (PACKW GPR:$rs, (XLenVT X0))>; -} - -//===----------------------------------------------------------------------===// -// Zalasr patterns not used by SelectionDAG -//===----------------------------------------------------------------------===// - -let Predicates = [HasStdExtZalasr] in { - // the sequentially consistent loads use - // .aq instead of .aqrl to match the psABI/A.7 - def : PatLAQ<acquiring_load<atomic_load_aext_8>, LB_AQ, i16>; - def : PatLAQ<seq_cst_load<atomic_load_aext_8>, LB_AQ, i16>; - - def : PatLAQ<acquiring_load<atomic_load_nonext_16>, LH_AQ, i16>; - def : PatLAQ<seq_cst_load<atomic_load_nonext_16>, LH_AQ, i16>; - - def : PatSRL<releasing_store<atomic_store_8>, SB_RL, i16>; - def : PatSRL<seq_cst_store<atomic_store_8>, SB_RL, i16>; - - def : PatSRL<releasing_store<atomic_store_16>, SH_RL, i16>; - def : PatSRL<seq_cst_store<atomic_store_16>, SH_RL, i16>; -} - -let Predicates = [HasStdExtZalasr, IsRV64] in { - // Load pattern is in RISCVInstrInfoZalasr.td and shared with RV32. - def : PatSRL<releasing_store<atomic_store_32>, SW_RL, i32>; - def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL, i32>; -} diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a3a4cf2..7123a2d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15721,8 +15721,7 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { return SDValue(); // Emit a negate of the setcc. - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - N0.getOperand(0)); + return DAG.getNegative(N0.getOperand(0), DL, VT); } static SDValue performADDCombine(SDNode *N, @@ -16974,7 +16973,7 @@ performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc)) if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG()) - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src); + return DAG.getNegative(Src, DL, VT); // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1) if (Opc == ISD::XOR && SrcVT == MVT::i1 && diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 9855c47..7a14929 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1980,7 +1980,7 @@ def : LdPat<sextloadi8, LB>; def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb. def : LdPat<sextloadi16, LH>; def : LdPat<extloadi16, LH>; -def : LdPat<load, LW, i32>; +def : LdPat<load, LW, i32>, Requires<[IsRV32]>; def : LdPat<zextloadi8, LBU>; def : LdPat<zextloadi16, LHU>; @@ -1994,7 +1994,7 @@ class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, def : StPat<truncstorei8, SB, GPR, XLenVT>; def : StPat<truncstorei16, SH, GPR, XLenVT>; -def : StPat<store, SW, GPR, i32>; +def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>; /// Fences diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 25accd9..571d72f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -24,36 +24,36 @@ class LR_r<bit aq, bit rl, bits<3> funct3, string opcodestr> } multiclass LR_r_aq_rl<bits<3> funct3, string opcodestr> { - def "" : LR_r<0, 0, funct3, opcodestr>; - def _AQ : LR_r<1, 0, funct3, opcodestr # ".aq">; - def _RL : LR_r<0, 1, funct3, opcodestr # ".rl">; - def _AQ_RL : LR_r<1, 1, funct3, opcodestr # ".aqrl">; + def "" : LR_r<0, 0, funct3, opcodestr>; + def _AQ : LR_r<1, 0, funct3, opcodestr # ".aq">; + def _RL : LR_r<0, 1, funct3, opcodestr # ".rl">; + def _AQRL : LR_r<1, 1, funct3, opcodestr # ".aqrl">; } let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in class SC_r<bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic<0b00011, aq, rl, funct3, OPC_AMO, - (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1, GPR:$rs2), + (outs GPR:$rd), (ins GPR:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs2, $rs1">; multiclass SC_r_aq_rl<bits<3> funct3, string opcodestr> { - def "" : SC_r<0, 0, funct3, opcodestr>; - def _AQ : SC_r<1, 0, funct3, opcodestr # ".aq">; - def _RL : SC_r<0, 1, funct3, opcodestr # ".rl">; - def _AQ_RL : SC_r<1, 1, funct3, opcodestr # ".aqrl">; + def "" : SC_r<0, 0, funct3, opcodestr>; + def _AQ : SC_r<1, 0, funct3, opcodestr # ".aq">; + def _RL : SC_r<0, 1, funct3, opcodestr # ".rl">; + def _AQRL : SC_r<1, 1, funct3, opcodestr # ".aqrl">; } let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in class AMO_rr<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO, - (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1, GPR:$rs2), + (outs GPR:$rd), (ins GPR:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs2, $rs1">; multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> { - def "" : AMO_rr<funct5, 0, 0, funct3, opcodestr>; - def _AQ : AMO_rr<funct5, 1, 0, funct3, opcodestr # ".aq">; - def _RL : AMO_rr<funct5, 0, 1, funct3, opcodestr # ".rl">; - def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">; + def "" : AMO_rr<funct5, 0, 0, funct3, opcodestr>; + def _AQ : AMO_rr<funct5, 1, 0, funct3, opcodestr # ".aq">; + def _RL : AMO_rr<funct5, 0, 1, funct3, opcodestr # ".rl">; + def _AQRL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">; } //===----------------------------------------------------------------------===// @@ -174,8 +174,9 @@ let Predicates = [HasAtomicLdSt] in { def : StPat<relaxed_store<atomic_store_8>, SB, GPR, XLenVT>; def : StPat<relaxed_store<atomic_store_16>, SH, GPR, XLenVT>; def : StPat<relaxed_store<atomic_store_32>, SW, GPR, XLenVT>; +} - // Used by GISel for RV32 and RV64. +let Predicates = [HasAtomicLdSt, IsRV32] in { def : LdPat<relaxed_load<atomic_load_nonext_32>, LW, i32>; } @@ -188,31 +189,34 @@ let Predicates = [HasAtomicLdSt, IsRV64] in { /// AMOs +class PatAMO<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> + : Pat<(vt (OpNode (XLenVT GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs2, GPR:$rs1)>; + multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, list<Predicate> ExtraPreds = []> { let Predicates = !listconcat([HasStdExtA, NoStdExtZtso], ExtraPreds) in { - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), - !cast<RVInst>(BaseInst#"_AQ"), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"), - !cast<RVInst>(BaseInst#"_RL"), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"), - !cast<RVInst>(BaseInst#"_AQ_RL"), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"), - !cast<RVInst>(BaseInst#"_AQ_RL"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst#"_AQ"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst#"_RL"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst#"_AQRL"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst#"_AQRL"), vt>; } let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in { - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"), - !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst), vt>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 447f05c..f2724c41 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1636,7 +1636,7 @@ def : QCISELECTCCIPat<SETNE, QC_SELECTNEI>; } let Predicates = [HasVendorXqcilsm, IsRV32] in { -def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7), +def : Pat<(qc_setwmi (i32 GPR:$rs3), GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7), (QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>; } // Predicates = [HasVendorXqcilsm, IsRV32] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index 7cf6d5f..20e2142 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -9,8 +9,8 @@ // This file describes the RISC-V instructions from the standard atomic 'Za*' // extensions: // - Zawrs (v1.0) : Wait-on-Reservation-Set. -// - Zacas (v1.0-rc1) : Atomic Compare-and-Swap. -// - Zabha (v1.0-rc1) : Byte and Halfword Atomic Memory Operations. +// - Zacas (v1.0) : Atomic Compare-and-Swap. +// - Zabha (v1.0) : Byte and Halfword Atomic Memory Operations. // //===----------------------------------------------------------------------===// @@ -44,15 +44,15 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $rd_wb" class AMO_cas<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr, DAGOperand RC> : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO, - (outs RC:$rd_wb), (ins RC:$rd, GPRMemZeroOffset:$rs1, RC:$rs2), + (outs RC:$rd_wb), (ins RC:$rd, RC:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs2, $rs1">; multiclass AMO_cas_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr, DAGOperand RC> { - def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>; - def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>; - def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>; - def _AQ_RL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>; + def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>; + def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>; + def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>; + def _AQRL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>; } let Predicates = [HasStdExtZacas], IsSignExtendingOpW = 1 in { @@ -71,48 +71,48 @@ defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>; multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, list<Predicate> ExtraPreds = []> { let Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) in { - def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$new, GPR:$addr)>; def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$new, GPR:$addr)>; } // Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in { - def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; } // Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) } @@ -140,7 +140,7 @@ def WRS_STO : WRSInst<0b000000011101, "wrs.sto">, Sched<[]>; // Zabha (Byte and Halfword Atomic Memory Operations) //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZabha] in { +let Predicates = [HasStdExtZabha], IsSignExtendingOpW = 1 in { defm AMOSWAP_B : AMO_rr_aq_rl<0b00001, 0b000, "amoswap.b">, Sched<[WriteAtomicB, ReadAtomicBA, ReadAtomicBD]>; defm AMOADD_B : AMO_rr_aq_rl<0b00000, 0b000, "amoadd.b">, @@ -181,7 +181,7 @@ defm AMOMAXU_H : AMO_rr_aq_rl<0b11100, 0b001, "amomaxu.h">, } // If Zacas extension is also implemented, Zabha further provides AMOCAS.[B|H]. -let Predicates = [HasStdExtZabha, HasStdExtZacas] in { +let Predicates = [HasStdExtZabha, HasStdExtZacas], IsSignExtendingOpW = 1 in { defm AMOCAS_B : AMO_cas_aq_rl<0b00101, 0b000, "amocas.b", GPR>; defm AMOCAS_H : AMO_cas_aq_rl<0b00101, 0b001, "amocas.h", GPR>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td index 1deecd2..5f944034 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td @@ -30,21 +30,22 @@ class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr> opcodestr, "$rs2, $rs1"> { let rd = 0; } + multiclass LAQ_r_aq_rl<bits<3> funct3, string opcodestr> { - def _AQ : LAQ_r<1, 0, funct3, opcodestr # ".aq">; - def _AQ_RL : LAQ_r<1, 1, funct3, opcodestr # ".aqrl">; + def _AQ : LAQ_r<1, 0, funct3, opcodestr # ".aq">; + def _AQRL : LAQ_r<1, 1, funct3, opcodestr # ".aqrl">; } multiclass SRL_r_aq_rl<bits<3> funct3, string opcodestr> { - def _RL : SRL_r<0, 1, funct3, opcodestr # ".rl">; - def _AQ_RL : SRL_r<1, 1, funct3, opcodestr # ".aqrl">; + def _RL : SRL_r<0, 1, funct3, opcodestr # ".rl">; + def _AQRL : SRL_r<1, 1, funct3, opcodestr # ".aqrl">; } //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZalasr] in { +let Predicates = [HasStdExtZalasr], IsSignExtendingOpW = 1 in { defm LB : LAQ_r_aq_rl<0b000, "lb">; defm LH : LAQ_r_aq_rl<0b001, "lh">; defm LW : LAQ_r_aq_rl<0b010, "lw">; @@ -93,11 +94,12 @@ let Predicates = [HasStdExtZalasr] in { def : PatSRL<releasing_store<atomic_store_32>, SW_RL>; def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL>; +} - // Used by GISel for RV32 and RV64. +let Predicates = [HasStdExtZalasr, IsRV32] in { def : PatLAQ<acquiring_load<atomic_load_nonext_32>, LW_AQ, i32>; def : PatLAQ<seq_cst_load<atomic_load_nonext_32>, LW_AQ, i32>; -} // Predicates = [HasStdExtZalasr] +} // Predicates = [HasStdExtZalasr, IsRV32] let Predicates = [HasStdExtZalasr, IsRV64] in { def : PatLAQ<acquiring_load<atomic_load_asext_32>, LW_AQ, i64>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 8d9b777..57fbaa0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -634,56 +634,56 @@ def : PatGpr<bswap, REV8_RV64, i64>; let Predicates = [HasStdExtZbkb] in { def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF), - (zexti8 (XLenVT GPR:$rs1))), - (PACKH GPR:$rs1, GPR:$rs2)>; -def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)), - (zexti8 (XLenVT GPR:$rs1))), - (PACKH GPR:$rs1, GPR:$rs2)>; + zexti8:$rs1), + (PACKH zexti8:$rs1, GPR:$rs2)>; +def : Pat<(or (shl zexti8:$rs2, (XLenVT 8)), + zexti8:$rs1), + (PACKH zexti8:$rs1, zexti8:$rs2)>; def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)), - (zexti8 (XLenVT GPR:$rs1))), 0xFFFF), - (PACKH GPR:$rs1, GPR:$rs2)>; + zexti8:$rs1), 0xFFFF), + (PACKH zexti8:$rs1, GPR:$rs2)>; def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)), - (zexti8 (XLenVT GPR:$rs1))), - (PACKH GPR:$rs1, GPR:$rs2)>; + zexti8:$rs1), + (PACKH zexti8:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbkb] let Predicates = [HasStdExtZbkb, IsRV32] in { -def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))), - (PACK GPR:$rs1, GPR:$rs2)>; +def : Pat<(i32 (or zexti16:$rs1, (shl GPR:$rs2, (i32 16)))), + (PACK zexti16:$rs1, GPR:$rs2)>; -def : Pat<(or (shl GPR:$rs2, (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), - (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; +def : Pat<(i32 (or (shl GPR:$rs2, (XLenVT 24)), + (shl zexti8:$rs1, (XLenVT 16)))), + (SLLI (XLenVT (PACKH zexti8:$rs1, GPR:$rs2)), (XLenVT 16))>; // Match a pattern of 2 bytes being inserted into bits [31:16], with bits // bits [15:0] coming from a zero extended value. We can use pack with packh for // bits [31:16]. If bits [15:0] can also be a packh, it can be matched // separately. -def : Pat<(or (or (shl GPR:$op1rs2, (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), - (zexti16 (XLenVT GPR:$rs1))), - (PACK (XLenVT GPR:$rs1), - (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; +def : Pat<(i32 (or (or (shl GPR:$op1rs2, (XLenVT 24)), + (shl zexti8:$op1rs1, (XLenVT 16))), + zexti16:$rs1)), + (PACK zexti16:$rs1, + (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; } let Predicates = [HasStdExtZbkb, IsRV64] in { -def : Pat<(i64 (or (zexti32 (i64 GPR:$rs1)), (shl GPR:$rs2, (i64 32)))), - (PACK GPR:$rs1, GPR:$rs2)>; +def : Pat<(i64 (or zexti32:$rs1, (shl GPR:$rs2, (i64 32)))), + (PACK zexti32:$rs1, GPR:$rs2)>; -def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), - (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; +def : Pat<(i64 (or (shl zexti8:$rs2, (XLenVT 24)), + (shl zexti8:$rs1, (XLenVT 16)))), + (SLLI (XLenVT (PACKH zexti8:$rs1, zexti8:$rs2)), (XLenVT 16))>; def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), - (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; + (shl zexti8:$rs1, (XLenVT 16))), + (SLLI (XLenVT (PACKH zexti8:$rs1, GPR:$rs2)), (XLenVT 16))>; def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)), - (zexti16 (i64 GPR:$rs1))), - (PACKW GPR:$rs1, GPR:$rs2)>; + zexti16:$rs1), + (PACKW zexti16:$rs1, GPR:$rs2)>; def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), - (zexti16 (i64 GPR:$rs1)))), - (PACKW GPR:$rs1, GPR:$rs2)>; + zexti16:$rs1)), + (PACKW zexti16:$rs1, GPR:$rs2)>; // Match a pattern of 2 bytes being inserted into bits [31:16], with bits // bits [15:0] coming from a zero extended value, and bits [63:32] being @@ -691,35 +691,35 @@ def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), // also be a packh, it can be matched separately. def : Pat<(binop_allwusers<or> (or (shl GPR:$op1rs2, (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), - (zexti16 (XLenVT GPR:$rs1))), - (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + (shl zexti8:$op1rs1, (XLenVT 16))), + zexti16:$rs1), + (PACKW zexti16:$rs1, (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; // We need to manually reassociate the patterns because of the binop_allwusers. def : Pat<(binop_allwusers<or> - (or (zexti16 (XLenVT GPR:$rs1)), - (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (or zexti16:$rs1, + (shl zexti8:$op1rs1, (XLenVT 16))), (shl GPR:$op1rs2, (XLenVT 24))), - (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + (PACKW zexti16:$rs1, (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; def : Pat<(binop_allwusers<or> - (or (zexti16 (XLenVT GPR:$rs1)), - (shl GPR:$op1rs1, (XLenVT 24))), - (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), - (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + (or zexti16:$rs1, + (shl GPR:$op1rs2, (XLenVT 24))), + (shl zexti8:$op1rs1, (XLenVT 16))), + (PACKW zexti16:$rs1, (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; def : Pat<(i64 (or (or (zexti16 (XLenVT GPR:$rs1)), - (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), - (sext_inreg (shl GPR:$op1rs1, (XLenVT 24)), i32))), - (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + (shl zexti8:$op1rs1, (XLenVT 16))), + (sext_inreg (shl GPR:$op1rs2, (XLenVT 24)), i32))), + (PACKW GPR:$rs1, (XLenVT (PACKH zexti8:$op1rs1, GPR:$op1rs2)))>; // Match a pattern of 2 halfwords being inserted into bits [63:32], with bits // bits [31:0] coming from a zero extended value. We can use pack with packw for // bits [63:32]. If bits [63:31] can also be a packw, it can be matched // separately. def : Pat<(or (or (shl GPR:$op1rs2, (i64 48)), - (shl (zexti16 (i64 GPR:$op1rs1)), (i64 32))), - (zexti32 (i64 GPR:$rs1))), - (PACK (XLenVT GPR:$rs1), - (XLenVT (PACKW GPR:$op1rs1, GPR:$op1rs2)))>; + (shl zexti16:$op1rs1, (i64 32))), + zexti32:$rs1), + (PACK zexti32:$rs1, + (XLenVT (PACKW zexti16:$op1rs1, GPR:$op1rs2)))>; } // Predicates = [HasStdExtZbkb, IsRV64] let Predicates = [HasStdExtZbb, IsRV32] in @@ -788,32 +788,32 @@ multiclass ShxAdd_UWPat<int i, Instruction shxadd_uw> { } multiclass Sh1Add_UWPat<Instruction sh1add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), (i64 0x1FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh1add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x1FFFFFFFE)), + (XLenVT GPR:$rs2)), (sh1add_uw (XLenVT (SRLI GPR:$rs1, 1)), GPR:$rs2)>; } multiclass Sh2Add_UWPat<Instruction sh2add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), (i64 0x3FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh2add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x3FFFFFFFC)), + (XLenVT GPR:$rs2)), (sh2add_uw (XLenVT (SRLI GPR:$rs1, 2)), GPR:$rs2)>; } multiclass Sh3Add_UWPat<Instruction sh3add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), (i64 0x7FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh3add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x7FFFFFFF8)), + (XLenVT GPR:$rs2)), (sh3add_uw (XLenVT (SRLI GPR:$rs1, 3)), GPR:$rs2)>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td index 6d86aff..3658817 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td +++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td @@ -14,6 +14,10 @@ // otherwise. def VLDSX0Pred : MCSchedPredicate<CheckRegOperand<3, X0>>; +// This scheduling predicate is true when subtarget feature TuneHasSingleElementVecFP64 +// is enabled. +def SingleElementVecFP64SchedPred : FeatureSchedPredicate<TuneHasSingleElementVecFP64>; + // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. def isSEXT_W : TIIPredicate<"isSEXT_W", diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 17a7948..e86431f 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -338,7 +338,8 @@ def SIFIVE_X390 : RISCVProcessorModel<"sifive-x390", FeatureStdExtZvl1024b, FeatureVendorXSiFivecdiscarddlone, FeatureVendorXSiFivecflushdlone], - SiFiveIntelligenceTuneFeatures>; + !listconcat(SiFiveIntelligenceTuneFeatures, + [TuneHasSingleElementVecFP64])>; defvar SiFiveP400TuneFeatures = [TuneNoDefaultUnroll, TuneConditionalCompressedMoveFusion, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 82e768d..6605a5c 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -238,7 +238,7 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList> } class GPRRegisterClass<dag regList> - : RISCVRegisterClass<[XLenVT, XLenFVT, i32, i16], 32, regList> { + : RISCVRegisterClass<[XLenVT, XLenFVT], 32, regList> { let RegInfos = XLenRI; } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 3f2e7db..f863392a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -317,7 +317,6 @@ multiclass SiFive7WriteResBase<int VLEN, ProcResourceKind VL, ProcResourceKind VS, ProcResourceKind VCQ, SiFive7FPLatencies fpLatencies, - bit isFP64Throttled = false, bit hasFastGather = false> { // Branching @@ -567,9 +566,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX8", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX8", [VCQ, VL], mx, IsWorstCase>; @@ -587,9 +589,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX16", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX16", [VCQ, VL], mx, IsWorstCase>; @@ -604,9 +609,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX32", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX32", [VCQ, VL], mx, IsWorstCase>; @@ -621,9 +629,12 @@ multiclass SiFive7WriteResBase<int VLEN, defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64, VLEN>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; - defm : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [VCQ, VL], - 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), - [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; + defm : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, + // Predicated + [VCQ, VL], 4, [0, 1], [1, !add(1, VLDSX0Cycles)], + // Not Predicated + [VCQ, VL], !add(3, Cycles), [0, 1], [1, !add(1, Cycles)], + mx, IsWorstCase>; let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { defm : LMULWriteResMX<"WriteVLDUX64", [VCQ, VL], mx, IsWorstCase>; defm : LMULWriteResMX<"WriteVLDOX64", [VCQ, VL], mx, IsWorstCase>; @@ -820,29 +831,56 @@ multiclass SiFive7WriteResBase<int VLEN, // 13. Vector Floating-Point Instructions foreach mx = SchedMxListF in { foreach sew = SchedSEWSet<mx, isF=1>.val in { - defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 64)), - SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c, - SiFive7GetCyclesDefault<mx>.c); - defvar Lat8 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 8); - defvar VA = !if(!and(isFP64Throttled, !eq(sew, 64)), VA1, VA1OrVA2); defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c; - let Latency = Lat8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; - } - defvar Lat4 = !if(!and(isFP64Throttled, !eq(sew, 64)), Cycles, 4); - let Latency = Lat4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA], mx, sew, IsWorstCase>; - // min max require merge - defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>; + if !eq(sew, 64) then { + defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c; + foreach SchedWriteName = ["WriteVFALUV", "WriteVFALUF", "WriteVFMulV", "WriteVFMulF", + "WriteVFMulAddV", "WriteVFMulAddF"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1OrVA2], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)], + mx, sew, IsWorstCase>; + foreach SchedWriteName = ["WriteVFRecpV", "WriteVFCvtIToFV"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], !add(SingleElementCycles, 7), [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)], + mx, sew, IsWorstCase>; + foreach SchedWriteName = ["WriteVFSgnjV", "WriteVFSgnjF"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1OrVA2], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)], + mx, sew, IsWorstCase>; + foreach SchedWriteName = ["WriteVFMinMaxV", "WriteVFMinMaxF"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], !add(SingleElementCycles, 3), [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 4, [0, 1], [1, !add(1, SiFive7GetCyclesDefault<mx>.c)], + mx, sew, IsWorstCase>; + } else { + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in { + defm : LMULSEWWriteResMXSEW<"WriteVFALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMulF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFRecpV", [VCQ, VA1], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + } + let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, SiFive7GetCyclesDefault<mx>.c)] in { + defm : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; + // min max require merge + defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [VCQ, VA1], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [VCQ, VA1], mx, sew, IsWorstCase>; + } } } } @@ -880,19 +918,28 @@ multiclass SiFive7WriteResBase<int VLEN, // Widening foreach mx = SchedMxListW in { foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in { - defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)), - SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c, - SiFive7GetCyclesDefault<mx>.c); defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c; - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in - defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c; + if !eq(sew, 32) then { + defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c; + defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtIToFV", SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)], + mx, sew, IsWorstCase>; + } else { + let Latency = 8, + AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in + defm : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + } } } foreach mx = SchedMxListFW in { foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { - defvar Cycles = SiFive7GetCyclesDefault<mx>.c; + defvar DefaultCycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { + let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in { defm : LMULSEWWriteResMXSEW<"WriteVFWALUV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; defm : LMULSEWWriteResMXSEW<"WriteVFWALUF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; defm : LMULSEWWriteResMXSEW<"WriteVFWMulV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; @@ -900,11 +947,19 @@ multiclass SiFive7WriteResBase<int VLEN, defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; defm : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [VCQ, VA1OrVA2], mx, sew, IsWorstCase>; } - defvar CvtCycles = !if(!and(isFP64Throttled, !eq(sew, 32)), - SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c, - SiFive7GetCyclesDefault<mx>.c); - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, CvtCycles)] in - defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + if !eq(sew, 32) then { + defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c; + defm : LMULSEWWriteResMXSEWVariant<"WriteVFWCvtFToFV", SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)], + mx, sew, IsWorstCase>; + } else { + let Latency = 8, + AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in + defm : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + } } defvar Cycles = SiFive7GetCyclesDefault<mx>.c; defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; @@ -921,13 +976,23 @@ multiclass SiFive7WriteResBase<int VLEN, } foreach mx = SchedMxListFW in { foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { - defvar Cycles = !if(!and(isFP64Throttled, !eq(sew, 32)), - SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c, - SiFive7GetCyclesNarrowing<mx>.c); defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; - let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { - defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; - defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + defvar DefaultCycles = SiFive7GetCyclesNarrowing<mx>.c; + if !eq(sew, 32) then { + defvar SingleElementCycles = SiFive7GetCyclesOnePerElement<mx, sew, VLEN>.c; + foreach SchedWriteName = ["WriteVFNCvtIToFV", "WriteVFNCvtFToFV"] in + defm : LMULSEWWriteResMXSEWVariant<SchedWriteName, SingleElementVecFP64SchedPred, + // Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, SingleElementCycles)], + // Not Predicated + [VCQ, VA1], 8, [0, 1], [1, !add(1, DefaultCycles)], + mx, sew, IsWorstCase>; + } else { + let Latency = 8, + AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, DefaultCycles)] in { + defm : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + defm : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [VCQ, VA1], mx, sew, IsWorstCase>; + } } } } @@ -1487,7 +1552,6 @@ multiclass SiFive7ReadAdvance { /// eventually be supplied by different SchedMachineModels. multiclass SiFive7SchedResources<int vlen, bit extraVALU, SiFive7FPLatencies fpLatencies, - bit isFP64Throttled, bit hasFastGather> { defm SiFive7 : SiFive7ProcResources<extraVALU>; @@ -1515,8 +1579,7 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU, : SiFive7WriteResBase<vlen, SiFive7PipeA, SiFive7PipeB, SiFive7PipeAB, SiFive7IDiv, SiFive7FDiv, SiFive7VA1, SiFive7VA1OrVA2, SiFive7VL, SiFive7VS, - SiFive7VCQ, fpLatencies, isFP64Throttled, - hasFastGather>; + SiFive7VCQ, fpLatencies, hasFastGather>; //===----------------------------------------------------------------------===// // Bypass and advance @@ -1548,7 +1611,6 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel { bit HasExtraVALU = false; SiFive7FPLatencies FPLatencies; - bit IsFP64Throttled = false; bit HasFastGather = false; string Name = !subst("Model", "", !subst("SiFive7", "", NAME)); @@ -1575,7 +1637,6 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> { def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> { let HasExtraVALU = true; let FPLatencies = SiFive7LowFPLatencies; - let IsFP64Throttled = true; let HasFastGather = true; } @@ -1584,7 +1645,6 @@ foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in { let SchedModel = model in defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU, model.FPLatencies, - model.IsFP64Throttled, model.HasFastGather>; } diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 6c7658c..d11b446 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -67,42 +67,41 @@ multiclass LMULSEWWriteResMXSEW<string name, list<ProcResourceKind> resources, // ReleaseAtCycles predCycles if the SchedPredicate Pred is true, otherwise has // Latency noPredLat and ReleaseAtCycles noPredCycles. The WorstCase SchedWrite // is created similarly if IsWorstCase is true. -multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, - list<ProcResourceKind> resources, - int predLat, list<int> predAcquireCycles, - list<int> predReleaseCycles, int noPredLat, - list<int> noPredAcquireCycles, - list<int> noPredReleaseCycles, - string mx, bit IsWorstCase> { - defvar nameMX = name # "_" # mx; - +multiclass LMULWriteResVariantImpl<string name, string writeResName, SchedPredicateBase Pred, + list<ProcResourceKind> predResources, + int predLat, list<int> predAcquireCycles, + list<int> predReleaseCycles, + list<ProcResourceKind> noPredResources, + int noPredLat, list<int> noPredAcquireCycles, + list<int> noPredReleaseCycles, + bit IsWorstCase> { // Define the different behaviors - def nameMX # "_Pred" : SchedWriteRes<resources>{ + def writeResName # "_Pred" : SchedWriteRes<predResources>{ let Latency = predLat; let AcquireAtCycles = predAcquireCycles; let ReleaseAtCycles = predReleaseCycles; } - def nameMX # "_NoPred" : SchedWriteRes<resources> { + def writeResName # "_NoPred" : SchedWriteRes<noPredResources> { let Latency = noPredLat; let AcquireAtCycles = noPredAcquireCycles; let ReleaseAtCycles = noPredReleaseCycles; } // Define SchedVars - def nameMX # PredSchedVar - : SchedVar<Pred, [!cast<SchedWriteRes>(NAME # nameMX # "_Pred")]>; - def nameMX # NoPredSchedVar - : SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # nameMX #"_NoPred")]>; + def writeResName # PredSchedVar + : SchedVar<Pred, [!cast<SchedWriteRes>(NAME # writeResName # "_Pred")]>; + def writeResName # NoPredSchedVar + : SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # writeResName #"_NoPred")]>; // Allow multiclass to refer to SchedVars -- need to have NAME prefix. - defvar PredSchedVar = !cast<SchedVar>(NAME # nameMX # PredSchedVar); - defvar NoPredSchedVar = !cast<SchedVar>(NAME # nameMX # NoPredSchedVar); + defvar PredSchedVar = !cast<SchedVar>(NAME # writeResName # PredSchedVar); + defvar NoPredSchedVar = !cast<SchedVar>(NAME # writeResName # NoPredSchedVar); // Tie behavior to predicate - def NAME # nameMX # "_Variant" + def NAME # writeResName # "_Variant" : SchedWriteVariant<[PredSchedVar, NoPredSchedVar]>; def : SchedAlias< - !cast<SchedReadWrite>(nameMX), - !cast<SchedReadWrite>(NAME # nameMX # "_Variant")>; + !cast<SchedReadWrite>(writeResName), + !cast<SchedReadWrite>(NAME # writeResName # "_Variant")>; if IsWorstCase then { def NAME # name # "_WorstCase_Variant" @@ -113,6 +112,38 @@ multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, } } +multiclass LMULWriteResMXVariant<string name, SchedPredicateBase Pred, + list<ProcResourceKind> predResources, + int predLat, list<int> predAcquireCycles, + list<int> predReleaseCycles, + list<ProcResourceKind> noPredResources, + int noPredLat, list<int> noPredAcquireCycles, + list<int> noPredReleaseCycles, + string mx, bit IsWorstCase> { + defm "" : LMULWriteResVariantImpl<name, name # "_" # mx, Pred, predResources, + predLat, predAcquireCycles, + predReleaseCycles, noPredResources, + noPredLat, noPredAcquireCycles, + noPredReleaseCycles, + IsWorstCase>; +} + +multiclass LMULSEWWriteResMXSEWVariant<string name, SchedPredicateBase Pred, + list<ProcResourceKind> predResources, + int predLat, list<int> predAcquireCycles, + list<int> predReleaseCycles, + list<ProcResourceKind> noPredResources, + int noPredLat, list<int> noPredAcquireCycles, + list<int> noPredReleaseCycles, + string mx, int sew, bit IsWorstCase> { + defm "" : LMULWriteResVariantImpl<name, name # "_" # mx # "_E" # sew, Pred, predResources, + predLat, predAcquireCycles, + predReleaseCycles, noPredResources, + noPredLat, noPredAcquireCycles, + noPredReleaseCycles, + IsWorstCase>; +} + // Define multiclasses to define SchedWrite, SchedRead, WriteRes, and // ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the // SchedMxList variants above. Each multiclass is responsible for defining |