diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r-- | llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 160 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td | 200 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 3 |
6 files changed, 265 insertions, 141 deletions
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 67cc01e..e0ac591 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -674,6 +674,9 @@ static constexpr FeatureBitset XAndesGroup = { static constexpr DecoderListEntry DecoderList32[]{ // Vendor Extensions + {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"}, + {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"}, + {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"}, {DecoderTableXVentana32, {RISCV::FeatureVendorXVentanaCondOps}, "XVentanaCondOps"}, @@ -690,9 +693,6 @@ static constexpr DecoderListEntry DecoderList32[]{ "MIPS mips.pref"}, {DecoderTableXAndes32, XAndesGroup, "Andes extensions"}, // Standard Extensions - {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"}, - {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"}, - {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"}, {DecoderTable32, {}, "standard 32-bit instructions"}, {DecoderTableRV32Only32, {}, "RV32-only standard 32-bit instructions"}, {DecoderTableZfinx32, {}, "Zfinx (Float in Integer)"}, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 95ec42f..8d956ce 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -32,6 +32,11 @@ static cl::opt<bool> ULEB128Reloc( "riscv-uleb128-reloc", cl::init(true), cl::Hidden, cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate")); +static cl::opt<bool> + AlignRvc("riscv-align-rvc", cl::init(true), cl::Hidden, + cl::desc("When generating R_RISCV_ALIGN, insert $alignment-2 " + "bytes of NOPs even in norvc code")); + RISCVAsmBackend::RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, const MCTargetOptions &Options) : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI), @@ -306,12 +311,21 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst, // If conditions are met, compute the padding size and create a fixup encoding // the padding size in the addend. bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { - // Use default handling unless linker relaxation is enabled and the alignment - // is larger than the nop size. - const MCSubtargetInfo *STI = F.getSubtargetInfo(); - if (!STI->hasFeature(RISCV::FeatureRelax)) + // Alignments before the first linker-relaxable instruction have fixed sizes + // and do not require relocations. Alignments after a linker-relaxable + // instruction require a relocation, even if the STI specifies norelax. + // + // firstLinkerRelaxable is the layout order within the subsection, which may + // be smaller than the section's order. Therefore, alignments in a + // lower-numbered subsection may be unnecessarily treated as linker-relaxable. + auto *Sec = F.getParent(); + if (F.getLayoutOrder() <= Sec->firstLinkerRelaxable()) return false; - unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; + + // Use default handling unless the alignment is larger than the nop size. + const MCSubtargetInfo *STI = F.getSubtargetInfo(); + unsigned MinNopLen = + AlignRvc || STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; if (F.getAlignment() <= MinNopLen) return false; @@ -321,7 +335,6 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN); F.setVarFixups({Fixup}); F.setLinkerRelaxable(); - F.getParent()->setLinkerRelaxable(); return true; } @@ -471,9 +484,12 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, Count -= 1; } + // TODO: emit a mapping symbol right here + if (Count % 4 == 2) { - // The canonical nop with Zca is c.nop. - OS.write(STI->hasFeature(RISCV::FeatureStdExtZca) ? "\x01\0" : "\0\0", 2); + // The canonical nop with Zca is c.nop. For .balign 4, we generate a 2-byte + // c.nop even in a norvc region. + OS.write("\x01\0", 2); Count -= 2; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e4aa8b8..e63b937 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1844,6 +1844,17 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, /*IsStore*/ true, /*IsUnitStrided*/ false, /*UsePtrVal*/ true); + case Intrinsic::riscv_sseg2_store_mask: + case Intrinsic::riscv_sseg3_store_mask: + case Intrinsic::riscv_sseg4_store_mask: + case Intrinsic::riscv_sseg5_store_mask: + case Intrinsic::riscv_sseg6_store_mask: + case Intrinsic::riscv_sseg7_store_mask: + case Intrinsic::riscv_sseg8_store_mask: + // Operands are (vec, ..., vec, ptr, offset, mask, vl) + return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, + /*IsStore*/ true, + /*IsUnitStrided*/ false, /*UsePtrVal*/ true); case Intrinsic::riscv_vlm: return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, @@ -11084,69 +11095,118 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); } -SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, - SelectionDAG &DAG) const { - unsigned IntNo = Op.getConstantOperandVal(1); +static SDValue +lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, + const RISCVSubtarget &Subtarget, + SelectionDAG &DAG) { + bool IsStrided; switch (IntNo) { - default: - break; case Intrinsic::riscv_seg2_store_mask: case Intrinsic::riscv_seg3_store_mask: case Intrinsic::riscv_seg4_store_mask: case Intrinsic::riscv_seg5_store_mask: case Intrinsic::riscv_seg6_store_mask: case Intrinsic::riscv_seg7_store_mask: - case Intrinsic::riscv_seg8_store_mask: { - SDLoc DL(Op); - static const Intrinsic::ID VssegInts[] = { - Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, - Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, - Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, - Intrinsic::riscv_vsseg8_mask}; + case Intrinsic::riscv_seg8_store_mask: + IsStrided = false; + break; + case Intrinsic::riscv_sseg2_store_mask: + case Intrinsic::riscv_sseg3_store_mask: + case Intrinsic::riscv_sseg4_store_mask: + case Intrinsic::riscv_sseg5_store_mask: + case Intrinsic::riscv_sseg6_store_mask: + case Intrinsic::riscv_sseg7_store_mask: + case Intrinsic::riscv_sseg8_store_mask: + IsStrided = true; + break; + default: + llvm_unreachable("unexpected intrinsic ID"); + } - // Operands: (chain, int_id, vec*, ptr, mask, vl) - unsigned NF = Op->getNumOperands() - 5; - assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); - MVT XLenVT = Subtarget.getXLenVT(); - MVT VT = Op->getOperand(2).getSimpleValueType(); - MVT ContainerVT = getContainerForFixedLengthVector(VT); - unsigned Sz = NF * ContainerVT.getVectorMinNumElements() * - ContainerVT.getScalarSizeInBits(); - EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF); + SDLoc DL(Op); + static const Intrinsic::ID VssegInts[] = { + Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, + Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, + Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, + Intrinsic::riscv_vsseg8_mask}; + static const Intrinsic::ID VsssegInts[] = { + Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask, + Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask, + Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask, + Intrinsic::riscv_vssseg8_mask}; + + // Operands: (chain, int_id, vec*, ptr, mask, vl) or + // (chain, int_id, vec*, ptr, stride, mask, vl) + unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5); + assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VT = Op->getOperand(2).getSimpleValueType(); + MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget); + unsigned Sz = NF * ContainerVT.getVectorMinNumElements() * + ContainerVT.getScalarSizeInBits(); + EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF); - SDValue VL = Op.getOperand(Op.getNumOperands() - 1); - SDValue Mask = Op.getOperand(Op.getNumOperands() - 2); - MVT MaskVT = Mask.getSimpleValueType(); - MVT MaskContainerVT = - ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget); - Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); + SDValue VL = Op.getOperand(Op.getNumOperands() - 1); + SDValue Mask = Op.getOperand(Op.getNumOperands() - 2); + MVT MaskVT = Mask.getSimpleValueType(); + MVT MaskContainerVT = + ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget); + Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); - SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); - SDValue Ptr = Op->getOperand(NF + 2); + SDValue IntID = DAG.getTargetConstant( + IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT); + SDValue Ptr = Op->getOperand(NF + 2); - auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); + auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); - SDValue StoredVal = DAG.getUNDEF(VecTupTy); - for (unsigned i = 0; i < NF; i++) - StoredVal = DAG.getNode( - RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal, - convertToScalableVector( - ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget), - DAG.getTargetConstant(i, DL, MVT::i32)); + SDValue StoredVal = DAG.getUNDEF(VecTupTy); + for (unsigned i = 0; i < NF; i++) + StoredVal = DAG.getNode( + RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal, + convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i), + DAG, Subtarget), + DAG.getTargetConstant(i, DL, MVT::i32)); + + SmallVector<SDValue, 10> Ops = { + FixedIntrinsic->getChain(), + IntID, + StoredVal, + Ptr, + Mask, + VL, + DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)}; + // Insert the stride operand. + if (IsStrided) + Ops.insert(std::next(Ops.begin(), 4), + Op.getOperand(Op.getNumOperands() - 3)); + + return DAG.getMemIntrinsicNode( + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, + FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); +} + +SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + default: + break; + case Intrinsic::riscv_seg2_store_mask: + case Intrinsic::riscv_seg3_store_mask: + case Intrinsic::riscv_seg4_store_mask: + case Intrinsic::riscv_seg5_store_mask: + case Intrinsic::riscv_seg6_store_mask: + case Intrinsic::riscv_seg7_store_mask: + case Intrinsic::riscv_seg8_store_mask: + case Intrinsic::riscv_sseg2_store_mask: + case Intrinsic::riscv_sseg3_store_mask: + case Intrinsic::riscv_sseg4_store_mask: + case Intrinsic::riscv_sseg5_store_mask: + case Intrinsic::riscv_sseg6_store_mask: + case Intrinsic::riscv_sseg7_store_mask: + case Intrinsic::riscv_sseg8_store_mask: + return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG); - SDValue Ops[] = { - FixedIntrinsic->getChain(), - IntID, - StoredVal, - Ptr, - Mask, - VL, - DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)}; - - return DAG.getMemIntrinsicNode( - ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, - FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); - } case Intrinsic::riscv_sf_vc_xv_se: return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE); case Intrinsic::riscv_sf_vc_iv_se: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 413ad8b..ee623d3a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -692,6 +692,11 @@ def : Pat<(binop_allwusers<or> (shl GPR:$op1rs1, (XLenVT 24))), (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; + +def : Pat<(i64 (or (or (zexti16 (XLenVT GPR:$rs1)), + (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), + (sext_inreg (shl GPR:$op1rs1, (XLenVT 24)), i32))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; } // Predicates = [HasStdExtZbkb, IsRV64] let Predicates = [HasStdExtZbb, IsRV32] in diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index bf23812..24ebbc3 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -13,78 +13,113 @@ // //===----------------------------------------------------------------------===// -class SMX60IsWorstCaseMX<string mx, list<string> MxList> { - string LLMUL = LargestLMUL<MxList>.r; - bit c = !eq(mx, LLMUL); -} +//===----------------------------------------------------------------------===// +// Helpers + +// Maps LMUL string to corresponding value from the Values array +// LMUL values map to array indices as follows: +// MF8 -> Values[0], MF4 -> Values[1], MF2 -> Values[2], M1 -> Values[3], +// M2 -> Values[4], M4 -> Values[5], M8 -> Values[6] +// Shorter lists are allowed, e.g., widening instructions don't work on M8 +class GetLMULValue<list<int> Values, string LMUL> { + defvar Index = !cond( + !eq(LMUL, "MF8"): 0, + !eq(LMUL, "MF4"): 1, + !eq(LMUL, "MF2"): 2, + !eq(LMUL, "M1"): 3, + !eq(LMUL, "M2"): 4, + !eq(LMUL, "M4"): 5, + !eq(LMUL, "M8"): 6, + ); -class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> { - string LLMUL = LargestLMUL<MxList>.r; - int SSEW = SmallestSEW<mx, isF>.r; - bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); + assert !lt(Index, !size(Values)), + "Missing LMUL value for '" # LMUL # "'. " # + "Expected at least " # !add(Index, 1) # " elements, but got " # + !size(Values) # "."; + + int c = Values[Index]; } -defvar SMX60VLEN = 256; -defvar SMX60DLEN = !div(SMX60VLEN, 2); +// Returns BaseValue for LMUL values before startLMUL, Value for startLMUL, +// then doubles Value for each subsequent LMUL +// Example: ConstValueUntilLMULThenDoubleBase<"M1", 2, 4, "M8"> returns: +// MF8->2, MF4->2, MF2->2, M1->4, M2->8, M4->16, M8->32 +// This is useful for modeling scheduling parameters that scale with LMUL. +class ConstValueUntilLMULThenDoubleBase<string startLMUL, int BaseValue, int Value, string currentLMUL> { + assert !le(BaseValue, Value), "BaseValue must be less-equal to Value"; + defvar startPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], startLMUL>.c; + defvar currentPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], currentLMUL>.c; + + // Calculate the difference in positions + defvar posDiff = !sub(currentPos, startPos); -class Get1248Latency<string mx> { + // Calculate Value * (2^posDiff) int c = !cond( - !eq(mx, "M2") : 2, - !eq(mx, "M4") : 4, - !eq(mx, "M8") : 8, - true: 1 + !eq(posDiff, 0) : Value, + !eq(posDiff, 1) : !mul(Value, 2), + !eq(posDiff, 2) : !mul(Value, 4), + !eq(posDiff, 3) : !mul(Value, 8), + !eq(posDiff, 4) : !mul(Value, 16), + !eq(posDiff, 5) : !mul(Value, 32), + !eq(posDiff, 6) : !mul(Value, 64), + true : BaseValue ); } -// Used for: logical opsz, shifts, sign ext, merge/move, FP sign/recip/convert, mask ops, slides -class Get4816Latency<string mx> { - int c = !cond( - !eq(mx, "M4") : 8, - !eq(mx, "M8") : 16, - true: 4 - ); +// Same as the previous function but BaseValue == Value +class ConstValueUntilLMULThenDouble<string startLMUL, int Value, string currentLMUL> { + int c = ConstValueUntilLMULThenDoubleBase<startLMUL, Value, Value, currentLMUL>.c; +} + +// Returns MF8->1, MF4->1, MF2->2, M1->4, M2->8, M4->16, M8->32 +class ConstOneUntilMF4ThenDouble<string mx> { + int c = ConstValueUntilLMULThenDouble<"MF4", 1, mx>.c; } +// Returns MF8->1, MF4->1, MF2->1, M1->2, M2->4, M4->8, M8->16 +class ConstOneUntilMF2ThenDouble<string mx> { + int c = ConstValueUntilLMULThenDouble<"MF2", 1, mx>.c; +} + +// Returns MF8->1, MF4->1, MF2->1, M1->1, M2->2, M4->4, M8->8 +class ConstOneUntilM1ThenDouble<string mx> { + int c = ConstValueUntilLMULThenDouble<"M1", 1, mx>.c; +} + +//===----------------------------------------------------------------------===// +// Latency helper classes + // Used for: arithmetic (add/sub/min/max), saturating/averaging, FP add/sub/min/max -class Get458Latency<string mx> { - int c = !cond( - !eq(mx, "M4") : 5, - !eq(mx, "M8") : 8, - true: 4 - ); +class Get4458Latency<string mx> { + int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/4, /*M4=*/5, /*M8=*/8], mx>.c; } -// Widening scaling pattern (4,4,4,4,5,8,8): plateaus at higher LMULs -// Used for: widening operations +// Used for: widening operations (no M8) class Get4588Latency<string mx> { - int c = !cond( - !eq(mx, "M2") : 5, - !eq(mx, "M4") : 8, - !eq(mx, "M8") : 8, // M8 not supported for most widening, fallback - true: 4 - ); + int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/5, /*M4=*/8], mx>.c; } // Used for: mask-producing comparisons, carry ops with mask, FP comparisons class Get461018Latency<string mx> { - int c = !cond( - !eq(mx, "M2") : 6, - !eq(mx, "M4") : 10, - !eq(mx, "M8") : 18, - true: 4 - ); + int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/6, /*M4=*/10, /*M8=*/18], mx>.c; } -// Used for: e64 multiply pattern, complex ops -class Get781632Latency<string mx> { - int c = !cond( - !eq(mx, "M2") : 8, - !eq(mx, "M4") : 16, - !eq(mx, "M8") : 32, - true: 7 - ); +//===----------------------------------------------------------------------===// + +class SMX60IsWorstCaseMX<string mx, list<string> MxList> { + string LLMUL = LargestLMUL<MxList>.r; + bit c = !eq(mx, LLMUL); } +class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> { + string LLMUL = LargestLMUL<MxList>.r; + int SSEW = SmallestSEW<mx, isF>.r; + bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); +} + +defvar SMX60VLEN = 256; +defvar SMX60DLEN = !div(SMX60VLEN, 2); + def SpacemitX60Model : SchedMachineModel { let IssueWidth = 2; // dual-issue let MicroOpBufferSize = 0; // in-order @@ -383,12 +418,13 @@ foreach LMul = [1, 2, 4, 8] in { foreach mx = SchedMxList in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c; - let Latency = Get458Latency<mx>.c, ReleaseAtCycles = [4] in { + let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [4] in { defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>; } - let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in { + defvar VIALULat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c; + let Latency = VIALULat, ReleaseAtCycles = [4] in { // Pattern of vadd, vsub, vrsub: 4/4/5/8 // Pattern of vand, vor, vxor: 4/4/8/16 // They are grouped together, so we used the worst case 4/4/8/16 @@ -425,7 +461,7 @@ foreach mx = SchedMxList in { // Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8, // e64 = 7,8,16,32. We use the worst-case until we can split the SEW. // TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites - let Latency = Get781632Latency<mx>.c, ReleaseAtCycles = [7] in { + let Latency = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c, ReleaseAtCycles = [7] in { defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>; @@ -461,15 +497,8 @@ foreach mx = SchedMxList in { foreach sew = SchedSEWSet<mx>.val in { defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; - // Slightly reduced for fractional LMULs - defvar Multiplier = !cond( - !eq(mx, "MF8") : 12, - !eq(mx, "MF4") : 12, - !eq(mx, "MF2") : 12, - true: 24 - ); - - let Latency = !mul(Get1248Latency<mx>.c, Multiplier), ReleaseAtCycles = [12] in { + defvar VIDivLat = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c; + let Latency = VIDivLat, ReleaseAtCycles = [12] in { defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>; defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>; } @@ -480,14 +509,8 @@ foreach mx = SchedMxList in { foreach mx = SchedMxListW in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c; - // Slightly increased for integer LMULs - defvar Multiplier = !cond( - !eq(mx, "M2") : 2, - !eq(mx, "M4") : 2, - true: 1 - ); - - let Latency = !mul(Get4816Latency<mx>.c, Multiplier), ReleaseAtCycles = [4] in { + defvar VNarrowingLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c; + let Latency = VNarrowingLat, ReleaseAtCycles = [4] in { defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>; @@ -501,16 +524,33 @@ foreach mx = SchedMxListW in { foreach mx = SchedMxList in { defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c; - defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>; + let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in { + defm "" : LMULWriteResMX<"WriteVSALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUX", [SMX60_VIEU], mx, IsWorstCase>; + } + + // Latency of vsmul: e8/e16 = 4/4/5/8, e32 = 5/5/5/8, e64 = 7/8/16/32 + // We use the worst-case until we can split the SEW. + defvar VSMulLat = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c; + // Latency of vsmul: e8/e16/e32 = 1/2/4/8, e64 = 4/8/16/32 + // We use the worst-case until we can split the SEW. + defvar VSMulOcc = ConstValueUntilLMULThenDoubleBase<"M1", 1, 4, mx>.c; + // TODO: change WriteVSMulV/X to be defined with LMULSEWSchedWrites + let Latency = VSMulLat, ReleaseAtCycles = [VSMulOcc] in { + defm "" : LMULWriteResMX<"WriteVSMulV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulX", [SMX60_VIEU], mx, IsWorstCase>; + } + + defvar VSShiftLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c; + defvar VSShiftOcc = ConstOneUntilMF2ThenDouble<mx>.c; + let Latency = VSShiftLat, ReleaseAtCycles = [VSShiftOcc] in { + defm "" : LMULWriteResMX<"WriteVSShiftV", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftI", [SMX60_VIEU], mx, IsWorstCase>; + } } // 13. Vector Floating-Point Instructions diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 05d504c..6a1f4b3 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -114,6 +114,9 @@ public: bool enableScalableVectorization() const override { return ST->hasVInstructions(); } + bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override { + return ST->hasVInstructions(); + } TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL |