diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
22 files changed, 411 insertions, 254 deletions
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 9538b20..95ec42f 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -327,19 +327,19 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const { - MCContext &C = getContext(); - int64_t LineDelta = F.getDwarfLineDelta(); const MCExpr &AddrDelta = F.getDwarfAddrDelta(); - SmallVector<MCFixup, 1> Fixups; size_t OldSize = F.getVarSize(); int64_t Value; + // If the label difference can be resolved, use the default handling, which + // utilizes a shorter special opcode. + if (AddrDelta.evaluateAsAbsolute(Value, *Asm)) + return false; [[maybe_unused]] bool IsAbsolute = AddrDelta.evaluateKnownAbsolute(Value, *Asm); assert(IsAbsolute && "CFA with invalid expression"); - Fixups.clear(); SmallVector<char> Data; raw_svector_ostream OS(Data); @@ -349,33 +349,21 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F, encodeSLEB128(LineDelta, OS); } - unsigned Offset; - std::pair<MCFixupKind, MCFixupKind> Fixup; - // According to the DWARF specification, the `DW_LNS_fixed_advance_pc` opcode // takes a single unsigned half (unencoded) operand. The maximum encodable // value is therefore 65535. Set a conservative upper bound for relaxation. + unsigned PCBytes; if (Value > 60000) { - unsigned PtrSize = C.getAsmInfo()->getCodePointerSize(); - - OS << uint8_t(dwarf::DW_LNS_extended_op); - encodeULEB128(PtrSize + 1, OS); - - OS << uint8_t(dwarf::DW_LNE_set_address); - Offset = OS.tell(); - assert((PtrSize == 4 || PtrSize == 8) && "Unexpected pointer size"); - Fixup = RISCV::getRelocPairForSize(PtrSize); - OS.write_zeros(PtrSize); + PCBytes = getContext().getAsmInfo()->getCodePointerSize(); + OS << uint8_t(dwarf::DW_LNS_extended_op) << uint8_t(PCBytes + 1) + << uint8_t(dwarf::DW_LNE_set_address); + OS.write_zeros(PCBytes); } else { + PCBytes = 2; OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc); - Offset = OS.tell(); - Fixup = RISCV::getRelocPairForSize(2); support::endian::write<uint16_t>(OS, 0, llvm::endianness::little); } - - const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta); - Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(Fixup))); - Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(Fixup))); + auto Offset = OS.tell() - PCBytes; if (LineDelta == INT64_MAX) { OS << uint8_t(dwarf::DW_LNS_extended_op); @@ -386,7 +374,8 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F, } F.setVarContents(Data); - F.setVarFixups(Fixups); + F.setVarFixups({MCFixup::create(Offset, &AddrDelta, + MCFixup::getDataKindForSize(PCBytes))}); WasRelaxed = OldSize != Data.size(); return true; } @@ -754,7 +743,7 @@ std::optional<bool> RISCVAsmBackend::evaluateFixup(const MCFragment &, if (!AUIPCTarget.getAddSym()) return false; - const MCSymbolELF &SA = cast<MCSymbolELF>(*AUIPCTarget.getAddSym()); + auto &SA = static_cast<const MCSymbolELF &>(*AUIPCTarget.getAddSym()); if (SA.isUndefined()) return false; @@ -881,9 +870,8 @@ bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup, } void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, - const MCValue &Target, - MutableArrayRef<char> Data, uint64_t Value, - bool IsResolved) { + const MCValue &Target, uint8_t *Data, + uint64_t Value, bool IsResolved) { IsResolved = addReloc(F, Fixup, Target, Value, IsResolved); MCFixupKind Kind = Fixup.getKind(); if (mc::isRelocation(Kind)) @@ -898,15 +886,14 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, // Shift the value into position. Value <<= Info.TargetOffset; - unsigned Offset = Fixup.getOffset(); unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8; - - assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); + assert(Fixup.getOffset() + NumBytes <= F.getSize() && + "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. for (unsigned i = 0; i != NumBytes; ++i) { - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + Data[i] |= uint8_t((Value >> (i * 8)) & 0xff); } } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index d97d632..adec1ec 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -46,8 +46,7 @@ public: void maybeAddVendorReloc(const MCFragment &, const MCFixup &); void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target, - MutableArrayRef<char> Data, uint64_t Value, - bool IsResolved) override; + uint8_t *Data, uint64_t Value, bool IsResolved) override; std::unique_ptr<MCObjectTargetWriter> createObjectTargetWriter() const override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index 9bf7896..2885e3c 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -55,8 +55,8 @@ unsigned RISCVELFObjectWriter::getRelocType(const MCFixup &Fixup, case ELF::R_RISCV_TLS_GOT_HI20: case ELF::R_RISCV_TLS_GD_HI20: case ELF::R_RISCV_TLSDESC_HI20: - if (auto *SA = Target.getAddSym()) - cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS); + if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym())) + static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS); break; case ELF::R_RISCV_PLT32: case ELF::R_RISCV_GOT32_PCREL: diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index c654fd2b..543c4c5 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -117,7 +117,7 @@ void RISCVTargetELFStreamer::reset() { void RISCVTargetELFStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) { getStreamer().getAssembler().registerSymbol(Symbol); - cast<MCSymbolELF>(Symbol).setOther(ELF::STO_RISCV_VARIANT_CC); + static_cast<MCSymbolELF &>(Symbol).setOther(ELF::STO_RISCV_VARIANT_CC); } void RISCVELFStreamer::reset() { @@ -142,7 +142,8 @@ void RISCVELFStreamer::emitInstructionsMappingSymbol() { } void RISCVELFStreamer::emitMappingSymbol(StringRef Name) { - auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name)); + auto *Symbol = + static_cast<MCSymbolELF *>(getContext().createLocalSymbol(Name)); emitLabel(Symbol); Symbol->setType(ELF::STT_NOTYPE); Symbol->setBinding(ELF::STB_LOCAL); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h index f816561c..98c8738 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h @@ -68,27 +68,6 @@ enum Fixups { fixup_riscv_invalid, NumTargetFixupKinds = fixup_riscv_invalid - FirstTargetFixupKind }; - -static inline std::pair<MCFixupKind, MCFixupKind> -getRelocPairForSize(unsigned Size) { - switch (Size) { - default: - llvm_unreachable("unsupported fixup size"); - case 1: - return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD8, - FirstLiteralRelocationKind + ELF::R_RISCV_SUB8); - case 2: - return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD16, - FirstLiteralRelocationKind + ELF::R_RISCV_SUB16); - case 4: - return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD32, - FirstLiteralRelocationKind + ELF::R_RISCV_SUB32); - case 8: - return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD64, - FirstLiteralRelocationKind + ELF::R_RISCV_SUB64); - } -} - } // end namespace llvm::RISCV #endif diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 3655861..f70837e 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -68,36 +68,30 @@ void RISCVTargetStreamer::emitNoteGnuPropertySection( const Triple &Triple = Ctx.getTargetTriple(); Align NoteAlign; + uint64_t DescSize; if (Triple.isArch64Bit()) { NoteAlign = Align(8); + DescSize = 16; } else { assert(Triple.isArch32Bit()); NoteAlign = Align(4); + DescSize = 12; } assert(Ctx.getObjectFileType() == MCContext::Environment::IsELF); MCSection *const NoteSection = Ctx.getELFSection(".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC); - NoteSection->setAlignment(NoteAlign); OutStreamer.pushSection(); OutStreamer.switchSection(NoteSection); // Emit the note header - OutStreamer.emitIntValue(4, 4); // n_namsz - - MCSymbol *const NDescBeginSym = Ctx.createTempSymbol(); - MCSymbol *const NDescEndSym = Ctx.createTempSymbol(); - const MCExpr *const NDescSzExpr = - MCBinaryExpr::createSub(MCSymbolRefExpr::create(NDescEndSym, Ctx), - MCSymbolRefExpr::create(NDescBeginSym, Ctx), Ctx); - - OutStreamer.emitValue(NDescSzExpr, 4); // n_descsz + OutStreamer.emitValueToAlignment(NoteAlign); + OutStreamer.emitIntValue(4, 4); // n_namsz + OutStreamer.emitIntValue(DescSize, 4); // n_descsz OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4); // n_type OutStreamer.emitBytes(StringRef("GNU", 4)); // n_name // Emit n_desc field - OutStreamer.emitLabel(NDescBeginSym); - OutStreamer.emitValueToAlignment(NoteAlign); // Emit the feature_1_and property OutStreamer.emitIntValue(ELF::GNU_PROPERTY_RISCV_FEATURE_1_AND, 4); // pr_type @@ -105,7 +99,6 @@ void RISCVTargetStreamer::emitNoteGnuPropertySection( OutStreamer.emitIntValue(Feature1And, 4); // pr_data OutStreamer.emitValueToAlignment(NoteAlign); // pr_padding - OutStreamer.emitLabel(NDescEndSym); OutStreamer.popSection(); } diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 171940e..a7329d2 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1700,6 +1700,18 @@ def TuneNLogNVRGather def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; +def TuneDisableMISchedLoadClustering : SubtargetFeature<"disable-misched-load-clustering", + "EnableMISchedLoadClustering", "false", "Disable load clustering in the machine scheduler">; + +def TuneDisableMISchedStoreClustering : SubtargetFeature<"disable-misched-store-clustering", + "EnableMISchedStoreClustering", "false", "Disable store clustering in the machine scheduler">; + +def TuneDisablePostMISchedLoadClustering : SubtargetFeature<"disable-postmisched-load-clustering", + "EnablePostMISchedLoadClustering", "false", "Disable PostRA load clustering in the machine scheduler">; + +def TuneDisablePostMISchedStoreClustering : SubtargetFeature<"disable-postmisched-store-clustering", + "EnablePostMISchedStoreClustering", "false", "Disable PostRA store clustering in the machine scheduler">; + def TuneDisableLatencySchedHeuristic : SubtargetFeature<"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp index 82c0d8d..80a48c5 100644 --- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp @@ -167,9 +167,8 @@ static std::pair<Value *, Value *> matchStridedStart(Value *Start, default: llvm_unreachable("Unexpected opcode"); case Instruction::Or: - // TODO: We'd be better off creating disjoint or here, but we don't yet - // have an IRBuilder API for that. - [[fallthrough]]; + Start = Builder.CreateOr(Start, Splat, "", /*IsDisjoint=*/true); + break; case Instruction::Add: Start = Builder.CreateAdd(Start, Splat); break; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index adbfbeb..03e54b3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -927,6 +927,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, @@ -1105,6 +1106,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); @@ -1181,6 +1183,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); @@ -1352,6 +1355,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, @@ -1442,6 +1446,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, ISD::EXPERIMENTAL_VP_STRIDED_STORE}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, @@ -7012,6 +7017,7 @@ static unsigned getRISCVVLOp(SDValue Op) { OP_CASE(FDIV) OP_CASE(FNEG) OP_CASE(FABS) + OP_CASE(FCOPYSIGN) OP_CASE(FSQRT) OP_CASE(SMIN) OP_CASE(SMAX) @@ -7079,6 +7085,15 @@ static unsigned getRISCVVLOp(SDValue Op) { if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) return RISCVISD::VMXOR_VL; return RISCVISD::XOR_VL; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + return RISCVISD::VZEXT_VL; + case ISD::SIGN_EXTEND: + return RISCVISD::VSEXT_VL; + case ISD::SETCC: + return RISCVISD::SETCC_VL; + case ISD::VSELECT: + return RISCVISD::VMERGE_VL; case ISD::VP_SELECT: case ISD::VP_MERGE: return RISCVISD::VMERGE_VL; @@ -7419,12 +7434,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, if (Op.getOperand(0).getValueType().isVector() && Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); - return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); + if (Op.getValueType().isScalableVector()) + return Op; + return lowerToScalableOp(Op, DAG); case ISD::SIGN_EXTEND: if (Op.getOperand(0).getValueType().isVector() && Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); - return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); + if (Op.getValueType().isScalableVector()) + return Op; + return lowerToScalableOp(Op, DAG); case ISD::SPLAT_VECTOR_PARTS: return lowerSPLAT_VECTOR_PARTS(Op, DAG); case ISD::INSERT_VECTOR_ELT: @@ -8103,6 +8122,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::MLOAD: case ISD::VP_LOAD: return lowerMaskedLoad(Op, DAG); + case ISD::VP_LOAD_FF: + return lowerLoadFF(Op, DAG); case ISD::MSTORE: case ISD::VP_STORE: return lowerMaskedStore(Op, DAG); @@ -8166,7 +8187,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget)) return SplitVectorOp(Op, DAG); - return lowerFixedLengthVectorSetccToRVV(Op, DAG); + return lowerToScalableOp(Op, DAG); } case ISD::ADD: case ISD::SUB: @@ -8182,6 +8203,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::UREM: case ISD::BSWAP: case ISD::CTPOP: + case ISD::VSELECT: return lowerToScalableOp(Op, DAG); case ISD::SHL: case ISD::SRA: @@ -8250,14 +8272,12 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerToScalableOp(Op, DAG); assert(Op.getOpcode() != ISD::CTTZ); return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); - case ISD::VSELECT: - return lowerFixedLengthVectorSelectToRVV(Op, DAG); case ISD::FCOPYSIGN: if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16) return lowerFCOPYSIGN(Op, DAG, Subtarget); if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVectorOp(Op, DAG); - return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); + return lowerToScalableOp(Op, DAG); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -9694,33 +9714,6 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VecVT, Select, DAG, Subtarget); } -SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( - SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { - MVT ExtVT = Op.getSimpleValueType(); - // Only custom-lower extensions from fixed-length vector types. - if (!ExtVT.isFixedLengthVector()) - return Op; - MVT VT = Op.getOperand(0).getSimpleValueType(); - // Grab the canonical container type for the extended type. Infer the smaller - // type from that to ensure the same number of vector elements, as we know - // the LMUL will be sufficient to hold the smaller type. - MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); - // Get the extended container type manually to ensure the same number of - // vector elements between source and dest. - MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), - ContainerExtVT.getVectorElementCount()); - - SDValue Op1 = - convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); - - SDLoc DL(Op); - auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - - SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); - - return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); -} - // Custom-lower truncations from vectors to mask vectors by using a mask and a // setcc operation: // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) @@ -12739,6 +12732,51 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, return DAG.getMergeValues({Result, Chain}, DL); } +SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Op->getSimpleValueType(0); + + const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op); + EVT MemVT = VPLoadFF->getMemoryVT(); + MachineMemOperand *MMO = VPLoadFF->getMemOperand(); + SDValue Chain = VPLoadFF->getChain(); + SDValue BasePtr = VPLoadFF->getBasePtr(); + + SDValue Mask = VPLoadFF->getMask(); + SDValue VL = VPLoadFF->getVectorLength(); + + MVT XLenVT = Subtarget.getXLenVT(); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + + unsigned IntID = Intrinsic::riscv_vleff_mask; + SDValue Ops[] = { + Chain, + DAG.getTargetConstant(IntID, DL, XLenVT), + DAG.getUNDEF(ContainerVT), + BasePtr, + Mask, + VL, + DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT)}; + + SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other}); + + SDValue Result = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); + SDValue OutVL = Result.getValue(1); + Chain = Result.getValue(2); + + if (VT.isFixedLengthVector()) + Result = convertFromScalableVector(VT, Result, DAG, Subtarget); + + return DAG.getMergeValues({Result, OutVL, Chain}, DL); +} + SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -12834,31 +12872,6 @@ SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op, return Res; } -SDValue -RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, - SelectionDAG &DAG) const { - MVT InVT = Op.getOperand(0).getSimpleValueType(); - MVT ContainerVT = getContainerForFixedLengthVector(InVT); - - MVT VT = Op.getSimpleValueType(); - - SDValue Op1 = - convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); - SDValue Op2 = - convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); - - SDLoc DL(Op); - auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL, - DAG, Subtarget); - MVT MaskVT = getMaskTypeFor(ContainerVT); - - SDValue Cmp = - DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, - {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL}); - - return convertFromScalableVector(VT, Cmp, DAG, Subtarget); -} - SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); @@ -12985,51 +12998,6 @@ SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { return Max; } -SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( - SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); - SDValue Mag = Op.getOperand(0); - SDValue Sign = Op.getOperand(1); - assert(Mag.getValueType() == Sign.getValueType() && - "Can only handle COPYSIGN with matching types."); - - MVT ContainerVT = getContainerForFixedLengthVector(VT); - Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); - Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); - - auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - - SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, - Sign, DAG.getUNDEF(ContainerVT), Mask, VL); - - return convertFromScalableVector(VT, CopySign, DAG, Subtarget); -} - -SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( - SDValue Op, SelectionDAG &DAG) const { - MVT VT = Op.getSimpleValueType(); - MVT ContainerVT = getContainerForFixedLengthVector(VT); - - MVT I1ContainerVT = - MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); - - SDValue CC = - convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); - SDValue Op1 = - convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); - SDValue Op2 = - convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); - - SDLoc DL(Op); - SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; - - SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1, - Op2, DAG.getUNDEF(ContainerVT), VL); - - return convertFromScalableVector(VT, Select, DAG, Subtarget); -} - SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const { const auto &TSInfo = @@ -13056,7 +13024,9 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, // "cast" fixed length vector to a scalable vector. assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && "Only fixed length vectors are supported!"); - Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); + MVT VContainerVT = ContainerVT.changeVectorElementType( + V.getSimpleValueType().getVectorElementType()); + Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget)); } SDLoc DL(Op); @@ -21478,11 +21448,10 @@ bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode( // TODO: Add more target nodes. switch (Op.getOpcode()) { case RISCVISD::SELECT_CC: - // Integer select_cc cannot create poison. - // TODO: What are the FP poison semantics? - // TODO: This instruction blocks poison from the unselected operand, can - // we do anything with that? - return !Op.getValueType().isInteger(); + // Integer comparisons cannot create poison. + assert(Op.getOperand(0).getValueType().isInteger() && + "RISCVISD::SELECT_CC only compares integers"); + return false; } return TargetLowering::canCreateUndefOrPoisonForTargetNode( Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth); @@ -22550,6 +22519,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( constexpr StringLiteral SupportedInterruptKinds[] = { "machine", "supervisor", + "rnmi", "qci-nest", "qci-nonest", "SiFive-CLIC-preemptible", @@ -22567,6 +22537,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments( reportFatalUsageError( "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension"); + if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi()) + reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension"); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF)) reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot " @@ -23212,7 +23184,11 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (Kind == "supervisor") RetOpc = RISCVISD::SRET_GLUE; - else if (Kind == "qci-nest" || Kind == "qci-nonest") { + else if (Kind == "rnmi") { + assert(STI.hasFeature(RISCV::FeatureStdExtSmrnmi) && + "Need Smrnmi extension for rnmi"); + RetOpc = RISCVISD::MNRET_GLUE; + } else if (Kind == "qci-nest" || Kind == "qci-nonest") { assert(STI.hasFeature(RISCV::FeatureVendorXqciint) && "Need Xqciint for qci-(no)nest"); RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE; @@ -24715,7 +24691,7 @@ SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size); if (Align) SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT)); + DAG.getSignedConstant(-Align->value(), dl, VT)); // Set the real SP to the new value with a probing loop. Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index ca70c46..433b8be 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -526,6 +526,7 @@ private: SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLoadFF(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, @@ -534,9 +535,6 @@ private: SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op, - SelectionDAG &DAG) const; SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const; @@ -551,8 +549,6 @@ private: SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, - unsigned ExtendOpc) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 6536078..8bd3830 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -75,6 +75,8 @@ def riscv_sret_glue : RVSDNode<"SRET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; def riscv_mret_glue : RVSDNode<"MRET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; +def riscv_mnret_glue : RVSDNode<"MNRET_GLUE", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; def riscv_mileaveret_glue : RVSDNode<"QC_C_MILEAVERET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; @@ -935,7 +937,6 @@ def MRET : Priv<"mret", 0b0011000>, Sched<[]> { let rs1 = 0; let rs2 = 0b00010; } -} // isBarrier = 1, isReturn = 1, isTerminator = 1 let Predicates = [HasStdExtSmrnmi] in { def MNRET : Priv<"mnret", 0b0111000>, Sched<[]> { @@ -944,6 +945,8 @@ def MNRET : Priv<"mnret", 0b0111000>, Sched<[]> { let rs2 = 0b00010; } }// Predicates = [HasStdExtSmrnmi] +} // isBarrier = 1, isReturn = 1, isTerminator = 1 + def WFI : Priv<"wfi", 0b0001000>, Sched<[]> { let rd = 0; @@ -1801,6 +1804,8 @@ def : Pat<(riscv_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; def : Pat<(riscv_sret_glue), (SRET)>; def : Pat<(riscv_mret_glue), (MRET)>; +let Predicates = [HasStdExtSmrnmi] in +def : Pat<(riscv_mnret_glue), (MNRET)>; let isCall = 1, Defs = [X1] in { let Predicates = [NoStdExtZicfilp] in diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 5265613..2c64b0c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -14,6 +14,14 @@ // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +def SDT_SetMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 3>, + SDTCisPtrTy<2>, + SDTCisVT<3, XLenVT>]>; + +def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_SetMultiple, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + def uimm5nonzero : RISCVOp<XLenVT>, ImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5, "NonZero">; @@ -27,6 +35,8 @@ def uimm5nonzero : RISCVOp<XLenVT>, }]; } +def tuimm5nonzero : TImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]>; + def uimm5gt3 : RISCVOp<XLenVT>, ImmLeaf<XLenVT, [{return (Imm > 3) && isUInt<5>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5, "GT3">; @@ -92,6 +102,8 @@ def uimm5slist : RISCVOp<XLenVT>, ImmLeaf<XLenVT, }]; } +def tuimm7_lsb00 : TImmLeaf<XLenVT,[{return isShiftedUInt<5, 2>(Imm);}]>; + def uimm10 : RISCVUImmLeafOp<10>; def uimm11 : RISCVUImmLeafOp<11>; @@ -457,6 +469,13 @@ class QCIRVInstRR<bits<5> funct5, DAGOperand InTyRs1, string opcodestr> : RVInstR<{0b00, funct5}, 0b011, OPC_CUSTOM_0, (outs GPRNoX0:$rd), (ins InTyRs1:$rs1, GPRNoX0:$rs2), opcodestr, "$rd, $rs1, $rs2">; +class QCIRVInstRRTied<bits<5> funct5, DAGOperand InTyRs1, string opcodestr> + : RVInstR<{0b00, funct5}, 0b011, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb), + (ins GPRNoX0:$rd, InTyRs1:$rs1, GPRNoX0:$rs2), opcodestr, + "$rd, $rs1, $rs2"> { + let Constraints = "$rd = $rd_wb"; +} + class QCIBitManipRII<bits<3> funct3, bits<2> funct2, DAGOperand InTyRs1, string opcodestr> : RVInstIBase<funct3, OPC_CUSTOM_0, (outs GPRNoX0:$rd), @@ -470,11 +489,26 @@ class QCIBitManipRII<bits<3> funct3, bits<2> funct2, let Inst{24-20} = shamt; } +class QCIBitManipRIITied<bits<3> funct3, bits<2> funct2, + DAGOperand InTyRs1, string opcodestr> + : RVInstIBase<funct3, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, + InTyRs1:$rs1, uimm5_plus1:$width, uimm5:$shamt), + opcodestr, "$rd, $rs1, $width, $shamt"> { + let Constraints = "$rd = $rd_wb"; + bits<5> shamt; + bits<5> width; + + let Inst{31-30} = funct2; + let Inst{29-25} = width; + let Inst{24-20} = shamt; +} + class QCIRVInstRI<bits<1> funct1, DAGOperand InTyImm11, string opcodestr> - : RVInstIBase<0b000, OPC_CUSTOM_0, (outs GPRNoX0:$rd), - (ins GPRNoX0:$rs1, InTyImm11:$imm11), opcodestr, + : RVInstIBase<0b000, OPC_CUSTOM_0, (outs GPRNoX0:$rd_wb), + (ins GPRNoX0:$rd, GPRNoX0:$rs1, InTyImm11:$imm11), opcodestr, "$rd, $rs1, $imm11"> { + let Constraints = "$rd = $rd_wb"; bits<11> imm11; let Inst{31-31} = funct1; @@ -858,12 +892,12 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { let Inst{29-25} = width; let Inst{24-20} = shamt; } - def QC_INSB : QCIBitManipRII<0b001, 0b01, GPR, "qc.insb">; - def QC_INSBH : QCIBitManipRII<0b001, 0b10, GPR, "qc.insbh">; - def QC_INSBR : QCIRVInstRR<0b00000, GPR, "qc.insbr">; - def QC_INSBHR : QCIRVInstRR<0b00001, GPR, "qc.insbhr">; - def QC_INSBPR : QCIRVInstRR<0b00010, GPR, "qc.insbpr">; - def QC_INSBPRH : QCIRVInstRR<0b00011, GPR, "qc.insbprh">; + def QC_INSB : QCIBitManipRIITied<0b001, 0b01, GPR, "qc.insb">; + def QC_INSBH : QCIBitManipRIITied<0b001, 0b10, GPR, "qc.insbh">; + def QC_INSBR : QCIRVInstRRTied<0b00000, GPR, "qc.insbr">; + def QC_INSBHR : QCIRVInstRRTied<0b00001, GPR, "qc.insbhr">; + def QC_INSBPR : QCIRVInstRRTied<0b00010, GPR, "qc.insbpr">; + def QC_INSBPRH : QCIRVInstRRTied<0b00011, GPR, "qc.insbprh">; def QC_EXTU : QCIBitManipRII<0b010, 0b00, GPRNoX0, "qc.extu">; def QC_EXTDU : QCIBitManipRII<0b010, 0b10, GPRNoX31, "qc.extdu">; def QC_EXTDUR : QCIRVInstRR<0b00100, GPRNoX31, "qc.extdur">; @@ -1566,6 +1600,11 @@ def : QCISELECTIICCPat <SETEQ, QC_SELECTIIEQ>; def : QCISELECTIICCPat <SETNE, QC_SELECTIINE>; } // Predicates = [HasVendorXqcics, IsRV32] +let Predicates = [HasVendorXqcilsm, IsRV32] in { +def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7), + (QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>; +} // Predicates = [HasVendorXqcilsm, IsRV32] + //===----------------------------------------------------------------------===/i // Compress Instruction tablegen backend. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index d2a6514..27ad10a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -641,13 +641,15 @@ def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)), let Predicates = [HasStdExtZbkb, IsRV32] in { def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))), (PACK GPR:$rs1, GPR:$rs2)>; -def : Pat<(or (or - (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)), + +// Match a pattern of 2 bytes being inserted into bits [31:16], with bits +// bits [15:0] coming from a zero extended value. We can use pack with packh for +// bits [31:16]. If bits [15:0] can also be a packh, it can be matched +// separately. +def : Pat<(or (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)), (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), - (or - (shl (zexti8 (XLenVT GPR:$op0rs2)), (XLenVT 8)), - (zexti8 (XLenVT GPR:$op0rs1)))), - (PACK (XLenVT (PACKH GPR:$op0rs1, GPR:$op0rs2)), + (zexti16 (XLenVT GPR:$rs1))), + (PACK (XLenVT GPR:$rs1), (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; } @@ -661,6 +663,27 @@ def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)), def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), (zexti16 (i64 GPR:$rs1)))), (PACKW GPR:$rs1, GPR:$rs2)>; + +// Match a pattern of 2 bytes being inserted into bits [31:16], with bits +// bits [15:0] coming from a zero extended value, and bits [63:32] being +// ignored. We can use packw with packh for bits [31:16]. If bits [15:0] can +// also be a packh, it can be matched separately. +def : Pat<(binop_allwusers<or> + (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (zexti16 (XLenVT GPR:$rs1))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; +// We need to manually reassociate the patterns because of the binop_allwusers. +def : Pat<(binop_allwusers<or> + (or (zexti16 (XLenVT GPR:$rs1)), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; +def : Pat<(binop_allwusers<or> + (or (zexti16 (XLenVT GPR:$rs1)), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 24))), + (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; } // Predicates = [HasStdExtZbkb, IsRV64] let Predicates = [HasStdExtZbb, IsRV32] in diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index a250ac8..5a5a9ed 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -206,8 +206,6 @@ let Predicates = [HasStdExtZvksh], RVVConstraint = VS2Constraint in { //===----------------------------------------------------------------------===// defvar I32IntegerVectors = !filter(vti, AllIntegerVectors, !eq(vti.SEW, 32)); -defvar I32I64IntegerVectors = !filter(vti, AllIntegerVectors, - !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64))); class ZvkI32IntegerVectors<string vd_lmul> { list<VTypeInfo> vs2_types = !cond(!eq(vd_lmul, "M8") : !filter(vti, I32IntegerVectors, !le(vti.LMul.octuple, 32)), @@ -1126,16 +1124,16 @@ let Predicates = [HasStdExtZvkned] in { defm : VPatUnaryV_S_NoMaskVectorCrypto<"int_riscv_vaesz", "PseudoVAESZ", I32IntegerVectors>; } // Predicates = [HasStdExtZvkned] -let Predicates = [HasStdExtZvknha] in { +let Predicates = [HasStdExtZvknhaOrZvknhb] in { defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>; - defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>; + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CL", I32IntegerVectors>; defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors, isSEWAware=true>; } // Predicates = [HasStdExtZvknha] let Predicates = [HasStdExtZvknhb] in { - defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>; - defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>; - defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors, isSEWAware=true>; + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I64IntegerVectors>; + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CL", I64IntegerVectors>; + defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I64IntegerVectors, isSEWAware=true>; } // Predicates = [HasStdExtZvknhb] let Predicates = [HasStdExtZvksed] in { diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td index 875a93d..39e099b 100644 --- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td @@ -91,3 +91,59 @@ def TuneLDADDFusion CheckIsImmOperand<2>, CheckImmOperand<2, 0> ]>>; + +defvar Load = [LB, LH, LW, LD, LBU, LHU, LWU]; + +// Fuse add(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu): +// add(.uw) rd, rs1, rs2 +// load rd, imm12(rd) +def TuneADDLoadFusion + : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD(.UW) + load macrofusion", + CheckOpcode<[ADD, ADD_UW]>, + CheckOpcode<Load>>; + +// Fuse AUIPC followed by by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// auipc rd, imm20 +// load rd, imm12(rd) +def TuneAUIPCLoadFusion + : SimpleFusion<"auipc-load-fusion", "HasAUIPCLoadFusion", + "Enable AUIPC + load macrofusion", + CheckOpcode<[AUIPC]>, + CheckOpcode<Load>>; + +// Fuse LUI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// lui rd, imm[31:12] +// load rd, imm12(rd) +def TuneLUILoadFusion + : SimpleFusion<"lui-load-fusion", "HasLUILoadFusion", + "Enable LUI + load macrofusion", + CheckOpcode<[LUI]>, + CheckOpcode<Load>>; + +// Bitfield extract fusion: similar to TuneShiftedZExtWFusion +// but without the immediate restriction +// slli rd, rs1, imm12 +// srli rd, rd, imm12 +def TuneBFExtFusion + : SimpleFusion<"bfext-fusion", "HasBFExtFusion", + "Enable SLLI+SRLI (bitfield extract) macrofusion", + CheckOpcode<[SLLI]>, + CheckOpcode<[SRLI]>>; + +// Fuse ADDI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// addi rd, rs1, imm12 +// load rd, imm12(rd) +def TuneADDILoadFusion + : SimpleFusion<"addi-load-fusion", "HasADDILoadFusion", + "Enable ADDI + load macrofusion", + CheckOpcode<[ADDI]>, + CheckOpcode<Load>>; + +// Fuse shXadd(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// shXadd(.uw) rd, rs1, rs2 +// load rd, imm12(rd) +def TuneSHXADDLoadFusion + : SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion", + "Enable SH(1|2|3)ADD(.UW) + load macrofusion", + CheckOpcode<[SH1ADD, SH2ADD, SH3ADD, SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>, + CheckOpcode<Load>>; diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 838edf6..31d2b3a 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -590,12 +590,17 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", FeatureStdExtZicboz, FeatureVendorXVentanaCondOps], [TuneVentanaVeyron, + TuneDisableMISchedLoadClustering, + TuneDisablePostMISchedLoadClustering, + TuneDisablePostMISchedStoreClustering, TuneLUIADDIFusion, TuneAUIPCADDIFusion, TuneZExtHFusion, TuneZExtWFusion, TuneShiftedZExtWFusion, - TuneLDADDFusion]> { + TuneADDLoadFusion, + TuneAUIPCLoadFusion, + TuneLUILoadFusion]> { let MVendorID = 0x61f; let MArchID = 0x8000000000010000; let MImpID = 0x111; diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index 6ecddad..041dd07 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "RISCVSelectionDAGInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" #define GET_SDNODE_DESC #include "RISCVGenSDNodeInfo.inc" @@ -62,3 +64,94 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, } #endif } + +SDValue RISCVSelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo) const { + const auto &Subtarget = DAG.getSubtarget<RISCVSubtarget>(); + // We currently do this only for Xqcilsm + if (!Subtarget.hasVendorXqcilsm()) + return SDValue(); + + // Do this only if we know the size at compile time. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + + uint64_t NumberOfBytesToWrite = ConstantSize->getZExtValue(); + + // Do this only if it is word aligned and we write a multiple of 4 bytes. + if (!(Alignment >= 4) || !((NumberOfBytesToWrite & 3) == 0)) + return SDValue(); + + SmallVector<SDValue, 8> OutChains; + SDValue SrcValueReplicated = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + int NumberOfWords = NumberOfBytesToWrite / 4; + MachineFunction &MF = DAG.getMachineFunction(); + auto Volatile = + isVolatile ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + + // Helper for constructing the QC_SETWMI instruction + auto getSetwmiNode = [&](uint8_t SizeWords, uint8_t OffsetSetwmi) -> SDValue { + SDValue Ops[] = {Chain, SrcValueReplicated, Dst, + DAG.getTargetConstant(SizeWords, dl, MVT::i32), + DAG.getTargetConstant(OffsetSetwmi, dl, MVT::i32)}; + MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand( + DstPtrInfo.getWithOffset(OffsetSetwmi), + MachineMemOperand::MOStore | Volatile, SizeWords * 4, Align(4)); + return DAG.getMemIntrinsicNode(RISCVISD::QC_SETWMI, dl, + DAG.getVTList(MVT::Other), Ops, MVT::i32, + BaseMemOperand); + }; + + // If i8 type and constant non-zero value. + if ((Src.getValueType() == MVT::i8) && !isNullConstant(Src)) + // Replicate byte to word by multiplication with 0x01010101. + SrcValueReplicated = + DAG.getNode(ISD::MUL, dl, MVT::i32, SrcValueReplicated, + DAG.getConstant(0x01010101ul, dl, MVT::i32)); + + // We limit a QC_SETWMI to 16 words or less to improve interruptibility. + // So for 1-16 words we use a single QC_SETWMI: + // + // QC_SETWMI reg1, N, 0(reg2) + // + // For 17-32 words we use two QC_SETWMI's with the first as 16 words and the + // second for the remainder: + // + // QC_SETWMI reg1, 16, 0(reg2) + // QC_SETWMI reg1, N, 64(reg2) + // + // For 33-48 words, we would like to use (16, 16, n), but that means the last + // QC_SETWMI needs an offset of 128 which the instruction doesn't support. + // So in this case we use a length of 15 for the second instruction and we do + // the rest with the third instruction. + // This means the maximum inlined number of words is 47 (for now): + // + // QC_SETWMI R2, R0, 16, 0 + // QC_SETWMI R2, R0, 15, 64 + // QC_SETWMI R2, R0, N, 124 + // + // For 48 words or more, call the target independent memset + if (NumberOfWords >= 48) + return SDValue(); + + if (NumberOfWords <= 16) { + // 1 - 16 words + return getSetwmiNode(NumberOfWords, 0); + } + + if (NumberOfWords <= 32) { + // 17 - 32 words + OutChains.push_back(getSetwmiNode(NumberOfWords - 16, 64)); + OutChains.push_back(getSetwmiNode(16, 0)); + } else { + // 33 - 47 words + OutChains.push_back(getSetwmiNode(NumberOfWords - 31, 124)); + OutChains.push_back(getSetwmiNode(15, 64)); + OutChains.push_back(getSetwmiNode(16, 0)); + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h index 641189f..08c8d11 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.h @@ -34,6 +34,12 @@ public: void verifyTargetNode(const SelectionDAG &DAG, const SDNode *N) const override; + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo) const override; + bool hasPassthruOp(unsigned Opcode) const { return GenNodeInfo.getDesc(Opcode).TSFlags & RISCVISD::HasPassthruOpMask; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index da6ac2f..66ce134 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -94,16 +94,6 @@ static cl::opt<bool> cl::desc("Enable the loop data prefetch pass"), cl::init(true)); -static cl::opt<bool> EnableMISchedLoadStoreClustering( - "riscv-misched-load-store-clustering", cl::Hidden, - cl::desc("Enable load and store clustering in the machine scheduler"), - cl::init(true)); - -static cl::opt<bool> EnablePostMISchedLoadStoreClustering( - "riscv-postmisched-load-store-clustering", cl::Hidden, - cl::desc("Enable PostRA load and store clustering in the machine scheduler"), - cl::init(true)); - static cl::opt<bool> DisableVectorMaskMutation( "riscv-disable-vector-mask-mutation", cl::desc("Disable the vector mask scheduling mutation"), cl::init(false), @@ -294,15 +284,17 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, ScheduleDAGInstrs * RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const { + const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); ScheduleDAGMILive *DAG = createSchedLive(C); - if (EnableMISchedLoadStoreClustering) { + + if (ST.enableMISchedLoadClustering()) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); + + if (ST.enableMISchedStoreClustering()) DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } - const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); if (!DisableVectorMaskMutation && ST.hasVInstructions()) DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI)); @@ -311,13 +303,16 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const { ScheduleDAGInstrs * RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const { + const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); ScheduleDAGMI *DAG = createSchedPostRA(C); - if (EnablePostMISchedLoadStoreClustering) { + + if (ST.enablePostMISchedLoadClustering()) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); + + if (ST.enablePostMISchedStoreClustering()) DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } return DAG; } @@ -642,12 +637,6 @@ void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { OptimizationLevel Level) { LPM.addPass(LoopIdiomVectorizePass(LoopIdiomVectorizeStyle::Predicated)); }); - - PB.registerVectorizerEndEPCallback( - [](FunctionPassManager &FPM, OptimizationLevel Level) { - if (Level.isOptimizingForSpeed()) - FPM.addPass(createFunctionToLoopPassAdaptor(EVLIndVarSimplifyPass())); - }); } yaml::MachineFunctionInfo * diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 0d5eb86..67f924a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -979,11 +979,11 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const { - // The interleaved memory access pass will lower interleaved memory ops (i.e - // a load and store followed by a specific shuffle) to vlseg/vsseg - // intrinsics. - if (!UseMaskForCond && !UseMaskForGaps && - Factor <= TLI->getMaxSupportedInterleaveFactor()) { + // The interleaved memory access pass will lower (de)interleave ops combined + // with an adjacent appropriate memory to vlseg/vsseg intrinsics. vlseg/vsseg + // only support masking per-iteration (i.e. condition), not per-segment (i.e. + // gap). + if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) { auto *VTy = cast<VectorType>(VecTy); std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VTy); // Need to make sure type has't been scalarized diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index d62d99c..05d504c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -398,6 +398,10 @@ public: bool enableInterleavedAccessVectorization() const override { return true; } + bool enableMaskedInterleavedAccessVectorization() const override { + return ST->hasVInstructions(); + } + unsigned getMinTripCountTailFoldingThreshold() const override; enum RISCVRegisterClass { GPRRC, FPRRC, VRRC }; diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index c946451..37a71e8 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -69,6 +69,7 @@ struct OperandInfo { // Represent as 1,2,4,8, ... and fractional indicator. This is because // EMUL can take on values that don't map to RISCVVType::VLMUL values exactly. // For example, a mask operand can have an EMUL less than MF8. + // If nullopt, then EMUL isn't used (i.e. only a single scalar is read). std::optional<std::pair<unsigned, bool>> EMUL; unsigned Log2EEW; @@ -83,12 +84,14 @@ struct OperandInfo { OperandInfo() = delete; - static bool EMULAndEEWAreEqual(const OperandInfo &A, const OperandInfo &B) { - return A.Log2EEW == B.Log2EEW && A.EMUL == B.EMUL; - } - - static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B) { - return A.Log2EEW == B.Log2EEW; + /// Return true if the EMUL and EEW produced by \p Def is compatible with the + /// EMUL and EEW used by \p User. + static bool areCompatible(const OperandInfo &Def, const OperandInfo &User) { + if (Def.Log2EEW != User.Log2EEW) + return false; + if (User.EMUL && Def.EMUL != User.EMUL) + return false; + return true; } void print(raw_ostream &OS) const { @@ -98,7 +101,7 @@ struct OperandInfo { OS << "f"; OS << EMUL->first; } else - OS << "EMUL: unknown\n"; + OS << "EMUL: none\n"; OS << ", EEW: " << (1 << Log2EEW); } }; @@ -1399,13 +1402,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { return std::nullopt; } - // If the operand is used as a scalar operand, then the EEW must be - // compatible. Otherwise, the EMUL *and* EEW must be compatible. - bool IsVectorOpUsedAsScalarOp = isVectorOpUsedAsScalarOp(UserOp); - if ((IsVectorOpUsedAsScalarOp && - !OperandInfo::EEWAreEqual(*ConsumerInfo, *ProducerInfo)) || - (!IsVectorOpUsedAsScalarOp && - !OperandInfo::EMULAndEEWAreEqual(*ConsumerInfo, *ProducerInfo))) { + if (!OperandInfo::areCompatible(*ProducerInfo, *ConsumerInfo)) { LLVM_DEBUG( dbgs() << " Abort due to incompatible information for EMUL or EEW.\n"); |