diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
22 files changed, 429 insertions, 339 deletions
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index f76f8b3..2c37c3b 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -302,6 +302,28 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst, Inst = std::move(Res); } +// Check if an R_RISCV_ALIGN relocation is needed for an alignment directive. +// If conditions are met, compute the padding size and create a fixup encoding +// the padding size in the addend. +bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { + // Use default handling unless linker relaxation is enabled and the alignment + // is larger than the nop size. + const MCSubtargetInfo *STI = F.getSubtargetInfo(); + if (!STI->hasFeature(RISCV::FeatureRelax)) + return false; + unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; + if (F.getAlignment() <= MinNopLen) + return false; + + Size = F.getAlignment().value() - MinNopLen; + auto *Expr = MCConstantExpr::create(Size, getContext()); + MCFixup Fixup = + MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN); + F.setVarFixups({Fixup}); + F.getParent()->setLinkerRelaxable(); + return true; +} + bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const { MCContext &C = getContext(); @@ -637,7 +659,7 @@ bool RISCVAsmBackend::isPCRelFixupResolved(const MCSymbol *SymA, // Otherwise, check if the offset between the symbol and fragment is fully // resolved, unaffected by linker-relaxable fragments (e.g. instructions or - // offset-affected MCAlignFragment). Complements the generic + // offset-affected FT_Align fragments). Complements the generic // isSymbolRefDifferenceFullyResolvedImpl. if (!PCRelTemp) PCRelTemp = getContext().createTempSymbol(); @@ -887,55 +909,6 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, } } -// Linker relaxation may change code size. We have to insert Nops -// for .align directive when linker relaxation enabled. So then Linker -// could satisfy alignment by removing Nops. -// The function return the total Nops Size we need to insert. -bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign( - const MCAlignFragment &AF, unsigned &Size) { - // Calculate Nops Size only when linker relaxation enabled. - const MCSubtargetInfo *STI = AF.getSubtargetInfo(); - if (!STI->hasFeature(RISCV::FeatureRelax)) - return false; - - unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; - - if (AF.getAlignment() <= MinNopLen) { - return false; - } else { - Size = AF.getAlignment().value() - MinNopLen; - return true; - } -} - -// We need to insert R_RISCV_ALIGN relocation type to indicate the -// position of Nops and the total bytes of the Nops have been inserted -// when linker relaxation enabled. -// The function insert fixup_riscv_align fixup which eventually will -// transfer to R_RISCV_ALIGN relocation type. -bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm, - MCAlignFragment &AF) { - // Insert the fixup only when linker relaxation enabled. - const MCSubtargetInfo *STI = AF.getSubtargetInfo(); - if (!STI->hasFeature(RISCV::FeatureRelax)) - return false; - - // Calculate total Nops we need to insert. If there are none to insert - // then simply return. - unsigned Count; - if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count) || (Count == 0)) - return false; - - MCContext &Ctx = getContext(); - const MCExpr *Dummy = MCConstantExpr::create(0, Ctx); - MCFixup Fixup = MCFixup::create(0, Dummy, ELF::R_RISCV_ALIGN); - - uint64_t FixedValue = 0; - MCValue NopBytes = MCValue::get(Count); - Asm.getWriter().recordRelocation(AF, Fixup, NopBytes, FixedValue); - return true; -} - std::unique_ptr<MCObjectTargetWriter> RISCVAsmBackend::createObjectTargetWriter() const { return createRISCVELFObjectWriter(OSABI, Is64Bit); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index 8c10fbe..d97d632 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -38,14 +38,6 @@ public: const MCTargetOptions &Options); ~RISCVAsmBackend() override = default; - // Return Size with extra Nop Bytes for alignment directive in code section. - bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, - unsigned &Size) override; - - // Insert target specific fixup type for alignment directive in code section. - bool shouldInsertFixupForCodeAlign(MCAssembler &Asm, - MCAlignFragment &AF) override; - std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &, uint64_t &) override; bool addReloc(const MCFragment &, const MCFixup &, const MCValue &, @@ -73,6 +65,7 @@ public: void relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const override; + bool relaxAlign(MCFragment &F, unsigned &Size) override; bool relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const override; bool relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const override; std::pair<bool, bool> relaxLEB128(MCFragment &LF, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index aeda5ac..5abb546 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -52,15 +52,6 @@ namespace RISCV { #include "RISCVGenSearchableTables.inc" } // namespace RISCV -// Report an error but don't ask the user to report a bug. -// TODO: Remove these wrappers. -[[noreturn]] static void reportError(const char *Reason) { - reportFatalUsageError(Reason); -} -[[noreturn]] static void reportError(Error Err) { - reportFatalUsageError(std::move(Err)); -} - namespace RISCVABI { ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, StringRef ABIName) { @@ -97,7 +88,7 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, if ((TargetABI == RISCVABI::ABI::ABI_ILP32E || (TargetABI == ABI_Unknown && IsRVE && !IsRV64)) && FeatureBits[RISCV::FeatureStdExtD]) - reportError("ILP32E cannot be used with the D ISA extension"); + reportFatalUsageError("ILP32E cannot be used with the D ISA extension"); if (TargetABI != ABI_Unknown) return TargetABI; @@ -105,7 +96,7 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, // If no explicit ABI is given, try to compute the default ABI. auto ISAInfo = RISCVFeatures::parseFeatureBits(IsRV64, FeatureBits); if (!ISAInfo) - reportError(ISAInfo.takeError()); + reportFatalUsageError(ISAInfo.takeError()); return getTargetABI((*ISAInfo)->computeDefaultABI()); } @@ -137,12 +128,12 @@ namespace RISCVFeatures { void validate(const Triple &TT, const FeatureBitset &FeatureBits) { if (TT.isArch64Bit() && !FeatureBits[RISCV::Feature64Bit]) - reportError("RV64 target requires an RV64 CPU"); + reportFatalUsageError("RV64 target requires an RV64 CPU"); if (!TT.isArch64Bit() && !FeatureBits[RISCV::Feature32Bit]) - reportError("RV32 target requires an RV32 CPU"); + reportFatalUsageError("RV32 target requires an RV32 CPU"); if (FeatureBits[RISCV::Feature32Bit] && FeatureBits[RISCV::Feature64Bit]) - reportError("RV32 and RV64 can't be combined"); + reportFatalUsageError("RV32 and RV64 can't be combined"); } llvm::Expected<std::unique_ptr<RISCVISAInfo>> diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td index cbf039e..4c303a9 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -56,19 +56,21 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, (sequence "F%u_D", 0, 31))>; +defvar VREGS = (add (sequence "V%u", 0, 31), + (sequence "V%uM2", 0, 31, 2), + (sequence "V%uM4", 0, 31, 4), + (sequence "V%uM8", 0, 31, 8)); + // Same as CSR_Interrupt, but including all vector registers. -def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, - (sequence "V%u", 0, 31))>; +def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, VREGS)>; // Same as CSR_Interrupt, but including all 32-bit FP registers and all vector // registers. -def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt, - (sequence "V%u", 0, 31))>; +def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt, VREGS)>; // Same as CSR_Interrupt, but including all 64-bit FP registers and all vector // registers. -def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt, - (sequence "V%u", 0, 31))>; +def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt, VREGS)>; // Same as CSR_Interrupt, but excluding X16-X31. def CSR_Interrupt_RVE : CalleeSavedRegs<(sub CSR_Interrupt, diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 23b4554..b1ab76a 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1544,10 +1544,53 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset; } +static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI, + const Register &Reg) { + MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0); + // If it's not a grouped vector register, it doesn't have subregister, so + // the base register is just itself. + if (BaseReg == RISCV::NoRegister) + BaseReg = Reg; + return BaseReg; +} + void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + + // In TargetFrameLowering::determineCalleeSaves, any vector register is marked + // as saved if any of its subregister is clobbered, this is not correct in + // vector registers. We only want the vector register to be marked as saved + // if all of its subregisters are clobbered. + // For example: + // Original behavior: If v24 is marked, v24m2, v24m4, v24m8 are also marked. + // Correct behavior: v24m2 is marked only if v24 and v25 are marked. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); + const RISCVRegisterInfo &TRI = *STI.getRegisterInfo(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned CSReg = CSRegs[i]; + // Only vector registers need special care. + if (!RISCV::VRRegClass.contains(getRVVBaseRegister(TRI, CSReg))) + continue; + + SavedRegs.reset(CSReg); + + auto SubRegs = TRI.subregs(CSReg); + // Set the register and all its subregisters. + if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) { + SavedRegs.set(CSReg); + llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); }); + } + + // Combine to super register if all of its subregisters are marked. + if (!SubRegs.empty() && llvm::all_of(SubRegs, [&](unsigned Reg) { + return SavedRegs.test(Reg); + })) + SavedRegs.set(CSReg); + } + // Unconditionally spill RA and FP only if the function uses a frame // pointer. if (hasFP(MF)) { @@ -2137,16 +2180,6 @@ static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) { : 8; } -static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI, - const Register &Reg) { - MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0); - // If it's not a grouped vector register, it doesn't have subregister, so - // the base register is just itself. - if (BaseReg == RISCV::NoRegister) - BaseReg = Reg; - return BaseReg; -} - void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const { MachineFunction *MF = MBB.getParent(); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index cfec46d2..34910b7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3032,6 +3032,63 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, return true; } +/// Return true if this a load/store that we have a RegRegScale instruction for. +static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, + const RISCVSubtarget &Subtarget) { + if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE) + return false; + EVT VT = cast<MemSDNode>(User)->getMemoryVT(); + if (!(VT.isScalarInteger() && + (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) && + !((VT == MVT::f32 || VT == MVT::f64) && + Subtarget.hasVendorXTHeadFMemIdx())) + return false; + // Don't allow stores of the value. It must be used as the address. + if (User->getOpcode() == ISD::STORE && + cast<StoreSDNode>(User)->getValue() == Add) + return false; + + return true; +} + +/// Is it profitable to fold this Add into RegRegScale load/store. If \p +/// Shift is non-null, then we have matched a shl+add. We allow reassociating +/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a +/// single addi and we don't have a SHXADD instruction we could use. +/// FIXME: May still need to check how many and what kind of users the SHL has. +static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, + SDValue Add, + SDValue Shift = SDValue()) { + bool FoundADDI = false; + for (auto *User : Add->users()) { + if (isRegRegScaleLoadOrStore(User, Add, Subtarget)) + continue; + + // Allow a single ADDI that is used by loads/stores if we matched a shift. + if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD || + !isa<ConstantSDNode>(User->getOperand(1)) || + !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue())) + return false; + + FoundADDI = true; + + // If we have a SHXADD instruction, prefer that over reassociating an ADDI. + assert(Shift.getOpcode() == ISD::SHL); + unsigned ShiftAmt = Shift.getConstantOperandVal(1); + if ((ShiftAmt <= 3 && + (Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa())) || + (ShiftAmt >= 4 && ShiftAmt <= 7 && Subtarget.hasVendorXqciac())) + return false; + + // All users of the ADDI should be load/store. + for (auto *ADDIUser : User->users()) + if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget)) + return false; + } + + return true; +} + bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, @@ -3062,7 +3119,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, if (LHS.getOpcode() == ISD::ADD && !isa<ConstantSDNode>(LHS.getOperand(1)) && isInt<12>(C1->getSExtValue())) { - if (SelectShl(LHS.getOperand(1), Index, Scale)) { + if (SelectShl(LHS.getOperand(1), Index, Scale) && + isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) { SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(), SDLoc(Addr), VT); Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT, @@ -3072,7 +3130,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, } // Add is commutative so we need to check both operands. - if (SelectShl(LHS.getOperand(0), Index, Scale)) { + if (SelectShl(LHS.getOperand(0), Index, Scale) && + isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) { SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(), SDLoc(Addr), VT); Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT, @@ -3090,22 +3149,48 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, // Try to match a shift on the RHS. if (SelectShl(RHS, Index, Scale)) { + if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS)) + return false; Base = LHS; return true; } // Try to match a shift on the LHS. if (SelectShl(LHS, Index, Scale)) { + if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS)) + return false; Base = RHS; return true; } + if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr)) + return false; + Base = LHS; Index = RHS; Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT); return true; } +bool RISCVDAGToDAGISel::SelectAddrRegZextRegScale(SDValue Addr, + unsigned MaxShiftAmount, + unsigned Bits, SDValue &Base, + SDValue &Index, + SDValue &Scale) { + if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale)) + return false; + + if (Index.getOpcode() == ISD::AND) { + auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1)); + if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { + Index = Index.getOperand(0); + return true; + } + } + + return false; +} + bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset) { if (Addr.getOpcode() != ISD::ADD) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 72e2f96..ee3a86e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -59,19 +59,14 @@ public: return SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale); } + bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, + unsigned Bits, SDValue &Base, SDValue &Index, + SDValue &Scale); + template <unsigned MaxShift, unsigned Bits> bool SelectAddrRegZextRegScale(SDValue Addr, SDValue &Base, SDValue &Index, SDValue &Scale) { - if (SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale)) { - if (Index.getOpcode() == ISD::AND) { - auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1)); - if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { - Index = Index.getOperand(0); - return true; - } - } - } - return false; + return SelectAddrRegZextRegScale(Addr, MaxShift, Bits, Base, Index, Scale); } bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4845a9c..3918dd2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2319,6 +2319,10 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, if (getLegalZfaFPImm(Imm, VT) >= 0) return true; + // Some constants can be produced by fli+fneg. + if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0) + return true; + // Cannot create a 64 bit floating-point immediate value for rv32. if (Subtarget.getXLen() < VT.getScalarSizeInBits()) { // td can handle +0.0 or -0.0 already. @@ -7936,7 +7940,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8)); OutChains.push_back(LoadVal.getValue(1)); Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal, - DAG.getVectorIdxConstant(i, DL)); + DAG.getTargetConstant(i, DL, MVT::i32)); BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag); } return DAG.getMergeValues( @@ -8015,9 +8019,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, // Extract subregisters in a vector tuple and store them individually. for (unsigned i = 0; i < NF; ++i) { - auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, - MVT::getScalableVectorVT(MVT::i8, NumElts), - StoredVal, DAG.getVectorIdxConstant(i, DL)); + auto Extract = + DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, + MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal, + DAG.getTargetConstant(i, DL, MVT::i32)); Ret = DAG.getStore(Chain, DL, Extract, BasePtr, MachinePointerInfo(Store->getAddressSpace()), Store->getBaseAlign(), @@ -10934,9 +10939,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Load->getMemoryVT(), Load->getMemOperand()); SmallVector<SDValue, 9> Results; for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) { - SDValue SubVec = - DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT, - Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL)); + SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT, + Result.getValue(0), + DAG.getTargetConstant(RetIdx, DL, MVT::i32)); Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget)); } Results.push_back(Result.getValue(1)); @@ -11023,7 +11028,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal, convertToScalableVector( ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget), - DAG.getVectorIdxConstant(i, DL)); + DAG.getTargetConstant(i, DL, MVT::i32)); SDValue Ops[] = { FixedIntrinsic->getChain(), @@ -12027,7 +12032,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, for (unsigned i = 0U; i < Factor; ++i) Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load, - DAG.getVectorIdxConstant(i, DL)); + DAG.getTargetConstant(i, DL, MVT::i32)); return DAG.getMergeValues(Res, DL); } @@ -12124,8 +12129,9 @@ SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op, SDValue StoredVal = DAG.getUNDEF(VecTupTy); for (unsigned i = 0; i < Factor; i++) - StoredVal = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal, - Op.getOperand(i), DAG.getConstant(i, DL, XLenVT)); + StoredVal = + DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal, + Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32)); SDValue Ops[] = {DAG.getEntryNode(), DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT), @@ -20690,7 +20696,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SDValue Result = DAG.getUNDEF(VT); for (unsigned i = 0; i < NF; ++i) Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat, - DAG.getVectorIdxConstant(i, DL)); + DAG.getTargetConstant(i, DL, MVT::i32)); return Result; } // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer @@ -24014,7 +24020,7 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts( #endif Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT), - Val, DAG.getVectorIdxConstant(0, DL)); + Val, DAG.getTargetConstant(0, DL, MVT::i32)); Parts[0] = Val; return true; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index e0a8c07..f0447e0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -434,7 +434,8 @@ public: ArrayRef<unsigned> Indices, unsigned Factor) const override; - bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, + bool lowerInterleavedStore(Instruction *Store, Value *Mask, + ShuffleVectorInst *SVI, unsigned Factor) const override; bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, @@ -444,9 +445,6 @@ public: Instruction *Store, Value *Mask, ArrayRef<Value *> InterleaveValues) const override; - bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask, - ArrayRef<Value *> InterleaveOps) const override; - bool supportKCFIBundles() const override { return true; } SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index e23001a..d9c6101 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -174,6 +174,7 @@ class EltDeps<bit vl, bit mask> { def EltDepsNone : EltDeps<vl=0, mask=0>; def EltDepsVL : EltDeps<vl=1, mask=0>; +def EltDepsMask : EltDeps<vl=0, mask=1>; def EltDepsVLMask : EltDeps<vl=1, mask=1>; class EEW <bits<2> val> { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index aef410f..7f1077a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -44,45 +44,62 @@ def simm10_unsigned : RISCVOp { //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class RVPUnaryImm10<bits<7> funct7, string opcodestr, - DAGOperand TyImm10 = simm10> - : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins TyImm10:$imm10), - opcodestr, "$rd, $imm10"> { +class PLI_i<bits<7> funct7, string opcodestr> + : RVInst<(outs GPR:$rd), (ins simm10:$imm10), opcodestr, "$rd, $imm10", [], + InstFormatOther> { bits<10> imm10; + bits<5> rd; let Inst{31-25} = funct7; let Inst{24-16} = imm10{8-0}; let Inst{15} = imm10{9}; + let Inst{14-12} = 0b010; + let Inst{11-7} = rd; + let Inst{6-0} = OPC_OP_IMM_32.Value; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class RVPUnaryImm8<bits<8> funct8, string opcodestr> - : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins uimm8:$uimm8), - opcodestr, "$rd, $uimm8"> { +class PLUI_i<bits<7> funct7, string opcodestr> + : RVInst<(outs GPR:$rd), (ins simm10_unsigned:$imm10), opcodestr, + "$rd, $imm10", [], InstFormatOther> { + bits<10> imm10; + bits<5> rd; + + let Inst{31-25} = funct7; + let Inst{24} = imm10{0}; + let Inst{23-15} = imm10{9-1}; + let Inst{14-12} = 0b010; + let Inst{11-7} = rd; + let Inst{6-0} = OPC_OP_IMM_32.Value; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class PLI_B_i<bits<8> funct8, string opcodestr> + : RVInst<(outs GPR:$rd), (ins uimm8:$uimm8), opcodestr, "$rd, $uimm8", [], + InstFormatOther> { bits<8> uimm8; + bits<5> rd; let Inst{31-24} = funct8; let Inst{23-16} = uimm8; let Inst{15} = 0b0; + let Inst{14-12} = 0b010; + let Inst{11-7} = rd; + let Inst{6-0} = OPC_OP_IMM_32.Value; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVPUnary<bits<3> f, string opcodestr, dag operands, string argstr> : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), operands, opcodestr, argstr> { - bits<5> imm; - bits<5> rs1; - let Inst{31} = 0b1; let Inst{30-28} = f; let Inst{27} = 0b0; - let Inst{19-15} = rs1; } class RVPUnaryImm5<bits<3> f, string opcodestr> : RVPUnary<f, opcodestr, (ins GPR:$rs1, uimm5:$uimm5), "$rd, $rs1, $uimm5"> { bits<5> uimm5; - let imm = uimm5; let Inst{26-25} = 0b01; let Inst{24-20} = uimm5; } @@ -145,11 +162,11 @@ def PSSLAI_W : RVPUnaryImm5<0b101, "psslai.w">; } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in -def PLI_H : RVPUnaryImm10<0b1011000, "pli.h">; +def PLI_H : PLI_i<0b1011000, "pli.h">; let Predicates = [HasStdExtP, IsRV64] in -def PLI_W : RVPUnaryImm10<0b1011001, "pli.w">; +def PLI_W : PLI_i<0b1011001, "pli.w">; let Predicates = [HasStdExtP] in -def PLI_B : RVPUnaryImm8<0b10110100, "pli.b">; +def PLI_B : PLI_B_i<0b10110100, "pli.b">; let Predicates = [HasStdExtP] in { def PSEXT_H_B : RVPUnaryWUF<0b00, 0b00100, "psext.h.b">; @@ -157,11 +174,11 @@ def PSABS_H : RVPUnaryWUF<0b00, 0b00111, "psabs.h">; def PSABS_B : RVPUnaryWUF<0b10, 0b00111, "psabs.b">; } // Predicates = [HasStdExtP] let Predicates = [HasStdExtP, IsRV64] in { -def PSEXT_W_B : RVPUnaryWUF<0b01, 0b00100, "psext.w.b">; -def PSEXT_W_H : RVPUnaryWUF<0b01, 0b00101, "psext.w.h">; +def PSEXT_W_B : RVPUnaryWUF<0b01, 0b00100, "psext.w.b">; +def PSEXT_W_H : RVPUnaryWUF<0b01, 0b00101, "psext.w.h">; } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in -def PLUI_H : RVPUnaryImm10<0b1111000, "plui.h", simm10_unsigned>; +def PLUI_H : PLUI_i<0b1111000, "plui.h">; let Predicates = [HasStdExtP, IsRV64] in -def PLUI_W : RVPUnaryImm10<0b1111001, "plui.w", simm10_unsigned>; +def PLUI_W : PLUI_i<0b1111001, "plui.w">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 5d13a87..33c7138 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1642,7 +1642,7 @@ def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd), def : MnemonicAlias<"vpopc.m", "vcpop.m">; -let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsVLMask in { +let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsMask in { let DestEEW = EEW1 in { // vmsbf.m set-before-first mask bit @@ -1655,7 +1655,7 @@ defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>; // Vector Iota Instruction defm VIOTA_M : VIOTA_MV_V<"viota.m", 0b010100, 0b10000>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsVLMask +} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsMask // Vector Element Index Instruction let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index c7cb6e2..f391300 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1377,9 +1377,9 @@ let Predicates = [HasVendorXqciac, IsRV32] in { def : Pat<(i32 (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12:$imm12))), (QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12:$imm12)>; def : Pat<(i32 (add_like_non_imm12 (shl GPRNoX0:$rs1, uimm5gt3:$imm), GPRNoX0:$rs2)), - (QC_SHLADD GPRNoX0:$rs2, GPRNoX0:$rs1, uimm5gt3:$imm)>; + (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>; def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, uimm5gt3:$imm, GPRNoX0:$rs2)), - (QC_SHLADD GPRNoX0:$rs2, GPRNoX0:$rs1, uimm5gt3:$imm)>; + (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>; } // Predicates = [HasVendorXqciac, IsRV32] /// Simple arithmetic operations diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index dd68a55..0565fcd 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -131,25 +131,56 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy, : Constant::getAllOnesValue(XLenTy); return true; } - auto *VPLdSt = cast<VPIntrinsic>(I); - assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load || - VPLdSt->getIntrinsicID() == Intrinsic::vp_store) && - "Unexpected intrinsic"); - Ptr = VPLdSt->getMemoryPointerParam(); - Alignment = VPLdSt->getPointerAlignment().value_or( - DL.getABITypeAlign(VTy->getElementType())); - - assert(Mask && "vp.load and vp.store needs a mask!"); - - Value *WideEVL = VPLdSt->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor)) - return false; - auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); - VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); - return true; + auto *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { + default: + llvm_unreachable("Unsupported intrinsic type"); + case Intrinsic::vp_load: + case Intrinsic::vp_store: { + auto *VPLdSt = cast<VPIntrinsic>(I); + Ptr = VPLdSt->getMemoryPointerParam(); + Alignment = VPLdSt->getPointerAlignment().value_or( + DL.getABITypeAlign(VTy->getElementType())); + + assert(Mask && "vp.load and vp.store needs a mask!"); + + Value *WideEVL = VPLdSt->getVectorLengthParam(); + // Conservatively check if EVL is a multiple of factor, otherwise some + // (trailing) elements might be lost after the transformation. + if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor)) + return false; + + auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); + VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); + return true; + } + case Intrinsic::masked_load: { + Ptr = II->getOperand(0); + Alignment = cast<ConstantInt>(II->getArgOperand(1))->getAlignValue(); + + if (!isa<UndefValue>(II->getOperand(3))) + return false; + + assert(Mask && "masked.load needs a mask!"); + + VL = isa<FixedVectorType>(VTy) + ? Builder.CreateElementCount(XLenTy, VTy->getElementCount()) + : Constant::getAllOnesValue(XLenTy); + return true; + } + case Intrinsic::masked_store: { + Ptr = II->getOperand(1); + Alignment = cast<ConstantInt>(II->getArgOperand(2))->getAlignValue(); + + assert(Mask && "masked.store needs a mask!"); + + VL = isa<FixedVectorType>(VTy) + ? Builder.CreateElementCount(XLenTy, VTy->getElementCount()) + : Constant::getAllOnesValue(XLenTy); + return true; + } + } } /// Lower an interleaved load into a vlsegN intrinsic. @@ -173,7 +204,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad( const DataLayout &DL = Load->getDataLayout(); auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType()); - auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen()); + auto *XLenTy = Builder.getIntNTy(Subtarget.getXLen()); Value *Ptr, *VL; Align Alignment; @@ -201,6 +232,7 @@ bool RISCVTargetLowering::lowerInterleavedLoad( Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load, {VTy, BasePtr->getType(), Stride->getType()}, {BasePtr, Stride, Mask, VL}); + Alignment = commonAlignment(Alignment, Indices[0] * ScalarSizeInBytes); CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), Alignment)); Shuffles[0]->replaceAllUsesWith(CI); @@ -234,22 +266,28 @@ bool RISCVTargetLowering::lowerInterleavedLoad( /// /// Note that the new shufflevectors will be removed and we'll only generate one /// vsseg3 instruction in CodeGen. -bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, +bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store, + Value *LaneMask, ShuffleVectorInst *SVI, unsigned Factor) const { - IRBuilder<> Builder(SI); - const DataLayout &DL = SI->getDataLayout(); + IRBuilder<> Builder(Store); + const DataLayout &DL = Store->getDataLayout(); auto Mask = SVI->getShuffleMask(); auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType()); // Given SVI : <n*factor x ty>, then VTy : <n x ty> auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), ShuffleVTy->getNumElements() / Factor); - if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), - SI->getPointerAddressSpace(), DL)) + auto *XLenTy = Builder.getIntNTy(Subtarget.getXLen()); + + Value *Ptr, *VL; + Align Alignment; + if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment)) return false; - auto *PtrTy = SI->getPointerOperandType(); - auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); + Type *PtrTy = Ptr->getType(); + unsigned AS = PtrTy->getPointerAddressSpace(); + if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL)) + return false; unsigned Index; // If the segment store only has one active lane (i.e. the interleave is @@ -260,26 +298,27 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, unsigned ScalarSizeInBytes = DL.getTypeStoreSize(ShuffleVTy->getElementType()); Value *Data = SVI->getOperand(0); - auto *DataVTy = cast<FixedVectorType>(Data->getType()); + Data = Builder.CreateExtractVector(VTy, Data, uint64_t(0)); Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes); - Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset); - Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount()); - Value *VL = Builder.CreateElementCount(Builder.getInt32Ty(), - VTy->getElementCount()); - - CallInst *CI = Builder.CreateIntrinsic( - Intrinsic::experimental_vp_strided_store, - {Data->getType(), BasePtr->getType(), Stride->getType()}, - {Data, BasePtr, Stride, Mask, VL}); - CI->addParamAttr( - 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign())); + Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset); + // Note: Same VL as above, but i32 not xlen due to signature of + // vp.strided.store + VL = Builder.CreateElementCount(Builder.getInt32Ty(), + VTy->getElementCount()); + CallInst *CI = + Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store, + {VTy, BasePtr->getType(), Stride->getType()}, + {Data, BasePtr, Stride, LaneMask, VL}); + Alignment = commonAlignment(Alignment, Index * ScalarSizeInBytes); + CI->addParamAttr(1, + Attribute::getWithAlignment(CI->getContext(), Alignment)); return true; } Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - SI->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}); + Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}); SmallVector<Value *, 10> Ops; SmallVector<int, 16> NewShuffleMask; @@ -295,13 +334,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, NewShuffleMask.clear(); } - // This VL should be OK (should be executable in one vsseg instruction, - // potentially under larger LMULs) because we checked that the fixed vector - // type fits in isLegalInterleavedAccessType - Value *VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount()); - Value *StoreMask = Builder.getAllOnesMask(VTy->getElementCount()); - Ops.append({SI->getPointerOperand(), StoreMask, VL}); - + Ops.append({Ptr, LaneMask, VL}); Builder.CreateCall(VssegNFunc, Ops); return true; @@ -318,7 +351,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( VectorType *ResVTy = getDeinterleavedVectorType(DI); const DataLayout &DL = Load->getDataLayout(); - auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen()); + auto *XLenTy = Builder.getIntNTy(Subtarget.getXLen()); Value *Ptr, *VL; Align Alignment; @@ -339,8 +372,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( unsigned NumElts = ResVTy->getElementCount().getKnownMinValue(); Type *VecTupTy = TargetExtType::get( Load->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(Load->getContext()), - NumElts * SEW / 8), + ScalableVectorType::get(Builder.getInt8Ty(), NumElts * SEW / 8), Factor); Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration( Load->getModule(), ScalableVlsegIntrIds[Factor - 2], @@ -381,7 +413,7 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType()); const DataLayout &DL = Store->getDataLayout(); - Type *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen()); + Type *XLenTy = Builder.getIntNTy(Subtarget.getXLen()); Value *Ptr, *VL; Align Alignment; @@ -405,9 +437,7 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( unsigned NumElts = InVTy->getElementCount().getKnownMinValue(); Type *VecTupTy = TargetExtType::get( Store->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(Store->getContext()), - NumElts * SEW / 8), - Factor); + ScalableVectorType::get(Builder.getInt8Ty(), NumElts * SEW / 8), Factor); Value *StoredVal = PoisonValue::get(VecTupTy); for (unsigned i = 0; i < Factor; ++i) @@ -424,91 +454,3 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( Builder.CreateCall(VssegNFunc, Operands); return true; } - -/// Lower an interleaved vp.store into a vssegN intrinsic. -/// -/// E.g. Lower an interleaved vp.store (Factor = 2): -/// -/// %is = tail call <vscale x 64 x i8> -/// @llvm.vector.interleave2.nxv64i8( -/// <vscale x 32 x i8> %load0, -/// <vscale x 32 x i8> %load1 -/// %wide.rvl = shl nuw nsw i32 %rvl, 1 -/// tail call void @llvm.vp.store.nxv64i8.p0( -/// <vscale x 64 x i8> %is, ptr %ptr, -/// %mask, -/// i32 %wide.rvl) -/// -/// Into: -/// call void @llvm.riscv.vsseg2.mask.nxv32i8.i64( -/// <vscale x 32 x i8> %load1, -/// <vscale x 32 x i8> %load2, ptr %ptr, -/// %mask, -/// i64 %rvl) -bool RISCVTargetLowering::lowerInterleavedVPStore( - VPIntrinsic *Store, Value *Mask, - ArrayRef<Value *> InterleaveOperands) const { - assert(Mask && "Expect a valid mask"); - assert(Store->getIntrinsicID() == Intrinsic::vp_store && - "Unexpected intrinsic"); - - const unsigned Factor = InterleaveOperands.size(); - - auto *VTy = dyn_cast<VectorType>(InterleaveOperands[0]->getType()); - if (!VTy) - return false; - - const DataLayout &DL = Store->getDataLayout(); - Align Alignment = Store->getParamAlign(1).value_or( - DL.getABITypeAlign(VTy->getElementType())); - if (!isLegalInterleavedAccessType( - VTy, Factor, Alignment, - Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL)) - return false; - - IRBuilder<> Builder(Store); - Value *WideEVL = Store->getArgOperand(3); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, Store->getDataLayout(), Factor)) - return false; - - auto *PtrTy = Store->getArgOperand(1)->getType(); - auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen()); - auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); - Value *EVL = - Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); - - if (isa<FixedVectorType>(VTy)) { - SmallVector<Value *, 8> Operands(InterleaveOperands); - Operands.append({Store->getArgOperand(1), Mask, EVL}); - Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2], - {VTy, PtrTy, XLenTy}, Operands); - return true; - } - - unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType()); - unsigned NumElts = VTy->getElementCount().getKnownMinValue(); - Type *VecTupTy = TargetExtType::get( - Store->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(Store->getContext()), - NumElts * SEW / 8), - Factor); - - Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration( - Store->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy}); - Value *StoredVal = PoisonValue::get(VecTupTy); - for (unsigned i = 0; i < Factor; ++i) - StoredVal = Builder.CreateCall( - VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32(i)}); - - Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - Store->getModule(), ScalableVssegIntrIds[Factor - 2], - {VecTupTy, PtrTy, Mask->getType(), EVL->getType()}); - - Value *Operands[] = {StoredVal, Store->getArgOperand(1), Mask, EVL, - ConstantInt::get(XLenTy, Log2_64(SEW))}; - - Builder.CreateCall(VssegNFunc, Operands); - return true; -} diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 28d6403..3b19c34 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -48,6 +48,8 @@ using namespace llvm; STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions"); STATISTIC(NumTransformedToWInstrs, "Number of instructions transformed to W-ops"); +STATISTIC(NumTransformedToNonWInstrs, + "Number of instructions transformed to non-W-ops"); static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal", cl::desc("Disable removal of sext.w"), @@ -67,10 +69,9 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII, const RISCVSubtarget &ST, MachineRegisterInfo &MRI); - bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, - const RISCVSubtarget &ST, MachineRegisterInfo &MRI); - bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, - const RISCVSubtarget &ST, MachineRegisterInfo &MRI); + bool canonicalizeWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, + const RISCVSubtarget &ST, + MachineRegisterInfo &MRI); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -721,45 +722,39 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF, return MadeChange; } -bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF, - const RISCVInstrInfo &TII, - const RISCVSubtarget &ST, - MachineRegisterInfo &MRI) { +// Strips or adds W suffixes to eligible instructions depending on the +// subtarget preferences. +bool RISCVOptWInstrs::canonicalizeWSuffixes(MachineFunction &MF, + const RISCVInstrInfo &TII, + const RISCVSubtarget &ST, + MachineRegisterInfo &MRI) { + bool ShouldStripW = !(DisableStripWSuffix || ST.preferWInst()); + bool ShouldPreferW = ST.preferWInst(); bool MadeChange = false; - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - unsigned Opc; - switch (MI.getOpcode()) { - default: - continue; - case RISCV::ADDW: Opc = RISCV::ADD; break; - case RISCV::ADDIW: Opc = RISCV::ADDI; break; - case RISCV::MULW: Opc = RISCV::MUL; break; - case RISCV::SLLIW: Opc = RISCV::SLLI; break; - } - if (hasAllWUsers(MI, ST, MRI)) { - MI.setDesc(TII.get(Opc)); - MadeChange = true; - } - } - } - - return MadeChange; -} - -bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF, - const RISCVInstrInfo &TII, - const RISCVSubtarget &ST, - MachineRegisterInfo &MRI) { - bool MadeChange = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - unsigned WOpc; - // TODO: Add more? - switch (MI.getOpcode()) { + std::optional<unsigned> WOpc; + std::optional<unsigned> NonWOpc; + unsigned OrigOpc = MI.getOpcode(); + switch (OrigOpc) { default: continue; + case RISCV::ADDW: + NonWOpc = RISCV::ADD; + break; + case RISCV::ADDIW: + NonWOpc = RISCV::ADDI; + break; + case RISCV::MULW: + NonWOpc = RISCV::MUL; + break; + case RISCV::SLLIW: + NonWOpc = RISCV::SLLI; + break; + case RISCV::SUBW: + NonWOpc = RISCV::SUB; + break; case RISCV::ADD: WOpc = RISCV::ADDW; break; @@ -773,7 +768,7 @@ bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF, WOpc = RISCV::MULW; break; case RISCV::SLLI: - // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits + // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits. if (MI.getOperand(2).getImm() >= 32) continue; WOpc = RISCV::SLLIW; @@ -784,19 +779,30 @@ bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF, break; } - if (hasAllWUsers(MI, ST, MRI)) { + if (ShouldStripW && NonWOpc.has_value() && hasAllWUsers(MI, ST, MRI)) { + LLVM_DEBUG(dbgs() << "Replacing " << MI); + MI.setDesc(TII.get(NonWOpc.value())); + LLVM_DEBUG(dbgs() << " with " << MI); + ++NumTransformedToNonWInstrs; + MadeChange = true; + continue; + } + // LWU is always converted to LW when possible as 1) LW is compressible + // and 2) it helps minimise differences vs RV32. + if ((ShouldPreferW || OrigOpc == RISCV::LWU) && WOpc.has_value() && + hasAllWUsers(MI, ST, MRI)) { LLVM_DEBUG(dbgs() << "Replacing " << MI); - MI.setDesc(TII.get(WOpc)); + MI.setDesc(TII.get(WOpc.value())); MI.clearFlag(MachineInstr::MIFlag::NoSWrap); MI.clearFlag(MachineInstr::MIFlag::NoUWrap); MI.clearFlag(MachineInstr::MIFlag::IsExact); LLVM_DEBUG(dbgs() << " with " << MI); ++NumTransformedToWInstrs; MadeChange = true; + continue; } } } - return MadeChange; } @@ -813,12 +819,6 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI); - - if (!(DisableStripWSuffix || ST.preferWInst())) - MadeChange |= stripWSuffixes(MF, TII, ST, MRI); - - if (ST.preferWInst()) - MadeChange |= appendWSuffixes(MF, TII, ST, MRI); - + MadeChange |= canonicalizeWSuffixes(MF, TII, ST, MRI); return MadeChange; } diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp index 668e596..6ecddad 100644 --- a/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGInfo.cpp @@ -24,6 +24,18 @@ void RISCVSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, switch (N->getOpcode()) { default: return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); + case RISCVISD::TUPLE_EXTRACT: + assert(N->getNumOperands() == 2 && "Expected three operands!"); + assert(N->getOperand(1).getOpcode() == ISD::TargetConstant && + N->getOperand(1).getValueType() == MVT::i32 && + "Expected index to be an i32 target constant!"); + break; + case RISCVISD::TUPLE_INSERT: + assert(N->getNumOperands() == 3 && "Expected three operands!"); + assert(N->getOperand(2).getOpcode() == ISD::TargetConstant && + N->getOperand(2).getValueType() == MVT::i32 && + "Expected index to be an i32 target constant!"); + break; case RISCVISD::VQDOT_VL: case RISCVISD::VQDOTU_VL: case RISCVISD::VQDOTSU_VL: { diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index c754de4..e35ffaf 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -216,7 +216,7 @@ unsigned RISCVSubtarget::getMinimumJumpTableEntries() const { } void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const { + const SchedRegion &Region) const { // Do bidirectional scheduling since it provides a more balanced scheduling // leading to better performance. This will increase compile time. Policy.OnlyTopDown = false; @@ -231,8 +231,8 @@ void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.ShouldTrackPressure = true; } -void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const { +void RISCVSubtarget::overridePostRASchedPolicy( + MachineSchedPolicy &Policy, const SchedRegion &Region) const { MISched::Direction PostRASchedDirection = getPostRASchedDirection(); if (PostRASchedDirection == MISched::TopDown) { Policy.OnlyTopDown = true; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 4f560cc..fd57e02 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -395,11 +395,11 @@ public: } void overrideSchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const override; + const SchedRegion &Region) const override; void overridePostRASchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const override; + const SchedRegion &Region) const override; }; -} // End llvm namespace +} // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 56ead92..fd634b5 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1489,6 +1489,34 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind, 0, cast<VectorType>(ICA.getReturnType())); } + case Intrinsic::fptoui_sat: + case Intrinsic::fptosi_sat: { + InstructionCost Cost = 0; + bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat; + Type *SrcTy = ICA.getArgTypes()[0]; + + auto SrcLT = getTypeLegalizationCost(SrcTy); + auto DstLT = getTypeLegalizationCost(RetTy); + if (!SrcTy->isVectorTy()) + break; + + if (!SrcLT.first.isValid() || !DstLT.first.isValid()) + return InstructionCost::getInvalid(); + + Cost += + getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI, + RetTy, SrcTy, TTI::CastContextHint::None, CostKind); + + // Handle NaN. + // vmfne v0, v8, v8 # If v8[i] is NaN set v0[i] to 1. + // vmerge.vim v8, v8, 0, v0 # Convert NaN to 0. + Type *CondTy = RetTy->getWithNewBitWidth(1); + Cost += getCmpSelInstrCost(BinaryOperator::FCmp, SrcTy, CondTy, + CmpInst::FCMP_UNO, CostKind); + Cost += getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::FCMP_UNO, CostKind); + return Cost; + } } if (ST->hasVInstructions() && RetTy->isVectorTy()) { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 12bf8c1..d62d99c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -116,8 +116,8 @@ public: } TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { - return ST->hasVInstructions() ? TailFoldingStyle::Data - : TailFoldingStyle::DataWithoutLaneMask; + return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL + : TailFoldingStyle::None; } std::optional<unsigned> getMaxVScale() const override; std::optional<unsigned> getVScaleForTuning() const override; diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index e656e8b..b53d919 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -33,6 +33,7 @@ namespace { class RISCVVLOptimizer : public MachineFunctionPass { const MachineRegisterInfo *MRI; const MachineDominatorTree *MDT; + const TargetInstrInfo *TII; public: static char ID; @@ -1291,7 +1292,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { return false; } - assert(!RISCVII::elementsDependOnVL(RISCV::getRVVMCOpcode(MI.getOpcode())) && + assert(!RISCVII::elementsDependOnVL( + TII->get(RISCV::getRVVMCOpcode(MI.getOpcode())).TSFlags) && "Instruction shouldn't be supported if elements depend on VL"); assert(MI.getOperand(0).isReg() && @@ -1484,7 +1486,6 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { } bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { - assert(DemandedVLs.size() == 0); if (skipFunction(MF.getFunction())) return false; @@ -1495,6 +1496,10 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { if (!ST.hasVInstructions()) return false; + TII = ST.getInstrInfo(); + + assert(DemandedVLs.empty()); + // For each instruction that defines a vector, compute what VL its // downstream users demand. for (MachineBasicBlock *MBB : post_order(&MF)) { diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 84ef539..c1cc19b 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -434,6 +434,15 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg())) return false; + // Masked off lanes past TrueVL will come from False, and converting to vmv + // will lose these lanes unless MIVL <= TrueVL. + // TODO: We could relax this for False == Passthru and True policy == TU + const MachineOperand &MIVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())); + const MachineOperand &TrueVL = + True->getOperand(RISCVII::getVLOpNum(True->getDesc())); + if (!RISCV::isVLKnownLE(MIVL, TrueVL)) + return false; + // True's passthru needs to be equivalent to False Register TruePassthruReg = True->getOperand(1).getReg(); Register FalseReg = MI.getOperand(2).getReg(); |