diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r-- | llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 73 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h | 9 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVCallingConv.td | 14 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 175 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 15 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrFormats.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 37 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp | 169 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp | 96 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVSubtarget.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 9 |
16 files changed, 357 insertions, 289 deletions
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index f76f8b3..2c37c3b 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -302,6 +302,28 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst, Inst = std::move(Res); } +// Check if an R_RISCV_ALIGN relocation is needed for an alignment directive. +// If conditions are met, compute the padding size and create a fixup encoding +// the padding size in the addend. +bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) { + // Use default handling unless linker relaxation is enabled and the alignment + // is larger than the nop size. + const MCSubtargetInfo *STI = F.getSubtargetInfo(); + if (!STI->hasFeature(RISCV::FeatureRelax)) + return false; + unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; + if (F.getAlignment() <= MinNopLen) + return false; + + Size = F.getAlignment().value() - MinNopLen; + auto *Expr = MCConstantExpr::create(Size, getContext()); + MCFixup Fixup = + MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN); + F.setVarFixups({Fixup}); + F.getParent()->setLinkerRelaxable(); + return true; +} + bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const { MCContext &C = getContext(); @@ -637,7 +659,7 @@ bool RISCVAsmBackend::isPCRelFixupResolved(const MCSymbol *SymA, // Otherwise, check if the offset between the symbol and fragment is fully // resolved, unaffected by linker-relaxable fragments (e.g. instructions or - // offset-affected MCAlignFragment). Complements the generic + // offset-affected FT_Align fragments). Complements the generic // isSymbolRefDifferenceFullyResolvedImpl. if (!PCRelTemp) PCRelTemp = getContext().createTempSymbol(); @@ -887,55 +909,6 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, } } -// Linker relaxation may change code size. We have to insert Nops -// for .align directive when linker relaxation enabled. So then Linker -// could satisfy alignment by removing Nops. -// The function return the total Nops Size we need to insert. -bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign( - const MCAlignFragment &AF, unsigned &Size) { - // Calculate Nops Size only when linker relaxation enabled. - const MCSubtargetInfo *STI = AF.getSubtargetInfo(); - if (!STI->hasFeature(RISCV::FeatureRelax)) - return false; - - unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4; - - if (AF.getAlignment() <= MinNopLen) { - return false; - } else { - Size = AF.getAlignment().value() - MinNopLen; - return true; - } -} - -// We need to insert R_RISCV_ALIGN relocation type to indicate the -// position of Nops and the total bytes of the Nops have been inserted -// when linker relaxation enabled. -// The function insert fixup_riscv_align fixup which eventually will -// transfer to R_RISCV_ALIGN relocation type. -bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm, - MCAlignFragment &AF) { - // Insert the fixup only when linker relaxation enabled. - const MCSubtargetInfo *STI = AF.getSubtargetInfo(); - if (!STI->hasFeature(RISCV::FeatureRelax)) - return false; - - // Calculate total Nops we need to insert. If there are none to insert - // then simply return. - unsigned Count; - if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count) || (Count == 0)) - return false; - - MCContext &Ctx = getContext(); - const MCExpr *Dummy = MCConstantExpr::create(0, Ctx); - MCFixup Fixup = MCFixup::create(0, Dummy, ELF::R_RISCV_ALIGN); - - uint64_t FixedValue = 0; - MCValue NopBytes = MCValue::get(Count); - Asm.getWriter().recordRelocation(AF, Fixup, NopBytes, FixedValue); - return true; -} - std::unique_ptr<MCObjectTargetWriter> RISCVAsmBackend::createObjectTargetWriter() const { return createRISCVELFObjectWriter(OSABI, Is64Bit); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index 8c10fbe..d97d632 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -38,14 +38,6 @@ public: const MCTargetOptions &Options); ~RISCVAsmBackend() override = default; - // Return Size with extra Nop Bytes for alignment directive in code section. - bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, - unsigned &Size) override; - - // Insert target specific fixup type for alignment directive in code section. - bool shouldInsertFixupForCodeAlign(MCAssembler &Asm, - MCAlignFragment &AF) override; - std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &, uint64_t &) override; bool addReloc(const MCFragment &, const MCFixup &, const MCValue &, @@ -73,6 +65,7 @@ public: void relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const override; + bool relaxAlign(MCFragment &F, unsigned &Size) override; bool relaxDwarfLineAddr(MCFragment &F, bool &WasRelaxed) const override; bool relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const override; std::pair<bool, bool> relaxLEB128(MCFragment &LF, diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td index cbf039e..4c303a9 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -56,19 +56,21 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, (sequence "F%u_D", 0, 31))>; +defvar VREGS = (add (sequence "V%u", 0, 31), + (sequence "V%uM2", 0, 31, 2), + (sequence "V%uM4", 0, 31, 4), + (sequence "V%uM8", 0, 31, 8)); + // Same as CSR_Interrupt, but including all vector registers. -def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, - (sequence "V%u", 0, 31))>; +def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, VREGS)>; // Same as CSR_Interrupt, but including all 32-bit FP registers and all vector // registers. -def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt, - (sequence "V%u", 0, 31))>; +def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt, VREGS)>; // Same as CSR_Interrupt, but including all 64-bit FP registers and all vector // registers. -def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt, - (sequence "V%u", 0, 31))>; +def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt, VREGS)>; // Same as CSR_Interrupt, but excluding X16-X31. def CSR_Interrupt_RVE : CalleeSavedRegs<(sub CSR_Interrupt, diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 6c8e3da..b1ab76a 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -95,6 +95,11 @@ static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = { /* -21, -22, -23, -24 are reserved */ }; +/// Returns true if DWARF CFI instructions ("frame moves") should be emitted. +static bool needsDwarfCFI(const MachineFunction &MF) { + return MF.needsFrameMoves(); +} + // For now we use x3, a.k.a gp, as pointer to shadow call stack. // User should not use x3 in their asm. static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, @@ -141,6 +146,9 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, .addImm(-SlotSize) .setMIFlag(MachineInstr::FrameSetup); + if (!needsDwarfCFI(MF)) + return; + // Emit a CFI instruction that causes SlotSize to be subtracted from the value // of the shadow stack pointer when unwinding past this frame. char DwarfSCSReg = TRI->getDwarfRegNum(SCSPReg, /*IsEH*/ true); @@ -199,8 +207,10 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB, .addReg(SCSPReg) .addImm(-SlotSize) .setMIFlag(MachineInstr::FrameDestroy); - // Restore the SCS pointer - CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg); + if (needsDwarfCFI(MF)) { + // Restore the SCS pointer + CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg); + } } // Insert instruction to swap mscratchsw with sp @@ -935,6 +945,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() + getUnmanagedCSI(MF, CSI).size()); CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); + bool NeedsDwarfCFI = needsDwarfCFI(MF); // If libcalls are used to spill and restore callee-saved registers, the frame // has two sections; the opaque section managed by the libcalls, and the @@ -962,10 +973,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign()); RVFI->setLibCallStackSize(LibCallFrameSize); - CFIBuilder.buildDefCFAOffset(LibCallFrameSize); - for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) - CFIBuilder.buildOffset(CS.getReg(), - MFI.getObjectOffset(CS.getFrameIdx())); + if (NeedsDwarfCFI) { + CFIBuilder.buildDefCFAOffset(LibCallFrameSize); + for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) + CFIBuilder.buildOffset(CS.getReg(), + MFI.getObjectOffset(CS.getFrameIdx())); + } } // FIXME (note copied from Lanai): This appears to be overallocating. Needs @@ -996,14 +1009,17 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // could only be the next instruction. ++PossiblePush; - // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)` - // could be. The PUSH will also get its own CFI metadata for its own - // modifications, which should come after the PUSH. - CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, MachineInstr::FrameSetup); - PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount); - for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI)) - PushCFIBuilder.buildOffset(CS.getReg(), - MFI.getObjectOffset(CS.getFrameIdx())); + if (NeedsDwarfCFI) { + // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)` + // could be. The PUSH will also get its own CFI metadata for its own + // modifications, which should come after the PUSH. + CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, + MachineInstr::FrameSetup); + PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount); + for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI)) + PushCFIBuilder.buildOffset(CS.getReg(), + MFI.getObjectOffset(CS.getFrameIdx())); + } } if (RVFI->isPushable(MF) && PossiblePush != MBB.end() && @@ -1017,10 +1033,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, PossiblePush->getOperand(1).setImm(StackAdj); StackSize -= StackAdj; - CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize); - for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) - CFIBuilder.buildOffset(CS.getReg(), - MFI.getObjectOffset(CS.getFrameIdx())); + if (NeedsDwarfCFI) { + CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize); + for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) + CFIBuilder.buildOffset(CS.getReg(), + MFI.getObjectOffset(CS.getFrameIdx())); + } } // Allocate space on the stack if necessary. @@ -1031,7 +1049,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, bool DynAllocation = MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation(); if (StackSize != 0) - allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true, + allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, NeedsDwarfCFI, NeedProbe, ProbeSize, DynAllocation, MachineInstr::FrameSetup); @@ -1049,8 +1067,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // Iterate over list of callee-saved registers and emit .cfi_offset // directives. - for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) - CFIBuilder.buildOffset(CS.getReg(), MFI.getObjectOffset(CS.getFrameIdx())); + if (NeedsDwarfCFI) + for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) + CFIBuilder.buildOffset(CS.getReg(), + MFI.getObjectOffset(CS.getFrameIdx())); // Generate new FP. if (hasFP(MF)) { @@ -1069,7 +1089,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineInstr::FrameSetup, getStackAlign()); } - CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize()); + if (NeedsDwarfCFI) + CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize()); } uint64_t SecondSPAdjustAmount = 0; @@ -1080,15 +1101,16 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, "SecondSPAdjustAmount should be greater than zero"); allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount, - getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe, - ProbeSize, DynAllocation, MachineInstr::FrameSetup); + getStackSizeWithRVVPadding(MF), NeedsDwarfCFI && !hasFP(MF), + NeedProbe, ProbeSize, DynAllocation, + MachineInstr::FrameSetup); } if (RVVStackSize) { if (NeedProbe) { allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize, - MachineInstr::FrameSetup, !hasFP(MF), - DynAllocation); + MachineInstr::FrameSetup, + NeedsDwarfCFI && !hasFP(MF), DynAllocation); } else { // We must keep the stack pointer aligned through any intermediate // updates. @@ -1097,14 +1119,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineInstr::FrameSetup, getStackAlign()); } - if (!hasFP(MF)) { + if (NeedsDwarfCFI && !hasFP(MF)) { // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb". CFIBuilder.insertCFIInst(createDefCFAExpression( *RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8)); } std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size()); - emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF)); + if (NeedsDwarfCFI) + emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF)); } if (hasFP(MF)) { @@ -1171,8 +1194,9 @@ void RISCVFrameLowering::deallocateStack(MachineFunction &MF, MachineInstr::FrameDestroy, getStackAlign()); StackSize = 0; - CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy) - .buildDefCFAOffset(CFAOffset); + if (needsDwarfCFI(MF)) + CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy) + .buildDefCFAOffset(CFAOffset); } void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, @@ -1212,6 +1236,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, std::next(MBBI, getRVVCalleeSavedInfo(MF, CSI).size()); CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn, MachineInstr::FrameDestroy); + bool NeedsDwarfCFI = needsDwarfCFI(MF); uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount @@ -1232,10 +1257,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, StackOffset::getScalable(RVVStackSize), MachineInstr::FrameDestroy, getStackAlign()); - if (!hasFP(MF)) - CFIBuilder.buildDefCFA(SPReg, RealStackSize); - - emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn); + if (NeedsDwarfCFI) { + if (!hasFP(MF)) + CFIBuilder.buildDefCFA(SPReg, RealStackSize); + emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn); + } } if (FirstSPAdjustAmount) { @@ -1251,7 +1277,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, StackOffset::getFixed(SecondSPAdjustAmount), MachineInstr::FrameDestroy, getStackAlign()); - if (!hasFP(MF)) + if (NeedsDwarfCFI && !hasFP(MF)) CFIBuilder.buildDefCFAOffset(FirstSPAdjustAmount); } @@ -1272,7 +1298,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, getStackAlign()); } - if (hasFP(MF)) + if (NeedsDwarfCFI && hasFP(MF)) CFIBuilder.buildDefCFA(SPReg, RealStackSize); // Skip to after the restores of scalar callee-saved registers @@ -1295,8 +1321,9 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, } // Recover callee-saved registers. - for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) - CFIBuilder.buildRestore(CS.getReg()); + if (NeedsDwarfCFI) + for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) + CFIBuilder.buildRestore(CS.getReg()); if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) { // Use available stack adjustment in pop instruction to deallocate stack @@ -1315,15 +1342,17 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, auto NextI = next_nodbg(MBBI, MBB.end()); if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) { ++MBBI; - CFIBuilder.setInsertPoint(MBBI); + if (NeedsDwarfCFI) { + CFIBuilder.setInsertPoint(MBBI); - for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) - CFIBuilder.buildRestore(CS.getReg()); + for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) + CFIBuilder.buildRestore(CS.getReg()); - // Update CFA Offset. If this is a QCI interrupt function, there will be a - // leftover offset which is deallocated by `QC.C.MILEAVERET`, otherwise - // getQCIInterruptStackSize() will be 0. - CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize()); + // Update CFA Offset. If this is a QCI interrupt function, there will + // be a leftover offset which is deallocated by `QC.C.MILEAVERET`, + // otherwise getQCIInterruptStackSize() will be 0. + CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize()); + } } } @@ -1515,10 +1544,53 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset; } +static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI, + const Register &Reg) { + MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0); + // If it's not a grouped vector register, it doesn't have subregister, so + // the base register is just itself. + if (BaseReg == RISCV::NoRegister) + BaseReg = Reg; + return BaseReg; +} + void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + + // In TargetFrameLowering::determineCalleeSaves, any vector register is marked + // as saved if any of its subregister is clobbered, this is not correct in + // vector registers. We only want the vector register to be marked as saved + // if all of its subregisters are clobbered. + // For example: + // Original behavior: If v24 is marked, v24m2, v24m4, v24m8 are also marked. + // Correct behavior: v24m2 is marked only if v24 and v25 are marked. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); + const RISCVRegisterInfo &TRI = *STI.getRegisterInfo(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned CSReg = CSRegs[i]; + // Only vector registers need special care. + if (!RISCV::VRRegClass.contains(getRVVBaseRegister(TRI, CSReg))) + continue; + + SavedRegs.reset(CSReg); + + auto SubRegs = TRI.subregs(CSReg); + // Set the register and all its subregisters. + if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) { + SavedRegs.set(CSReg); + llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); }); + } + + // Combine to super register if all of its subregisters are marked. + if (!SubRegs.empty() && llvm::all_of(SubRegs, [&](unsigned Reg) { + return SavedRegs.test(Reg); + })) + SavedRegs.set(CSReg); + } + // Unconditionally spill RA and FP only if the function uses a frame // pointer. if (hasFP(MF)) { @@ -1812,7 +1884,8 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( // allocateStack. bool DynAllocation = MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation(); - allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF), + allocateStack(MBB, MI, MF, -Amount, -Amount, + needsDwarfCFI(MF) && !hasFP(MF), /*NeedProbe=*/true, ProbeSize, DynAllocation, MachineInstr::NoFlags); } else { @@ -2107,16 +2180,6 @@ static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) { : 8; } -static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI, - const Register &Reg) { - MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0); - // If it's not a grouped vector register, it doesn't have subregister, so - // the base register is just itself. - if (BaseReg == RISCV::NoRegister) - BaseReg = Reg; - return BaseReg; -} - void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const { MachineFunction *MF = MBB.getParent(); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index cfec46d2..a541c2f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3106,6 +3106,25 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, return true; } +bool RISCVDAGToDAGISel::SelectAddrRegZextRegScale(SDValue Addr, + unsigned MaxShiftAmount, + unsigned Bits, SDValue &Base, + SDValue &Index, + SDValue &Scale) { + if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale)) + return false; + + if (Index.getOpcode() == ISD::AND) { + auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1)); + if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { + Index = Index.getOperand(0); + return true; + } + } + + return false; +} + bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset) { if (Addr.getOpcode() != ISD::ADD) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 72e2f96..ee3a86e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -59,19 +59,14 @@ public: return SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale); } + bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, + unsigned Bits, SDValue &Base, SDValue &Index, + SDValue &Scale); + template <unsigned MaxShift, unsigned Bits> bool SelectAddrRegZextRegScale(SDValue Addr, SDValue &Base, SDValue &Index, SDValue &Scale) { - if (SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale)) { - if (Index.getOpcode() == ISD::AND) { - auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1)); - if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { - Index = Index.getOperand(0); - return true; - } - } - } - return false; + return SelectAddrRegZextRegScale(Addr, MaxShift, Bits, Base, Index, Scale); } bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset); diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index e23001a..d9c6101 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -174,6 +174,7 @@ class EltDeps<bit vl, bit mask> { def EltDepsNone : EltDeps<vl=0, mask=0>; def EltDepsVL : EltDeps<vl=1, mask=0>; +def EltDepsMask : EltDeps<vl=0, mask=1>; def EltDepsVLMask : EltDeps<vl=1, mask=1>; class EEW <bits<2> val> { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index aef410f..17067220 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -44,45 +44,48 @@ def simm10_unsigned : RISCVOp { //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class RVPUnaryImm10<bits<7> funct7, string opcodestr, - DAGOperand TyImm10 = simm10> - : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins TyImm10:$imm10), - opcodestr, "$rd, $imm10"> { +class RVPLoadImm10<bits<7> funct7, string opcodestr, + DAGOperand TyImm10 = simm10> + : RVInst<(outs GPR:$rd), (ins TyImm10:$imm10), opcodestr, "$rd, $imm10", [], + InstFormatOther> { bits<10> imm10; + bits<5> rd; let Inst{31-25} = funct7; let Inst{24-16} = imm10{8-0}; let Inst{15} = imm10{9}; + let Inst{14-12} = 0b010; + let Inst{11-7} = rd; + let Inst{6-0} = OPC_OP_IMM_32.Value; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class RVPUnaryImm8<bits<8> funct8, string opcodestr> - : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins uimm8:$uimm8), - opcodestr, "$rd, $uimm8"> { +class RVPLoadImm8<bits<8> funct8, string opcodestr> + : RVInst<(outs GPR:$rd), (ins uimm8:$uimm8), opcodestr, "$rd, $uimm8", [], + InstFormatOther> { bits<8> uimm8; + bits<5> rd; let Inst{31-24} = funct8; let Inst{23-16} = uimm8; let Inst{15} = 0b0; + let Inst{14-12} = 0b010; + let Inst{11-7} = rd; + let Inst{6-0} = OPC_OP_IMM_32.Value; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVPUnary<bits<3> f, string opcodestr, dag operands, string argstr> : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), operands, opcodestr, argstr> { - bits<5> imm; - bits<5> rs1; - let Inst{31} = 0b1; let Inst{30-28} = f; let Inst{27} = 0b0; - let Inst{19-15} = rs1; } class RVPUnaryImm5<bits<3> f, string opcodestr> : RVPUnary<f, opcodestr, (ins GPR:$rs1, uimm5:$uimm5), "$rd, $rs1, $uimm5"> { bits<5> uimm5; - let imm = uimm5; let Inst{26-25} = 0b01; let Inst{24-20} = uimm5; } @@ -145,11 +148,11 @@ def PSSLAI_W : RVPUnaryImm5<0b101, "psslai.w">; } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in -def PLI_H : RVPUnaryImm10<0b1011000, "pli.h">; +def PLI_H : RVPLoadImm10<0b1011000, "pli.h">; let Predicates = [HasStdExtP, IsRV64] in -def PLI_W : RVPUnaryImm10<0b1011001, "pli.w">; +def PLI_W : RVPLoadImm10<0b1011001, "pli.w">; let Predicates = [HasStdExtP] in -def PLI_B : RVPUnaryImm8<0b10110100, "pli.b">; +def PLI_B : RVPLoadImm8<0b10110100, "pli.b">; let Predicates = [HasStdExtP] in { def PSEXT_H_B : RVPUnaryWUF<0b00, 0b00100, "psext.h.b">; @@ -162,6 +165,6 @@ def PSEXT_W_H : RVPUnaryWUF<0b01, 0b00101, "psext.w.h">; } // Predicates = [HasStdExtP, IsRV64] let Predicates = [HasStdExtP] in -def PLUI_H : RVPUnaryImm10<0b1111000, "plui.h", simm10_unsigned>; +def PLUI_H : RVPLoadImm10<0b1111000, "plui.h", simm10_unsigned>; let Predicates = [HasStdExtP, IsRV64] in -def PLUI_W : RVPUnaryImm10<0b1111001, "plui.w", simm10_unsigned>; +def PLUI_W : RVPLoadImm10<0b1111001, "plui.w", simm10_unsigned>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 5d13a87..33c7138 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1642,7 +1642,7 @@ def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd), def : MnemonicAlias<"vpopc.m", "vcpop.m">; -let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsVLMask in { +let Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsMask in { let DestEEW = EEW1 in { // vmsbf.m set-before-first mask bit @@ -1655,7 +1655,7 @@ defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>; // Vector Iota Instruction defm VIOTA_M : VIOTA_MV_V<"viota.m", 0b010100, 0b10000>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsVLMask +} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota, ElementsDependOn = EltDepsMask // Vector Element Index Instruction let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index c7cb6e2..f391300 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1377,9 +1377,9 @@ let Predicates = [HasVendorXqciac, IsRV32] in { def : Pat<(i32 (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12:$imm12))), (QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12:$imm12)>; def : Pat<(i32 (add_like_non_imm12 (shl GPRNoX0:$rs1, uimm5gt3:$imm), GPRNoX0:$rs2)), - (QC_SHLADD GPRNoX0:$rs2, GPRNoX0:$rs1, uimm5gt3:$imm)>; + (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>; def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, uimm5gt3:$imm, GPRNoX0:$rs2)), - (QC_SHLADD GPRNoX0:$rs2, GPRNoX0:$rs1, uimm5gt3:$imm)>; + (QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>; } // Predicates = [HasVendorXqciac, IsRV32] /// Simple arithmetic operations diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index 38cc0ce..25817b6 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -102,6 +102,87 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { return false; } +/// Do the common operand retrieval and validition required by the +/// routines below. +static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy, + Instruction *I, Value *&Ptr, Value *&Mask, + Value *&VL, Align &Alignment) { + + IRBuilder<> Builder(I); + const DataLayout &DL = I->getDataLayout(); + ElementCount EC = VTy->getElementCount(); + if (auto *LI = dyn_cast<LoadInst>(I)) { + assert(LI->isSimple()); + Ptr = LI->getPointerOperand(); + Alignment = LI->getAlign(); + assert(!Mask && "Unexpected mask on a load"); + Mask = Builder.getAllOnesMask(EC); + VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC) + : Constant::getAllOnesValue(XLenTy); + return true; + } + if (auto *SI = dyn_cast<StoreInst>(I)) { + assert(SI->isSimple()); + Ptr = SI->getPointerOperand(); + Alignment = SI->getAlign(); + assert(!Mask && "Unexpected mask on a store"); + Mask = Builder.getAllOnesMask(EC); + VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC) + : Constant::getAllOnesValue(XLenTy); + return true; + } + + auto *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { + default: + llvm_unreachable("Unsupported intrinsic type"); + case Intrinsic::vp_load: + case Intrinsic::vp_store: { + auto *VPLdSt = cast<VPIntrinsic>(I); + Ptr = VPLdSt->getMemoryPointerParam(); + Alignment = VPLdSt->getPointerAlignment().value_or( + DL.getABITypeAlign(VTy->getElementType())); + + assert(Mask && "vp.load and vp.store needs a mask!"); + + Value *WideEVL = VPLdSt->getVectorLengthParam(); + // Conservatively check if EVL is a multiple of factor, otherwise some + // (trailing) elements might be lost after the transformation. + if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor)) + return false; + + auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); + VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); + return true; + } + case Intrinsic::masked_load: { + Ptr = II->getOperand(0); + Alignment = cast<ConstantInt>(II->getArgOperand(1))->getAlignValue(); + + if (!isa<UndefValue>(II->getOperand(3))) + return false; + + assert(Mask && "masked.load needs a mask!"); + + VL = isa<FixedVectorType>(VTy) + ? Builder.CreateElementCount(XLenTy, VTy->getElementCount()) + : Constant::getAllOnesValue(XLenTy); + return true; + } + case Intrinsic::masked_store: { + Ptr = II->getOperand(1); + Alignment = cast<ConstantInt>(II->getArgOperand(2))->getAlignValue(); + + assert(Mask && "masked.store needs a mask!"); + + VL = isa<FixedVectorType>(VTy) + ? Builder.CreateElementCount(XLenTy, VTy->getElementCount()) + : Constant::getAllOnesValue(XLenTy); + return true; + } + } +} + /// Lower an interleaved load into a vlsegN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -127,32 +208,8 @@ bool RISCVTargetLowering::lowerInterleavedLoad( Value *Ptr, *VL; Align Alignment; - if (auto *LI = dyn_cast<LoadInst>(Load)) { - assert(LI->isSimple()); - Ptr = LI->getPointerOperand(); - Alignment = LI->getAlign(); - assert(!Mask && "Unexpected mask on a load\n"); - Mask = Builder.getAllOnesMask(VTy->getElementCount()); - VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount()); - } else { - auto *VPLoad = cast<VPIntrinsic>(Load); - assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load && - "Unexpected intrinsic"); - Ptr = VPLoad->getMemoryPointerParam(); - Alignment = VPLoad->getPointerAlignment().value_or( - DL.getABITypeAlign(VTy->getElementType())); - - assert(Mask && "vp.load needs a mask!"); - - Value *WideEVL = VPLoad->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, DL, Factor)) - return false; - - auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); - VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); - } + if (!getMemOperands(Factor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment)) + return false; Type *PtrTy = Ptr->getType(); unsigned AS = PtrTy->getPointerAddressSpace(); @@ -296,34 +353,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( Value *Ptr, *VL; Align Alignment; - if (auto *LI = dyn_cast<LoadInst>(Load)) { - assert(LI->isSimple()); - Ptr = LI->getPointerOperand(); - Alignment = LI->getAlign(); - assert(!Mask && "Unexpected mask on a load\n"); - Mask = Builder.getAllOnesMask(ResVTy->getElementCount()); - VL = isa<FixedVectorType>(ResVTy) - ? Builder.CreateElementCount(XLenTy, ResVTy->getElementCount()) - : Constant::getAllOnesValue(XLenTy); - } else { - auto *VPLoad = cast<VPIntrinsic>(Load); - assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load && - "Unexpected intrinsic"); - Ptr = VPLoad->getMemoryPointerParam(); - Alignment = VPLoad->getPointerAlignment().value_or( - DL.getABITypeAlign(ResVTy->getElementType())); - - assert(Mask && "vp.load needs a mask!"); - - Value *WideEVL = VPLoad->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor)) - return false; - - auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); - VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); - } + if (!getMemOperands(Factor, ResVTy, XLenTy, Load, Ptr, Mask, VL, Alignment)) + return false; Type *PtrTy = Ptr->getType(); unsigned AS = PtrTy->getPointerAddressSpace(); @@ -385,34 +416,8 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( Value *Ptr, *VL; Align Alignment; - if (auto *SI = dyn_cast<StoreInst>(Store)) { - assert(SI->isSimple()); - Ptr = SI->getPointerOperand(); - Alignment = SI->getAlign(); - assert(!Mask && "Unexpected mask on a store"); - Mask = Builder.getAllOnesMask(InVTy->getElementCount()); - VL = isa<FixedVectorType>(InVTy) - ? Builder.CreateElementCount(XLenTy, InVTy->getElementCount()) - : Constant::getAllOnesValue(XLenTy); - } else { - auto *VPStore = cast<VPIntrinsic>(Store); - assert(VPStore->getIntrinsicID() == Intrinsic::vp_store && - "Unexpected intrinsic"); - Ptr = VPStore->getMemoryPointerParam(); - Alignment = VPStore->getPointerAlignment().value_or( - DL.getABITypeAlign(InVTy->getElementType())); - - assert(Mask && "vp.store needs a mask!"); - - Value *WideEVL = VPStore->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, DL, Factor)) - return false; - - auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); - VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); - } + if (!getMemOperands(Factor, InVTy, XLenTy, Store, Ptr, Mask, VL, Alignment)) + return false; Type *PtrTy = Ptr->getType(); unsigned AS = Ptr->getType()->getPointerAddressSpace(); if (!isLegalInterleavedAccessType(InVTy, Factor, Alignment, AS, DL)) diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 28d6403..3b19c34 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -48,6 +48,8 @@ using namespace llvm; STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions"); STATISTIC(NumTransformedToWInstrs, "Number of instructions transformed to W-ops"); +STATISTIC(NumTransformedToNonWInstrs, + "Number of instructions transformed to non-W-ops"); static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal", cl::desc("Disable removal of sext.w"), @@ -67,10 +69,9 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII, const RISCVSubtarget &ST, MachineRegisterInfo &MRI); - bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, - const RISCVSubtarget &ST, MachineRegisterInfo &MRI); - bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, - const RISCVSubtarget &ST, MachineRegisterInfo &MRI); + bool canonicalizeWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII, + const RISCVSubtarget &ST, + MachineRegisterInfo &MRI); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -721,45 +722,39 @@ bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF, return MadeChange; } -bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF, - const RISCVInstrInfo &TII, - const RISCVSubtarget &ST, - MachineRegisterInfo &MRI) { +// Strips or adds W suffixes to eligible instructions depending on the +// subtarget preferences. +bool RISCVOptWInstrs::canonicalizeWSuffixes(MachineFunction &MF, + const RISCVInstrInfo &TII, + const RISCVSubtarget &ST, + MachineRegisterInfo &MRI) { + bool ShouldStripW = !(DisableStripWSuffix || ST.preferWInst()); + bool ShouldPreferW = ST.preferWInst(); bool MadeChange = false; - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - unsigned Opc; - switch (MI.getOpcode()) { - default: - continue; - case RISCV::ADDW: Opc = RISCV::ADD; break; - case RISCV::ADDIW: Opc = RISCV::ADDI; break; - case RISCV::MULW: Opc = RISCV::MUL; break; - case RISCV::SLLIW: Opc = RISCV::SLLI; break; - } - if (hasAllWUsers(MI, ST, MRI)) { - MI.setDesc(TII.get(Opc)); - MadeChange = true; - } - } - } - - return MadeChange; -} - -bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF, - const RISCVInstrInfo &TII, - const RISCVSubtarget &ST, - MachineRegisterInfo &MRI) { - bool MadeChange = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - unsigned WOpc; - // TODO: Add more? - switch (MI.getOpcode()) { + std::optional<unsigned> WOpc; + std::optional<unsigned> NonWOpc; + unsigned OrigOpc = MI.getOpcode(); + switch (OrigOpc) { default: continue; + case RISCV::ADDW: + NonWOpc = RISCV::ADD; + break; + case RISCV::ADDIW: + NonWOpc = RISCV::ADDI; + break; + case RISCV::MULW: + NonWOpc = RISCV::MUL; + break; + case RISCV::SLLIW: + NonWOpc = RISCV::SLLI; + break; + case RISCV::SUBW: + NonWOpc = RISCV::SUB; + break; case RISCV::ADD: WOpc = RISCV::ADDW; break; @@ -773,7 +768,7 @@ bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF, WOpc = RISCV::MULW; break; case RISCV::SLLI: - // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits + // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits. if (MI.getOperand(2).getImm() >= 32) continue; WOpc = RISCV::SLLIW; @@ -784,19 +779,30 @@ bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF, break; } - if (hasAllWUsers(MI, ST, MRI)) { + if (ShouldStripW && NonWOpc.has_value() && hasAllWUsers(MI, ST, MRI)) { + LLVM_DEBUG(dbgs() << "Replacing " << MI); + MI.setDesc(TII.get(NonWOpc.value())); + LLVM_DEBUG(dbgs() << " with " << MI); + ++NumTransformedToNonWInstrs; + MadeChange = true; + continue; + } + // LWU is always converted to LW when possible as 1) LW is compressible + // and 2) it helps minimise differences vs RV32. + if ((ShouldPreferW || OrigOpc == RISCV::LWU) && WOpc.has_value() && + hasAllWUsers(MI, ST, MRI)) { LLVM_DEBUG(dbgs() << "Replacing " << MI); - MI.setDesc(TII.get(WOpc)); + MI.setDesc(TII.get(WOpc.value())); MI.clearFlag(MachineInstr::MIFlag::NoSWrap); MI.clearFlag(MachineInstr::MIFlag::NoUWrap); MI.clearFlag(MachineInstr::MIFlag::IsExact); LLVM_DEBUG(dbgs() << " with " << MI); ++NumTransformedToWInstrs; MadeChange = true; + continue; } } } - return MadeChange; } @@ -813,12 +819,6 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI); - - if (!(DisableStripWSuffix || ST.preferWInst())) - MadeChange |= stripWSuffixes(MF, TII, ST, MRI); - - if (ST.preferWInst()) - MadeChange |= appendWSuffixes(MF, TII, ST, MRI); - + MadeChange |= canonicalizeWSuffixes(MF, TII, ST, MRI); return MadeChange; } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index c754de4..e35ffaf 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -216,7 +216,7 @@ unsigned RISCVSubtarget::getMinimumJumpTableEntries() const { } void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const { + const SchedRegion &Region) const { // Do bidirectional scheduling since it provides a more balanced scheduling // leading to better performance. This will increase compile time. Policy.OnlyTopDown = false; @@ -231,8 +231,8 @@ void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.ShouldTrackPressure = true; } -void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const { +void RISCVSubtarget::overridePostRASchedPolicy( + MachineSchedPolicy &Policy, const SchedRegion &Region) const { MISched::Direction PostRASchedDirection = getPostRASchedDirection(); if (PostRASchedDirection == MISched::TopDown) { Policy.OnlyTopDown = true; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 4f560cc..fd57e02 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -395,11 +395,11 @@ public: } void overrideSchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const override; + const SchedRegion &Region) const override; void overridePostRASchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const override; + const SchedRegion &Region) const override; }; -} // End llvm namespace +} // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index e656e8b..b53d919 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -33,6 +33,7 @@ namespace { class RISCVVLOptimizer : public MachineFunctionPass { const MachineRegisterInfo *MRI; const MachineDominatorTree *MDT; + const TargetInstrInfo *TII; public: static char ID; @@ -1291,7 +1292,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { return false; } - assert(!RISCVII::elementsDependOnVL(RISCV::getRVVMCOpcode(MI.getOpcode())) && + assert(!RISCVII::elementsDependOnVL( + TII->get(RISCV::getRVVMCOpcode(MI.getOpcode())).TSFlags) && "Instruction shouldn't be supported if elements depend on VL"); assert(MI.getOperand(0).isReg() && @@ -1484,7 +1486,6 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { } bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { - assert(DemandedVLs.size() == 0); if (skipFunction(MF.getFunction())) return false; @@ -1495,6 +1496,10 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) { if (!ST.hasVInstructions()) return false; + TII = ST.getInstrInfo(); + + assert(DemandedVLs.empty()); + // For each instruction that defines a vector, compute what VL its // downstream users demand. for (MachineBasicBlock *MBB : post_order(&MF)) { diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 84ef539..c1cc19b 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -434,6 +434,15 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) { if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg())) return false; + // Masked off lanes past TrueVL will come from False, and converting to vmv + // will lose these lanes unless MIVL <= TrueVL. + // TODO: We could relax this for False == Passthru and True policy == TU + const MachineOperand &MIVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())); + const MachineOperand &TrueVL = + True->getOperand(RISCVII::getVLOpNum(True->getDesc())); + if (!RISCV::isVLKnownLE(MIVL, TrueVL)) + return false; + // True's passthru needs to be equivalent to False Register TruePassthruReg = True->getOperand(1).getReg(); Register FalseReg = MI.getOperand(2).getReg(); |