//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Load/Store Pairing: It identifies pairs of load or store instructions // operating on consecutive memory locations and merges them into a single // paired instruction, leveraging hardware support for paired memory accesses. // Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass. // // NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as // merging zero store instructions, promoting loads that read directly from a // preceding store, and merging base register updates with load/store // instructions (via pre-/post-indexed addressing). These advanced // transformations are not yet implemented in the RISC-V pass but represent // potential future enhancements for further optimizing RISC-V memory // operations. // //===----------------------------------------------------------------------===// #include "RISCV.h" #include "RISCVTargetMachine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; #define DEBUG_TYPE "riscv-load-store-opt" #define RISCV_LOAD_STORE_OPT_NAME "RISC-V Load / Store Optimizer" // The LdStLimit limits number of instructions how far we search for load/store // pairs. static cl::opt LdStLimit("riscv-load-store-scan-limit", cl::init(128), cl::Hidden); namespace { struct RISCVLoadStoreOpt : public MachineFunctionPass { static char ID; bool runOnMachineFunction(MachineFunction &Fn) override; RISCVLoadStoreOpt() : MachineFunctionPass(ID) {} MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().setNoVRegs(); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; } // Find and pair load/store instructions. bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); // Convert load/store pairs to single instructions. bool tryConvertToLdStPair(MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second); // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. // Return the matching instruction if one is found, else MBB->end(). MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, bool &MergeForward); MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, bool MergeForward); private: AliasAnalysis *AA; MachineRegisterInfo *MRI; const RISCVInstrInfo *TII; const RISCVRegisterInfo *TRI; LiveRegUnits ModifiedRegUnits, UsedRegUnits; }; } // end anonymous namespace char RISCVLoadStoreOpt::ID = 0; INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, false) bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; const RISCVSubtarget &Subtarget = Fn.getSubtarget(); if (!Subtarget.useLoadStorePairs()) return false; bool MadeChange = false; TII = Subtarget.getInstrInfo(); TRI = Subtarget.getRegisterInfo(); MRI = &Fn.getRegInfo(); AA = &getAnalysis().getAAResults(); ModifiedRegUnits.init(*TRI); UsedRegUnits.init(*TRI); for (MachineBasicBlock &MBB : Fn) { LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && tryToPairLdStInst(MBBI)) MadeChange = true; else ++MBBI; } } return MadeChange; } // Find loads and stores that can be merged into a single load or store pair // instruction. bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; // If this is volatile, it is not a candidate. if (MI.hasOrderedMemoryRef()) return false; if (!TII->isLdStSafeToPair(MI, TRI)) return false; // Look ahead for a pairable instruction. MachineBasicBlock::iterator E = MI.getParent()->end(); bool MergeForward; MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward); if (Paired != E) { MBBI = mergePairedInsns(MBBI, Paired, MergeForward); return true; } return false; } // Merge two adjacent load/store instructions into a paired instruction // (LDP/SDP/SWP/LWP) if the effective address is 8-byte aligned in case of // SWP/LWP 16-byte aligned in case of LDP/SDP. This function selects the // appropriate paired opcode, verifies that the memory operand is properly // aligned, and checks that the offset is valid. If all conditions are met, it // builds and inserts the paired instruction. bool RISCVLoadStoreOpt::tryConvertToLdStPair( MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { unsigned PairOpc; Align RequiredAlignment; switch (First->getOpcode()) { default: llvm_unreachable("Unsupported load/store instruction for pairing"); case RISCV::SW: PairOpc = RISCV::MIPS_SWP; RequiredAlignment = Align(8); break; case RISCV::LW: PairOpc = RISCV::MIPS_LWP; RequiredAlignment = Align(8); break; case RISCV::SD: PairOpc = RISCV::MIPS_SDP; RequiredAlignment = Align(16); break; case RISCV::LD: PairOpc = RISCV::MIPS_LDP; RequiredAlignment = Align(16); break; } MachineFunction *MF = First->getMF(); const MachineMemOperand *MMO = *First->memoperands_begin(); Align MMOAlign = MMO->getAlign(); if (MMOAlign < RequiredAlignment) return false; int64_t Offset = First->getOperand(2).getImm(); if (!isUInt<7>(Offset)) return false; MachineInstrBuilder MIB = BuildMI( *MF, First->getDebugLoc() ? First->getDebugLoc() : Second->getDebugLoc(), TII->get(PairOpc)); MIB.add(First->getOperand(0)) .add(Second->getOperand(0)) .add(First->getOperand(1)) .add(First->getOperand(2)) .cloneMergedMemRefs({&*First, &*Second}); First->getParent()->insert(First, MIB); First->removeFromParent(); Second->removeFromParent(); return true; } static bool mayAlias(MachineInstr &MIa, SmallVectorImpl &MemInsns, AliasAnalysis *AA) { for (MachineInstr *MIb : MemInsns) if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) return true; return false; } // Scan the instructions looking for a load/store that can be combined with the // current instruction into a wider equivalent or a load/store pair. // TODO: Extend pairing logic to consider reordering both instructions // to a safe "middle" position rather than only merging forward/backward. // This requires more sophisticated checks for aliasing, register // liveness, and potential scheduling hazards. MachineBasicBlock::iterator RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, bool &MergeForward) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr &FirstMI = *I; MBBI = next_nodbg(MBBI, E); bool MayLoad = FirstMI.mayLoad(); Register Reg = FirstMI.getOperand(0).getReg(); Register BaseReg = FirstMI.getOperand(1).getReg(); int64_t Offset = FirstMI.getOperand(2).getImm(); int64_t OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue(); MergeForward = false; // Track which register units have been modified and used between the first // insn (inclusive) and the second insn. ModifiedRegUnits.clear(); UsedRegUnits.clear(); // Remember any instructions that read/write memory between FirstMI and MI. SmallVector MemInsns; for (unsigned Count = 0; MBBI != E && Count < LdStLimit; MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; // Don't count transient instructions towards the search limit since there // may be different numbers of them if e.g. debug information is present. if (!MI.isTransient()) ++Count; if (MI.getOpcode() == FirstMI.getOpcode() && TII->isLdStSafeToPair(MI, TRI)) { Register MIBaseReg = MI.getOperand(1).getReg(); int64_t MIOffset = MI.getOperand(2).getImm(); if (BaseReg == MIBaseReg) { if ((Offset != MIOffset + OffsetStride) && (Offset + OffsetStride != MIOffset)) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); continue; } // If the destination register of one load is the same register or a // sub/super register of the other load, bail and keep looking. if (MayLoad && TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); continue; } // If the BaseReg has been modified, then we cannot do the optimization. if (!ModifiedRegUnits.available(BaseReg)) return E; // If the Rt of the second instruction was not modified or used between // the two instructions and none of the instructions between the second // and first alias with the second, we can combine the second into the // first. if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) && !(MI.mayLoad() && !UsedRegUnits.available(MI.getOperand(0).getReg())) && !mayAlias(MI, MemInsns, AA)) { MergeForward = false; return MBBI; } // Likewise, if the Rt of the first instruction is not modified or used // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. if (!(MayLoad && !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) && !mayAlias(FirstMI, MemInsns, AA)) { if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) { MergeForward = true; return MBBI; } } // Unable to combine these instructions due to interference in between. // Keep looking. } } // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. if (MI.isCall()) return E; // Update modified / uses register units. LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. if (!ModifiedRegUnits.available(BaseReg)) return E; // Update list of instructions that read/write memory. if (MI.mayLoadOrStore()) MemInsns.push_back(&MI); } return E; } MachineBasicBlock::iterator RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, bool MergeForward) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator NextI = next_nodbg(I, E); // If NextI is the second of the two instructions to be merged, we need // to skip one further. Either way we merge will invalidate the iterator, // and we don't need to scan the new instruction, as it's a pairwise // instruction, which we're not considering for further action anyway. if (NextI == Paired) NextI = next_nodbg(NextI, E); // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired; int Offset = I->getOperand(2).getImm(); int PairedOffset = Paired->getOperand(2).getImm(); bool InsertAfter = (Offset < PairedOffset) ^ MergeForward; if (!MergeForward) Paired->getOperand(1).setIsKill(false); // Kill flags may become invalid when moving stores for pairing. if (I->getOperand(0).isUse()) { if (!MergeForward) { // Check if the Paired store's source register has a kill flag and clear // it only if there are intermediate uses between I and Paired. MachineOperand &PairedRegOp = Paired->getOperand(0); if (PairedRegOp.isKill()) { for (auto It = std::next(I); It != Paired; ++It) { if (It->readsRegister(PairedRegOp.getReg(), TRI)) { PairedRegOp.setIsKill(false); break; } } } } else { // Clear kill flags of the first store's register in the forward // direction. Register Reg = I->getOperand(0).getReg(); for (MachineInstr &MI : make_range(std::next(I), std::next(Paired))) MI.clearRegisterKills(Reg, TRI); } } MachineInstr *ToInsert = DeletionPoint->removeFromParent(); MachineBasicBlock &MBB = *InsertionPoint->getParent(); MachineBasicBlock::iterator First, Second; if (!InsertAfter) { First = MBB.insert(InsertionPoint, ToInsert); Second = InsertionPoint; } else { Second = MBB.insertAfter(InsertionPoint, ToInsert); First = InsertionPoint; } if (tryConvertToLdStPair(First, Second)) { LLVM_DEBUG(dbgs() << "Pairing load/store:\n "); LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs())); } return NextI; } // Returns an instance of the Load / Store Optimization pass. FunctionPass *llvm::createRISCVLoadStoreOptPass() { return new RISCVLoadStoreOpt(); }