diff options
author | Benjamin Maxwell <benjamin.maxwell@arm.com> | 2025-09-05 12:15:28 +0000 |
---|---|---|
committer | Benjamin Maxwell <benjamin.maxwell@arm.com> | 2025-09-12 11:21:41 +0000 |
commit | b666a28a0581e3a1f7838eca5f241d2550c05e4b (patch) | |
tree | 41473d9785a63148661490cb95234d4d6f304258 /llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | |
parent | 77596b78e5664fff8d272599c0420fc9b87e2c2d (diff) | |
download | llvm-users/MacDue/sme-loops.zip llvm-users/MacDue/sme-loops.tar.gz llvm-users/MacDue/sme-loops.tar.bz2 |
[AArch64][SME] Avoid ZA save state changes in loops in MachineSMEABIPassusers/MacDue/sme-loops
This patch uses the MachineLoopInfo to give blocks within loops a higher
weight when choosing the bundle ZA state. MachineLoopInfo does not find
loop trip counts, so this uses an arbitrary weight (default 10), which
can be configured with the `-aarch64-sme-abi-loop-edge-weight` flag.
This makes the MachineSMEABIPass pass more likely to pick a bundle state
that matches the loop's entry/exit state, which avoids state changes in
the loop (which we assume will happen more than once).
This does require some extra analysis, so this is only enabled at -O1
and above.
Change-Id: If318c809d2f7cc1fca144fbe424ba2a2ca7fb19f
Diffstat (limited to 'llvm/lib/Target/AArch64/MachineSMEABIPass.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index cced0fa..6e0cebb 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -63,6 +63,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -70,6 +71,12 @@ using namespace llvm; #define DEBUG_TYPE "aarch64-machine-sme-abi" +static cl::opt<int> + LoopEdgeWeight("aarch64-sme-abi-loop-edge-weight", cl::ReallyHidden, + cl::init(10), + cl::desc("Edge weight for basic blocks witin loops (used " + "for placing ZA saves/restores)")); + namespace { enum ZAState { @@ -238,7 +245,8 @@ getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI, struct MachineSMEABI : public MachineFunctionPass { inline static char ID = 0; - MachineSMEABI() : MachineFunctionPass(ID) {} + MachineSMEABI(CodeGenOptLevel OptLevel = CodeGenOptLevel::Default) + : MachineFunctionPass(ID), OptLevel(OptLevel) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -247,6 +255,9 @@ struct MachineSMEABI : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<EdgeBundlesWrapperLegacy>(); + // Only analyse loops at -01 and above. + if (OptLevel != CodeGenOptLevel::None) + AU.addRequired<MachineLoopInfoWrapperPass>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -329,12 +340,15 @@ struct MachineSMEABI : public MachineFunctionPass { MachineBasicBlock::iterator MBBI, DebugLoc DL); private: + CodeGenOptLevel OptLevel = CodeGenOptLevel::Default; + MachineFunction *MF = nullptr; const AArch64Subtarget *Subtarget = nullptr; const AArch64RegisterInfo *TRI = nullptr; const AArch64FunctionInfo *AFI = nullptr; const TargetInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; + MachineLoopInfo *MLI = nullptr; }; FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { @@ -436,18 +450,23 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles, LLVM_DEBUG(dbgs() << " (no state preference)\n"); continue; } + bool IsLoop = MLI && MLI->getLoopFor(MF->getBlockNumbered(BlockID)); bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I; bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I; + int EdgeWeight = IsLoop ? LoopEdgeWeight : 1; + if (IsLoop) + LLVM_DEBUG(dbgs() << " IsLoop"); + LLVM_DEBUG(dbgs() << " (EdgeWeight: " << EdgeWeight << ')'); ZAState DesiredIncomingState = Block.Insts.front().NeededState; if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) { - EdgeStateCounts[DesiredIncomingState]++; + EdgeStateCounts[DesiredIncomingState] += EdgeWeight; LLVM_DEBUG(dbgs() << " DesiredIncomingState: " << getZAStateString(DesiredIncomingState)); } ZAState DesiredOutgoingState = Block.Insts.back().NeededState; if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) { - EdgeStateCounts[DesiredOutgoingState]++; + EdgeStateCounts[DesiredOutgoingState] += EdgeWeight; LLVM_DEBUG(dbgs() << " DesiredOutgoingState: " << getZAStateString(DesiredOutgoingState)); } @@ -849,6 +868,8 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) { TII = Subtarget->getInstrInfo(); TRI = Subtarget->getRegisterInfo(); MRI = &MF.getRegInfo(); + if (OptLevel != CodeGenOptLevel::None) + MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); const EdgeBundles &Bundles = getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles(); @@ -877,4 +898,6 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) { return true; } -FunctionPass *llvm::createMachineSMEABIPass() { return new MachineSMEABI(); } +FunctionPass *llvm::createMachineSMEABIPass(CodeGenOptLevel OptLevel) { + return new MachineSMEABI(OptLevel); +} |