diff options
author | Yuta Mukai <mukai.yuta@fujitsu.com> | 2025-02-05 21:08:20 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-02-05 21:08:20 +0900 |
commit | e3abe940d8fc356bf46a6b71da44df0f4652df1c (patch) | |
tree | ad3fc804c9759bd2129357625c7a10d74ff50ea8 /llvm/lib/CodeGen/MachinePipeliner.cpp | |
parent | a61ca99de219ff358e613822706a7a40941a8229 (diff) | |
download | llvm-e3abe940d8fc356bf46a6b71da44df0f4652df1c.zip llvm-e3abe940d8fc356bf46a6b71da44df0f4652df1c.tar.gz llvm-e3abe940d8fc356bf46a6b71da44df0f4652df1c.tar.bz2 |
[MachinePipeliner] Improve loop carried dependence analysis (#94185)
The previous implementation had false positive/negative cases in the
analysis of the loop carried dependency.
A missed dependency case is caused by incorrect analysis of address
increments. This is fixed by strict analysis of recursive definitions.
See added test swp-carried-dep4.mir.
Excessive dependency detection is fixed by improving the formula
for determining the overlap of address ranges to be accessed. See added test
swp-carried-dep5.mir.
Diffstat (limited to 'llvm/lib/CodeGen/MachinePipeliner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachinePipeliner.cpp | 266 |
1 files changed, 192 insertions, 74 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index dbf320f..064b0a0 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -2521,9 +2521,104 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { return scheduleFound && Schedule.getMaxStageCount() > 0; } +static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI) { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + Register Result; + for (const MachineOperand &Use : MI.all_uses()) { + Register Reg = Use.getReg(); + if (!Reg.isVirtual()) + return Register(); + if (MRI.getVRegDef(Reg)->getParent() != MI.getParent()) + continue; + if (Result) + return Register(); + Result = Reg; + } + return Result; +} + +/// When Op is a value that is incremented recursively in a loop and there is a +/// unique instruction that increments it, returns true and sets Value. +static bool findLoopIncrementValue(const MachineOperand &Op, int &Value) { + if (!Op.isReg() || !Op.getReg().isVirtual()) + return false; + + Register OrgReg = Op.getReg(); + Register CurReg = OrgReg; + const MachineBasicBlock *LoopBB = Op.getParent()->getParent(); + const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); + + const TargetInstrInfo *TII = + LoopBB->getParent()->getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = + LoopBB->getParent()->getSubtarget().getRegisterInfo(); + + MachineInstr *Phi = nullptr; + MachineInstr *Increment = nullptr; + + // Traverse definitions until it reaches Op or an instruction that does not + // satisfy the condition. + // Acceptable example: + // bb.0: + // %0 = PHI %3, %bb.0, ... + // %2 = ADD %0, Value + // ... = LOAD %2(Op) + // %3 = COPY %2 + while (true) { + if (!CurReg.isValid() || !CurReg.isVirtual()) + return false; + MachineInstr *Def = MRI.getVRegDef(CurReg); + if (Def->getParent() != LoopBB) + return false; + + if (Def->isCopy()) { + // Ignore copy instructions unless they contain subregisters + if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg()) + return false; + CurReg = Def->getOperand(1).getReg(); + } else if (Def->isPHI()) { + // There must be just one Phi + if (Phi) + return false; + Phi = Def; + CurReg = getLoopPhiReg(*Def, LoopBB); + } else if (TII->getIncrementValue(*Def, Value)) { + // Potentially a unique increment + if (Increment) + // Multiple increments exist + return false; + + const MachineOperand *BaseOp; + int64_t Offset; + bool OffsetIsScalable; + if (TII->getMemOperandWithOffset(*Def, BaseOp, Offset, OffsetIsScalable, + TRI)) { + // Pre/post increment instruction + CurReg = BaseOp->getReg(); + } else { + // If only one of the operands is defined within the loop, it is assumed + // to be an incremented value. + CurReg = findUniqueOperandDefinedInLoop(*Def); + if (!CurReg.isValid()) + return false; + } + Increment = Def; + } else { + return false; + } + if (CurReg == OrgReg) + break; + } + + if (!Phi || !Increment) + return false; + + return true; +} + /// Return true if we can compute the amount the instruction changes /// during each iteration. Set Delta to the amount of the change. -bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const { +bool SwingSchedulerDAG::computeDelta(const MachineInstr &MI, int &Delta) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineOperand *BaseOp; int64_t Offset; @@ -2538,24 +2633,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const { if (!BaseOp->isReg()) return false; - Register BaseReg = BaseOp->getReg(); - - MachineRegisterInfo &MRI = MF.getRegInfo(); - // Check if there is a Phi. If so, get the definition in the loop. - MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); - if (BaseDef && BaseDef->isPHI()) { - BaseReg = getLoopPhiReg(*BaseDef, MI.getParent()); - BaseDef = MRI.getVRegDef(BaseReg); - } - if (!BaseDef) - return false; - - int D = 0; - if (!TII->getIncrementValue(*BaseDef, D) && D >= 0) - return false; - - Delta = D; - return true; + return findLoopIncrementValue(*BaseOp, Delta); } /// Check if we can change the instruction to use an offset value from the @@ -2673,6 +2751,100 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) { return Def; } +/// Return false if there is no overlap between the region accessed by BaseMI in +/// an iteration and the region accessed by OtherMI in subsequent iterations. +bool SwingSchedulerDAG::mayOverlapInLaterIter( + const MachineInstr *BaseMI, const MachineInstr *OtherMI) const { + int DeltaB, DeltaO, Delta; + if (!computeDelta(*BaseMI, DeltaB) || !computeDelta(*OtherMI, DeltaO) || + DeltaB != DeltaO) + return true; + Delta = DeltaB; + + const MachineOperand *BaseOpB, *BaseOpO; + int64_t OffsetB, OffsetO; + bool OffsetBIsScalable, OffsetOIsScalable; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB, + OffsetBIsScalable, TRI) || + !TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO, + OffsetOIsScalable, TRI)) + return true; + + if (OffsetBIsScalable || OffsetOIsScalable) + return true; + + if (!BaseOpB->isIdenticalTo(*BaseOpO)) { + // Pass cases with different base operands but same initial values. + // Typically for when pre/post increment is used. + + if (!BaseOpB->isReg() || !BaseOpO->isReg()) + return true; + Register RegB = BaseOpB->getReg(), RegO = BaseOpO->getReg(); + if (!RegB.isVirtual() || !RegO.isVirtual()) + return true; + + MachineInstr *DefB = MRI.getVRegDef(BaseOpB->getReg()); + MachineInstr *DefO = MRI.getVRegDef(BaseOpO->getReg()); + if (!DefB || !DefO || !DefB->isPHI() || !DefO->isPHI()) + return true; + + unsigned InitValB = 0; + unsigned LoopValB = 0; + unsigned InitValO = 0; + unsigned LoopValO = 0; + getPhiRegs(*DefB, BB, InitValB, LoopValB); + getPhiRegs(*DefO, BB, InitValO, LoopValO); + MachineInstr *InitDefB = MRI.getVRegDef(InitValB); + MachineInstr *InitDefO = MRI.getVRegDef(InitValO); + + if (!InitDefB->isIdenticalTo(*InitDefO)) + return true; + } + + LocationSize AccessSizeB = (*BaseMI->memoperands_begin())->getSize(); + LocationSize AccessSizeO = (*OtherMI->memoperands_begin())->getSize(); + + // This is the main test, which checks the offset values and the loop + // increment value to determine if the accesses may be loop carried. + if (!AccessSizeB.hasValue() || !AccessSizeO.hasValue()) + return true; + + LLVM_DEBUG({ + dbgs() << "Overlap check:\n"; + dbgs() << " BaseMI: "; + BaseMI->dump(); + dbgs() << " Base + " << OffsetB << " + I * " << Delta + << ", Len: " << AccessSizeB.getValue() << "\n"; + dbgs() << " OtherMI: "; + OtherMI->dump(); + dbgs() << " Base + " << OffsetO << " + I * " << Delta + << ", Len: " << AccessSizeO.getValue() << "\n"; + }); + + // Excessive overlap may be detected in strided patterns. + // For example, the memory addresses of the store and the load in + // for (i=0; i<n; i+=2) a[i+1] = a[i]; + // are assumed to overlap. + if (Delta < 0) { + int64_t BaseMinAddr = OffsetB; + int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue() - 1; + if (BaseMinAddr > OhterNextIterMaxAddr) { + LLVM_DEBUG(dbgs() << " Result: No overlap\n"); + return false; + } + } else { + int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue() - 1; + int64_t OtherNextIterMinAddr = OffsetO + Delta; + if (BaseMaxAddr < OtherNextIterMinAddr) { + LLVM_DEBUG(dbgs() << " Result: No overlap\n"); + return false; + } + } + LLVM_DEBUG(dbgs() << " Result: Overlap\n"); + return true; +} + /// Return true for an order or output dependence that is loop carried /// potentially. A dependence is loop carried if the destination defines a value /// that may be used or defined by the source in a subsequent iteration. @@ -2704,61 +2876,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep( // The conservative assumption is that a dependence between memory operations // may be loop carried. The following code checks when it can be proved that // there is no loop carried dependence. - unsigned DeltaS, DeltaD; - if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD)) - return true; - - const MachineOperand *BaseOpS, *BaseOpD; - int64_t OffsetS, OffsetD; - bool OffsetSIsScalable, OffsetDIsScalable; - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable, - TRI) || - !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable, - TRI)) - return true; - - assert(!OffsetSIsScalable && !OffsetDIsScalable && - "Expected offsets to be byte offsets"); - - MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg()); - MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg()); - if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI()) - return true; - - unsigned InitValS = 0; - unsigned LoopValS = 0; - unsigned InitValD = 0; - unsigned LoopValD = 0; - getPhiRegs(*DefS, BB, InitValS, LoopValS); - getPhiRegs(*DefD, BB, InitValD, LoopValD); - MachineInstr *InitDefS = MRI.getVRegDef(InitValS); - MachineInstr *InitDefD = MRI.getVRegDef(InitValD); - - if (!InitDefS->isIdenticalTo(*InitDefD)) - return true; - - // Check that the base register is incremented by a constant value for each - // iteration. - MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS); - int D = 0; - if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D)) - return true; - - LocationSize AccessSizeS = (*SI->memoperands_begin())->getSize(); - LocationSize AccessSizeD = (*DI->memoperands_begin())->getSize(); - - // This is the main test, which checks the offset values and the loop - // increment value to determine if the accesses may be loop carried. - if (!AccessSizeS.hasValue() || !AccessSizeD.hasValue()) - return true; - - if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue() || - DeltaD < AccessSizeD.getValue()) - return true; - - return (OffsetS + (int64_t)AccessSizeS.getValue() < - OffsetD + (int64_t)AccessSizeD.getValue()); + return mayOverlapInLaterIter(DI, SI); } void SwingSchedulerDAG::postProcessDAG() { |