diff options
| author | Vasileios Porpodas <vasileios.porpodas@amd.com> | 2026-04-10 23:28:07 +0000 |
|---|---|---|
| committer | Vasileios Porpodas <vasileios.porpodas@amd.com> | 2026-04-21 18:59:18 +0000 |
| commit | 07e939bfad6cd90f1be9048791254c568975311c (patch) | |
| tree | 5c238ef78f994263b302d9b12b1a655740eba570 | |
| parent | 64d56184a36f919d0d0f29f924a8d9c831c9b139 (diff) | |
| download | llvm-users/vporpo/waitcnt10-6.tar.gz llvm-users/vporpo/waitcnt10-6.tar.bz2 llvm-users/vporpo/waitcnt10-6.zip | |
[AMDGPU][SIInsertWaitcnts] Introduce Counter::dropOldest() and Counter::clear()users/vporpo/waitcnt10-6
It might not be 100% NFC, because `setScoreLB(T, getScoreUB(T) - 1);` could potentially decrement LB if UB == LB (not sure if this statement could logically be reached when this holds).
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 23 |
1 files changed, 10 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index baa7f61f471e..4dc0f8fb67e8 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -794,8 +794,6 @@ private: /// \p Score to complete, assuming in-order completion. unsigned getWait(unsigned Score) const { return UB - Score; } // TODO: Make private: we should not provide raw access to the internals. - void setLB(unsigned NewLB) { LB = NewLB; } - // TODO: Make private: we should not provide raw access to the internals. unsigned getUB() const { return UB; } // TODO: Make private: we should not provide raw access to the internals. unsigned getLB() const { return LB; } @@ -828,6 +826,11 @@ private: unsigned Max = getWaitCountMax(*Limits, CntT); setUB(UB + Max); } + /// Drop all oldest scores except \p Remaining. + void dropOldest(unsigned Remaining = 0) { + LB = std::max(LB, UB - Remaining); + } + void clear() { LB = UB; } }; std::array<Counter, AMDGPU::NUM_INST_CNTS> Counters; @@ -1002,11 +1005,6 @@ private: return Context->TRI.regunits(Reg); } - void setScoreLB(AMDGPU::InstCounterType T, unsigned Val) { - assert(T < AMDGPU::NUM_INST_CNTS); - Counters[T].setLB(Val); - } - void setRegScore(MCPhysReg Reg, AMDGPU::InstCounterType T, unsigned Val) { const SIRegisterInfo &TRI = Context->TRI; if (Reg == AMDGPU::SCC) { @@ -1261,7 +1259,7 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) { // SMEM and VMEM operations. So there will never be // outstanding address translations for both SMEM and // VMEM at the same time. - setScoreLB(T, getScoreUB(T) - 1); + Counters[T].dropOldest(/*Remaining=*/1); PendingEvents.remove(OtherEvent); } for (const MachineOperand &Op : Inst.all_uses()) @@ -1714,9 +1712,8 @@ void WaitcntBrackets::tryClearSCCWriteEvent(MachineInstr *Inst) { WaitEventSet SCC_WRITE_PendingEvent(SCC_WRITE); // If this SCC_WRITE is the only pending KM_CNT event, clear counter. if ((PendingEvents & Context->getWaitEvents(AMDGPU::KM_CNT)) == - SCC_WRITE_PendingEvent) { - setScoreLB(AMDGPU::KM_CNT, getScoreUB(AMDGPU::KM_CNT)); - } + SCC_WRITE_PendingEvent) + Counters[AMDGPU::KM_CNT].clear(); PendingEvents.remove(SCC_WRITE_PendingEvent); PendingSCCWrite = nullptr; @@ -1735,9 +1732,9 @@ void WaitcntBrackets::applyWaitcnt(AMDGPU::InstCounterType T, unsigned Count) { if (Count != 0) { if (counterOutOfOrder(T)) return; - setScoreLB(T, std::max(getScoreLB(T), UB - Count)); + Counters[T].dropOldest(Count); } else { - setScoreLB(T, UB); + Counters[T].clear(); PendingEvents.remove(Context->getWaitEvents(T)); } |
