[AMDGPU][SIInsertWaitcnts] Introduce Counter::dropOldest() and Counter::clear()users/vporpo/waitcnt10-6

It might not be 100% NFC, because `setScoreLB(T, getScoreUB(T) - 1);` could potentially decrement LB if UB == LB (not sure if this statement could logically be reached when this holds).
author: Vasileios Porpodas <vasileios.porpodas@amd.com> 2026-04-10 23:28:07 +0000
committer: Vasileios Porpodas <vasileios.porpodas@amd.com> 2026-04-21 18:59:18 +0000
commit: 07e939bfad6cd90f1be9048791254c568975311c (patch)
tree: 5c238ef78f994263b302d9b12b1a655740eba570
parent: 64d56184a36f919d0d0f29f924a8d9c831c9b139 (diff)
download: llvm-users/vporpo/waitcnt10-6.tar.gz
llvm-users/vporpo/waitcnt10-6.tar.bz2
llvm-users/vporpo/waitcnt10-6.zip
1 files changed, 10 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index baa7f61f471e..4dc0f8fb67e8 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -794,8 +794,6 @@ private:
       /// \p Score to complete, assuming in-order completion.
       unsigned getWait(unsigned Score) const { return UB - Score; }
       // TODO: Make private: we should not provide raw access to the internals.
-      void setLB(unsigned NewLB) { LB = NewLB; }
-      // TODO: Make private: we should not provide raw access to the internals.
       unsigned getUB() const { return UB; }
       // TODO: Make private: we should not provide raw access to the internals.
       unsigned getLB() const { return LB; }
@@ -828,6 +826,11 @@ private:
         unsigned Max = getWaitCountMax(*Limits, CntT);
         setUB(UB + Max);
       }
+      /// Drop all oldest scores except \p Remaining.
+      void dropOldest(unsigned Remaining = 0) {
+        LB = std::max(LB, UB - Remaining);
+      }
+      void clear() { LB = UB; }
     };
 
     std::array<Counter, AMDGPU::NUM_INST_CNTS> Counters;
@@ -1002,11 +1005,6 @@ private:
     return Context->TRI.regunits(Reg);
   }
 
-  void setScoreLB(AMDGPU::InstCounterType T, unsigned Val) {
-    assert(T < AMDGPU::NUM_INST_CNTS);
-    Counters[T].setLB(Val);
-  }
-
   void setRegScore(MCPhysReg Reg, AMDGPU::InstCounterType T, unsigned Val) {
     const SIRegisterInfo &TRI = Context->TRI;
     if (Reg == AMDGPU::SCC) {
@@ -1261,7 +1259,7 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
       // SMEM and VMEM operations. So there will never be
       // outstanding address translations for both SMEM and
       // VMEM at the same time.
-      setScoreLB(T, getScoreUB(T) - 1);
+      Counters[T].dropOldest(/*Remaining=*/1);
       PendingEvents.remove(OtherEvent);
     }
     for (const MachineOperand &Op : Inst.all_uses())
@@ -1714,9 +1712,8 @@ void WaitcntBrackets::tryClearSCCWriteEvent(MachineInstr *Inst) {
     WaitEventSet SCC_WRITE_PendingEvent(SCC_WRITE);
     // If this SCC_WRITE is the only pending KM_CNT event, clear counter.
     if ((PendingEvents & Context->getWaitEvents(AMDGPU::KM_CNT)) ==
-        SCC_WRITE_PendingEvent) {
-      setScoreLB(AMDGPU::KM_CNT, getScoreUB(AMDGPU::KM_CNT));
-    }
+        SCC_WRITE_PendingEvent)
+      Counters[AMDGPU::KM_CNT].clear();
 
     PendingEvents.remove(SCC_WRITE_PendingEvent);
     PendingSCCWrite = nullptr;
@@ -1735,9 +1732,9 @@ void WaitcntBrackets::applyWaitcnt(AMDGPU::InstCounterType T, unsigned Count) {
   if (Count != 0) {
     if (counterOutOfOrder(T))
       return;
-    setScoreLB(T, std::max(getScoreLB(T), UB - Count));
+    Counters[T].dropOldest(Count);
   } else {
-    setScoreLB(T, UB);
+    Counters[T].clear();
     PendingEvents.remove(Context->getWaitEvents(T));
   }
author	Vasileios Porpodas <vasileios.porpodas@amd.com>	2026-04-10 23:28:07 +0000
committer	Vasileios Porpodas <vasileios.porpodas@amd.com>	2026-04-21 18:59:18 +0000
commit	07e939bfad6cd90f1be9048791254c568975311c (patch)
tree	5c238ef78f994263b302d9b12b1a655740eba570
parent	64d56184a36f919d0d0f29f924a8d9c831c9b139 (diff)
download	llvm-users/vporpo/waitcnt10-6.tar.gz llvm-users/vporpo/waitcnt10-6.tar.bz2 llvm-users/vporpo/waitcnt10-6.zip