aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVasileios Porpodas <vasileios.porpodas@amd.com>2026-04-10 23:28:07 +0000
committerVasileios Porpodas <vasileios.porpodas@amd.com>2026-04-21 18:59:18 +0000
commit07e939bfad6cd90f1be9048791254c568975311c (patch)
tree5c238ef78f994263b302d9b12b1a655740eba570
parent64d56184a36f919d0d0f29f924a8d9c831c9b139 (diff)
downloadllvm-users/vporpo/waitcnt10-6.tar.gz
llvm-users/vporpo/waitcnt10-6.tar.bz2
llvm-users/vporpo/waitcnt10-6.zip
[AMDGPU][SIInsertWaitcnts] Introduce Counter::dropOldest() and Counter::clear()users/vporpo/waitcnt10-6
It might not be 100% NFC, because `setScoreLB(T, getScoreUB(T) - 1);` could potentially decrement LB if UB == LB (not sure if this statement could logically be reached when this holds).
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp23
1 files changed, 10 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index baa7f61f471e..4dc0f8fb67e8 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -794,8 +794,6 @@ private:
/// \p Score to complete, assuming in-order completion.
unsigned getWait(unsigned Score) const { return UB - Score; }
// TODO: Make private: we should not provide raw access to the internals.
- void setLB(unsigned NewLB) { LB = NewLB; }
- // TODO: Make private: we should not provide raw access to the internals.
unsigned getUB() const { return UB; }
// TODO: Make private: we should not provide raw access to the internals.
unsigned getLB() const { return LB; }
@@ -828,6 +826,11 @@ private:
unsigned Max = getWaitCountMax(*Limits, CntT);
setUB(UB + Max);
}
+ /// Drop all oldest scores except \p Remaining.
+ void dropOldest(unsigned Remaining = 0) {
+ LB = std::max(LB, UB - Remaining);
+ }
+ void clear() { LB = UB; }
};
std::array<Counter, AMDGPU::NUM_INST_CNTS> Counters;
@@ -1002,11 +1005,6 @@ private:
return Context->TRI.regunits(Reg);
}
- void setScoreLB(AMDGPU::InstCounterType T, unsigned Val) {
- assert(T < AMDGPU::NUM_INST_CNTS);
- Counters[T].setLB(Val);
- }
-
void setRegScore(MCPhysReg Reg, AMDGPU::InstCounterType T, unsigned Val) {
const SIRegisterInfo &TRI = Context->TRI;
if (Reg == AMDGPU::SCC) {
@@ -1261,7 +1259,7 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
// SMEM and VMEM operations. So there will never be
// outstanding address translations for both SMEM and
// VMEM at the same time.
- setScoreLB(T, getScoreUB(T) - 1);
+ Counters[T].dropOldest(/*Remaining=*/1);
PendingEvents.remove(OtherEvent);
}
for (const MachineOperand &Op : Inst.all_uses())
@@ -1714,9 +1712,8 @@ void WaitcntBrackets::tryClearSCCWriteEvent(MachineInstr *Inst) {
WaitEventSet SCC_WRITE_PendingEvent(SCC_WRITE);
// If this SCC_WRITE is the only pending KM_CNT event, clear counter.
if ((PendingEvents & Context->getWaitEvents(AMDGPU::KM_CNT)) ==
- SCC_WRITE_PendingEvent) {
- setScoreLB(AMDGPU::KM_CNT, getScoreUB(AMDGPU::KM_CNT));
- }
+ SCC_WRITE_PendingEvent)
+ Counters[AMDGPU::KM_CNT].clear();
PendingEvents.remove(SCC_WRITE_PendingEvent);
PendingSCCWrite = nullptr;
@@ -1735,9 +1732,9 @@ void WaitcntBrackets::applyWaitcnt(AMDGPU::InstCounterType T, unsigned Count) {
if (Count != 0) {
if (counterOutOfOrder(T))
return;
- setScoreLB(T, std::max(getScoreLB(T), UB - Count));
+ Counters[T].dropOldest(Count);
} else {
- setScoreLB(T, UB);
+ Counters[T].clear();
PendingEvents.remove(Context->getWaitEvents(T));
}