aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeffrey Byrnes <Jeffrey.Byrnes@amd.com>2026-04-24 09:29:10 -0700
committerJeffrey Byrnes <Jeffrey.Byrnes@amd.com>2026-04-24 09:52:30 -0700
commit9e4ef195dc3017fa3e4a86092e60b018900c5b53 (patch)
treec9a55f3f66b41c11431902e1995a32c6676f4a9b
parenta18260b1a0d1f7ccb05e64a2a42f550270c0ca0e (diff)
downloadllvm-users/jebyrnes/DS-FIFO-stalls-Rebase3.tar.gz
llvm-users/jebyrnes/DS-FIFO-stalls-Rebase3.tar.bz2
llvm-users/jebyrnes/DS-FIFO-stalls-Rebase3.zip
Change-Id: I6972e887edd5db44ee9bcaed1f79e0c9933f611e
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.h20
2 files changed, 23 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.cpp
index 093afb08629e..8d550f5078b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.cpp
@@ -141,6 +141,8 @@ void HardwareUnitInfo::markScheduled(SUnit *SU, unsigned BlockingCycles) {
AllSUs.remove(SU);
PrioritySUs.remove(SU);
+ // BufferSize of 0 or 1 implies that each SU uses the HardwareUnit for
+ // BlockingCycles
if (BufferSize <= 1 || (ScheduledSUs.size() % BufferSize == 0))
TotalCycles -= BlockingCycles;
@@ -170,6 +172,8 @@ void HardwareUnitInfo::markScheduled(SUnit *SU, unsigned BlockingCycles) {
}
void HardwareUnitInfo::finalizeCycles() {
+ // BufferSize of 0 or 1 implies that each SU uses the HardwareUnit for
+ // BlockingCycles
if (BufferSize <= 1 || AllSUs.empty())
return;
@@ -701,7 +705,8 @@ bool AMDGPUCoExecSchedStrategy::tryEffectiveStall(SchedCandidate &Cand,
*SU->getInstr(), *static_cast<const SIInstrInfo *>(DAG->TII));
HardwareUnitInfo *HWUI = Heurs.getHWUIFromFlavor(Flavor);
- if (HWUI->getBufferSize() <= 1)
+ // A BufferSize of 0 means "unlimited" buffer, thus we will never fill it.
+ if (HWUI->getBufferSize() == 0)
return 0;
// getBufferAvailableCycle assumes top-down scheduling.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.h b/llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.h
index 198e9b007fa1..fd637e9f8efc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCoExecSchedStrategy.h
@@ -178,9 +178,23 @@ private:
/// instructions on other HardwareUnits.
bool ProducesCoexecWindow = false;
/// How many instructions can be held simultaneously for this HardwareUnit.
- /// A value of 0 or 1 means that there is no buffer.
+ /// A value of 0 means there is no limit.
+ ///
+ /// This may approximate the hardware. For example, for LDS instructions
+ /// it is a well-known phenomena that oversubscribing the LDS unit results in
+ /// longer latency for the LDS instructions. While it is true that there is a
+ /// hard limit to the amount of simulatenous in-flight LDS instructions, good
+ /// scheduling would also cool off the LDS to avoid other forms of hardware
+ /// contention and increasing LDS latency. Thus, we limit the amount of LDS
+ /// instructions we are willing to schedule close together, though this does
+ /// not correspond 1:1 with a hardware mechanism.
unsigned BufferSize = 0;
/// How many cycles it takes for an instruction to clear the buffer.
+ ///
+ /// Again, this may be an apprxoimation. For example, for memory FIFOs, the
+ /// actual amount of cycles it will take to clear it is dependent on how
+ /// quickly prior instructions evacuate the FIFO, which is based on runtime
+ /// behavior which is not modelled in the compiler.
unsigned BufferCycles = 0;
public:
@@ -210,7 +224,7 @@ public:
/// \returns the next cycle where there is space in the buffer.
unsigned getBufferAvailableCycle(unsigned CurrCycle) {
// There is no buffer.
- if (BufferSize <= 1)
+ if (BufferSize == 0)
return CurrCycle;
// Buffer is available now.
@@ -265,7 +279,7 @@ public:
/// the list of PrioritySUs.
void markScheduled(SUnit *SU, unsigned BlockingCycles);
/// After we've collected all the region pressure for this HWUI, correct for
- /// any specifics of the behavior of this resource. For example, if we the
+ /// any specifics of the behavior of this resource. For example, if the
/// HardwareUnit can hold N instructions simultaneously, then there is no
/// penalty for scheduling N instructions back to back.
void finalizeCycles();