aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNSubtarget.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.cpp57
1 files changed, 57 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 9a2bab1..0a0a107 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -537,6 +537,63 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
return getMaxNumVGPRs(MF.getFunction());
}
+std::pair<unsigned, unsigned>
+GCNSubtarget::getMaxNumVectorRegs(const Function &F) const {
+ const unsigned MaxVectorRegs = getMaxNumVGPRs(F);
+
+ unsigned MaxNumVGPRs = MaxVectorRegs;
+ unsigned MaxNumAGPRs = 0;
+
+ // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
+ // a wave may have up to 512 total vector registers combining together both
+ // VGPRs and AGPRs. Hence, in an entry function without calls and without
+ // AGPRs used within it, it is possible to use the whole vector register
+ // budget for VGPRs.
+ //
+ // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
+ // register file accordingly.
+ if (hasGFX90AInsts()) {
+ unsigned MinNumAGPRs = 0;
+ const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
+ const unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+ const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};
+
+ // TODO: The lower bound should probably force the number of required
+ // registers up, overriding amdgpu-waves-per-eu.
+ std::tie(MinNumAGPRs, MaxNumAGPRs) =
+ AMDGPU::getIntegerPairAttribute(F, "amdgpu-agpr-alloc", DefaultNumAGPR,
+ /*OnlyFirstRequired=*/true);
+
+ if (MinNumAGPRs == DefaultNumAGPR.first) {
+ // Default to splitting half the registers if AGPRs are required.
+ MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
+ } else {
+ // Align to accum_offset's allocation granularity.
+ MinNumAGPRs = alignTo(MinNumAGPRs, 4);
+
+ MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);
+ }
+
+ // Clamp values to be inbounds of our limits, and ensure min <= max.
+
+ MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
+ MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);
+
+ MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, TotalNumVGPRs);
+ MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);
+
+ assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
+ MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= TotalNumVGPRs &&
+ "invalid register counts");
+ } else if (hasMAIInsts()) {
+ // On gfx908 the number of AGPRs always equals the number of VGPRs.
+ MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;
+ }
+
+ return std::pair(MaxNumVGPRs, MaxNumAGPRs);
+}
+
void GCNSubtarget::adjustSchedDependency(
SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
const TargetSchedModel *SchedModel) const {