aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDiana Picus <Diana-Magda.Picus@amd.com>2024-03-07 12:46:42 +0100
committerGitHub <noreply@github.com>2024-03-07 12:46:42 +0100
commit0086cc95b3b3ac4088d3d782cd490d0c08108b59 (patch)
tree6f34ea3e7471555ded3b733e99244f67e9c274f4
parent937a5396cf3e524ae40106a943a5c1f2c565fa00 (diff)
downloadllvm-0086cc95b3b3ac4088d3d782cd490d0c08108b59.zip
llvm-0086cc95b3b3ac4088d3d782cd490d0c08108b59.tar.gz
llvm-0086cc95b3b3ac4088d3d782cd490d0c08108b59.tar.bz2
[AMDGPU] Rename getNumVGPRBlocks. NFC (#84161)
Rename getNumVGPRBlocks to getEncodedNumVGPRBlocks, to clarify that it's using the encoding granule. This is used to program the hardware. In practice, the hardware will use the alloc granule instead, so this patch also adds a new helper, getAllocatedNumVGPRBlocks, which can be useful when driving heuristics.
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h15
4 files changed, 33 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 37a36b2..d9970a20 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -868,8 +868,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
&STM, ProgInfo.NumSGPRsForWavesPerEU);
- ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
- &STM, ProgInfo.NumVGPRsForWavesPerEU);
+ ProgInfo.VGPRBlocks =
+ IsaInfo::getEncodedNumVGPRBlocks(&STM, ProgInfo.NumVGPRsForWavesPerEU);
const SIModeRegisterDefaults Mode = MFI->getMode();
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index cb4eddf..d5efd44 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5344,8 +5344,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
}
- VGPRBlocks =
- IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
+ VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs,
+ EnableWavefrontSize32);
SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
return false;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 0eab7ac..62903a2 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1060,10 +1060,15 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
}
+static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
+ unsigned Granule) {
+ return divideCeil(std::max(1u, NumRegs), Granule);
+}
+
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
- NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
// SGPRBlocks is actual number of SGPR blocks minus 1.
- return NumSGPRs / getSGPREncodingGranule(STI) - 1;
+ return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) -
+ 1;
}
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
@@ -1158,14 +1163,19 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
-unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
- std::optional<bool> EnableWavefrontSize32) {
- NumVGPRs = alignTo(std::max(1u, NumVGPRs),
- getVGPREncodingGranule(STI, EnableWavefrontSize32));
- // VGPRBlocks is actual number of VGPR blocks minus 1.
- return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
+unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
+ std::optional<bool> EnableWavefrontSize32) {
+ return getGranulatedNumRegisterBlocks(
+ NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
+ 1;
}
+unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
+ unsigned NumVGPRs,
+ std::optional<bool> EnableWavefrontSize32) {
+ return getGranulatedNumRegisterBlocks(
+ NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32));
+}
} // end namespace IsaInfo
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 6edf01d..bb307cb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -316,13 +316,20 @@ unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
unsigned NumVGPRs);
/// \returns Number of VGPR blocks needed for given subtarget \p STI when
-/// \p NumVGPRs are used.
+/// \p NumVGPRs are used. We actually return the number of blocks -1, since
+/// that's what we encode.
///
/// For subtargets which support it, \p EnableWavefrontSize32 should match the
/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
-unsigned
-getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
- std::optional<bool> EnableWavefrontSize32 = std::nullopt);
+unsigned getEncodedNumVGPRBlocks(
+ const MCSubtargetInfo *STI, unsigned NumVGPRs,
+ std::optional<bool> EnableWavefrontSize32 = std::nullopt);
+
+/// \returns Number of VGPR blocks that need to be allocated for the given
+/// subtarget \p STI when \p NumVGPRs are used.
+unsigned getAllocatedNumVGPRBlocks(
+ const MCSubtargetInfo *STI, unsigned NumVGPRs,
+ std::optional<bool> EnableWavefrontSize32 = std::nullopt);
} // end namespace IsaInfo