diff options
author | Diana Picus <Diana-Magda.Picus@amd.com> | 2024-03-07 12:46:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-07 12:46:42 +0100 |
commit | 0086cc95b3b3ac4088d3d782cd490d0c08108b59 (patch) | |
tree | 6f34ea3e7471555ded3b733e99244f67e9c274f4 | |
parent | 937a5396cf3e524ae40106a943a5c1f2c565fa00 (diff) | |
download | llvm-0086cc95b3b3ac4088d3d782cd490d0c08108b59.zip llvm-0086cc95b3b3ac4088d3d782cd490d0c08108b59.tar.gz llvm-0086cc95b3b3ac4088d3d782cd490d0c08108b59.tar.bz2 |
[AMDGPU] Rename getNumVGPRBlocks. NFC (#84161)
Rename getNumVGPRBlocks to getEncodedNumVGPRBlocks, to clarify that it's
using the encoding granule. This is used to program the hardware. In
practice, the hardware will use the alloc granule instead, so this patch
also adds a new helper, getAllocatedNumVGPRBlocks, which can be useful
when driving heuristics.
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 15 |
4 files changed, 33 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 37a36b2..d9970a20 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -868,8 +868,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks( &STM, ProgInfo.NumSGPRsForWavesPerEU); - ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( - &STM, ProgInfo.NumVGPRsForWavesPerEU); + ProgInfo.VGPRBlocks = + IsaInfo::getEncodedNumVGPRBlocks(&STM, ProgInfo.NumVGPRsForWavesPerEU); const SIModeRegisterDefaults Mode = MFI->getMode(); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index cb4eddf..d5efd44 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5344,8 +5344,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks( NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; } - VGPRBlocks = - IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); + VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs, + EnableWavefrontSize32); SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); return false; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0eab7ac..62903a2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1060,10 +1060,15 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); } +static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, + unsigned Granule) { + return divideCeil(std::max(1u, NumRegs), Granule); +} + unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { - NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); // SGPRBlocks is actual number of SGPR blocks minus 1. - return NumSGPRs / getSGPREncodingGranule(STI) - 1; + return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) - + 1; } unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, @@ -1158,14 +1163,19 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { return std::min(MaxNumVGPRs, AddressableNumVGPRs); } -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, - std::optional<bool> EnableWavefrontSize32) { - NumVGPRs = alignTo(std::max(1u, NumVGPRs), - getVGPREncodingGranule(STI, EnableWavefrontSize32)); - // VGPRBlocks is actual number of VGPR blocks minus 1. - return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; +unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional<bool> EnableWavefrontSize32) { + return getGranulatedNumRegisterBlocks( + NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) - + 1; } +unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, + unsigned NumVGPRs, + std::optional<bool> EnableWavefrontSize32) { + return getGranulatedNumRegisterBlocks( + NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32)); +} } // end namespace IsaInfo void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 6edf01d..bb307cb 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -316,13 +316,20 @@ unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs); /// \returns Number of VGPR blocks needed for given subtarget \p STI when -/// \p NumVGPRs are used. +/// \p NumVGPRs are used. We actually return the number of blocks -1, since +/// that's what we encode. /// /// For subtargets which support it, \p EnableWavefrontSize32 should match the /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. -unsigned -getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, - std::optional<bool> EnableWavefrontSize32 = std::nullopt); +unsigned getEncodedNumVGPRBlocks( + const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional<bool> EnableWavefrontSize32 = std::nullopt); + +/// \returns Number of VGPR blocks that need to be allocated for the given +/// subtarget \p STI when \p NumVGPRs are used. +unsigned getAllocatedNumVGPRBlocks( + const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional<bool> EnableWavefrontSize32 = std::nullopt); } // end namespace IsaInfo |