diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 15 |
4 files changed, 33 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 37a36b2..d9970a20 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -868,8 +868,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks( &STM, ProgInfo.NumSGPRsForWavesPerEU); - ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( - &STM, ProgInfo.NumVGPRsForWavesPerEU); + ProgInfo.VGPRBlocks = + IsaInfo::getEncodedNumVGPRBlocks(&STM, ProgInfo.NumVGPRsForWavesPerEU); const SIModeRegisterDefaults Mode = MFI->getMode(); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index cb4eddf..d5efd44 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5344,8 +5344,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks( NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; } - VGPRBlocks = - IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); + VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs, + EnableWavefrontSize32); SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); return false; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0eab7ac..62903a2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1060,10 +1060,15 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); } +static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, + unsigned Granule) { + return divideCeil(std::max(1u, NumRegs), Granule); +} + unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { - NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); // SGPRBlocks is actual number of SGPR blocks minus 1. - return NumSGPRs / getSGPREncodingGranule(STI) - 1; + return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) - + 1; } unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, @@ -1158,14 +1163,19 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { return std::min(MaxNumVGPRs, AddressableNumVGPRs); } -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, - std::optional<bool> EnableWavefrontSize32) { - NumVGPRs = alignTo(std::max(1u, NumVGPRs), - getVGPREncodingGranule(STI, EnableWavefrontSize32)); - // VGPRBlocks is actual number of VGPR blocks minus 1. - return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; +unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional<bool> EnableWavefrontSize32) { + return getGranulatedNumRegisterBlocks( + NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) - + 1; } +unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, + unsigned NumVGPRs, + std::optional<bool> EnableWavefrontSize32) { + return getGranulatedNumRegisterBlocks( + NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32)); +} } // end namespace IsaInfo void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 6edf01d..bb307cb 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -316,13 +316,20 @@ unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs); /// \returns Number of VGPR blocks needed for given subtarget \p STI when -/// \p NumVGPRs are used. +/// \p NumVGPRs are used. We actually return the number of blocks -1, since +/// that's what we encode. /// /// For subtargets which support it, \p EnableWavefrontSize32 should match the /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. -unsigned -getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, - std::optional<bool> EnableWavefrontSize32 = std::nullopt); +unsigned getEncodedNumVGPRBlocks( + const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional<bool> EnableWavefrontSize32 = std::nullopt); + +/// \returns Number of VGPR blocks that need to be allocated for the given +/// subtarget \p STI when \p NumVGPRs are used. +unsigned getAllocatedNumVGPRBlocks( + const MCSubtargetInfo *STI, unsigned NumVGPRs, + std::optional<bool> EnableWavefrontSize32 = std::nullopt); } // end namespace IsaInfo |