diff options
author | Diana Picus <Diana-Magda.Picus@amd.com> | 2025-06-24 11:09:36 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-24 11:09:36 +0200 |
commit | a201f8872a63aa336e4f79a40e196b6c20c9001e (patch) | |
tree | 98d0ca031c01c13cb0a958b0dd36c46bbff0aa26 /llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | |
parent | 6cfa03f1f1d9351950b6d12fb923ce7e2cb3405b (diff) | |
download | llvm-a201f8872a63aa336e4f79a40e196b6c20c9001e.zip llvm-a201f8872a63aa336e4f79a40e196b6c20c9001e.tar.gz llvm-a201f8872a63aa336e4f79a40e196b6c20c9001e.tar.bz2 |
[AMDGPU] Replace dynamic VGPR feature with attribute (#133444)
Use a function attribute (amdgpu-dynamic-vgpr) instead of a subtarget
feature, as requested in #130030.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 62 |
1 files changed, 46 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 47d213d..0e54932 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1158,10 +1158,16 @@ unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { } unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, + unsigned DynamicVGPRBlockSize, std::optional<bool> EnableWavefrontSize32) { if (STI->getFeatureBits().test(FeatureGFX90AInsts)) return 8; + if (DynamicVGPRBlockSize != 0) + return DynamicVGPRBlockSize; + + // Temporarily check the subtarget feature, until we fully switch to using + // attributes. if (STI->getFeatureBits().test(FeatureDynamicVGPR)) return STI->getFeatureBits().test(FeatureDynamicVGPRBlockSize32) ? 32 : 16; @@ -1205,20 +1211,26 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; } -unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, + unsigned DynamicVGPRBlockSize) { if (STI->getFeatureBits().test(FeatureGFX90AInsts)) return 512; - if (STI->getFeatureBits().test(FeatureDynamicVGPR)) + + // Temporarily check the subtarget feature, until we fully switch to using + // attributes. + if (DynamicVGPRBlockSize != 0 || + STI->getFeatureBits().test(FeatureDynamicVGPR)) // On GFX12 we can allocate at most 8 blocks of VGPRs. - return 8 * getVGPRAllocGranule(STI); + return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize); return getAddressableNumArchVGPRs(STI); } unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, - unsigned NumVGPRs) { - return getNumWavesPerEUWithNumVGPRs(NumVGPRs, getVGPRAllocGranule(STI), - getMaxWavesPerEU(STI), - getTotalNumVGPRs(STI)); + unsigned NumVGPRs, + unsigned DynamicVGPRBlockSize) { + return getNumWavesPerEUWithNumVGPRs( + NumVGPRs, getVGPRAllocGranule(STI, DynamicVGPRBlockSize), + getMaxWavesPerEU(STI), getTotalNumVGPRs(STI)); } unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule, @@ -1257,7 +1269,8 @@ unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, return 5; } -unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, + unsigned DynamicVGPRBlockSize) { assert(WavesPerEU != 0); unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); @@ -1265,28 +1278,33 @@ unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { return 0; unsigned TotNumVGPRs = getTotalNumVGPRs(STI); - unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI); - unsigned Granule = getVGPRAllocGranule(STI); + unsigned AddrsableNumVGPRs = + getAddressableNumVGPRs(STI, DynamicVGPRBlockSize); + unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize); unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule); if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule)) return 0; - unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs); + unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs, + DynamicVGPRBlockSize); if (WavesPerEU < MinWavesPerEU) - return getMinNumVGPRs(STI, MinWavesPerEU); + return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize); unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule); unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext); return std::min(MinNumVGPRs, AddrsableNumVGPRs); } -unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, + unsigned DynamicVGPRBlockSize) { assert(WavesPerEU != 0); unsigned MaxNumVGPRs = - alignDown(getTotalNumVGPRs(STI) / WavesPerEU, getVGPRAllocGranule(STI)); - unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); + alignDown(getTotalNumVGPRs(STI) / WavesPerEU, + getVGPRAllocGranule(STI, DynamicVGPRBlockSize)); + unsigned AddressableNumVGPRs = + getAddressableNumVGPRs(STI, DynamicVGPRBlockSize); return std::min(MaxNumVGPRs, AddressableNumVGPRs); } @@ -1299,9 +1317,11 @@ unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, + unsigned DynamicVGPRBlockSize, std::optional<bool> EnableWavefrontSize32) { return getGranulatedNumRegisterBlocks( - NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32)); + NumVGPRs, + getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32)); } } // end namespace IsaInfo @@ -2124,6 +2144,16 @@ bool getHasDepthExport(const Function &F) { return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0; } +unsigned getDynamicVGPRBlockSize(const Function &F) { + unsigned BlockSize = + F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0); + + if (BlockSize == 16 || BlockSize == 32) + return BlockSize; + + return 0; +} + bool hasXNACK(const MCSubtargetInfo &STI) { return STI.hasFeature(AMDGPU::FeatureXNACK); } |