diff options
author | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2022-09-28 14:55:14 +0100 |
---|---|---|
committer | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2022-09-28 14:55:16 +0100 |
commit | 80ba432821206ee3ba4275d48ed6b50aadfbb9d8 (patch) | |
tree | 9386d8cd1fb6ba2dbc3e439c9d65cfb47e09e3e4 /llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | |
parent | dcc00482797124d97aab1cfc254b1563e38de76b (diff) | |
download | llvm-80ba432821206ee3ba4275d48ed6b50aadfbb9d8.zip llvm-80ba432821206ee3ba4275d48ed6b50aadfbb9d8.tar.gz llvm-80ba432821206ee3ba4275d48ed6b50aadfbb9d8.tar.bz2 |
[amdgpu][nfc] Allocate kernel-specific LDS struct deterministically
A kernel may have an associated struct for laying out LDS variables.
This patch puts that instance, if present, at a deterministic address by
allocating it at the same time as the module scope instance.
This is relatively likely to be where the instance was allocated anyway (~NFC)
but will allow later patches to calculate where a given field can be found,
which means a function which is only reachable from a single kernel will be
able to access a LDS variable with zero overhead. That will be particularly
helpful for applications that instantiate a function template containing LDS
variables once per kernel.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D127052
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 53 |
1 files changed, 47 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index f5e12fd..dacf873 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -49,7 +49,8 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) } unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, - const GlobalVariable &GV) { + const GlobalVariable &GV, + Align Trailing) { auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0)); if (!Entry.second) return Entry.first->second; @@ -66,9 +67,8 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); - // Update the LDS size considering the padding to align the dynamic shared - // memory. - LDSSize = alignTo(StaticLDSSize, DynLDSAlign); + // Align LDS size to trailing, e.g. for aligning dynamic shared memory + LDSSize = alignTo(StaticLDSSize, Trailing); } else { assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && "expected region address space"); @@ -84,21 +84,62 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, return Offset; } +const GlobalVariable * +AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { + const Module *M = F.getParent(); + std::string KernelLDSName = "llvm.amdgcn.kernel."; + KernelLDSName += F.getName(); + KernelLDSName += ".lds"; + return M->getNamedGlobal(KernelLDSName); +} + // This kernel calls no functions that require the module lds struct static bool canElideModuleLDS(const Function &F) { return F.hasFnAttribute("amdgpu-elide-module-lds"); } -void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) { +void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { const Module *M = F.getParent(); + + // This function is called before allocating any other LDS so that it can + // reliably put values at known addresses. Consequently, dynamic LDS, if + // present, will not yet have been allocated + + assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated"); + if (isModuleEntryFunction()) { + + // Pointer values start from zero, memory allocated per-kernel-launch + // Variables can be grouped into a module level struct and a struct per + // kernel function by AMDGPULowerModuleLDSPass. If that is done, they + // are allocated at statically computable addresses here. + // + // Address 0 + // { + // llvm.amdgcn.module.lds + // } + // alignment padding + // { + // llvm.amdgcn.kernel.some-name.lds + // } + // other variables, e.g. dynamic lds, allocated after this call + const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds"); + const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); + if (GV && !canElideModuleLDS(F)) { - unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV); + unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); (void)Offset; assert(Offset == 0 && "Module LDS expected to be allocated before other LDS"); } + + if (KV) { + // The per-kernel offset is deterministic because it is allocated + // before any other non-module LDS variables. + unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); + (void)Offset; + } } } |