diff options
author | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2022-12-07 22:02:53 +0000 |
---|---|---|
committer | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2022-12-07 22:02:54 +0000 |
commit | d77ae7f2513504655e555cd326208598093d66e2 (patch) | |
tree | 36d35417bde880a09b647a764c0719a1aa74ab86 /llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | |
parent | f6fb0a4f35d152d154aeb8a8e3d47ff1392c1bad (diff) | |
download | llvm-d77ae7f2513504655e555cd326208598093d66e2.zip llvm-d77ae7f2513504655e555cd326208598093d66e2.tar.gz llvm-d77ae7f2513504655e555cd326208598093d66e2.tar.bz2 |
[amdgpu] Reimplement LDS lowering
Renames the current lowering scheme to "module" and introduces two new
ones, "kernel" and "table", plus a "hybrid" that chooses between those three
on a per-variable basis.
Unit tests are set up to pass with the default lowering of "module" or "hybrid"
with this patch defaulting to "module", which will be a less dramatic codegen
change relative to the current. This reflects the sparsity of test coverage for
the table lowering method. Hybrid is better than module in every respect and
will be default in a subsequent patch.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D139433
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 57 |
1 files changed, 55 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 488b3be..d8133a9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -84,6 +84,24 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, return Offset; } +static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; + +bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { + auto name = GV.getName(); + return (name == ModuleLDSName) || + (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); +} + +const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( + const GlobalVariable &GV) { + const Module &M = *GV.getParent(); + StringRef N(GV.getName()); + if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { + return M.getFunction(N); + } + return nullptr; +} + const GlobalVariable * AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { const Module *M = F.getParent(); @@ -98,6 +116,37 @@ static bool canElideModuleLDS(const Function &F) { return F.hasFnAttribute("amdgpu-elide-module-lds"); } +unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( + const GlobalVariable &GV) { + // module.lds, then alignment padding, then kernel.lds, then other variables + // if any + + assert(isKnownAddressLDSGlobal(GV)); + unsigned Offset = 0; + + if (GV.getName() == ModuleLDSName) { + return 0; + } + + const Module *M = GV.getParent(); + const DataLayout &DL = M->getDataLayout(); + + const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); + const Function *f = getKernelLDSFunctionFromGlobal(GV); + + // Account for module.lds if allocated for this function + if (GVM && f && !canElideModuleLDS(*f)) { + // allocator aligns this to var align, but it's zero to begin with + Offset += DL.getTypeAllocSize(GVM->getValueType()); + } + + // No dynamic LDS alignment done by allocateModuleLDSGlobal + Offset = alignTo( + Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); + + return Offset; +} + void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { const Module *M = F.getParent(); @@ -124,21 +173,25 @@ void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { // } // other variables, e.g. dynamic lds, allocated after this call - const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds"); + const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); if (GV && !canElideModuleLDS(F)) { + assert(isKnownAddressLDSGlobal(*GV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); (void)Offset; - assert(Offset == 0 && + assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && "Module LDS expected to be allocated before other LDS"); } if (KV) { // The per-kernel offset is deterministic because it is allocated // before any other non-module LDS variables. + assert(isKnownAddressLDSGlobal(*KV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); (void)Offset; + assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && + "Kernel LDS expected to be immediately after module LDS"); } } } |