aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
diff options
context:
space:
mode:
authorJon Chesterfield <jonathanchesterfield@gmail.com>2022-12-07 22:02:53 +0000
committerJon Chesterfield <jonathanchesterfield@gmail.com>2022-12-07 22:02:54 +0000
commitd77ae7f2513504655e555cd326208598093d66e2 (patch)
tree36d35417bde880a09b647a764c0719a1aa74ab86 /llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
parentf6fb0a4f35d152d154aeb8a8e3d47ff1392c1bad (diff)
downloadllvm-d77ae7f2513504655e555cd326208598093d66e2.zip
llvm-d77ae7f2513504655e555cd326208598093d66e2.tar.gz
llvm-d77ae7f2513504655e555cd326208598093d66e2.tar.bz2
[amdgpu] Reimplement LDS lowering
Renames the current lowering scheme to "module" and introduces two new ones, "kernel" and "table", plus a "hybrid" that chooses between those three on a per-variable basis. Unit tests are set up to pass with the default lowering of "module" or "hybrid" with this patch defaulting to "module", which will be a less dramatic codegen change relative to the current. This reflects the sparsity of test coverage for the table lowering method. Hybrid is better than module in every respect and will be default in a subsequent patch. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D139433
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp57
1 files changed, 55 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 488b3be..d8133a9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -84,6 +84,24 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
return Offset;
}
+static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
+
+bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
+ auto name = GV.getName();
+ return (name == ModuleLDSName) ||
+ (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
+}
+
+const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
+ const GlobalVariable &GV) {
+ const Module &M = *GV.getParent();
+ StringRef N(GV.getName());
+ if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
+ return M.getFunction(N);
+ }
+ return nullptr;
+}
+
const GlobalVariable *
AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
const Module *M = F.getParent();
@@ -98,6 +116,37 @@ static bool canElideModuleLDS(const Function &F) {
return F.hasFnAttribute("amdgpu-elide-module-lds");
}
+unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
+ const GlobalVariable &GV) {
+ // module.lds, then alignment padding, then kernel.lds, then other variables
+ // if any
+
+ assert(isKnownAddressLDSGlobal(GV));
+ unsigned Offset = 0;
+
+ if (GV.getName() == ModuleLDSName) {
+ return 0;
+ }
+
+ const Module *M = GV.getParent();
+ const DataLayout &DL = M->getDataLayout();
+
+ const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
+ const Function *f = getKernelLDSFunctionFromGlobal(GV);
+
+ // Account for module.lds if allocated for this function
+ if (GVM && f && !canElideModuleLDS(*f)) {
+ // allocator aligns this to var align, but it's zero to begin with
+ Offset += DL.getTypeAllocSize(GVM->getValueType());
+ }
+
+ // No dynamic LDS alignment done by allocateModuleLDSGlobal
+ Offset = alignTo(
+ Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
+
+ return Offset;
+}
+
void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
const Module *M = F.getParent();
@@ -124,21 +173,25 @@ void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
// }
// other variables, e.g. dynamic lds, allocated after this call
- const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds");
+ const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
if (GV && !canElideModuleLDS(F)) {
+ assert(isKnownAddressLDSGlobal(*GV));
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
(void)Offset;
- assert(Offset == 0 &&
+ assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
"Module LDS expected to be allocated before other LDS");
}
if (KV) {
// The per-kernel offset is deterministic because it is allocated
// before any other non-module LDS variables.
+ assert(isKnownAddressLDSGlobal(*KV));
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
(void)Offset;
+ assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
+ "Kernel LDS expected to be immediately after module LDS");
}
}
}