aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
diff options
context:
space:
mode:
authorJon Chesterfield <jonathanchesterfield@gmail.com>2022-05-04 22:42:05 +0100
committerJon Chesterfield <jonathanchesterfield@gmail.com>2022-05-04 22:42:07 +0100
commitbc78c099524283b5de44517ee5fbb805d09a7cdc (patch)
treef2eb6bbb2b5adbcc014c1d3265788e59ab9ffad5 /llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
parent411bb42eed723ba8e8ae29a59cbc7aacc6bab774 (diff)
downloadllvm-bc78c099524283b5de44517ee5fbb805d09a7cdc.zip
llvm-bc78c099524283b5de44517ee5fbb805d09a7cdc.tar.gz
llvm-bc78c099524283b5de44517ee5fbb805d09a7cdc.tar.bz2
[amdgpu] Elide module lds allocation in kernels with no callees
Introduces a string attribute, amdgpu-requires-module-lds, to allow eliding the module.lds block from kernels. Will allocate the block as before if the attribute is missing or has its default value of true. Patch uses the new attribute to detect the simplest possible instance of this, where a kernel makes no calls and thus cannot call any functions that use LDS. Tests updated to match, coverage was already good. Interesting cases is in lower-module-lds-offsets where annotating the kernel allows the backend to pick a different (in this case better) variable ordering than previously. A later patch will avoid moving kernel variables into module.lds when the kernel can have this attribute, allowing optimal ordering and locally unused variable elimination. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D122091
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp10
1 files changed, 8 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 4fb4485..b461c3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -83,10 +83,16 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
return Offset;
}
-void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Module *M) {
+// This kernel calls no functions that require the module lds struct
+static bool canElideModuleLDS(const Function &F) {
+ return F.hasFnAttribute("amdgpu-elide-module-lds");
+}
+
+void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) {
+ const Module *M = F.getParent();
if (isModuleEntryFunction()) {
const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds");
- if (GV) {
+ if (GV && !canElideModuleLDS(F)) {
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV);
(void)Offset;
assert(Offset == 0 &&