aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
diff options
context:
space:
mode:
authorJon Chesterfield <jonathanchesterfield@gmail.com>2021-03-15 15:24:00 +0000
committerJon Chesterfield <jonathanchesterfield@gmail.com>2021-03-15 15:24:01 +0000
commit13e49dcee48f7bffec17df48b87e3237aebd5b1d (patch)
tree068b422233ed27cec09d5925ef0dd9ac7177b9c6 /llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
parent752f477d677b73039e9073d700c6def99c153445 (diff)
downloadllvm-13e49dcee48f7bffec17df48b87e3237aebd5b1d.zip
llvm-13e49dcee48f7bffec17df48b87e3237aebd5b1d.tar.gz
llvm-13e49dcee48f7bffec17df48b87e3237aebd5b1d.tar.bz2
[amdgpu] Implement lower function LDS pass
[amdgpu] Implement lower function LDS pass Local variables are allocated at kernel launch. This pass collects global variables that are used from non-kernel functions, moves them into a new struct type, and allocates an instance of that type in every kernel. Uses are then replaced with a constantexpr offset. Prior to this pass, accesses from a function are compiled to trap. With this pass, most such accesses are removed before reaching codegen. The trap logic is left unchanged by this pass. It is still reachable for the cases this pass misses, notably the extern shared construct from hip and variables marked constant which survive the optimizer. This is of interest to the openmp project because the deviceRTL runtime library uses cuda shared variables from functions that cannot be inlined. Trunk llvm therefore cannot compile some openmp kernels for amdgpu. In addition to the unit tests attached, this patch applied to ROCm llvm with fixed-abi enabled and the function pointer hashing scheme deleted passes the openmp suite. This lowering will use more LDS than strictly necessary. It is intended to be a functionally correct fallback for cases that are difficult to target from future optimisation passes. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D94648
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp12
1 files changed, 12 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 717145b..5134497 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -64,6 +64,18 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
return Offset;
}
+void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Module *M) {
+ if (isModuleEntryFunction()) {
+ GlobalVariable *GV = M->getGlobalVariable("llvm.amdgcn.module.lds");
+ if (GV) {
+ unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV);
+ (void)Offset;
+ assert(Offset == 0 &&
+ "Module LDS expected to be allocated before other LDS");
+ }
+ }
+}
+
void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
const GlobalVariable &GV) {
assert(DL.getTypeAllocSize(GV.getValueType()).isZero());