aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoseph Huber <huberjn@outlook.com>2024-06-06 15:19:55 -0500
committerGitHub <noreply@github.com>2024-06-06 15:19:55 -0500
commit9e209a4a3728a599c434bfed4fa37fd8b5907e89 (patch)
tree48bbdb49de9fac61dc7c2776f711e946b9bc3c6b
parent9293fc7981526eaca0a28012f2e5963fff1b830b (diff)
downloadllvm-9e209a4a3728a599c434bfed4fa37fd8b5907e89.zip
llvm-9e209a4a3728a599c434bfed4fa37fd8b5907e89.tar.gz
llvm-9e209a4a3728a599c434bfed4fa37fd8b5907e89.tar.bz2
[Offload] Use the kernel argument size directly in AMDGPU offloading (#94667)
Summary: The old COV3 implementation of HSA used to omit the implicit arguments from the kernel argument size. For COV4 and COV5 this is no longer the case so we can simply use the size reported from the symbol information. See https://github.com/ROCm/ROCR-Runtime/issues/117#issuecomment-812758161
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp8
1 files changed, 1 insertions, 7 deletions
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index f088d5d..663cfdc 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3272,19 +3272,13 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (ArgsSize < KernelArgsSize)
return Plugin::error("Mismatch of kernel arguments size");
- // The args size reported by HSA may or may not contain the implicit args.
- // For now, assume that HSA does not consider the implicit arguments when
- // reporting the arguments of a kernel. In the worst case, we can waste
- // 56 bytes per allocation.
- uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize;
-
AMDGPUPluginTy &AMDGPUPlugin =
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();
void *AllArgs = nullptr;
- if (auto Err = ArgsMemoryManager.allocate(AllArgsSize, &AllArgs))
+ if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs))
return Err;
// Account for user requested dynamic shared memory.