diff options
author | Joseph Huber <huberjn@outlook.com> | 2024-03-26 07:13:59 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-26 07:13:59 -0500 |
commit | 4dc322524871df02578a05b260dacf28ff130c02 (patch) | |
tree | 93247463fa8b4a72f8a6300f1d61b35cc066297b | |
parent | bfb12ef33a90bd20c3a76094ef4393a8defbb712 (diff) | |
download | llvm-4dc322524871df02578a05b260dacf28ff130c02.zip llvm-4dc322524871df02578a05b260dacf28ff130c02.tar.gz llvm-4dc322524871df02578a05b260dacf28ff130c02.tar.bz2 |
[Libomptarget] Replace global `PluginTy::get` interface with references (#86595)
Summary:
We have a plugin singleton that implements the Plugin interface. This
then spawns separate device and kernels. Previously when these needed to
reach into the global singleton they would use the `PluginTy::get`
routine to get access to it. In the future we will move away from this
as the lifetime of the plugin will be handled by `libomptarget`
directly. This patch removes uses of this inside of the plugin
implementaion themselves by simply keeping a reference to the plugin
inside of the device.
The external `__tgt_rtl` functions still use the global method, but will
be removed later.
5 files changed, 53 insertions, 39 deletions
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 14461f1..2dd08dd 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -371,7 +371,8 @@ private: struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy { /// Create an empty memory manager. - AMDGPUMemoryManagerTy() : MemoryPool(nullptr), MemoryManager(nullptr) {} + AMDGPUMemoryManagerTy(AMDGPUPluginTy &Plugin) + : Plugin(Plugin), MemoryPool(nullptr), MemoryManager(nullptr) {} /// Initialize the memory manager from a memory pool. Error init(AMDGPUMemoryPoolTy &MemoryPool) { @@ -429,6 +430,9 @@ private: return OFFLOAD_SUCCESS; } + /// The underlying plugin that owns this memory manager. + AMDGPUPluginTy &Plugin; + /// The memory pool used to allocate memory. AMDGPUMemoryPoolTy *MemoryPool; @@ -1744,9 +1748,10 @@ protected: /// HSA host agent. We aggregate all its resources into the same instance. struct AMDHostDeviceTy : public AMDGenericDeviceTy { /// Create a host device from an array of host agents. - AMDHostDeviceTy(const llvm::SmallVector<hsa_agent_t> &HostAgents) - : AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(), - PinnedMemoryManager() { + AMDHostDeviceTy(AMDGPUPluginTy &Plugin, + const llvm::SmallVector<hsa_agent_t> &HostAgents) + : AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(Plugin), + PinnedMemoryManager(Plugin) { assert(HostAgents.size() && "No host agent found"); } @@ -1840,9 +1845,10 @@ private: /// generic device class. struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { // Create an AMDGPU device with a device id and default AMDGPU grid values. - AMDGPUDeviceTy(int32_t DeviceId, int32_t NumDevices, + AMDGPUDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices, AMDHostDeviceTy &HostDevice, hsa_agent_t Agent) - : GenericDeviceTy(DeviceId, NumDevices, {0}), AMDGenericDeviceTy(), + : GenericDeviceTy(Plugin, DeviceId, NumDevices, {0}), + AMDGenericDeviceTy(), OMPX_NumQueues("LIBOMPTARGET_AMDGPU_NUM_HSA_QUEUES", 4), OMPX_QueueSize("LIBOMPTARGET_AMDGPU_HSA_QUEUE_SIZE", 512), OMPX_DefaultTeamsPerCU("LIBOMPTARGET_AMDGPU_TEAMS_PER_CU", 4), @@ -2088,7 +2094,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { /// Allocate and construct an AMDGPU kernel. Expected<GenericKernelTy &> constructKernel(const char *Name) override { // Allocate and construct the AMDGPU kernel. - AMDGPUKernelTy *AMDGPUKernel = PluginTy::get().allocate<AMDGPUKernelTy>(); + AMDGPUKernelTy *AMDGPUKernel = Plugin.allocate<AMDGPUKernelTy>(); if (!AMDGPUKernel) return Plugin::error("Failed to allocate memory for AMDGPU kernel"); @@ -2138,8 +2144,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) override { // Allocate and initialize the image object. - AMDGPUDeviceImageTy *AMDImage = - PluginTy::get().allocate<AMDGPUDeviceImageTy>(); + AMDGPUDeviceImageTy *AMDImage = Plugin.allocate<AMDGPUDeviceImageTy>(); new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, TgtImage); // Load the HSA executable. @@ -2697,7 +2702,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { } Error setDeviceHeapSize(uint64_t Value) override { for (DeviceImageTy *Image : LoadedImages) - if (auto Err = setupDeviceMemoryPool(PluginTy::get(), *Image, Value)) + if (auto Err = setupDeviceMemoryPool(Plugin, *Image, Value)) return Err; DeviceMemoryPoolSize = Value; return Plugin::success(); @@ -2737,7 +2742,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return utils::iterateAgentMemoryPools( Agent, [&](hsa_amd_memory_pool_t HSAMemoryPool) { AMDGPUMemoryPoolTy *MemoryPool = - PluginTy::get().allocate<AMDGPUMemoryPoolTy>(); + Plugin.allocate<AMDGPUMemoryPoolTy>(); new (MemoryPool) AMDGPUMemoryPoolTy(HSAMemoryPool); AllMemoryPools.push_back(MemoryPool); return HSA_STATUS_SUCCESS; @@ -3090,7 +3095,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy { // Initialize the host device using host agents. HostDevice = allocate<AMDHostDeviceTy>(); - new (HostDevice) AMDHostDeviceTy(HostAgents); + new (HostDevice) AMDHostDeviceTy(*this, HostAgents); // Setup the memory pools of available for the host. if (auto Err = HostDevice->init()) @@ -3116,8 +3121,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy { } /// Creates an AMDGPU device. - GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override { - return new AMDGPUDeviceTy(DeviceId, NumDevices, getHostDevice(), + GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId, + int32_t NumDevices) override { + return new AMDGPUDeviceTy(Plugin, DeviceId, NumDevices, getHostDevice(), getKernelAgent(DeviceId)); } @@ -3248,7 +3254,9 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, // 56 bytes per allocation. uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize; - AMDHostDeviceTy &HostDevice = PluginTy::get<AMDGPUPluginTy>().getHostDevice(); + AMDGPUPluginTy &AMDGPUPlugin = + static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin); + AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice(); AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager(); void *AllArgs = nullptr; @@ -3385,7 +3393,7 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr, } assert(Ptr && "Invalid pointer"); - auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents(); + auto &KernelAgents = Plugin.getKernelAgents(); // Allow all kernel agents to access the allocation. if (auto Err = MemoryPool->enableAccess(Ptr, Size, KernelAgents)) { @@ -3428,7 +3436,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) { } if (Alloc) { - auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents(); + auto &KernelAgents = + static_cast<AMDGPUPluginTy &>(Plugin).getKernelAgents(); // Inherently necessary for host or shared allocations // Also enabled for device memory to allow device to device memcpy diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h index 1440f8f..b16b081 100644 --- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h @@ -610,7 +610,7 @@ public: struct GenericDeviceTy : public DeviceAllocatorTy { /// Construct a device with its device id within the plugin, the number of /// devices in the plugin and the grid values for that kind of device. - GenericDeviceTy(int32_t DeviceId, int32_t NumDevices, + GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices, const llvm::omp::GV &GridValues); /// Get the device identifier within the corresponding plugin. Notice that @@ -860,6 +860,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// Allocate and construct a kernel object. virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0; + /// Reference to the underlying plugin that created this device. + GenericPluginTy &Plugin; + private: /// Get and set the stack size and heap size for the device. If not used, the /// plugin can implement the setters as no-op and setting the output @@ -977,7 +980,8 @@ struct GenericPluginTy { virtual Error deinitImpl() = 0; /// Create a new device for the underlying plugin. - virtual GenericDeviceTy *createDevice(int32_t DeviceID, + virtual GenericDeviceTy *createDevice(GenericPluginTy &Plugin, + int32_t DeviceID, int32_t NumDevices) = 0; /// Create a new global handler for the underlying plugin. diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp index 4db787a..c32bd08 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp @@ -438,7 +438,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice, // Retrieve kernel environment object for the kernel. GlobalTy KernelEnv(std::string(Name) + "_kernel_environment", sizeof(KernelEnvironment), &KernelEnvironment); - GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler(); + GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler(); if (auto Err = GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) { [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); @@ -710,9 +710,10 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice, return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit()); } -GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices, +GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, + int32_t NumDevices, const llvm::omp::GV &OMPGridValues) - : MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"), + : Plugin(Plugin), MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"), OMP_NumTeams("OMP_NUM_TEAMS"), OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"), OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"), @@ -1522,7 +1523,7 @@ Error GenericPluginTy::initDevice(int32_t DeviceId) { assert(!Devices[DeviceId] && "Device already initialized"); // Create the device and save the reference. - GenericDeviceTy *Device = createDevice(DeviceId, NumDevices); + GenericDeviceTy *Device = createDevice(*this, DeviceId, NumDevices); assert(Device && "Invalid device"); // Save the device reference into the list. diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp index d43c723..fc74c6aa 100644 --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -255,8 +255,8 @@ private: /// generic device class. struct CUDADeviceTy : public GenericDeviceTy { // Create a CUDA device with a device id and the default CUDA grid values. - CUDADeviceTy(int32_t DeviceId, int32_t NumDevices) - : GenericDeviceTy(DeviceId, NumDevices, NVPTXGridValues), + CUDADeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices) + : GenericDeviceTy(Plugin, DeviceId, NumDevices, NVPTXGridValues), CUDAStreamManager(*this), CUDAEventManager(*this) {} ~CUDADeviceTy() {} @@ -471,7 +471,7 @@ struct CUDADeviceTy : public GenericDeviceTy { /// Allocate and construct a CUDA kernel. Expected<GenericKernelTy &> constructKernel(const char *Name) override { // Allocate and construct the CUDA kernel. - CUDAKernelTy *CUDAKernel = PluginTy::get().allocate<CUDAKernelTy>(); + CUDAKernelTy *CUDAKernel = Plugin.allocate<CUDAKernelTy>(); if (!CUDAKernel) return Plugin::error("Failed to allocate memory for CUDA kernel"); @@ -529,8 +529,7 @@ struct CUDADeviceTy : public GenericDeviceTy { return std::move(Err); // Allocate and initialize the image object. - CUDADeviceImageTy *CUDAImage = - PluginTy::get().allocate<CUDADeviceImageTy>(); + CUDADeviceImageTy *CUDAImage = Plugin.allocate<CUDADeviceImageTy>(); new (CUDAImage) CUDADeviceImageTy(ImageId, *this, TgtImage); // Load the CUDA module. @@ -1373,8 +1372,9 @@ struct CUDAPluginTy final : public GenericPluginTy { Error deinitImpl() override { return Plugin::success(); } /// Creates a CUDA device to use for offloading. - GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override { - return new CUDADeviceTy(DeviceId, NumDevices); + GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId, + int32_t NumDevices) override { + return new CUDADeviceTy(Plugin, DeviceId, NumDevices); } /// Creates a CUDA global handler. diff --git a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp index ea8ed8d..f0ce242 100644 --- a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp @@ -66,7 +66,7 @@ struct GenELF64KernelTy : public GenericKernelTy { GlobalTy Global(getName(), 0); // Get the metadata (address) of the kernel function. - GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler(); + GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler(); if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, Global)) return Err; @@ -132,8 +132,9 @@ private: /// Class implementing the device functionalities for GenELF64. struct GenELF64DeviceTy : public GenericDeviceTy { /// Create the device with a specific id. - GenELF64DeviceTy(int32_t DeviceId, int32_t NumDevices) - : GenericDeviceTy(DeviceId, NumDevices, GenELF64GridValues) {} + GenELF64DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, + int32_t NumDevices) + : GenericDeviceTy(Plugin, DeviceId, NumDevices, GenELF64GridValues) {} ~GenELF64DeviceTy() {} @@ -149,8 +150,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy { /// Construct the kernel for a specific image on the device. Expected<GenericKernelTy &> constructKernel(const char *Name) override { // Allocate and construct the kernel. - GenELF64KernelTy *GenELF64Kernel = - PluginTy::get().allocate<GenELF64KernelTy>(); + GenELF64KernelTy *GenELF64Kernel = Plugin.allocate<GenELF64KernelTy>(); if (!GenELF64Kernel) return Plugin::error("Failed to allocate memory for GenELF64 kernel"); @@ -166,8 +166,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy { Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) override { // Allocate and initialize the image object. - GenELF64DeviceImageTy *Image = - PluginTy::get().allocate<GenELF64DeviceImageTy>(); + GenELF64DeviceImageTy *Image = Plugin.allocate<GenELF64DeviceImageTy>(); new (Image) GenELF64DeviceImageTy(ImageId, *this, TgtImage); // Create a temporary file. @@ -400,8 +399,9 @@ struct GenELF64PluginTy final : public GenericPluginTy { Error deinitImpl() override { return Plugin::success(); } /// Creates a generic ELF device. - GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override { - return new GenELF64DeviceTy(DeviceId, NumDevices); + GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId, + int32_t NumDevices) override { + return new GenELF64DeviceTy(Plugin, DeviceId, NumDevices); } /// Creates a generic global handler. |