aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoseph Huber <huberjn@outlook.com>2025-05-09 08:21:40 -0500
committerGitHub <noreply@github.com>2025-05-09 08:21:40 -0500
commitd60eeda2e5179cbdb2af70a7531dd437082724aa (patch)
tree5352ea96ba91945526d7054012b98534a883b195
parent806b49140142dea46d31646c438d936522bb2be7 (diff)
downloadllvm-d60eeda2e5179cbdb2af70a7531dd437082724aa.zip
llvm-d60eeda2e5179cbdb2af70a7531dd437082724aa.tar.gz
llvm-d60eeda2e5179cbdb2af70a7531dd437082724aa.tar.bz2
[Offload] Do not load images from the same descriptor on the same device (#139147)
Summary: Right now we generally assume that we have one image per device. The binary descriptor represents a single 'compilation'. This means that each image is going to contain the same code built for different architectures when used through the OpenMP interface. This is problematic when we have cases where the same code will then be loaded multiple times (like wiht sm_80, sm_89 or the generic GFX ISAs). This patch is the quick and dirty slution, we just prevent this from happening at all. This means we use the first one we find, which might not be overly optimal, but it should be better than the alternative. Note that this does not affect shared library loads as it is per binary descriptor, not per device.
-rw-r--r--offload/libomptarget/PluginManager.cpp17
1 files changed, 15 insertions, 2 deletions
diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp
index d6d529a..d99d6ad 100644
--- a/offload/libomptarget/PluginManager.cpp
+++ b/offload/libomptarget/PluginManager.cpp
@@ -202,9 +202,10 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
PM->addDeviceImage(*Desc, Desc->DeviceImages[i]);
// Register the images with the RTLs that understand them, if any.
- for (DeviceImageTy &DI : PM->deviceImages()) {
+ llvm::DenseMap<GenericPluginTy *, llvm::DenseSet<int32_t>> UsedDevices;
+ for (int32_t i = 0; i < Desc->NumDeviceImages; ++i) {
// Obtain the image and information that was previously extracted.
- __tgt_device_image *Img = &DI.getExecutableImage();
+ __tgt_device_image *Img = &Desc->DeviceImages[i];
GenericPluginTy *FoundRTL = nullptr;
@@ -223,6 +224,17 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
}
for (int32_t DeviceId = 0; DeviceId < R.number_of_devices(); ++DeviceId) {
+ // We only want a single matching image to be registered for each binary
+ // descriptor. This prevents multiple of the same image from being
+ // registered for the same device in the case that they are mutually
+ // compatible, such as sm_80 and sm_89.
+ if (UsedDevices[&R].contains(DeviceId)) {
+ DP("Image " DPxMOD
+ " is a duplicate, not loaded on RTL %s device %d!\n",
+ DPxPTR(Img->ImageStart), R.getName(), DeviceId);
+ continue;
+ }
+
if (!R.is_device_compatible(DeviceId, Img))
continue;
@@ -262,6 +274,7 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
TT.TargetsImages[UserId] = Img;
TT.TargetsTable[UserId] = nullptr;
+ UsedDevices[&R].insert(DeviceId);
PM->UsedImages.insert(Img);
FoundRTL = &R;