diff options
author | Joseph Huber <huberjn@outlook.com> | 2024-06-06 08:10:56 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-06 08:10:56 -0500 |
commit | 435aa7663d56e7216ad148ede3a422675b5f2be1 (patch) | |
tree | d1626f1fb0579123ba70829bb4be02081fa0470f /offload | |
parent | e5c93ed3482d483bb5d2876a296cbe603d23d0e8 (diff) | |
download | llvm-435aa7663d56e7216ad148ede3a422675b5f2be1.zip llvm-435aa7663d56e7216ad148ede3a422675b5f2be1.tar.gz llvm-435aa7663d56e7216ad148ede3a422675b5f2be1.tar.bz2 |
[Libomptarget] Rework device initialization and image registration (#93844)
Summary:
Currently, we register images into a linear table according to the
logical OpenMP device identifier. We then initialize all of these images
as one block. This logic requires that images are compatible with *all*
devices instead of just the one that it can run on. This prevents us
from running on systems with heterogeneous devices (i.e. image 1 runs on
device 0 image 0 runs on device 1).
This patch reworks the logic by instead making the compatibility check a
per-device query. We then scan every device to see if it's compatible
and do it as they come.
Diffstat (limited to 'offload')
-rw-r--r-- | offload/include/PluginManager.h | 27 | ||||
-rw-r--r-- | offload/plugins-nextgen/amdgpu/src/rtl.cpp | 21 | ||||
-rw-r--r-- | offload/plugins-nextgen/common/include/PluginInterface.h | 34 | ||||
-rw-r--r-- | offload/plugins-nextgen/common/src/PluginInterface.cpp | 72 | ||||
-rw-r--r-- | offload/plugins-nextgen/cuda/src/rtl.cpp | 55 | ||||
-rw-r--r-- | offload/plugins-nextgen/host/src/rtl.cpp | 4 | ||||
-rw-r--r-- | offload/src/PluginManager.cpp | 235 | ||||
-rw-r--r-- | offload/src/omptarget.cpp | 2 |
8 files changed, 232 insertions, 218 deletions
diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index 1d6804d..fce2adc 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -64,10 +64,6 @@ struct PluginManager { std::make_unique<DeviceImageTy>(TgtBinDesc, TgtDeviceImage)); } - /// Initialize as many devices as possible for this plugin. Devices that fail - /// to initialize are ignored. - void initDevices(GenericPluginTy &RTL); - /// Return the device presented to the user as device \p DeviceNo if it is /// initialized and ready. Otherwise return an error explaining the problem. llvm::Expected<DeviceTy &> getDevice(uint32_t DeviceNo); @@ -117,20 +113,31 @@ struct PluginManager { return Devices.getExclusiveAccessor(); } - int getNumUsedPlugins() const { return DeviceOffsets.size(); } - // Initialize all plugins. void initAllPlugins(); /// Iterator range for all plugins (in use or not, but always valid). auto plugins() { return llvm::make_pointee_range(Plugins); } + /// Iterator range for all plugins (in use or not, but always valid). + auto plugins() const { return llvm::make_pointee_range(Plugins); } + /// Return the user provided requirements. int64_t getRequirements() const { return Requirements.getRequirements(); } /// Add \p Flags to the user provided requirements. void addRequirements(int64_t Flags) { Requirements.addRequirements(Flags); } + /// Returns the number of plugins that are active. + int getNumActivePlugins() const { + int count = 0; + for (auto &R : plugins()) + if (R.is_initialized()) + ++count; + + return count; + } + private: bool RTLsLoaded = false; llvm::SmallVector<__tgt_bin_desc *> DelayedBinDesc; @@ -138,11 +145,9 @@ private: // List of all plugins, in use or not. llvm::SmallVector<std::unique_ptr<GenericPluginTy>> Plugins; - // Mapping of plugins to offsets in the device table. - llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceOffsets; - - // Mapping of plugins to the number of used devices. - llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceUsed; + // Mapping of plugins to the OpenMP device identifier. + llvm::DenseMap<std::pair<const GenericPluginTy *, int32_t>, int32_t> + DeviceIds; // Set of all device images currently in use. llvm::DenseSet<const __tgt_device_image *> UsedImages; diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index c6dd954..f088d5d 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -3163,25 +3163,24 @@ struct AMDGPUPluginTy final : public GenericPluginTy { uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; } /// Check whether the image is compatible with an AMDGPU device. - Expected<bool> isELFCompatible(StringRef Image) const override { + Expected<bool> isELFCompatible(uint32_t DeviceId, + StringRef Image) const override { // Get the associated architecture and flags from the ELF. auto ElfOrErr = ELF64LEObjectFile::create( MemoryBufferRef(Image, /*Identifier=*/""), /*InitContent=*/false); if (!ElfOrErr) return ElfOrErr.takeError(); std::optional<StringRef> Processor = ElfOrErr->tryGetCPUName(); + if (!Processor) + return false; - for (hsa_agent_t Agent : KernelAgents) { - auto TargeTripleAndFeaturesOrError = - utils::getTargetTripleAndFeatures(Agent); - if (!TargeTripleAndFeaturesOrError) - return TargeTripleAndFeaturesOrError.takeError(); - if (!utils::isImageCompatibleWithEnv(Processor ? *Processor : "", + auto TargeTripleAndFeaturesOrError = + utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId)); + if (!TargeTripleAndFeaturesOrError) + return TargeTripleAndFeaturesOrError.takeError(); + return utils::isImageCompatibleWithEnv(Processor ? *Processor : "", ElfOrErr->getPlatformFlags(), - *TargeTripleAndFeaturesOrError)) - return false; - } - return true; + *TargeTripleAndFeaturesOrError); } bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override { diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index eda6a4f..88423be 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -993,11 +993,11 @@ struct GenericPluginTy { /// Get the number of active devices. int32_t getNumDevices() const { return NumDevices; } - /// Get the plugin-specific device identifier offset. - int32_t getDeviceIdStartIndex() const { return DeviceIdStartIndex; } - - /// Set the plugin-specific device identifier offset. - void setDeviceIdStartIndex(int32_t Offset) { DeviceIdStartIndex = Offset; } + /// Get the plugin-specific device identifier. + int32_t getUserId(int32_t DeviceId) const { + assert(UserDeviceIds.contains(DeviceId) && "No user-id registered"); + return UserDeviceIds.at(DeviceId); + } /// Get the ELF code to recognize the binary image of this plugin. virtual uint16_t getMagicElfBits() const = 0; @@ -1059,7 +1059,8 @@ struct GenericPluginTy { /// Indicate if an image is compatible with the plugin devices. Notice that /// this function may be called before actually initializing the devices. So /// we could not move this function into GenericDeviceTy. - virtual Expected<bool> isELFCompatible(StringRef Image) const = 0; + virtual Expected<bool> isELFCompatible(uint32_t DeviceID, + StringRef Image) const = 0; protected: /// Indicate whether a device id is valid. @@ -1070,11 +1071,18 @@ protected: public: // TODO: This plugin interface needs to be cleaned up. - /// Returns true if the plugin has been initialized. + /// Returns non-zero if the plugin runtime has been initialized. int32_t is_initialized() const; - /// Returns non-zero if the provided \p Image can be executed by the runtime. - int32_t is_valid_binary(__tgt_device_image *Image, bool Initialized = true); + /// Returns non-zero if the \p Image is compatible with the plugin. This + /// function does not require the plugin to be initialized before use. + int32_t is_plugin_compatible(__tgt_device_image *Image); + + /// Returns non-zero if the \p Image is compatible with the device. + int32_t is_device_compatible(int32_t DeviceId, __tgt_device_image *Image); + + /// Returns non-zero if the plugin device has been initialized. + int32_t is_device_initialized(int32_t DeviceId) const; /// Initialize the device inside of the plugin. int32_t init_device(int32_t DeviceId); @@ -1180,7 +1188,7 @@ public: const char **ErrStr); /// Sets the offset into the devices for use by OMPT. - int32_t set_device_offset(int32_t DeviceIdOffset); + int32_t set_device_identifier(int32_t UserId, int32_t DeviceId); /// Returns if the plugin can support auotmatic copy. int32_t use_auto_zero_copy(int32_t DeviceId); @@ -1200,10 +1208,8 @@ private: /// Number of devices available for the plugin. int32_t NumDevices = 0; - /// Index offset, which when added to a DeviceId, will yield a unique - /// user-observable device identifier. This is especially important when - /// DeviceIds of multiple plugins / RTLs need to be distinguishable. - int32_t DeviceIdStartIndex = 0; + /// Map of plugin device identifiers to the user device identifier. + llvm::DenseMap<int32_t, int32_t> UserDeviceIds; /// Array of pointers to the devices. Initially, they are all set to nullptr. /// Once a device is initialized, the pointer is stored in the position given diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 913721a..5a53c47 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -748,8 +748,7 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) { if (ompt::Initialized) { bool ExpectedStatus = false; if (OmptInitialized.compare_exchange_strong(ExpectedStatus, true)) - performOmptCallback(device_initialize, /*device_num=*/DeviceId + - Plugin.getDeviceIdStartIndex(), + performOmptCallback(device_initialize, Plugin.getUserId(DeviceId), /*type=*/getComputeUnitKind().c_str(), /*device=*/reinterpret_cast<ompt_device_t *>(this), /*lookup=*/ompt::lookupCallbackByName, @@ -847,9 +846,7 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { if (ompt::Initialized) { bool ExpectedStatus = true; if (OmptInitialized.compare_exchange_strong(ExpectedStatus, false)) - performOmptCallback(device_finalize, - /*device_num=*/DeviceId + - Plugin.getDeviceIdStartIndex()); + performOmptCallback(device_finalize, Plugin.getUserId(DeviceId)); } #endif @@ -908,7 +905,7 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, size_t Bytes = getPtrDiff(InputTgtImage->ImageEnd, InputTgtImage->ImageStart); performOmptCallback( - device_load, /*device_num=*/DeviceId + Plugin.getDeviceIdStartIndex(), + device_load, Plugin.getUserId(DeviceId), /*FileName=*/nullptr, /*FileOffset=*/0, /*VmaInFile=*/nullptr, /*ImgSize=*/Bytes, /*HostAddr=*/InputTgtImage->ImageStart, /*DeviceAddr=*/nullptr, /* FIXME: ModuleId */ 0); @@ -1492,11 +1489,14 @@ Error GenericDeviceTy::syncEvent(void *EventPtr) { bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); } Error GenericPluginTy::init() { + if (Initialized) + return Plugin::success(); + auto NumDevicesOrErr = initImpl(); if (!NumDevicesOrErr) return NumDevicesOrErr.takeError(); - Initialized = true; + NumDevices = *NumDevicesOrErr; if (NumDevices == 0) return Plugin::success(); @@ -1517,6 +1517,8 @@ Error GenericPluginTy::init() { } Error GenericPluginTy::deinit() { + assert(Initialized && "Plugin was not initialized!"); + // Deinitialize all active devices. for (int32_t DeviceId = 0; DeviceId < NumDevices; ++DeviceId) { if (Devices[DeviceId]) { @@ -1537,7 +1539,11 @@ Error GenericPluginTy::deinit() { delete RecordReplay; // Perform last deinitializations on the plugin. - return deinitImpl(); + if (Error Err = deinitImpl()) + return Err; + Initialized = false; + + return Plugin::success(); } Error GenericPluginTy::initDevice(int32_t DeviceId) { @@ -1599,8 +1605,7 @@ Expected<bool> GenericPluginTy::checkBitcodeImage(StringRef Image) const { int32_t GenericPluginTy::is_initialized() const { return Initialized; } -int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image, - bool Initialized) { +int32_t GenericPluginTy::is_plugin_compatible(__tgt_device_image *Image) { StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart), target::getPtrDiff(Image->ImageEnd, Image->ImageStart)); @@ -1618,11 +1623,43 @@ int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image, auto MatchOrErr = checkELFImage(Buffer); if (Error Err = MatchOrErr.takeError()) return HandleError(std::move(Err)); - if (!Initialized || !*MatchOrErr) - return *MatchOrErr; + return *MatchOrErr; + } + case file_magic::bitcode: { + auto MatchOrErr = checkBitcodeImage(Buffer); + if (Error Err = MatchOrErr.takeError()) + return HandleError(std::move(Err)); + return *MatchOrErr; + } + default: + return false; + } +} + +int32_t GenericPluginTy::is_device_compatible(int32_t DeviceId, + __tgt_device_image *Image) { + StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart), + target::getPtrDiff(Image->ImageEnd, Image->ImageStart)); + + auto HandleError = [&](Error Err) -> bool { + [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); + DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str()); + return false; + }; + switch (identify_magic(Buffer)) { + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: { + auto MatchOrErr = checkELFImage(Buffer); + if (Error Err = MatchOrErr.takeError()) + return HandleError(std::move(Err)); + if (!*MatchOrErr) + return false; // Perform plugin-dependent checks for the specific architecture if needed. - auto CompatibleOrErr = isELFCompatible(Buffer); + auto CompatibleOrErr = isELFCompatible(DeviceId, Buffer); if (Error Err = CompatibleOrErr.takeError()) return HandleError(std::move(Err)); return *CompatibleOrErr; @@ -1638,6 +1675,10 @@ int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image, } } +int32_t GenericPluginTy::is_device_initialized(int32_t DeviceId) const { + return isValidDeviceId(DeviceId) && Devices[DeviceId] != nullptr; +} + int32_t GenericPluginTy::init_device(int32_t DeviceId) { auto Err = initDevice(DeviceId); if (Err) { @@ -1985,8 +2026,9 @@ int32_t GenericPluginTy::init_device_info(int32_t DeviceId, return OFFLOAD_SUCCESS; } -int32_t GenericPluginTy::set_device_offset(int32_t DeviceIdOffset) { - setDeviceIdStartIndex(DeviceIdOffset); +int32_t GenericPluginTy::set_device_identifier(int32_t UserId, + int32_t DeviceId) { + UserDeviceIds[DeviceId] = UserId; return OFFLOAD_SUCCESS; } diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index b260334..62460c0 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -1388,8 +1388,9 @@ struct CUDAPluginTy final : public GenericPluginTy { const char *getName() const override { return GETNAME(TARGET_NAME); } - /// Check whether the image is compatible with the available CUDA devices. - Expected<bool> isELFCompatible(StringRef Image) const override { + /// Check whether the image is compatible with a CUDA device. + Expected<bool> isELFCompatible(uint32_t DeviceId, + StringRef Image) const override { auto ElfOrErr = ELF64LEObjectFile::create(MemoryBufferRef(Image, /*Identifier=*/""), /*InitContent=*/false); @@ -1399,33 +1400,29 @@ struct CUDAPluginTy final : public GenericPluginTy { // Get the numeric value for the image's `sm_` value. auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM; - for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) { - CUdevice Device; - CUresult Res = cuDeviceGet(&Device, DevId); - if (auto Err = Plugin::check(Res, "Error in cuDeviceGet: %s")) - return std::move(Err); - - int32_t Major, Minor; - Res = cuDeviceGetAttribute( - &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, Device); - if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s")) - return std::move(Err); - - Res = cuDeviceGetAttribute( - &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, Device); - if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s")) - return std::move(Err); - - int32_t ImageMajor = SM / 10; - int32_t ImageMinor = SM % 10; - - // A cubin generated for a certain compute capability is supported to - // run on any GPU with the same major revision and same or higher minor - // revision. - if (Major != ImageMajor || Minor < ImageMinor) - return false; - } - return true; + CUdevice Device; + CUresult Res = cuDeviceGet(&Device, DeviceId); + if (auto Err = Plugin::check(Res, "Error in cuDeviceGet: %s")) + return std::move(Err); + + int32_t Major, Minor; + Res = cuDeviceGetAttribute( + &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, Device); + if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s")) + return std::move(Err); + + Res = cuDeviceGetAttribute( + &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, Device); + if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s")) + return std::move(Err); + + int32_t ImageMajor = SM / 10; + int32_t ImageMinor = SM % 10; + + // A cubin generated for a certain compute capability is supported to + // run on any GPU with the same major revision and same or higher minor + // revision. + return Major == ImageMajor && Minor >= ImageMinor; } }; diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index ef84cba..aa59ea6 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -418,7 +418,9 @@ struct GenELF64PluginTy final : public GenericPluginTy { } /// All images (ELF-compatible) should be compatible with this plugin. - Expected<bool> isELFCompatible(StringRef) const override { return true; } + Expected<bool> isELFCompatible(uint32_t, StringRef) const override { + return true; + } Triple::ArchType getTripleArch() const override { #if defined(__x86_64__) diff --git a/offload/src/PluginManager.cpp b/offload/src/PluginManager.cpp index 13f08b1..5e8f917 100644 --- a/offload/src/PluginManager.cpp +++ b/offload/src/PluginManager.cpp @@ -47,6 +47,9 @@ void PluginManager::deinit() { DP("Unloading RTLs...\n"); for (auto &Plugin : Plugins) { + if (!Plugin->is_initialized()) + continue; + if (auto Err = Plugin->deinit()) { [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); DP("Failed to deinit plugin: %s\n", InfoMsg.c_str()); @@ -57,90 +60,15 @@ void PluginManager::deinit() { DP("RTLs unloaded!\n"); } -void PluginManager::initDevices(GenericPluginTy &RTL) { - // If this RTL has already been initialized. - if (PM->DeviceOffsets.contains(&RTL)) - return; - TIMESCOPE(); - - // If this RTL is not already in use, initialize it. - assert(RTL.number_of_devices() > 0 && "Tried to initialize useless plugin!"); - - // Initialize the device information for the RTL we are about to use. - auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor(); - - // Initialize the index of this RTL and save it in the used RTLs. - int32_t DeviceOffset = ExclusiveDevicesAccessor->size(); - - // Set the device identifier offset in the plugin. - RTL.set_device_offset(DeviceOffset); - - int32_t NumberOfUserDevices = 0; - int32_t NumPD = RTL.number_of_devices(); - ExclusiveDevicesAccessor->reserve(DeviceOffset + NumPD); - // Auto zero-copy is a per-device property. We need to ensure - // that all devices are suggesting to use it. - bool UseAutoZeroCopy = !(NumPD == 0); - for (int32_t PDevI = 0, UserDevId = DeviceOffset; PDevI < NumPD; PDevI++) { - auto Device = std::make_unique<DeviceTy>(&RTL, UserDevId, PDevI); - if (auto Err = Device->init()) { - DP("Skip plugin known device %d: %s\n", PDevI, - toString(std::move(Err)).c_str()); - continue; - } - UseAutoZeroCopy = UseAutoZeroCopy && Device->useAutoZeroCopy(); - - ExclusiveDevicesAccessor->push_back(std::move(Device)); - ++NumberOfUserDevices; - ++UserDevId; - } - - // Auto Zero-Copy can only be currently triggered when the system is an - // homogeneous APU architecture without attached discrete GPUs. - // If all devices suggest to use it, change requirment flags to trigger - // zero-copy behavior when mapping memory. - if (UseAutoZeroCopy) - addRequirements(OMPX_REQ_AUTO_ZERO_COPY); - - DeviceOffsets[&RTL] = DeviceOffset; - DeviceUsed[&RTL] = NumberOfUserDevices; - DP("Plugin has index %d, exposes %d out of %d devices!\n", DeviceOffset, - NumberOfUserDevices, RTL.number_of_devices()); -} - void PluginManager::initAllPlugins() { - for (auto &R : Plugins) - initDevices(*R); -} - -static void registerImageIntoTranslationTable(TranslationTable &TT, - int32_t DeviceOffset, - int32_t NumberOfUserDevices, - __tgt_device_image *Image) { - - // same size, as when we increase one, we also increase the other. - assert(TT.TargetsTable.size() == TT.TargetsImages.size() && - "We should have as many images as we have tables!"); - - // Resize the Targets Table and Images to accommodate the new targets if - // required - unsigned TargetsTableMinimumSize = DeviceOffset + NumberOfUserDevices; - - if (TT.TargetsTable.size() < TargetsTableMinimumSize) { - TT.DeviceTables.resize(TargetsTableMinimumSize, {}); - TT.TargetsImages.resize(TargetsTableMinimumSize, 0); - TT.TargetsEntries.resize(TargetsTableMinimumSize, {}); - TT.TargetsTable.resize(TargetsTableMinimumSize, 0); - } - - // Register the image in all devices for this target type. - for (int32_t I = 0; I < NumberOfUserDevices; ++I) { - // If we are changing the image we are also invalidating the target table. - if (TT.TargetsImages[DeviceOffset + I] != Image) { - TT.TargetsImages[DeviceOffset + I] = Image; - TT.TargetsTable[DeviceOffset + I] = - 0; // lazy initialization of target table. + for (auto &R : plugins()) { + if (auto Err = R.init()) { + [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); + DP("Failed to init plugin: %s\n", InfoMsg.c_str()); + continue; } + DP("Registered plugin %s with %d visible device(s)\n", R.getName(), + R.number_of_devices()); } } @@ -153,27 +81,6 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { if (Entry.flags == OMP_REGISTER_REQUIRES) PM->addRequirements(Entry.data); - // Initialize all the plugins that have associated images. - for (auto &Plugin : Plugins) { - // Extract the exectuable image and extra information if availible. - for (int32_t i = 0; i < Desc->NumDeviceImages; ++i) { - if (Plugin->is_initialized()) - continue; - - if (!Plugin->is_valid_binary(&Desc->DeviceImages[i], - /*Initialized=*/false)) - continue; - - if (auto Err = Plugin->init()) { - [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); - DP("Failed to init plugin: %s\n", InfoMsg.c_str()); - } else { - DP("Registered plugin %s with %d visible device(s)\n", - Plugin->getName(), Plugin->number_of_devices()); - } - } - } - // Extract the exectuable image and extra information if availible. for (int32_t i = 0; i < Desc->NumDeviceImages; ++i) PM->addDeviceImage(*Desc, Desc->DeviceImages[i]); @@ -188,54 +95,110 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { // Scan the RTLs that have associated images until we find one that supports // the current image. for (auto &R : PM->plugins()) { - if (!R.number_of_devices()) + if (!R.is_plugin_compatible(Img)) continue; - if (!R.is_valid_binary(Img, /*Initialized=*/true)) { - DP("Image " DPxMOD " is NOT compatible with RTL %s!\n", - DPxPTR(Img->ImageStart), R.getName()); - continue; + if (!R.is_initialized()) { + if (auto Err = R.init()) { + [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); + DP("Failed to init plugin: %s\n", InfoMsg.c_str()); + continue; + } + DP("Registered plugin %s with %d visible device(s)\n", R.getName(), + R.number_of_devices()); } - DP("Image " DPxMOD " is compatible with RTL %s!\n", - DPxPTR(Img->ImageStart), R.getName()); - - PM->initDevices(R); + if (!R.number_of_devices()) { + DP("Skipping plugin %s with no visible devices\n", R.getName()); + continue; + } - // Initialize (if necessary) translation table for this library. - PM->TrlTblMtx.lock(); - if (!PM->HostEntriesBeginToTransTable.count(Desc->HostEntriesBegin)) { - PM->HostEntriesBeginRegistrationOrder.push_back(Desc->HostEntriesBegin); - TranslationTable &TransTable = + for (int32_t DeviceId = 0; DeviceId < R.number_of_devices(); ++DeviceId) { + if (!R.is_device_compatible(DeviceId, Img)) + continue; + + DP("Image " DPxMOD " is compatible with RTL %s device %d!\n", + DPxPTR(Img->ImageStart), R.getName(), DeviceId); + + if (!R.is_device_initialized(DeviceId)) { + // Initialize the device information for the RTL we are about to use. + auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor(); + + int32_t UserId = ExclusiveDevicesAccessor->size(); + + // Set the device identifier offset in the plugin. +#ifdef OMPT_SUPPORT + R.set_device_identifier(UserId, DeviceId); +#endif + + auto Device = std::make_unique<DeviceTy>(&R, UserId, DeviceId); + if (auto Err = Device->init()) { + [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); + DP("Failed to init device %d: %s\n", DeviceId, InfoMsg.c_str()); + continue; + } + + ExclusiveDevicesAccessor->push_back(std::move(Device)); + + // We need to map between the plugin's device identifier and the one + // that OpenMP will use. + PM->DeviceIds[std::make_pair(&R, DeviceId)] = UserId; + } + + // Initialize (if necessary) translation table for this library. + PM->TrlTblMtx.lock(); + if (!PM->HostEntriesBeginToTransTable.count(Desc->HostEntriesBegin)) { + PM->HostEntriesBeginRegistrationOrder.push_back( + Desc->HostEntriesBegin); + TranslationTable &TT = + (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin]; + TT.HostTable.EntriesBegin = Desc->HostEntriesBegin; + TT.HostTable.EntriesEnd = Desc->HostEntriesEnd; + } + + // Retrieve translation table for this library. + TranslationTable &TT = (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin]; - TransTable.HostTable.EntriesBegin = Desc->HostEntriesBegin; - TransTable.HostTable.EntriesEnd = Desc->HostEntriesEnd; - } - // Retrieve translation table for this library. - TranslationTable &TransTable = - (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin]; + DP("Registering image " DPxMOD " with RTL %s!\n", + DPxPTR(Img->ImageStart), R.getName()); - DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(Img->ImageStart), - R.getName()); + auto UserId = PM->DeviceIds[std::make_pair(&R, DeviceId)]; + if (TT.TargetsTable.size() < static_cast<size_t>(UserId + 1)) { + TT.DeviceTables.resize(UserId + 1, {}); + TT.TargetsImages.resize(UserId + 1, nullptr); + TT.TargetsEntries.resize(UserId + 1, {}); + TT.TargetsTable.resize(UserId + 1, nullptr); + } - registerImageIntoTranslationTable(TransTable, PM->DeviceOffsets[&R], - PM->DeviceUsed[&R], Img); - PM->UsedImages.insert(Img); + // Register the image for this target type and invalidate the table. + TT.TargetsImages[UserId] = Img; + TT.TargetsTable[UserId] = nullptr; - PM->TrlTblMtx.unlock(); - FoundRTL = &R; + PM->UsedImages.insert(Img); + FoundRTL = &R; - // if an RTL was found we are done - proceed to register the next image - break; + PM->TrlTblMtx.unlock(); + } } - - if (!FoundRTL) { + if (!FoundRTL) DP("No RTL found for image " DPxMOD "!\n", DPxPTR(Img->ImageStart)); - } } PM->RTLsMtx.unlock(); + bool UseAutoZeroCopy = Plugins.size() > 0; + + auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor(); + for (const auto &Device : *ExclusiveDevicesAccessor) + UseAutoZeroCopy &= Device->useAutoZeroCopy(); + + // Auto Zero-Copy can only be currently triggered when the system is an + // homogeneous APU architecture without attached discrete GPUs. + // If all devices suggest to use it, change requirment flags to trigger + // zero-copy behavior when mapping memory. + if (UseAutoZeroCopy) + addRequirements(OMPX_REQ_AUTO_ZERO_COPY); + DP("Done registering entries!\n"); } @@ -257,7 +220,7 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) { // Scan the RTLs that have associated images until we find one that supports // the current image. We only need to scan RTLs that are already being used. for (auto &R : PM->plugins()) { - if (!DeviceOffsets.contains(&R)) + if (R.is_initialized()) continue; // Ensure that we do not use any unused images associated with this RTL. diff --git a/offload/src/omptarget.cpp b/offload/src/omptarget.cpp index 91e1213..9bca852 100644 --- a/offload/src/omptarget.cpp +++ b/offload/src/omptarget.cpp @@ -315,7 +315,7 @@ void handleTargetOutcome(bool Success, ident_t *Loc) { FAILURE_MESSAGE("Consult https://openmp.llvm.org/design/Runtimes.html " "for debugging options.\n"); - if (!PM->getNumUsedPlugins()) { + if (!PM->getNumActivePlugins()) { FAILURE_MESSAGE( "No images found compatible with the installed hardware. "); |