aboutsummaryrefslogtreecommitdiff
path: root/hw/vfio/cpr-legacy.c
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2025-06-11 11:39:53 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2025-06-11 11:39:53 -0400
commitd9ce74873a6a5a7c504379857461e4ae64fcf0cd (patch)
tree6267a2cf8b04d62247800322183711aae78c0111 /hw/vfio/cpr-legacy.c
parentd82bb3f5dd5647e0f470b4189096aced1447b09f (diff)
parent079e7216debd767e78a77aefc88e2e7335f49b26 (diff)
downloadqemu-master.zip
qemu-master.tar.gz
qemu-master.tar.bz2
Merge tag 'pull-vfio-20250611' of https://github.com/legoater/qemu into stagingHEADmaster
vfio queue: * Fixed newly added potential issues in vfio-pci * Added support to report vfio-ap configuration changes * Added prerequisite support for vfio-user * Added first part for VFIO live update support # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmhJm00ACgkQUaNDx8/7 # 7KHBehAAlbSt+QCPwdNJ/5QPGGPWIQ86acIHaI/sE/lpcJx9FideQhtKTtt0gTOE # ZNGbzfeCnewCM+VLMgkrYZC9DWd9OpEO68tDy6ev577F6ijSR8wzXRtDl2j5Revm # R9gBuOm/cQ6Mafiv8SNPNSGW2tQ0M9Bd4GJRa5K3VBf8kFwPpWEZC/yDWbvSVvwc # 99TFXziIbWJEYGRzG4h7hoEEd/GapZOwTRIPRoRGHznbOPMsxShjIhExn8ZGTlU9 # woaNBPZXS5xjjy5tKyURexu+eyxbR6WsZFyeAA03+HzWEfRzhFc/rhAC6mBbpq7v # 03a/4ewkKZ0fYUf9G2H5YpXTXl6io+qk+irKi99/4GT0oSBMrm+/NcY7u9Hv2MwA # 50h3iXUhLQYzL2G2bSSoBTKOGxV84Xtto9j7dM7fy8e0nYv9rucvKl+V3Ox1Qwv4 # 8+bQsxP5tjmHlXE/n6ckfcrWtSHuWmb3JJ8yxdBttdo3Cz/+KxJ3UjtP9U81RXxY # gepxCRXZmcTfnv1dV6FyjOE6QOhB3WIT5rHmgoQIvHGhtBsLpT2mDlSsMVEQIXvm # ixQnRguwQv9fgEZeYB/ck/ezluOxewBlOv5Q3CPpHQBd2Ykh4N/8xsWpXlKI1KWr # Tj7Nh/2ObqNXbKdmb9nNiuo6eQDkPOm4mr1cs2ncMr7/cRGeKeA= # =KOf3 # -----END PGP SIGNATURE----- # gpg: Signature made Wed 11 Jun 2025 11:05:49 EDT # gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1 # gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [full] # gpg: aka "Cédric Le Goater <clg@kaod.org>" [full] # Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1 * tag 'pull-vfio-20250611' of https://github.com/legoater/qemu: (27 commits) vfio: improve VFIODeviceIOOps docs vfio/pci: export MSI functions vfio/pci: vfio_notifier_cleanup vfio/pci: vfio_notifier_init cpr parameters vfio/pci: pass vector to virq functions vfio/pci: vfio_notifier_init vfio/pci: vfio_pci_vector_init vfio-pci: skip reset during cpr pci: skip reset during cpr pci: export msix_is_pending vfio/container: recover from unmap-all-vaddr failure vfio/container: mdev cpr blocker vfio/container: restore DMA vaddr vfio/container: discard old DMA vaddr vfio/container: preserve descriptors vfio/container: register container for cpr migration: lower handler priority migration: cpr helpers vfio: mark posted writes in region write callbacks vfio: add per-region fd support ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw/vfio/cpr-legacy.c')
-rw-r--r--hw/vfio/cpr-legacy.c287
1 files changed, 287 insertions, 0 deletions
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
new file mode 100644
index 0000000..a84c324
--- /dev/null
+++ b/hw/vfio/cpr-legacy.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2021-2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+#include "qemu/osdep.h"
+#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio/vfio-listener.h"
+#include "migration/blocker.h"
+#include "migration/cpr.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
+{
+ struct vfio_iommu_type1_dma_unmap unmap = {
+ .argsz = sizeof(unmap),
+ .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL,
+ .iova = 0,
+ .size = 0,
+ };
+ if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+ error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
+ return false;
+ }
+ container->cpr.vaddr_unmapped = true;
+ return true;
+}
+
+/*
+ * Set the new @vaddr for any mappings registered during cpr load.
+ * The incoming state is cleared thereafter.
+ */
+static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size, void *vaddr,
+ bool readonly, MemoryRegion *mr)
+{
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
+ bcontainer);
+ struct vfio_iommu_type1_dma_map map = {
+ .argsz = sizeof(map),
+ .flags = VFIO_DMA_MAP_FLAG_VADDR,
+ .vaddr = (__u64)(uintptr_t)vaddr,
+ .iova = iova,
+ .size = size,
+ };
+
+ g_assert(cpr_is_incoming());
+
+ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
+ return -errno;
+ }
+
+ return 0;
+}
+
+static void vfio_region_remap(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer,
+ cpr.remap_listener);
+ vfio_container_region_add(&container->bcontainer, section, true);
+}
+
+static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
+{
+ if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
+ error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR");
+ return false;
+
+ } else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) {
+ error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL");
+ return false;
+
+ } else {
+ return true;
+ }
+}
+
+static int vfio_container_pre_save(void *opaque)
+{
+ VFIOContainer *container = opaque;
+ Error *local_err = NULL;
+
+ if (!vfio_dma_unmap_vaddr_all(container, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ return 0;
+}
+
+static int vfio_container_post_load(void *opaque, int version_id)
+{
+ VFIOContainer *container = opaque;
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOGroup *group;
+ Error *local_err = NULL;
+
+ if (!vfio_listener_register(bcontainer, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ /* Restore original dma_map function */
+ vioc->dma_map = container->cpr.saved_dma_map;
+ }
+ return 0;
+}
+
+static const VMStateDescription vfio_container_vmstate = {
+ .name = "vfio-container",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .priority = MIG_PRI_LOW, /* Must happen after devices and groups */
+ .pre_save = vfio_container_pre_save,
+ .post_load = vfio_container_post_load,
+ .needed = cpr_incoming_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e, Error **errp)
+{
+ VFIOContainer *container =
+ container_of(notifier, VFIOContainer, cpr.transfer_notifier);
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ if (e->type != MIG_EVENT_PRECOPY_FAILED) {
+ return 0;
+ }
+
+ if (container->cpr.vaddr_unmapped) {
+ /*
+ * Force a call to vfio_region_remap for each mapped section by
+ * temporarily registering a listener, and temporarily diverting
+ * dma_map to vfio_legacy_cpr_dma_map. The latter restores vaddr.
+ */
+
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ vioc->dma_map = vfio_legacy_cpr_dma_map;
+
+ container->cpr.remap_listener = (MemoryListener) {
+ .name = "vfio cpr recover",
+ .region_add = vfio_region_remap
+ };
+ memory_listener_register(&container->cpr.remap_listener,
+ bcontainer->space->as);
+ memory_listener_unregister(&container->cpr.remap_listener);
+ container->cpr.vaddr_unmapped = false;
+ vioc->dma_map = container->cpr.saved_dma_map;
+ }
+ return 0;
+}
+
+bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+ Error **cpr_blocker = &container->cpr.blocker;
+
+ migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
+ vfio_cpr_reboot_notifier,
+ MIG_MODE_CPR_REBOOT);
+
+ if (!vfio_cpr_supported(container, cpr_blocker)) {
+ return migrate_add_blocker_modes(cpr_blocker, errp,
+ MIG_MODE_CPR_TRANSFER, -1) == 0;
+ }
+
+ vmstate_register(NULL, -1, &vfio_container_vmstate, container);
+
+ /* During incoming CPR, divert calls to dma_map. */
+ if (cpr_is_incoming()) {
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ container->cpr.saved_dma_map = vioc->dma_map;
+ vioc->dma_map = vfio_legacy_cpr_dma_map;
+ }
+
+ migration_add_notifier_mode(&container->cpr.transfer_notifier,
+ vfio_cpr_fail_notifier,
+ MIG_MODE_CPR_TRANSFER);
+ return true;
+}
+
+void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+ migrate_del_blocker(&container->cpr.blocker);
+ vmstate_unregister(NULL, &vfio_container_vmstate, container);
+ migration_remove_notifier(&container->cpr.transfer_notifier);
+}
+
+/*
+ * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
+ * succeeding for others, so the latter have lost their vaddr. Call this
+ * to restore vaddr for a section with a giommu.
+ *
+ * The giommu already exists. Find it and replay it, which calls
+ * vfio_legacy_cpr_dma_map further down the stack.
+ */
+void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section)
+{
+ VFIOGuestIOMMU *giommu = NULL;
+ hwaddr as_offset = section->offset_within_address_space;
+ hwaddr iommu_offset = as_offset - section->offset_within_region;
+
+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
+ if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) &&
+ giommu->iommu_offset == iommu_offset) {
+ break;
+ }
+ }
+ g_assert(giommu);
+ memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
+}
+
+/*
+ * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
+ * succeeding for others, so the latter have lost their vaddr. Call this
+ * to restore vaddr for a section with a RamDiscardManager.
+ *
+ * The ram discard listener already exists. Call its populate function
+ * directly, which calls vfio_legacy_cpr_dma_map.
+ */
+bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl =
+ vfio_find_ram_discard_listener(bcontainer, section);
+
+ g_assert(vrdl);
+ return vrdl->listener.notify_populate(&vrdl->listener, section) == 0;
+}
+
+int vfio_cpr_group_get_device_fd(int d, const char *name)
+{
+ const int id = 0;
+ int fd = cpr_find_fd(name, id);
+
+ if (fd < 0) {
+ fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
+ if (fd >= 0) {
+ cpr_save_fd(name, id, fd);
+ }
+ }
+ return fd;
+}
+
+static bool same_device(int fd1, int fd2)
+{
+ struct stat st1, st2;
+
+ return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
+}
+
+bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
+ int fd)
+{
+ if (container->fd == fd) {
+ return true;
+ }
+ if (!same_device(container->fd, fd)) {
+ return false;
+ }
+ /*
+ * Same device, different fd. This occurs when the container fd is
+ * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
+ * produces duplicates. De-dup it.
+ */
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
+ close(fd);
+ cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
+ return true;
+}