From 924c3ccb310e615bd350d4c77b269b19d95bf5e4 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 27 Jun 2025 14:33:32 +0800 Subject: vfio/container: Fix vfio_container_post_load() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there are multiple VFIO containers, vioc->dma_map is restored multiple times, this made only first container work and remaining containers using vioc->dma_map restored by first container. Fix it by save and restore vioc->dma_map locally. saved_dma_map in VFIOContainerCPR becomes useless and is removed. Fixes: 7e9f21411302 ("vfio/container: restore DMA vaddr") Signed-off-by: Zhenzhong Duan Reviewed-by: Steve Sistare Link: https://lore.kernel.org/qemu-devel/20250627063332.5173-3-zhenzhong.duan@intel.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-cpr.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index 8bf85b9..dbb2a16 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -16,14 +16,15 @@ struct VFIOContainer; struct VFIOContainerBase; struct VFIOGroup; +typedef int (*dma_map_fn)(const struct VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, void *vaddr, + bool readonly, MemoryRegion *mr); + typedef struct VFIOContainerCPR { Error *blocker; bool vaddr_unmapped; NotifierWithReturn transfer_notifier; MemoryListener remap_listener; - int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly, MemoryRegion *mr); } VFIOContainerCPR; typedef struct VFIODeviceCPR { -- cgit v1.1 From 30edcb4d4e7a265c2912ca6978b150c7c75b654f Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:38 -0700 Subject: vfio-pci: preserve MSI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Save the MSI message area as part of vfio-pci vmstate, and preserve the interrupt and notifier eventfd's. migrate_incoming loads the MSI data, then the vfio-pci post_load handler finds the eventfds in CPR state, rebuilds vector data structures, and attaches the interrupts to the new KVM instance. Signed-off-by: Steve Sistare Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/1751493538-202042-2-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-cpr.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index dbb2a16..f21578d 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -15,6 +15,7 @@ struct VFIOContainer; struct VFIOContainerBase; struct VFIOGroup; +struct VFIOPCIDevice; typedef int (*dma_map_fn)(const struct VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, @@ -53,6 +54,13 @@ void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer, bool vfio_cpr_ram_discard_register_listener( struct VFIOContainerBase *bcontainer, MemoryRegionSection *section); +void vfio_cpr_save_vector_fd(struct VFIOPCIDevice *vdev, const char *name, + int nr, int fd); +int vfio_cpr_load_vector_fd(struct VFIOPCIDevice *vdev, const char *name, + int nr); +void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, const char *name, + int nr); + extern const VMStateDescription vfio_cpr_pci_vmstate; #endif /* HW_VFIO_VFIO_CPR_H */ -- cgit v1.1 From 7ed0919119b0e7a6b7db1dcaca3a2cb30c771dd1 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:40 -0700 Subject: migration: close kvm after cpr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cpr-transfer breaks vfio network connectivity to and from the guest, and the host system log shows: irq bypass consumer (token 00000000a03c32e5) registration fails: -16 which is EBUSY. This occurs because KVM descriptors are still open in the old QEMU process. Close them. Cc: Paolo Bonzini Signed-off-by: Steve Sistare Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/qemu-devel/1751493538-202042-4-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-cpr.h | 2 ++ include/hw/vfio/vfio-device.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index f21578d..d37acc4 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -63,4 +63,6 @@ void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, const char *name, extern const VMStateDescription vfio_cpr_pci_vmstate; +void vfio_cpr_add_kvm_notifier(void); + #endif /* HW_VFIO_VFIO_CPR_H */ diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index c616652..f503837 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -283,4 +283,6 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard); int vfio_device_get_aw_bits(VFIODevice *vdev); + +void vfio_kvm_device_close(void); #endif /* HW_VFIO_VFIO_COMMON_H */ -- cgit v1.1 From fb32965b6dd8a001815593642a5146fbd2e85651 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:45 -0700 Subject: vfio/iommufd: use IOMMU_IOAS_MAP_FILE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use IOMMU_IOAS_MAP_FILE when the mapped region is backed by a file. Such a mapping can be preserved without modification during CPR, because it depends on the file's address space, which does not change, rather than on the process's address space, which does change. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-9-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-container-base.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 3cd86ec..bded6e9 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -168,6 +168,21 @@ struct VFIOIOMMUClass { hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mr); /** + * @dma_map_file + * + * Map a file range for the container. + * + * @bcontainer: #VFIOContainerBase to use for map + * @iova: start address to map + * @size: size of the range to map + * @fd: descriptor of the file to map + * @start: starting file offset of the range to map + * @readonly: map read only if true + */ + int (*dma_map_file)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + int fd, unsigned long start, bool readonly); + /** * @dma_unmap * * Unmap an address range from the container. -- cgit v1.1 From 184053f04f6ad6b2950d4712063ffed43bb2720f Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:47 -0700 Subject: vfio/iommufd: add vfio_device_free_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define vfio_device_free_name to free the name created by vfio_device_get_name. A subsequent patch will do more there. No functional change. Signed-off-by: Steve Sistare Reviewed-by: Cédric Le Goater Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-11-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index f503837..1901a35 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -279,6 +279,7 @@ int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, /* Returns 0 on success, or a negative errno. */ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp); +void vfio_device_free_name(VFIODevice *vbasedev); void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard); -- cgit v1.1 From a434fd8f6462c1541927d22e07c58425d6cbd84b Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:48 -0700 Subject: vfio/iommufd: device name blocker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an invariant device name cannot be created, block CPR. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-12-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-cpr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index d37acc4..fa7d43d 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -30,6 +30,7 @@ typedef struct VFIOContainerCPR { typedef struct VFIODeviceCPR { Error *mdev_blocker; + Error *id_blocker; } VFIODeviceCPR; bool vfio_legacy_cpr_register_container(struct VFIOContainer *container, -- cgit v1.1 From 06c6a65852af0b7648cdb6ff6cf2e66929a7b5f5 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:49 -0700 Subject: vfio/iommufd: register container for cpr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Register a vfio iommufd container and device for CPR, replacing the generic CPR register call with a more specific iommufd register call. Add a blocker if the kernel does not support IOMMU_IOAS_CHANGE_PROCESS. This is mostly boiler plate. The fields to to saved and restored are added in subsequent patches. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-13-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-cpr.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index fa7d43d..87b4206 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -15,7 +15,10 @@ struct VFIOContainer; struct VFIOContainerBase; struct VFIOGroup; +struct VFIODevice; struct VFIOPCIDevice; +struct VFIOIOMMUFDContainer; +struct IOMMUFDBackend; typedef int (*dma_map_fn)(const struct VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, @@ -44,6 +47,15 @@ bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, Error **errp); void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); +bool vfio_iommufd_cpr_register_container(struct VFIOIOMMUFDContainer *container, + Error **errp); +void vfio_iommufd_cpr_unregister_container( + struct VFIOIOMMUFDContainer *container); +bool vfio_iommufd_cpr_register_iommufd(struct IOMMUFDBackend *be, Error **errp); +void vfio_iommufd_cpr_unregister_iommufd(struct IOMMUFDBackend *be); +void vfio_iommufd_cpr_register_device(struct VFIODevice *vbasedev); +void vfio_iommufd_cpr_unregister_device(struct VFIODevice *vbasedev); + int vfio_cpr_group_get_device_fd(int d, const char *name); bool vfio_cpr_container_match(struct VFIOContainer *container, -- cgit v1.1 From a6f2f9c42f3a5418fc7000b1fd331b086b6133d9 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:50 -0700 Subject: migration: vfio cpr state hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define a list of vfio devices in CPR state, in a subsection so that older QEMU can be live updated to this version. However, new QEMU will not be live updateable to old QEMU. This is acceptable because CPR is not yet commonly used, and updates to older versions are unusual. The contents of each device object will be defined by the vfio subsystem in a subsequent patch. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-14-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-cpr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index 87b4206..286e3d4 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -75,6 +75,7 @@ void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, const char *name, int nr); extern const VMStateDescription vfio_cpr_pci_vmstate; +extern const VMStateDescription vmstate_cpr_vfio_devices; void vfio_cpr_add_kvm_notifier(void); -- cgit v1.1 From f2f3e4667e4d6026f39ab17f355f79b2f8431e19 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:51 -0700 Subject: vfio/iommufd: cpr state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VFIO iommufd devices will need access to ioas_id, devid, and hwpt_id in new QEMU at realize time, so add them to CPR state. Define CprVFIODevice as the object which holds the state and is serialized to the vmstate file. Define accessors to copy state between VFIODevice and CprVFIODevice. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-15-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-cpr.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index 286e3d4..2878372 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -34,6 +34,8 @@ typedef struct VFIOContainerCPR { typedef struct VFIODeviceCPR { Error *mdev_blocker; Error *id_blocker; + uint32_t hwpt_id; + uint32_t ioas_id; } VFIODeviceCPR; bool vfio_legacy_cpr_register_container(struct VFIOContainer *container, @@ -55,6 +57,7 @@ bool vfio_iommufd_cpr_register_iommufd(struct IOMMUFDBackend *be, Error **errp); void vfio_iommufd_cpr_unregister_iommufd(struct IOMMUFDBackend *be); void vfio_iommufd_cpr_register_device(struct VFIODevice *vbasedev); void vfio_iommufd_cpr_unregister_device(struct VFIODevice *vbasedev); +void vfio_cpr_load_device(struct VFIODevice *vbasedev); int vfio_cpr_group_get_device_fd(int d, const char *name); -- cgit v1.1 From 99cedd5d552130b9b27743c40ca9012e1f4f0371 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:57 -0700 Subject: vfio/container: delete old cpr register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vfio_cpr_[un]register_container is no longer used since they were subsumed by container type-specific registration. Delete them. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/1751493538-202042-21-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-cpr.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/hw') diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index 2878372..80ad20d 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -45,10 +45,6 @@ void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container); int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e, Error **errp); -bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, - Error **errp); -void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); - bool vfio_iommufd_cpr_register_container(struct VFIOIOMMUFDContainer *container, Error **errp); void vfio_iommufd_cpr_unregister_container( -- cgit v1.1