aboutsummaryrefslogtreecommitdiff
path: root/include/hw/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'include/hw/vfio')
-rw-r--r--include/hw/vfio/vfio-common.h286
-rw-r--r--include/hw/vfio/vfio-container-base.h143
-rw-r--r--include/hw/vfio/vfio-container.h38
-rw-r--r--include/hw/vfio/vfio-cpr.h57
-rw-r--r--include/hw/vfio/vfio-device.h286
-rw-r--r--include/hw/vfio/vfio-migration.h16
-rw-r--r--include/hw/vfio/vfio-platform.h4
-rw-r--r--include/hw/vfio/vfio-region.h48
8 files changed, 568 insertions, 310 deletions
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
deleted file mode 100644
index 4cb1ab8..0000000
--- a/include/hw/vfio/vfio-common.h
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * common header for vfio based device assignment support
- *
- * Copyright Red Hat, Inc. 2012
- *
- * Authors:
- * Alex Williamson <alex.williamson@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Based on qemu-kvm device-assignment:
- * Adapted for KVM by Qumranet.
- * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
- * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
- * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
- * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
- * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
- */
-
-#ifndef HW_VFIO_VFIO_COMMON_H
-#define HW_VFIO_VFIO_COMMON_H
-
-#include "exec/memory.h"
-#include "qemu/queue.h"
-#include "qemu/notify.h"
-#include "ui/console.h"
-#include "hw/display/ramfb.h"
-#ifdef CONFIG_LINUX
-#include <linux/vfio.h>
-#endif
-#include "sysemu/sysemu.h"
-#include "hw/vfio/vfio-container-base.h"
-
-#define VFIO_MSG_PREFIX "vfio %s: "
-
-enum {
- VFIO_DEVICE_TYPE_PCI = 0,
- VFIO_DEVICE_TYPE_PLATFORM = 1,
- VFIO_DEVICE_TYPE_CCW = 2,
- VFIO_DEVICE_TYPE_AP = 3,
-};
-
-typedef struct VFIOMmap {
- MemoryRegion mem;
- void *mmap;
- off_t offset;
- size_t size;
-} VFIOMmap;
-
-typedef struct VFIORegion {
- struct VFIODevice *vbasedev;
- off_t fd_offset; /* offset of region within device fd */
- MemoryRegion *mem; /* slow, read/write access */
- size_t size;
- uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
- uint32_t nr_mmaps;
- VFIOMmap *mmaps;
- uint8_t nr; /* cache the region number for debug */
-} VFIORegion;
-
-typedef struct VFIOMigration {
- struct VFIODevice *vbasedev;
- VMChangeStateEntry *vm_state;
- NotifierWithReturn migration_state;
- uint32_t device_state;
- int data_fd;
- void *data_buffer;
- size_t data_buffer_size;
- uint64_t mig_flags;
- uint64_t precopy_init_size;
- uint64_t precopy_dirty_size;
- bool initial_data_sent;
-} VFIOMigration;
-
-struct VFIOGroup;
-
-typedef struct VFIOContainer {
- VFIOContainerBase bcontainer;
- int fd; /* /dev/vfio/vfio, empowered by the attached groups */
- unsigned iommu_type;
- QLIST_HEAD(, VFIOGroup) group_list;
-} VFIOContainer;
-
-typedef struct VFIOHostDMAWindow {
- hwaddr min_iova;
- hwaddr max_iova;
- uint64_t iova_pgsizes;
- QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
-} VFIOHostDMAWindow;
-
-typedef struct IOMMUFDBackend IOMMUFDBackend;
-
-typedef struct VFIOIOMMUFDContainer {
- VFIOContainerBase bcontainer;
- IOMMUFDBackend *be;
- uint32_t ioas_id;
-} VFIOIOMMUFDContainer;
-
-typedef struct VFIODeviceOps VFIODeviceOps;
-
-typedef struct VFIODevice {
- QLIST_ENTRY(VFIODevice) next;
- QLIST_ENTRY(VFIODevice) container_next;
- QLIST_ENTRY(VFIODevice) global_next;
- struct VFIOGroup *group;
- VFIOContainerBase *bcontainer;
- char *sysfsdev;
- char *name;
- DeviceState *dev;
- int fd;
- int type;
- bool reset_works;
- bool needs_reset;
- bool no_mmap;
- bool ram_block_discard_allowed;
- OnOffAuto enable_migration;
- bool migration_events;
- VFIODeviceOps *ops;
- unsigned int num_irqs;
- unsigned int num_regions;
- unsigned int flags;
- VFIOMigration *migration;
- Error *migration_blocker;
- OnOffAuto pre_copy_dirty_page_tracking;
- bool dirty_pages_supported;
- bool dirty_tracking;
- int devid;
- IOMMUFDBackend *iommufd;
-} VFIODevice;
-
-struct VFIODeviceOps {
- void (*vfio_compute_needs_reset)(VFIODevice *vdev);
- int (*vfio_hot_reset_multi)(VFIODevice *vdev);
- void (*vfio_eoi)(VFIODevice *vdev);
- Object *(*vfio_get_object)(VFIODevice *vdev);
-
- /**
- * @vfio_save_config
- *
- * Save device config state
- *
- * @vdev: #VFIODevice for which to save the config
- * @f: #QEMUFile where to send the data
- * @errp: pointer to Error*, to store an error if it happens.
- *
- * Returns zero to indicate success and negative for error
- */
- int (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f, Error **errp);
-
- /**
- * @vfio_load_config
- *
- * Load device config state
- *
- * @vdev: #VFIODevice for which to load the config
- * @f: #QEMUFile where to get the data
- *
- * Returns zero to indicate success and negative for error
- */
- int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
-};
-
-typedef struct VFIOGroup {
- int fd;
- int groupid;
- VFIOContainer *container;
- QLIST_HEAD(, VFIODevice) device_list;
- QLIST_ENTRY(VFIOGroup) next;
- QLIST_ENTRY(VFIOGroup) container_next;
- bool ram_block_discard_allowed;
-} VFIOGroup;
-
-typedef struct VFIODMABuf {
- QemuDmaBuf *buf;
- uint32_t pos_x, pos_y, pos_updates;
- uint32_t hot_x, hot_y, hot_updates;
- int dmabuf_id;
- QTAILQ_ENTRY(VFIODMABuf) next;
-} VFIODMABuf;
-
-typedef struct VFIODisplay {
- QemuConsole *con;
- RAMFBState *ramfb;
- struct vfio_region_info *edid_info;
- struct vfio_region_gfx_edid *edid_regs;
- uint8_t *edid_blob;
- QEMUTimer *edid_link_timer;
- struct {
- VFIORegion buffer;
- DisplaySurface *surface;
- } region;
- struct {
- QTAILQ_HEAD(, VFIODMABuf) bufs;
- VFIODMABuf *primary;
- VFIODMABuf *cursor;
- } dmabuf;
-} VFIODisplay;
-
-VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
-void vfio_put_address_space(VFIOAddressSpace *space);
-
-/* SPAPR specific */
-int vfio_spapr_container_init(VFIOContainer *container, Error **errp);
-void vfio_spapr_container_deinit(VFIOContainer *container);
-
-void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
-void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
-void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index);
-bool vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
- int action, int fd, Error **errp);
-void vfio_region_write(void *opaque, hwaddr addr,
- uint64_t data, unsigned size);
-uint64_t vfio_region_read(void *opaque,
- hwaddr addr, unsigned size);
-int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
- int index, const char *name);
-int vfio_region_mmap(VFIORegion *region);
-void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
-void vfio_region_unmap(VFIORegion *region);
-void vfio_region_exit(VFIORegion *region);
-void vfio_region_finalize(VFIORegion *region);
-void vfio_reset_handler(void *opaque);
-struct vfio_device_info *vfio_get_device_info(int fd);
-bool vfio_attach_device(char *name, VFIODevice *vbasedev,
- AddressSpace *as, Error **errp);
-void vfio_detach_device(VFIODevice *vbasedev);
-
-int vfio_kvm_device_add_fd(int fd, Error **errp);
-int vfio_kvm_device_del_fd(int fd, Error **errp);
-
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp);
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer);
-
-extern const MemoryRegionOps vfio_region_ops;
-typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
-typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
-extern VFIOGroupList vfio_group_list;
-extern VFIODeviceList vfio_device_list;
-extern const MemoryListener vfio_memory_listener;
-extern int vfio_kvm_device_fd;
-
-bool vfio_mig_active(void);
-int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
-void vfio_unblock_multiple_devices_migration(void);
-bool vfio_viommu_preset(VFIODevice *vbasedev);
-int64_t vfio_mig_bytes_transferred(void);
-void vfio_reset_bytes_transferred(void);
-bool vfio_device_state_is_running(VFIODevice *vbasedev);
-bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
-
-#ifdef CONFIG_LINUX
-int vfio_get_region_info(VFIODevice *vbasedev, int index,
- struct vfio_region_info **info);
-int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
- uint32_t subtype, struct vfio_region_info **info);
-bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type);
-struct vfio_info_cap_header *
-vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id);
-bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
- unsigned int *avail);
-struct vfio_info_cap_header *
-vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
-struct vfio_info_cap_header *
-vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
-#endif
-
-bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
-void vfio_migration_exit(VFIODevice *vbasedev);
-
-int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size);
-bool
-vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer);
-bool
-vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer);
-int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
-int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
- uint64_t size, ram_addr_t ram_addr, Error **errp);
-
-/* Returns 0 on success, or a negative errno. */
-bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
-void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
-void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
- DeviceState *dev, bool ram_discard);
-#endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 2776481..3cd86ec 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -13,7 +13,7 @@
#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H
#define HW_VFIO_VFIO_CONTAINER_BASE_H
-#include "exec/memory.h"
+#include "system/memory.h"
typedef struct VFIODevice VFIODevice;
typedef struct VFIOIOMMUClass VFIOIOMMUClass;
@@ -34,7 +34,7 @@ typedef struct VFIOAddressSpace {
* This is the base object for vfio container backends
*/
typedef struct VFIOContainerBase {
- const VFIOIOMMUClass *ops;
+ Object parent;
VFIOAddressSpace *space;
MemoryListener listener;
Error *error;
@@ -44,6 +44,7 @@ typedef struct VFIOContainerBase {
unsigned long pgsizes;
unsigned int dma_max_mappings;
bool dirty_pages_supported;
+ bool dirty_pages_started; /* Protected by BQL */
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
QLIST_ENTRY(VFIOContainerBase) next;
@@ -70,12 +71,17 @@ typedef struct VFIORamDiscardListener {
QLIST_ENTRY(VFIORamDiscardListener) next;
} VFIORamDiscardListener;
+VFIOAddressSpace *vfio_address_space_get(AddressSpace *as);
+void vfio_address_space_put(VFIOAddressSpace *space);
+void vfio_address_space_insert(VFIOAddressSpace *space,
+ VFIOContainerBase *bcontainer);
+
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, bool readonly, MemoryRegion *mr);
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb);
+ IOMMUTLBEntry *iotlb, bool unmap_all);
bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
MemoryRegionSection *section,
Error **errp);
@@ -83,42 +89,126 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
MemoryRegionSection *section);
int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
bool start, Error **errp);
+bool vfio_container_dirty_tracking_is_started(
+ const VFIOContainerBase *bcontainer);
+bool vfio_container_devices_dirty_tracking_is_supported(
+ const VFIOContainerBase *bcontainer);
int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
+ uint64_t iova, uint64_t size, ram_addr_t ram_addr, Error **errp);
-void vfio_container_init(VFIOContainerBase *bcontainer,
- VFIOAddressSpace *space,
- const VFIOIOMMUClass *ops);
-void vfio_container_destroy(VFIOContainerBase *bcontainer);
+GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer);
+static inline uint64_t
+vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
+{
+ assert(bcontainer);
+ return bcontainer->pgsizes;
+}
#define TYPE_VFIO_IOMMU "vfio-iommu"
#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
+#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
-/*
- * VFIOContainerBase is not an abstract QOM object because it felt
- * unnecessary to expose all the IOMMU backends to the QEMU machine
- * and human interface. However, we can still abstract the IOMMU
- * backend handlers using a QOM interface class. This provides more
- * flexibility when referencing the various implementations.
- */
-DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU)
+OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
struct VFIOIOMMUClass {
- InterfaceClass parent_class;
+ ObjectClass parent_class;
- /* basic feature */
+ /**
+ * @setup
+ *
+ * Perform basic setup of the container, including configuring IOMMU
+ * capabilities, IOVA ranges, supported page sizes, etc.
+ *
+ * @bcontainer: #VFIOContainerBase
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
+
+ /**
+ * @listener_begin
+ *
+ * Called at the beginning of an address space update transaction.
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
+ void (*listener_begin)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @listener_commit
+ *
+ * Called at the end of an address space update transaction,
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
+ void (*listener_commit)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @dma_map
+ *
+ * Map an address range into the container. Note that the memory region is
+ * referenced within an RCU read lock region across this call.
+ *
+ * @bcontainer: #VFIOContainerBase to use
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @vaddr: process virtual address of mapping
+ * @readonly: true if mapping should be readonly
+ * @mr: the memory region for this mapping
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
int (*dma_map)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, bool readonly, MemoryRegion *mr);
+ /**
+ * @dma_unmap
+ *
+ * Unmap an address range from the container.
+ *
+ * @bcontainer: #VFIOContainerBase to use for unmap
+ * @iova: start address to unmap
+ * @size: size of the range to unmap
+ * @iotlb: The IOMMU TLB mapping entry (or NULL)
+ * @unmap_all: if set, unmap the entire address space
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
int (*dma_unmap)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb);
+ IOMMUTLBEntry *iotlb, bool unmap_all);
+
+
+ /**
+ * @attach_device
+ *
+ * Associate the given device with a container and do some related
+ * initialization of the device context.
+ *
+ * @name: name of the device
+ * @vbasedev: the device
+ * @as: address space to use
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*attach_device)(const char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp);
+
+ /*
+ * @detach_device
+ *
+ * Detach the given device from its container and clean up any necessary
+ * state.
+ *
+ * @vbasedev: the device to disassociate
+ */
void (*detach_device)(VFIODevice *vbasedev);
/* migration feature */
@@ -133,7 +223,7 @@ struct VFIOIOMMUClass {
* @start: indicates whether to start or stop dirty pages tracking
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
bool start, Error **errp);
@@ -148,7 +238,7 @@ struct VFIOIOMMUClass {
* @size: size of iova range
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
@@ -163,4 +253,11 @@ struct VFIOIOMMUClass {
MemoryRegionSection *section);
void (*release)(VFIOContainerBase *bcontainer);
};
+
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section);
+
+void vfio_container_region_add(VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section, bool cpr_remap);
+
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
diff --git a/include/hw/vfio/vfio-container.h b/include/hw/vfio/vfio-container.h
new file mode 100644
index 0000000..21e5807
--- /dev/null
+++ b/include/hw/vfio/vfio-container.h
@@ -0,0 +1,38 @@
+/*
+ * VFIO container
+ *
+ * Copyright Red Hat, Inc. 2025
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_CONTAINER_H
+#define HW_VFIO_CONTAINER_H
+
+#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio/vfio-cpr.h"
+
+typedef struct VFIOContainer VFIOContainer;
+typedef struct VFIODevice VFIODevice;
+
+typedef struct VFIOGroup {
+ int fd;
+ int groupid;
+ VFIOContainer *container;
+ QLIST_HEAD(, VFIODevice) device_list;
+ QLIST_ENTRY(VFIOGroup) next;
+ QLIST_ENTRY(VFIOGroup) container_next;
+ bool ram_block_discard_allowed;
+} VFIOGroup;
+
+typedef struct VFIOContainer {
+ VFIOContainerBase bcontainer;
+ int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+ unsigned iommu_type;
+ QLIST_HEAD(, VFIOGroup) group_list;
+ VFIOContainerCPR cpr;
+} VFIOContainer;
+
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY);
+
+#endif /* HW_VFIO_CONTAINER_H */
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
new file mode 100644
index 0000000..8bf85b9
--- /dev/null
+++ b/include/hw/vfio/vfio-cpr.h
@@ -0,0 +1,57 @@
+/*
+ * VFIO CPR
+ *
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_VFIO_CPR_H
+#define HW_VFIO_VFIO_CPR_H
+
+#include "migration/misc.h"
+#include "system/memory.h"
+
+struct VFIOContainer;
+struct VFIOContainerBase;
+struct VFIOGroup;
+
+typedef struct VFIOContainerCPR {
+ Error *blocker;
+ bool vaddr_unmapped;
+ NotifierWithReturn transfer_notifier;
+ MemoryListener remap_listener;
+ int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ void *vaddr, bool readonly, MemoryRegion *mr);
+} VFIOContainerCPR;
+
+typedef struct VFIODeviceCPR {
+ Error *mdev_blocker;
+} VFIODeviceCPR;
+
+bool vfio_legacy_cpr_register_container(struct VFIOContainer *container,
+ Error **errp);
+void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container);
+
+int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
+ Error **errp);
+
+bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer,
+ Error **errp);
+void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
+
+int vfio_cpr_group_get_device_fd(int d, const char *name);
+
+bool vfio_cpr_container_match(struct VFIOContainer *container,
+ struct VFIOGroup *group, int fd);
+
+void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section);
+
+bool vfio_cpr_ram_discard_register_listener(
+ struct VFIOContainerBase *bcontainer, MemoryRegionSection *section);
+
+extern const VMStateDescription vfio_cpr_pci_vmstate;
+
+#endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
new file mode 100644
index 0000000..c616652
--- /dev/null
+++ b/include/hw/vfio/vfio-device.h
@@ -0,0 +1,286 @@
+/*
+ * VFIO Device interface
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ * Adapted for KVM by Qumranet.
+ * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
+ * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
+ * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
+ * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
+ * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ */
+
+#ifndef HW_VFIO_VFIO_COMMON_H
+#define HW_VFIO_VFIO_COMMON_H
+
+#include "system/memory.h"
+#include "qemu/queue.h"
+#ifdef CONFIG_LINUX
+#include <linux/vfio.h>
+#endif
+#include "system/system.h"
+#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio/vfio-cpr.h"
+#include "system/host_iommu_device.h"
+#include "system/iommufd.h"
+
+#define VFIO_MSG_PREFIX "vfio %s: "
+
+enum {
+ VFIO_DEVICE_TYPE_PCI = 0,
+ VFIO_DEVICE_TYPE_PLATFORM = 1,
+ VFIO_DEVICE_TYPE_CCW = 2,
+ VFIO_DEVICE_TYPE_AP = 3,
+};
+
+typedef struct VFIODeviceOps VFIODeviceOps;
+typedef struct VFIODeviceIOOps VFIODeviceIOOps;
+typedef struct VFIOMigration VFIOMigration;
+
+typedef struct IOMMUFDBackend IOMMUFDBackend;
+typedef struct VFIOIOASHwpt VFIOIOASHwpt;
+typedef struct VFIOUserProxy VFIOUserProxy;
+
+typedef struct VFIODevice {
+ QLIST_ENTRY(VFIODevice) next;
+ QLIST_ENTRY(VFIODevice) container_next;
+ QLIST_ENTRY(VFIODevice) global_next;
+ struct VFIOGroup *group;
+ VFIOContainerBase *bcontainer;
+ char *sysfsdev;
+ char *name;
+ DeviceState *dev;
+ int fd;
+ int type;
+ bool mdev;
+ bool reset_works;
+ bool needs_reset;
+ bool no_mmap;
+ bool ram_block_discard_allowed;
+ OnOffAuto enable_migration;
+ OnOffAuto migration_multifd_transfer;
+ bool migration_events;
+ bool use_region_fds;
+ VFIODeviceOps *ops;
+ VFIODeviceIOOps *io_ops;
+ unsigned int num_irqs;
+ unsigned int num_regions;
+ unsigned int flags;
+ VFIOMigration *migration;
+ Error *migration_blocker;
+ OnOffAuto pre_copy_dirty_page_tracking;
+ OnOffAuto device_dirty_page_tracking;
+ bool dirty_pages_supported;
+ bool dirty_tracking; /* Protected by BQL */
+ bool iommu_dirty_tracking;
+ HostIOMMUDevice *hiod;
+ int devid;
+ IOMMUFDBackend *iommufd;
+ VFIOIOASHwpt *hwpt;
+ QLIST_ENTRY(VFIODevice) hwpt_next;
+ struct vfio_region_info **reginfo;
+ int *region_fds;
+ VFIODeviceCPR cpr;
+ VFIOUserProxy *proxy;
+} VFIODevice;
+
+struct VFIODeviceOps {
+ void (*vfio_compute_needs_reset)(VFIODevice *vdev);
+ int (*vfio_hot_reset_multi)(VFIODevice *vdev);
+ void (*vfio_eoi)(VFIODevice *vdev);
+ Object *(*vfio_get_object)(VFIODevice *vdev);
+
+ /**
+ * @vfio_save_config
+ *
+ * Save device config state
+ *
+ * @vdev: #VFIODevice for which to save the config
+ * @f: #QEMUFile where to send the data
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error
+ */
+ int (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f, Error **errp);
+
+ /**
+ * @vfio_load_config
+ *
+ * Load device config state
+ *
+ * @vdev: #VFIODevice for which to load the config
+ * @f: #QEMUFile where to get the data
+ *
+ * Returns zero to indicate success and negative for error
+ */
+ int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
+};
+
+/*
+ * Given a return value of either a short number of bytes read or -errno,
+ * construct a meaningful error message.
+ */
+#define strreaderror(ret) \
+ (ret < 0 ? strerror(-ret) : "short read")
+
+/*
+ * Given a return value of either a short number of bytes written or -errno,
+ * construct a meaningful error message.
+ */
+#define strwriteerror(ret) \
+ (ret < 0 ? strerror(-ret) : "short write")
+
+void vfio_device_irq_disable(VFIODevice *vbasedev, int index);
+void vfio_device_irq_unmask(VFIODevice *vbasedev, int index);
+void vfio_device_irq_mask(VFIODevice *vbasedev, int index);
+bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex,
+ int action, int fd, Error **errp);
+
+void vfio_device_reset_handler(void *opaque);
+bool vfio_device_is_mdev(VFIODevice *vbasedev);
+bool vfio_device_hiod_create_and_realize(VFIODevice *vbasedev,
+ const char *typename, Error **errp);
+bool vfio_device_attach(char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp);
+bool vfio_device_attach_by_iommu_type(const char *iommu_type, char *name,
+ VFIODevice *vbasedev, AddressSpace *as,
+ Error **errp);
+void vfio_device_detach(VFIODevice *vbasedev);
+VFIODevice *vfio_get_vfio_device(Object *obj);
+
+typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
+extern VFIODeviceList vfio_device_list;
+
+#ifdef CONFIG_LINUX
+/*
+ * How devices communicate with the server. The default option is through
+ * ioctl() to the kernel VFIO driver, but vfio-user can use a socket to a remote
+ * process.
+ */
+struct VFIODeviceIOOps {
+ /**
+ * @device_feature
+ *
+ * Fill in feature info for the given device.
+ *
+ * @vdev: #VFIODevice to use
+ * @feat: feature information to fill in
+ *
+ * Returns 0 on success or -errno.
+ */
+ int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *feat);
+
+ /**
+ * @get_region_info
+ *
+ * Get the information for a given region on the device.
+ *
+ * @vdev: #VFIODevice to use
+ * @info: set @info->index to the region index to look up; the rest of the
+ * struct will be filled in on success
+ * @fd: pointer to the fd for the region; will be -1 if not found
+ *
+ * Returns 0 on success or -errno.
+ */
+ int (*get_region_info)(VFIODevice *vdev,
+ struct vfio_region_info *info, int *fd);
+
+ /**
+ * @get_irq_info
+ *
+ * @vdev: #VFIODevice to use
+ * @irq: set @irq->index to the IRQ index to look up; the rest of the struct
+ * will be filled in on success
+ *
+ * Returns 0 on success or -errno.
+ */
+ int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
+
+ /**
+ * @set_irqs
+ *
+ * Configure IRQs.
+ *
+ * @vdev: #VFIODevice to use
+ * @irqs: IRQ configuration as defined by VFIO docs.
+ *
+ * Returns 0 on success or -errno.
+ */
+ int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
+
+ /**
+ * @region_read
+ *
+ * Read part of a region.
+ *
+ * @vdev: #VFIODevice to use
+ * @nr: region index
+ * @off: offset within the region
+ * @size: size in bytes to read
+ * @data: buffer to read into
+ *
+ * Returns number of bytes read on success or -errno.
+ */
+ int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
+ void *data);
+
+ /**
+ * @region_write
+ *
+ * Write part of a region.
+ *
+ * @vdev: #VFIODevice to use
+ * @nr: region index
+ * @off: offset within the region
+ * @size: size in bytes to write
+ * @data: buffer to write from
+ * @post: true if this is a posted write
+ *
+ * Returns number of bytes write on success or -errno.
+ */
+ int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
+ void *data, bool post);
+};
+
+void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
+ struct vfio_device_info *info);
+
+void vfio_device_unprepare(VFIODevice *vbasedev);
+
+int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
+ struct vfio_region_info **info);
+int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
+ uint32_t subtype, struct vfio_region_info **info);
+
+/**
+ * Return the fd for mapping this region. This is either the device's fd (for
+ * e.g. kernel vfio), or a per-region fd (for vfio-user).
+ *
+ * @vbasedev: #VFIODevice to use
+ * @index: region index
+ *
+ * Returns the fd.
+ */
+int vfio_device_get_region_fd(VFIODevice *vbasedev, int index);
+
+bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type);
+
+int vfio_device_get_irq_info(VFIODevice *vbasedev, int index,
+ struct vfio_irq_info *info);
+#endif
+
+/* Returns 0 on success, or a negative errno. */
+bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
+void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
+ DeviceState *dev, bool ram_discard);
+int vfio_device_get_aw_bits(VFIODevice *vdev);
+#endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/include/hw/vfio/vfio-migration.h b/include/hw/vfio/vfio-migration.h
new file mode 100644
index 0000000..0d4ecd3
--- /dev/null
+++ b/include/hw/vfio/vfio-migration.h
@@ -0,0 +1,16 @@
+/*
+ * VFIO migration interface
+ *
+ * Copyright Red Hat, Inc. 2025
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_VFIO_MIGRATION_H
+#define HW_VFIO_VFIO_MIGRATION_H
+
+bool vfio_migration_active(void);
+int64_t vfio_migration_bytes_transferred(void);
+void vfio_migration_reset_bytes_transferred(void);
+
+#endif /* HW_VFIO_VFIO_MIGRATION_H */
diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h
index c414c3d..256d850 100644
--- a/include/hw/vfio/vfio-platform.h
+++ b/include/hw/vfio/vfio-platform.h
@@ -17,7 +17,7 @@
#define HW_VFIO_VFIO_PLATFORM_H
#include "hw/sysbus.h"
-#include "hw/vfio/vfio-common.h"
+#include "hw/vfio/vfio-device.h"
#include "qemu/event_notifier.h"
#include "qemu/queue.h"
#include "qom/object.h"
@@ -47,6 +47,8 @@ typedef struct VFIOINTp {
/* function type for user side eventfd handler */
typedef void (*eventfd_user_side_handler_t)(VFIOINTp *intp);
+typedef struct VFIORegion VFIORegion;
+
struct VFIOPlatformDevice {
SysBusDevice sbdev;
VFIODevice vbasedev; /* not a QOM object */
diff --git a/include/hw/vfio/vfio-region.h b/include/hw/vfio/vfio-region.h
new file mode 100644
index 0000000..ede6e0c
--- /dev/null
+++ b/include/hw/vfio/vfio-region.h
@@ -0,0 +1,48 @@
+/*
+ * VFIO region
+ *
+ * Copyright Red Hat, Inc. 2025
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_REGION_H
+#define HW_VFIO_REGION_H
+
+#include "system/memory.h"
+
+typedef struct VFIOMmap {
+ MemoryRegion mem;
+ void *mmap;
+ off_t offset;
+ size_t size;
+} VFIOMmap;
+
+typedef struct VFIODevice VFIODevice;
+
+typedef struct VFIORegion {
+ struct VFIODevice *vbasedev;
+ off_t fd_offset; /* offset of region within device fd */
+ MemoryRegion *mem; /* slow, read/write access */
+ size_t size;
+ uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+ uint32_t nr_mmaps;
+ VFIOMmap *mmaps;
+ uint8_t nr; /* cache the region number for debug */
+ bool post_wr; /* writes can be posted */
+} VFIORegion;
+
+
+void vfio_region_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size);
+uint64_t vfio_region_read(void *opaque,
+ hwaddr addr, unsigned size);
+int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
+ int index, const char *name);
+int vfio_region_mmap(VFIORegion *region);
+void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
+void vfio_region_unmap(VFIORegion *region);
+void vfio_region_exit(VFIORegion *region);
+void vfio_region_finalize(VFIORegion *region);
+
+#endif /* HW_VFIO_REGION_H */