aboutsummaryrefslogtreecommitdiff
path: root/include/hw/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'include/hw/vfio')
-rw-r--r--include/hw/vfio/vfio-amd-xgbe.h46
-rw-r--r--include/hw/vfio/vfio-calxeda-xgmac.h43
-rw-r--r--include/hw/vfio/vfio-container-base.h186
-rw-r--r--include/hw/vfio/vfio-container-legacy.h39
-rw-r--r--include/hw/vfio/vfio-container.h288
-rw-r--r--include/hw/vfio/vfio-cpr.h88
-rw-r--r--include/hw/vfio/vfio-device.h93
-rw-r--r--include/hw/vfio/vfio-platform.h78
-rw-r--r--include/hw/vfio/vfio-region.h47
9 files changed, 469 insertions, 439 deletions
diff --git a/include/hw/vfio/vfio-amd-xgbe.h b/include/hw/vfio/vfio-amd-xgbe.h
deleted file mode 100644
index a894546..0000000
--- a/include/hw/vfio/vfio-amd-xgbe.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * VFIO AMD XGBE device
- *
- * Copyright Linaro Limited, 2015
- *
- * Authors:
- * Eric Auger <eric.auger@linaro.org>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#ifndef HW_VFIO_VFIO_AMD_XGBE_H
-#define HW_VFIO_VFIO_AMD_XGBE_H
-
-#include "hw/vfio/vfio-platform.h"
-#include "qom/object.h"
-
-#define TYPE_VFIO_AMD_XGBE "vfio-amd-xgbe"
-
-/**
- * This device exposes:
- * - 5 MMIO regions: MAC, PCS, SerDes Rx/Tx regs,
- SerDes Integration Registers 1/2 & 2/2
- * - 2 level sensitive IRQs and optional DMA channel IRQs
- */
-struct VFIOAmdXgbeDevice {
- VFIOPlatformDevice vdev;
-};
-
-typedef struct VFIOAmdXgbeDevice VFIOAmdXgbeDevice;
-
-struct VFIOAmdXgbeDeviceClass {
- /*< private >*/
- VFIOPlatformDeviceClass parent_class;
- /*< public >*/
- DeviceRealize parent_realize;
-};
-
-typedef struct VFIOAmdXgbeDeviceClass VFIOAmdXgbeDeviceClass;
-
-DECLARE_OBJ_CHECKERS(VFIOAmdXgbeDevice, VFIOAmdXgbeDeviceClass,
- VFIO_AMD_XGBE_DEVICE, TYPE_VFIO_AMD_XGBE)
-
-#endif
diff --git a/include/hw/vfio/vfio-calxeda-xgmac.h b/include/hw/vfio/vfio-calxeda-xgmac.h
deleted file mode 100644
index 8482f15..0000000
--- a/include/hw/vfio/vfio-calxeda-xgmac.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * VFIO calxeda xgmac device
- *
- * Copyright Linaro Limited, 2014
- *
- * Authors:
- * Eric Auger <eric.auger@linaro.org>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#ifndef HW_VFIO_VFIO_CALXEDA_XGMAC_H
-#define HW_VFIO_VFIO_CALXEDA_XGMAC_H
-
-#include "hw/vfio/vfio-platform.h"
-#include "qom/object.h"
-
-#define TYPE_VFIO_CALXEDA_XGMAC "vfio-calxeda-xgmac"
-
-/**
- * This device exposes:
- * - a single MMIO region corresponding to its register space
- * - 3 IRQS (main and 2 power related IRQs)
- */
-struct VFIOCalxedaXgmacDevice {
- VFIOPlatformDevice vdev;
-};
-typedef struct VFIOCalxedaXgmacDevice VFIOCalxedaXgmacDevice;
-
-struct VFIOCalxedaXgmacDeviceClass {
- /*< private >*/
- VFIOPlatformDeviceClass parent_class;
- /*< public >*/
- DeviceRealize parent_realize;
-};
-typedef struct VFIOCalxedaXgmacDeviceClass VFIOCalxedaXgmacDeviceClass;
-
-DECLARE_OBJ_CHECKERS(VFIOCalxedaXgmacDevice, VFIOCalxedaXgmacDeviceClass,
- VFIO_CALXEDA_XGMAC_DEVICE, TYPE_VFIO_CALXEDA_XGMAC)
-
-#endif
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
deleted file mode 100644
index 3d392b0..0000000
--- a/include/hw/vfio/vfio-container-base.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * VFIO BASE CONTAINER
- *
- * Copyright (C) 2023 Intel Corporation.
- * Copyright Red Hat, Inc. 2023
- *
- * Authors: Yi Liu <yi.l.liu@intel.com>
- * Eric Auger <eric.auger@redhat.com>
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H
-#define HW_VFIO_VFIO_CONTAINER_BASE_H
-
-#include "system/memory.h"
-
-typedef struct VFIODevice VFIODevice;
-typedef struct VFIOIOMMUClass VFIOIOMMUClass;
-
-typedef struct {
- unsigned long *bitmap;
- hwaddr size;
- hwaddr pages;
-} VFIOBitmap;
-
-typedef struct VFIOAddressSpace {
- AddressSpace *as;
- QLIST_HEAD(, VFIOContainerBase) containers;
- QLIST_ENTRY(VFIOAddressSpace) list;
-} VFIOAddressSpace;
-
-/*
- * This is the base object for vfio container backends
- */
-typedef struct VFIOContainerBase {
- Object parent;
- VFIOAddressSpace *space;
- MemoryListener listener;
- Error *error;
- bool initialized;
- uint64_t dirty_pgsizes;
- uint64_t max_dirty_bitmap_size;
- unsigned long pgsizes;
- unsigned int dma_max_mappings;
- bool dirty_pages_supported;
- bool dirty_pages_started; /* Protected by BQL */
- QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
- QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
- QLIST_ENTRY(VFIOContainerBase) next;
- QLIST_HEAD(, VFIODevice) device_list;
- GList *iova_ranges;
- NotifierWithReturn cpr_reboot_notifier;
-} VFIOContainerBase;
-
-typedef struct VFIOGuestIOMMU {
- VFIOContainerBase *bcontainer;
- IOMMUMemoryRegion *iommu_mr;
- hwaddr iommu_offset;
- IOMMUNotifier n;
- QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
-} VFIOGuestIOMMU;
-
-typedef struct VFIORamDiscardListener {
- VFIOContainerBase *bcontainer;
- MemoryRegion *mr;
- hwaddr offset_within_address_space;
- hwaddr size;
- uint64_t granularity;
- RamDiscardListener listener;
- QLIST_ENTRY(VFIORamDiscardListener) next;
-} VFIORamDiscardListener;
-
-VFIOAddressSpace *vfio_address_space_get(AddressSpace *as);
-void vfio_address_space_put(VFIOAddressSpace *space);
-void vfio_address_space_insert(VFIOAddressSpace *space,
- VFIOContainerBase *bcontainer);
-
-int vfio_container_dma_map(VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
-int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb, bool unmap_all);
-bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section,
- Error **errp);
-void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section);
-int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
- bool start, Error **errp);
-bool vfio_container_dirty_tracking_is_started(
- const VFIOContainerBase *bcontainer);
-bool vfio_container_devices_dirty_tracking_is_supported(
- const VFIOContainerBase *bcontainer);
-int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- uint64_t iova, uint64_t size, ram_addr_t ram_addr, Error **errp);
-
-GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer);
-
-static inline uint64_t
-vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
-{
- assert(bcontainer);
- return bcontainer->pgsizes;
-}
-
-#define TYPE_VFIO_IOMMU "vfio-iommu"
-#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
-#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
-#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
-
-OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
-
-struct VFIOIOMMUClass {
- ObjectClass parent_class;
-
- /* basic feature */
- bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
- void (*listener_begin)(VFIOContainerBase *bcontainer);
- void (*listener_commit)(VFIOContainerBase *bcontainer);
- int (*dma_map)(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
- /**
- * @dma_unmap
- *
- * Unmap an address range from the container.
- *
- * @bcontainer: #VFIOContainerBase to use for unmap
- * @iova: start address to unmap
- * @size: size of the range to unmap
- * @iotlb: The IOMMU TLB mapping entry (or NULL)
- * @unmap_all: if set, unmap the entire address space
- */
- int (*dma_unmap)(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb, bool unmap_all);
- bool (*attach_device)(const char *name, VFIODevice *vbasedev,
- AddressSpace *as, Error **errp);
- void (*detach_device)(VFIODevice *vbasedev);
-
- /* migration feature */
-
- /**
- * @set_dirty_page_tracking
- *
- * Start or stop dirty pages tracking on VFIO container
- *
- * @bcontainer: #VFIOContainerBase on which to de/activate dirty
- * page tracking
- * @start: indicates whether to start or stop dirty pages tracking
- * @errp: pointer to Error*, to store an error if it happens.
- *
- * Returns zero to indicate success and negative for error
- */
- int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
- bool start, Error **errp);
- /**
- * @query_dirty_bitmap
- *
- * Get bitmap of dirty pages from container
- *
- * @bcontainer: #VFIOContainerBase from which to get dirty pages
- * @vbmap: #VFIOBitmap internal bitmap structure
- * @iova: iova base address
- * @size: size of iova range
- * @errp: pointer to Error*, to store an error if it happens.
- *
- * Returns zero to indicate success and negative for error
- */
- int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
- /* PCI specific */
- int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
-
- /* SPAPR specific */
- bool (*add_window)(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section,
- Error **errp);
- void (*del_window)(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section);
- void (*release)(VFIOContainerBase *bcontainer);
-};
-
-#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
diff --git a/include/hw/vfio/vfio-container-legacy.h b/include/hw/vfio/vfio-container-legacy.h
new file mode 100644
index 0000000..74a72df
--- /dev/null
+++ b/include/hw/vfio/vfio-container-legacy.h
@@ -0,0 +1,39 @@
+/*
+ * VFIO container
+ *
+ * Copyright Red Hat, Inc. 2025
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_CONTAINER_LEGACY_H
+#define HW_VFIO_CONTAINER_LEGACY_H
+
+#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-cpr.h"
+
+typedef struct VFIOLegacyContainer VFIOLegacyContainer;
+typedef struct VFIODevice VFIODevice;
+
+typedef struct VFIOGroup {
+ int fd;
+ int groupid;
+ VFIOLegacyContainer *container;
+ QLIST_HEAD(, VFIODevice) device_list;
+ QLIST_ENTRY(VFIOGroup) next;
+ QLIST_ENTRY(VFIOGroup) container_next;
+ bool ram_block_discard_allowed;
+} VFIOGroup;
+
+struct VFIOLegacyContainer {
+ VFIOContainer parent_obj;
+
+ int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+ unsigned iommu_type;
+ QLIST_HEAD(, VFIOGroup) group_list;
+ VFIOContainerCPR cpr;
+};
+
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOLegacyContainer, VFIO_IOMMU_LEGACY);
+
+#endif /* HW_VFIO_CONTAINER_LEGACY_H */
diff --git a/include/hw/vfio/vfio-container.h b/include/hw/vfio/vfio-container.h
index afc498d..c4b58d6 100644
--- a/include/hw/vfio/vfio-container.h
+++ b/include/hw/vfio/vfio-container.h
@@ -1,36 +1,280 @@
/*
- * VFIO container
+ * VFIO BASE CONTAINER
*
- * Copyright Red Hat, Inc. 2025
+ * Copyright (C) 2023 Intel Corporation.
+ * Copyright Red Hat, Inc. 2023
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ * Eric Auger <eric.auger@redhat.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
-#ifndef HW_VFIO_CONTAINER_H
-#define HW_VFIO_CONTAINER_H
+#ifndef HW_VFIO_VFIO_CONTAINER_H
+#define HW_VFIO_VFIO_CONTAINER_H
-#include "hw/vfio/vfio-container-base.h"
+#include "system/memory.h"
-typedef struct VFIOContainer VFIOContainer;
typedef struct VFIODevice VFIODevice;
+typedef struct VFIOIOMMUClass VFIOIOMMUClass;
+
+typedef struct {
+ unsigned long *bitmap;
+ hwaddr size;
+ hwaddr pages;
+} VFIOBitmap;
-typedef struct VFIOGroup {
- int fd;
- int groupid;
- VFIOContainer *container;
+typedef struct VFIOAddressSpace {
+ AddressSpace *as;
+ QLIST_HEAD(, VFIOContainer) containers;
+ QLIST_ENTRY(VFIOAddressSpace) list;
+} VFIOAddressSpace;
+
+/*
+ * This is the base object for vfio container backends
+ */
+struct VFIOContainer {
+ Object parent_obj;
+
+ VFIOAddressSpace *space;
+ MemoryListener listener;
+ Error *error;
+ bool initialized;
+ uint64_t dirty_pgsizes;
+ uint64_t max_dirty_bitmap_size;
+ unsigned long pgsizes;
+ unsigned int dma_max_mappings;
+ bool dirty_pages_supported;
+ bool dirty_pages_started; /* Protected by BQL */
+ QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
+ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
+ QLIST_ENTRY(VFIOContainer) next;
QLIST_HEAD(, VFIODevice) device_list;
- QLIST_ENTRY(VFIOGroup) next;
- QLIST_ENTRY(VFIOGroup) container_next;
- bool ram_block_discard_allowed;
-} VFIOGroup;
+ GList *iova_ranges;
+ NotifierWithReturn cpr_reboot_notifier;
+};
+
+#define TYPE_VFIO_IOMMU "vfio-iommu"
+OBJECT_DECLARE_TYPE(VFIOContainer, VFIOIOMMUClass, VFIO_IOMMU)
+
+typedef struct VFIOGuestIOMMU {
+ VFIOContainer *bcontainer;
+ IOMMUMemoryRegion *iommu_mr;
+ hwaddr iommu_offset;
+ IOMMUNotifier n;
+ QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
+} VFIOGuestIOMMU;
+
+typedef struct VFIORamDiscardListener {
+ VFIOContainer *bcontainer;
+ MemoryRegion *mr;
+ hwaddr offset_within_address_space;
+ hwaddr size;
+ uint64_t granularity;
+ RamDiscardListener listener;
+ QLIST_ENTRY(VFIORamDiscardListener) next;
+} VFIORamDiscardListener;
+
+VFIOAddressSpace *vfio_address_space_get(AddressSpace *as);
+void vfio_address_space_put(VFIOAddressSpace *space);
+void vfio_address_space_insert(VFIOAddressSpace *space,
+ VFIOContainer *bcontainer);
+
+int vfio_container_dma_map(VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ void *vaddr, bool readonly, MemoryRegion *mr);
+int vfio_container_dma_unmap(VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all);
+bool vfio_container_add_section_window(VFIOContainer *bcontainer,
+ MemoryRegionSection *section,
+ Error **errp);
+void vfio_container_del_section_window(VFIOContainer *bcontainer,
+ MemoryRegionSection *section);
+int vfio_container_set_dirty_page_tracking(VFIOContainer *bcontainer,
+ bool start, Error **errp);
+bool vfio_container_dirty_tracking_is_started(
+ const VFIOContainer *bcontainer);
+bool vfio_container_devices_dirty_tracking_is_supported(
+ const VFIOContainer *bcontainer);
+int vfio_container_query_dirty_bitmap(const VFIOContainer *bcontainer,
+ uint64_t iova, uint64_t size,
+ hwaddr translated_addr, Error **errp);
+
+GList *vfio_container_get_iova_ranges(const VFIOContainer *bcontainer);
+
+static inline uint64_t
+vfio_container_get_page_size_mask(const VFIOContainer *bcontainer)
+{
+ assert(bcontainer);
+ return bcontainer->pgsizes;
+}
+
+#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
+#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
+#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
+#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
+
+struct VFIOIOMMUClass {
+ ObjectClass parent_class;
+
+ /**
+ * @setup
+ *
+ * Perform basic setup of the container, including configuring IOMMU
+ * capabilities, IOVA ranges, supported page sizes, etc.
+ *
+ * @bcontainer: #VFIOContainer
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
+ bool (*setup)(VFIOContainer *bcontainer, Error **errp);
+
+ /**
+ * @listener_begin
+ *
+ * Called at the beginning of an address space update transaction.
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainer
+ */
+ void (*listener_begin)(VFIOContainer *bcontainer);
+
+ /**
+ * @listener_commit
+ *
+ * Called at the end of an address space update transaction,
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainer
+ */
+ void (*listener_commit)(VFIOContainer *bcontainer);
+
+ /**
+ * @dma_map
+ *
+ * Map an address range into the container. Note that the memory region is
+ * referenced within an RCU read lock region across this call.
+ *
+ * @bcontainer: #VFIOContainer to use
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @vaddr: process virtual address of mapping
+ * @readonly: true if mapping should be readonly
+ * @mr: the memory region for this mapping
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
+ int (*dma_map)(const VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ void *vaddr, bool readonly, MemoryRegion *mr);
+ /**
+ * @dma_map_file
+ *
+ * Map a file range for the container.
+ *
+ * @bcontainer: #VFIOContainer to use for map
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @fd: descriptor of the file to map
+ * @start: starting file offset of the range to map
+ * @readonly: map read only if true
+ */
+ int (*dma_map_file)(const VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ int fd, unsigned long start, bool readonly);
+ /**
+ * @dma_unmap
+ *
+ * Unmap an address range from the container.
+ *
+ * @bcontainer: #VFIOContainer to use for unmap
+ * @iova: start address to unmap
+ * @size: size of the range to unmap
+ * @iotlb: The IOMMU TLB mapping entry (or NULL)
+ * @unmap_all: if set, unmap the entire address space
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
+ int (*dma_unmap)(const VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all);
+
+
+ /**
+ * @attach_device
+ *
+ * Associate the given device with a container and do some related
+ * initialization of the device context.
+ *
+ * @name: name of the device
+ * @vbasedev: the device
+ * @as: address space to use
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
+ bool (*attach_device)(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp);
+
+ /*
+ * @detach_device
+ *
+ * Detach the given device from its container and clean up any necessary
+ * state.
+ *
+ * @vbasedev: the device to disassociate
+ */
+ void (*detach_device)(VFIODevice *vbasedev);
+
+ /* migration feature */
+
+ /**
+ * @set_dirty_page_tracking
+ *
+ * Start or stop dirty pages tracking on VFIO container
+ *
+ * @bcontainer: #VFIOContainer on which to de/activate dirty
+ * page tracking
+ * @start: indicates whether to start or stop dirty pages tracking
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error.
+ */
+ int (*set_dirty_page_tracking)(const VFIOContainer *bcontainer,
+ bool start, Error **errp);
+ /**
+ * @query_dirty_bitmap
+ *
+ * Get bitmap of dirty pages from container
+ *
+ * @bcontainer: #VFIOContainer from which to get dirty pages
+ * @vbmap: #VFIOBitmap internal bitmap structure
+ * @iova: iova base address
+ * @size: size of iova range
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error.
+ */
+ int (*query_dirty_bitmap)(const VFIOContainer *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
+ /* PCI specific */
+ int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
+
+ /* SPAPR specific */
+ bool (*add_window)(VFIOContainer *bcontainer,
+ MemoryRegionSection *section,
+ Error **errp);
+ void (*del_window)(VFIOContainer *bcontainer,
+ MemoryRegionSection *section);
+ void (*release)(VFIOContainer *bcontainer);
+};
-typedef struct VFIOContainer {
- VFIOContainerBase bcontainer;
- int fd; /* /dev/vfio/vfio, empowered by the attached groups */
- unsigned iommu_type;
- QLIST_HEAD(, VFIOGroup) group_list;
-} VFIOContainer;
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainer *bcontainer, MemoryRegionSection *section);
-OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY);
+void vfio_container_region_add(VFIOContainer *bcontainer,
+ MemoryRegionSection *section, bool cpr_remap);
-#endif /* HW_VFIO_CONTAINER_H */
+#endif /* HW_VFIO_VFIO_CONTAINER_H */
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
new file mode 100644
index 0000000..81f4e24
--- /dev/null
+++ b/include/hw/vfio/vfio-cpr.h
@@ -0,0 +1,88 @@
+/*
+ * VFIO CPR
+ *
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_VFIO_CPR_H
+#define HW_VFIO_VFIO_CPR_H
+
+#include "migration/misc.h"
+#include "system/memory.h"
+
+struct VFIOLegacyContainer;
+struct VFIOContainer;
+struct VFIOGroup;
+struct VFIODevice;
+struct VFIOPCIDevice;
+struct VFIOIOMMUFDContainer;
+struct IOMMUFDBackend;
+
+typedef int (*dma_map_fn)(const struct VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size, void *vaddr,
+ bool readonly, MemoryRegion *mr);
+
+typedef struct VFIOContainerCPR {
+ Error *blocker;
+ bool vaddr_unmapped;
+ NotifierWithReturn transfer_notifier;
+ MemoryListener remap_listener;
+} VFIOContainerCPR;
+
+typedef struct VFIODeviceCPR {
+ Error *mdev_blocker;
+ Error *id_blocker;
+ uint32_t hwpt_id;
+ uint32_t ioas_id;
+} VFIODeviceCPR;
+
+typedef struct VFIOPCICPR {
+ NotifierWithReturn transfer_notifier;
+} VFIOPCICPR;
+
+bool vfio_legacy_cpr_register_container(struct VFIOLegacyContainer *container,
+ Error **errp);
+void vfio_legacy_cpr_unregister_container(
+ struct VFIOLegacyContainer *container);
+
+int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
+ Error **errp);
+
+bool vfio_iommufd_cpr_register_container(struct VFIOIOMMUFDContainer *container,
+ Error **errp);
+void vfio_iommufd_cpr_unregister_container(
+ struct VFIOIOMMUFDContainer *container);
+bool vfio_iommufd_cpr_register_iommufd(struct IOMMUFDBackend *be, Error **errp);
+void vfio_iommufd_cpr_unregister_iommufd(struct IOMMUFDBackend *be);
+void vfio_iommufd_cpr_register_device(struct VFIODevice *vbasedev);
+void vfio_iommufd_cpr_unregister_device(struct VFIODevice *vbasedev);
+void vfio_cpr_load_device(struct VFIODevice *vbasedev);
+
+int vfio_cpr_group_get_device_fd(int d, const char *name);
+
+bool vfio_cpr_container_match(struct VFIOLegacyContainer *container,
+ struct VFIOGroup *group, int fd);
+
+void vfio_cpr_giommu_remap(struct VFIOContainer *bcontainer,
+ MemoryRegionSection *section);
+
+bool vfio_cpr_ram_discard_register_listener(
+ struct VFIOContainer *bcontainer, MemoryRegionSection *section);
+
+void vfio_cpr_save_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
+ int nr, int fd);
+int vfio_cpr_load_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
+ int nr);
+void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
+ int nr);
+
+extern const VMStateDescription vfio_cpr_pci_vmstate;
+extern const VMStateDescription vmstate_cpr_vfio_devices;
+
+void vfio_cpr_add_kvm_notifier(void);
+void vfio_cpr_pci_register_device(struct VFIOPCIDevice *vdev);
+void vfio_cpr_pci_unregister_device(struct VFIOPCIDevice *vdev);
+
+#endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 8bcb3c1..7e9aed6 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -18,8 +18,8 @@
* Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
*/
-#ifndef HW_VFIO_VFIO_COMMON_H
-#define HW_VFIO_VFIO_COMMON_H
+#ifndef HW_VFIO_VFIO_DEVICE_H
+#define HW_VFIO_VFIO_DEVICE_H
#include "system/memory.h"
#include "qemu/queue.h"
@@ -27,7 +27,8 @@
#include <linux/vfio.h>
#endif
#include "system/system.h"
-#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-cpr.h"
#include "system/host_iommu_device.h"
#include "system/iommufd.h"
@@ -35,7 +36,7 @@
enum {
VFIO_DEVICE_TYPE_PCI = 0,
- VFIO_DEVICE_TYPE_PLATFORM = 1,
+ VFIO_DEVICE_TYPE_UNUSED = 1,
VFIO_DEVICE_TYPE_CCW = 2,
VFIO_DEVICE_TYPE_AP = 3,
};
@@ -46,13 +47,14 @@ typedef struct VFIOMigration VFIOMigration;
typedef struct IOMMUFDBackend IOMMUFDBackend;
typedef struct VFIOIOASHwpt VFIOIOASHwpt;
+typedef struct VFIOUserProxy VFIOUserProxy;
typedef struct VFIODevice {
QLIST_ENTRY(VFIODevice) next;
QLIST_ENTRY(VFIODevice) container_next;
QLIST_ENTRY(VFIODevice) global_next;
struct VFIOGroup *group;
- VFIOContainerBase *bcontainer;
+ VFIOContainer *bcontainer;
char *sysfsdev;
char *name;
DeviceState *dev;
@@ -65,7 +67,10 @@ typedef struct VFIODevice {
bool ram_block_discard_allowed;
OnOffAuto enable_migration;
OnOffAuto migration_multifd_transfer;
+ OnOffAuto migration_load_config_after_iter;
+ uint64_t migration_max_queued_buffers_size;
bool migration_events;
+ bool use_region_fds;
VFIODeviceOps *ops;
VFIODeviceIOOps *io_ops;
unsigned int num_irqs;
@@ -84,6 +89,9 @@ typedef struct VFIODevice {
VFIOIOASHwpt *hwpt;
QLIST_ENTRY(VFIODevice) hwpt_next;
struct vfio_region_info **reginfo;
+ int *region_fds;
+ VFIODeviceCPR cpr;
+ VFIOUserProxy *proxy;
} VFIODevice;
struct VFIODeviceOps {
@@ -164,36 +172,64 @@ struct VFIODeviceIOOps {
* @device_feature
*
* Fill in feature info for the given device.
+ *
+ * @vdev: #VFIODevice to use
+ * @feat: feature information to fill in
+ *
+ * Returns 0 on success or -errno.
*/
- int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *);
+ int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *feat);
/**
* @get_region_info
*
- * Fill in @info with information on the region given by @info->index.
+ * Get the information for a given region on the device.
+ *
+ * @vdev: #VFIODevice to use
+ * @info: set @info->index to the region index to look up; the rest of the
+ * struct will be filled in on success
+ * @fd: pointer to the fd for the region; will be -1 if not found
+ *
+ * Returns 0 on success or -errno.
*/
int (*get_region_info)(VFIODevice *vdev,
- struct vfio_region_info *info);
+ struct vfio_region_info *info, int *fd);
/**
* @get_irq_info
*
- * Fill in @irq with information on the IRQ given by @info->index.
+ * @vdev: #VFIODevice to use
+ * @irq: set @irq->index to the IRQ index to look up; the rest of the struct
+ * will be filled in on success
+ *
+ * Returns 0 on success or -errno.
*/
int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
/**
* @set_irqs
*
- * Configure IRQs as defined by @irqs.
+ * Configure IRQs.
+ *
+ * @vdev: #VFIODevice to use
+ * @irqs: IRQ configuration as defined by VFIO docs.
+ *
+ * Returns 0 on success or -errno.
*/
int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
/**
* @region_read
*
- * Read @size bytes from the region @nr at offset @off into the buffer
- * @data.
+ * Read part of a region.
+ *
+ * @vdev: #VFIODevice to use
+ * @nr: region index
+ * @off: offset within the region
+ * @size: size in bytes to read
+ * @data: buffer to read into
+ *
+ * Returns number of bytes read on success or -errno.
*/
int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
void *data);
@@ -201,14 +237,22 @@ struct VFIODeviceIOOps {
/**
* @region_write
*
- * Write @size bytes to the region @nr at offset @off from the buffer
- * @data.
+ * Write part of a region.
+ *
+ * @vdev: #VFIODevice to use
+ * @nr: region index
+ * @off: offset within the region
+ * @size: size in bytes to write
+ * @data: buffer to write from
+ * @post: true if this is a posted write
+ *
+ * Returns number of bytes write on success or -errno.
*/
int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
- void *data);
+ void *data, bool post);
};
-void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
+void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainer *bcontainer,
struct vfio_device_info *info);
void vfio_device_unprepare(VFIODevice *vbasedev);
@@ -217,6 +261,18 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info);
int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
uint32_t subtype, struct vfio_region_info **info);
+
+/**
+ * Return the fd for mapping this region. This is either the device's fd (for
+ * e.g. kernel vfio), or a per-region fd (for vfio-user).
+ *
+ * @vbasedev: #VFIODevice to use
+ * @index: region index
+ *
+ * Returns the fd.
+ */
+int vfio_device_get_region_fd(VFIODevice *vbasedev, int index);
+
bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type);
int vfio_device_get_irq_info(VFIODevice *vbasedev, int index,
@@ -225,8 +281,11 @@ int vfio_device_get_irq_info(VFIODevice *vbasedev, int index,
/* Returns 0 on success, or a negative errno. */
bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
+void vfio_device_free_name(VFIODevice *vbasedev);
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
DeviceState *dev, bool ram_discard);
int vfio_device_get_aw_bits(VFIODevice *vdev);
-#endif /* HW_VFIO_VFIO_COMMON_H */
+
+void vfio_kvm_device_close(void);
+#endif /* HW_VFIO_VFIO_DEVICE_H */
diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h
deleted file mode 100644
index 256d850..0000000
--- a/include/hw/vfio/vfio-platform.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * vfio based device assignment support - platform devices
- *
- * Copyright Linaro Limited, 2014
- *
- * Authors:
- * Kim Phillips <kim.phillips@linaro.org>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Based on vfio based PCI device assignment support:
- * Copyright Red Hat, Inc. 2012
- */
-
-#ifndef HW_VFIO_VFIO_PLATFORM_H
-#define HW_VFIO_VFIO_PLATFORM_H
-
-#include "hw/sysbus.h"
-#include "hw/vfio/vfio-device.h"
-#include "qemu/event_notifier.h"
-#include "qemu/queue.h"
-#include "qom/object.h"
-
-#define TYPE_VFIO_PLATFORM "vfio-platform"
-
-enum {
- VFIO_IRQ_INACTIVE = 0,
- VFIO_IRQ_PENDING = 1,
- VFIO_IRQ_ACTIVE = 2,
- /* VFIO_IRQ_ACTIVE_AND_PENDING cannot happen with VFIO */
-};
-
-typedef struct VFIOINTp {
- QLIST_ENTRY(VFIOINTp) next; /* entry for IRQ list */
- QSIMPLEQ_ENTRY(VFIOINTp) pqnext; /* entry for pending IRQ queue */
- EventNotifier *interrupt; /* eventfd triggered on interrupt */
- EventNotifier *unmask; /* eventfd for unmask on QEMU bypass */
- qemu_irq qemuirq;
- struct VFIOPlatformDevice *vdev; /* back pointer to device */
- int state; /* inactive, pending, active */
- uint8_t pin; /* index */
- uint32_t flags; /* IRQ info flags */
- bool kvm_accel; /* set when QEMU bypass through KVM enabled */
-} VFIOINTp;
-
-/* function type for user side eventfd handler */
-typedef void (*eventfd_user_side_handler_t)(VFIOINTp *intp);
-
-typedef struct VFIORegion VFIORegion;
-
-struct VFIOPlatformDevice {
- SysBusDevice sbdev;
- VFIODevice vbasedev; /* not a QOM object */
- VFIORegion **regions;
- QLIST_HEAD(, VFIOINTp) intp_list; /* list of IRQs */
- /* queue of pending IRQs */
- QSIMPLEQ_HEAD(, VFIOINTp) pending_intp_queue;
- char *compat; /* DT compatible values, separated by NUL */
- unsigned int num_compat; /* number of compatible values */
- uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
- QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */
- QemuMutex intp_mutex; /* protect the intp_list IRQ state */
- bool irqfd_allowed; /* debug option to force irqfd on/off */
-};
-typedef struct VFIOPlatformDevice VFIOPlatformDevice;
-
-struct VFIOPlatformDeviceClass {
- /*< private >*/
- SysBusDeviceClass parent_class;
- /*< public >*/
-};
-typedef struct VFIOPlatformDeviceClass VFIOPlatformDeviceClass;
-
-DECLARE_OBJ_CHECKERS(VFIOPlatformDevice, VFIOPlatformDeviceClass,
- VFIO_PLATFORM_DEVICE, TYPE_VFIO_PLATFORM)
-
-#endif /* HW_VFIO_VFIO_PLATFORM_H */
diff --git a/include/hw/vfio/vfio-region.h b/include/hw/vfio/vfio-region.h
deleted file mode 100644
index cbffb26..0000000
--- a/include/hw/vfio/vfio-region.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * VFIO region
- *
- * Copyright Red Hat, Inc. 2025
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef HW_VFIO_REGION_H
-#define HW_VFIO_REGION_H
-
-#include "system/memory.h"
-
-typedef struct VFIOMmap {
- MemoryRegion mem;
- void *mmap;
- off_t offset;
- size_t size;
-} VFIOMmap;
-
-typedef struct VFIODevice VFIODevice;
-
-typedef struct VFIORegion {
- struct VFIODevice *vbasedev;
- off_t fd_offset; /* offset of region within device fd */
- MemoryRegion *mem; /* slow, read/write access */
- size_t size;
- uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
- uint32_t nr_mmaps;
- VFIOMmap *mmaps;
- uint8_t nr; /* cache the region number for debug */
-} VFIORegion;
-
-
-void vfio_region_write(void *opaque, hwaddr addr,
- uint64_t data, unsigned size);
-uint64_t vfio_region_read(void *opaque,
- hwaddr addr, unsigned size);
-int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
- int index, const char *name);
-int vfio_region_mmap(VFIORegion *region);
-void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
-void vfio_region_unmap(VFIORegion *region);
-void vfio_region_exit(VFIORegion *region);
-void vfio_region_finalize(VFIORegion *region);
-
-#endif /* HW_VFIO_REGION_H */