aboutsummaryrefslogtreecommitdiff
path: root/include/hw/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'include/hw/vfio')
-rw-r--r--include/hw/vfio/vfio-container-base.h87
-rw-r--r--include/hw/vfio/vfio-container.h2
-rw-r--r--include/hw/vfio/vfio-cpr.h57
-rw-r--r--include/hw/vfio/vfio-device.h74
-rw-r--r--include/hw/vfio/vfio-region.h1
5 files changed, 206 insertions, 15 deletions
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 3d392b0..3cd86ec 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -78,7 +78,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, bool readonly, MemoryRegion *mr);
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);
@@ -109,19 +109,64 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
+#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
struct VFIOIOMMUClass {
ObjectClass parent_class;
- /* basic feature */
+ /**
+ * @setup
+ *
+ * Perform basic setup of the container, including configuring IOMMU
+ * capabilities, IOVA ranges, supported page sizes, etc.
+ *
+ * @bcontainer: #VFIOContainerBase
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
+
+ /**
+ * @listener_begin
+ *
+ * Called at the beginning of an address space update transaction.
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
void (*listener_begin)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @listener_commit
+ *
+ * Called at the end of an address space update transaction,
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
void (*listener_commit)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @dma_map
+ *
+ * Map an address range into the container. Note that the memory region is
+ * referenced within an RCU read lock region across this call.
+ *
+ * @bcontainer: #VFIOContainerBase to use
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @vaddr: process virtual address of mapping
+ * @readonly: true if mapping should be readonly
+ * @mr: the memory region for this mapping
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
int (*dma_map)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, bool readonly, MemoryRegion *mr);
/**
* @dma_unmap
*
@@ -132,12 +177,38 @@ struct VFIOIOMMUClass {
* @size: size of the range to unmap
* @iotlb: The IOMMU TLB mapping entry (or NULL)
* @unmap_all: if set, unmap the entire address space
+ *
+ * Returns 0 to indicate success and -errno otherwise.
*/
int (*dma_unmap)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);
+
+
+ /**
+ * @attach_device
+ *
+ * Associate the given device with a container and do some related
+ * initialization of the device context.
+ *
+ * @name: name of the device
+ * @vbasedev: the device
+ * @as: address space to use
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*attach_device)(const char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp);
+
+ /*
+ * @detach_device
+ *
+ * Detach the given device from its container and clean up any necessary
+ * state.
+ *
+ * @vbasedev: the device to disassociate
+ */
void (*detach_device)(VFIODevice *vbasedev);
/* migration feature */
@@ -152,7 +223,7 @@ struct VFIOIOMMUClass {
* @start: indicates whether to start or stop dirty pages tracking
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
bool start, Error **errp);
@@ -167,7 +238,7 @@ struct VFIOIOMMUClass {
* @size: size of iova range
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
@@ -183,4 +254,10 @@ struct VFIOIOMMUClass {
void (*release)(VFIOContainerBase *bcontainer);
};
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section);
+
+void vfio_container_region_add(VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section, bool cpr_remap);
+
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
diff --git a/include/hw/vfio/vfio-container.h b/include/hw/vfio/vfio-container.h
index afc498d..21e5807 100644
--- a/include/hw/vfio/vfio-container.h
+++ b/include/hw/vfio/vfio-container.h
@@ -10,6 +10,7 @@
#define HW_VFIO_CONTAINER_H
#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio/vfio-cpr.h"
typedef struct VFIOContainer VFIOContainer;
typedef struct VFIODevice VFIODevice;
@@ -29,6 +30,7 @@ typedef struct VFIOContainer {
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
unsigned iommu_type;
QLIST_HEAD(, VFIOGroup) group_list;
+ VFIOContainerCPR cpr;
} VFIOContainer;
OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY);
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
new file mode 100644
index 0000000..8bf85b9
--- /dev/null
+++ b/include/hw/vfio/vfio-cpr.h
@@ -0,0 +1,57 @@
+/*
+ * VFIO CPR
+ *
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_VFIO_CPR_H
+#define HW_VFIO_VFIO_CPR_H
+
+#include "migration/misc.h"
+#include "system/memory.h"
+
+struct VFIOContainer;
+struct VFIOContainerBase;
+struct VFIOGroup;
+
+typedef struct VFIOContainerCPR {
+ Error *blocker;
+ bool vaddr_unmapped;
+ NotifierWithReturn transfer_notifier;
+ MemoryListener remap_listener;
+ int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ void *vaddr, bool readonly, MemoryRegion *mr);
+} VFIOContainerCPR;
+
+typedef struct VFIODeviceCPR {
+ Error *mdev_blocker;
+} VFIODeviceCPR;
+
+bool vfio_legacy_cpr_register_container(struct VFIOContainer *container,
+ Error **errp);
+void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container);
+
+int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
+ Error **errp);
+
+bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer,
+ Error **errp);
+void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
+
+int vfio_cpr_group_get_device_fd(int d, const char *name);
+
+bool vfio_cpr_container_match(struct VFIOContainer *container,
+ struct VFIOGroup *group, int fd);
+
+void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section);
+
+bool vfio_cpr_ram_discard_register_listener(
+ struct VFIOContainerBase *bcontainer, MemoryRegionSection *section);
+
+extern const VMStateDescription vfio_cpr_pci_vmstate;
+
+#endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 8bcb3c1..c616652 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -28,6 +28,7 @@
#endif
#include "system/system.h"
#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio/vfio-cpr.h"
#include "system/host_iommu_device.h"
#include "system/iommufd.h"
@@ -46,6 +47,7 @@ typedef struct VFIOMigration VFIOMigration;
typedef struct IOMMUFDBackend IOMMUFDBackend;
typedef struct VFIOIOASHwpt VFIOIOASHwpt;
+typedef struct VFIOUserProxy VFIOUserProxy;
typedef struct VFIODevice {
QLIST_ENTRY(VFIODevice) next;
@@ -66,6 +68,7 @@ typedef struct VFIODevice {
OnOffAuto enable_migration;
OnOffAuto migration_multifd_transfer;
bool migration_events;
+ bool use_region_fds;
VFIODeviceOps *ops;
VFIODeviceIOOps *io_ops;
unsigned int num_irqs;
@@ -84,6 +87,9 @@ typedef struct VFIODevice {
VFIOIOASHwpt *hwpt;
QLIST_ENTRY(VFIODevice) hwpt_next;
struct vfio_region_info **reginfo;
+ int *region_fds;
+ VFIODeviceCPR cpr;
+ VFIOUserProxy *proxy;
} VFIODevice;
struct VFIODeviceOps {
@@ -164,36 +170,64 @@ struct VFIODeviceIOOps {
* @device_feature
*
* Fill in feature info for the given device.
+ *
+ * @vdev: #VFIODevice to use
+ * @feat: feature information to fill in
+ *
+ * Returns 0 on success or -errno.
*/
- int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *);
+ int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *feat);
/**
* @get_region_info
*
- * Fill in @info with information on the region given by @info->index.
+ * Get the information for a given region on the device.
+ *
+ * @vdev: #VFIODevice to use
+ * @info: set @info->index to the region index to look up; the rest of the
+ * struct will be filled in on success
+ * @fd: pointer to the fd for the region; will be -1 if not found
+ *
+ * Returns 0 on success or -errno.
*/
int (*get_region_info)(VFIODevice *vdev,
- struct vfio_region_info *info);
+ struct vfio_region_info *info, int *fd);
/**
* @get_irq_info
*
- * Fill in @irq with information on the IRQ given by @info->index.
+ * @vdev: #VFIODevice to use
+ * @irq: set @irq->index to the IRQ index to look up; the rest of the struct
+ * will be filled in on success
+ *
+ * Returns 0 on success or -errno.
*/
int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
/**
* @set_irqs
*
- * Configure IRQs as defined by @irqs.
+ * Configure IRQs.
+ *
+ * @vdev: #VFIODevice to use
+ * @irqs: IRQ configuration as defined by VFIO docs.
+ *
+ * Returns 0 on success or -errno.
*/
int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
/**
* @region_read
*
- * Read @size bytes from the region @nr at offset @off into the buffer
- * @data.
+ * Read part of a region.
+ *
+ * @vdev: #VFIODevice to use
+ * @nr: region index
+ * @off: offset within the region
+ * @size: size in bytes to read
+ * @data: buffer to read into
+ *
+ * Returns number of bytes read on success or -errno.
*/
int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
void *data);
@@ -201,11 +235,19 @@ struct VFIODeviceIOOps {
/**
* @region_write
*
- * Write @size bytes to the region @nr at offset @off from the buffer
- * @data.
+ * Write part of a region.
+ *
+ * @vdev: #VFIODevice to use
+ * @nr: region index
+ * @off: offset within the region
+ * @size: size in bytes to write
+ * @data: buffer to write from
+ * @post: true if this is a posted write
+ *
+ * Returns number of bytes write on success or -errno.
*/
int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
- void *data);
+ void *data, bool post);
};
void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
@@ -217,6 +259,18 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info);
int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
uint32_t subtype, struct vfio_region_info **info);
+
+/**
+ * Return the fd for mapping this region. This is either the device's fd (for
+ * e.g. kernel vfio), or a per-region fd (for vfio-user).
+ *
+ * @vbasedev: #VFIODevice to use
+ * @index: region index
+ *
+ * Returns the fd.
+ */
+int vfio_device_get_region_fd(VFIODevice *vbasedev, int index);
+
bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type);
int vfio_device_get_irq_info(VFIODevice *vbasedev, int index,
diff --git a/include/hw/vfio/vfio-region.h b/include/hw/vfio/vfio-region.h
index cbffb26..ede6e0c 100644
--- a/include/hw/vfio/vfio-region.h
+++ b/include/hw/vfio/vfio-region.h
@@ -29,6 +29,7 @@ typedef struct VFIORegion {
uint32_t nr_mmaps;
VFIOMmap *mmaps;
uint8_t nr; /* cache the region number for debug */
+ bool post_wr; /* writes can be posted */
} VFIORegion;