diff options
Diffstat (limited to 'include/hw/vfio')
-rw-r--r-- | include/hw/vfio/vfio-container-base.h | 87 | ||||
-rw-r--r-- | include/hw/vfio/vfio-container.h | 2 | ||||
-rw-r--r-- | include/hw/vfio/vfio-cpr.h | 57 | ||||
-rw-r--r-- | include/hw/vfio/vfio-device.h | 74 | ||||
-rw-r--r-- | include/hw/vfio/vfio-region.h | 1 |
5 files changed, 206 insertions, 15 deletions
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 3d392b0..3cd86ec 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -78,7 +78,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space, int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); @@ -109,19 +109,64 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer) #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" +#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user" OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU) struct VFIOIOMMUClass { ObjectClass parent_class; - /* basic feature */ + /** + * @setup + * + * Perform basic setup of the container, including configuring IOMMU + * capabilities, IOVA ranges, supported page sizes, etc. + * + * @bcontainer: #VFIOContainerBase + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*setup)(VFIOContainerBase *bcontainer, Error **errp); + + /** + * @listener_begin + * + * Called at the beginning of an address space update transaction. + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_begin)(VFIOContainerBase *bcontainer); + + /** + * @listener_commit + * + * Called at the end of an address space update transaction, + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_commit)(VFIOContainerBase *bcontainer); + + /** + * @dma_map + * + * Map an address range into the container. Note that the memory region is + * referenced within an RCU read lock region across this call. + * + * @bcontainer: #VFIOContainerBase to use + * @iova: start address to map + * @size: size of the range to map + * @vaddr: process virtual address of mapping + * @readonly: true if mapping should be readonly + * @mr: the memory region for this mapping + * + * Returns 0 to indicate success and -errno otherwise. + */ int (*dma_map)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); /** * @dma_unmap * @@ -132,12 +177,38 @@ struct VFIOIOMMUClass { * @size: size of the range to unmap * @iotlb: The IOMMU TLB mapping entry (or NULL) * @unmap_all: if set, unmap the entire address space + * + * Returns 0 to indicate success and -errno otherwise. */ int (*dma_unmap)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); + + + /** + * @attach_device + * + * Associate the given device with a container and do some related + * initialization of the device context. + * + * @name: name of the device + * @vbasedev: the device + * @as: address space to use + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*attach_device)(const char *name, VFIODevice *vbasedev, AddressSpace *as, Error **errp); + + /* + * @detach_device + * + * Detach the given device from its container and clean up any necessary + * state. + * + * @vbasedev: the device to disassociate + */ void (*detach_device)(VFIODevice *vbasedev); /* migration feature */ @@ -152,7 +223,7 @@ struct VFIOIOMMUClass { * @start: indicates whether to start or stop dirty pages tracking * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, bool start, Error **errp); @@ -167,7 +238,7 @@ struct VFIOIOMMUClass { * @size: size of iova range * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); @@ -183,4 +254,10 @@ struct VFIOIOMMUClass { void (*release)(VFIOContainerBase *bcontainer); }; +VFIORamDiscardListener *vfio_find_ram_discard_listener( + VFIOContainerBase *bcontainer, MemoryRegionSection *section); + +void vfio_container_region_add(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, bool cpr_remap); + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ diff --git a/include/hw/vfio/vfio-container.h b/include/hw/vfio/vfio-container.h index afc498d..21e5807 100644 --- a/include/hw/vfio/vfio-container.h +++ b/include/hw/vfio/vfio-container.h @@ -10,6 +10,7 @@ #define HW_VFIO_CONTAINER_H #include "hw/vfio/vfio-container-base.h" +#include "hw/vfio/vfio-cpr.h" typedef struct VFIOContainer VFIOContainer; typedef struct VFIODevice VFIODevice; @@ -29,6 +30,7 @@ typedef struct VFIOContainer { int fd; /* /dev/vfio/vfio, empowered by the attached groups */ unsigned iommu_type; QLIST_HEAD(, VFIOGroup) group_list; + VFIOContainerCPR cpr; } VFIOContainer; OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY); diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h new file mode 100644 index 0000000..8bf85b9 --- /dev/null +++ b/include/hw/vfio/vfio-cpr.h @@ -0,0 +1,57 @@ +/* + * VFIO CPR + * + * Copyright (c) 2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_VFIO_VFIO_CPR_H +#define HW_VFIO_VFIO_CPR_H + +#include "migration/misc.h" +#include "system/memory.h" + +struct VFIOContainer; +struct VFIOContainerBase; +struct VFIOGroup; + +typedef struct VFIOContainerCPR { + Error *blocker; + bool vaddr_unmapped; + NotifierWithReturn transfer_notifier; + MemoryListener remap_listener; + int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly, MemoryRegion *mr); +} VFIOContainerCPR; + +typedef struct VFIODeviceCPR { + Error *mdev_blocker; +} VFIODeviceCPR; + +bool vfio_legacy_cpr_register_container(struct VFIOContainer *container, + Error **errp); +void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container); + +int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e, + Error **errp); + +bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, + Error **errp); +void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); + +int vfio_cpr_group_get_device_fd(int d, const char *name); + +bool vfio_cpr_container_match(struct VFIOContainer *container, + struct VFIOGroup *group, int fd); + +void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer, + MemoryRegionSection *section); + +bool vfio_cpr_ram_discard_register_listener( + struct VFIOContainerBase *bcontainer, MemoryRegionSection *section); + +extern const VMStateDescription vfio_cpr_pci_vmstate; + +#endif /* HW_VFIO_VFIO_CPR_H */ diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index 8bcb3c1..c616652 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -28,6 +28,7 @@ #endif #include "system/system.h" #include "hw/vfio/vfio-container-base.h" +#include "hw/vfio/vfio-cpr.h" #include "system/host_iommu_device.h" #include "system/iommufd.h" @@ -46,6 +47,7 @@ typedef struct VFIOMigration VFIOMigration; typedef struct IOMMUFDBackend IOMMUFDBackend; typedef struct VFIOIOASHwpt VFIOIOASHwpt; +typedef struct VFIOUserProxy VFIOUserProxy; typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; @@ -66,6 +68,7 @@ typedef struct VFIODevice { OnOffAuto enable_migration; OnOffAuto migration_multifd_transfer; bool migration_events; + bool use_region_fds; VFIODeviceOps *ops; VFIODeviceIOOps *io_ops; unsigned int num_irqs; @@ -84,6 +87,9 @@ typedef struct VFIODevice { VFIOIOASHwpt *hwpt; QLIST_ENTRY(VFIODevice) hwpt_next; struct vfio_region_info **reginfo; + int *region_fds; + VFIODeviceCPR cpr; + VFIOUserProxy *proxy; } VFIODevice; struct VFIODeviceOps { @@ -164,36 +170,64 @@ struct VFIODeviceIOOps { * @device_feature * * Fill in feature info for the given device. + * + * @vdev: #VFIODevice to use + * @feat: feature information to fill in + * + * Returns 0 on success or -errno. */ - int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *); + int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *feat); /** * @get_region_info * - * Fill in @info with information on the region given by @info->index. + * Get the information for a given region on the device. + * + * @vdev: #VFIODevice to use + * @info: set @info->index to the region index to look up; the rest of the + * struct will be filled in on success + * @fd: pointer to the fd for the region; will be -1 if not found + * + * Returns 0 on success or -errno. */ int (*get_region_info)(VFIODevice *vdev, - struct vfio_region_info *info); + struct vfio_region_info *info, int *fd); /** * @get_irq_info * - * Fill in @irq with information on the IRQ given by @info->index. + * @vdev: #VFIODevice to use + * @irq: set @irq->index to the IRQ index to look up; the rest of the struct + * will be filled in on success + * + * Returns 0 on success or -errno. */ int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); /** * @set_irqs * - * Configure IRQs as defined by @irqs. + * Configure IRQs. + * + * @vdev: #VFIODevice to use + * @irqs: IRQ configuration as defined by VFIO docs. + * + * Returns 0 on success or -errno. */ int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); /** * @region_read * - * Read @size bytes from the region @nr at offset @off into the buffer - * @data. + * Read part of a region. + * + * @vdev: #VFIODevice to use + * @nr: region index + * @off: offset within the region + * @size: size in bytes to read + * @data: buffer to read into + * + * Returns number of bytes read on success or -errno. */ int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, void *data); @@ -201,11 +235,19 @@ struct VFIODeviceIOOps { /** * @region_write * - * Write @size bytes to the region @nr at offset @off from the buffer - * @data. + * Write part of a region. + * + * @vdev: #VFIODevice to use + * @nr: region index + * @off: offset within the region + * @size: size in bytes to write + * @data: buffer to write from + * @post: true if this is a posted write + * + * Returns number of bytes write on success or -errno. */ int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, - void *data); + void *data, bool post); }; void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, @@ -217,6 +259,18 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info); int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, uint32_t subtype, struct vfio_region_info **info); + +/** + * Return the fd for mapping this region. This is either the device's fd (for + * e.g. kernel vfio), or a per-region fd (for vfio-user). + * + * @vbasedev: #VFIODevice to use + * @index: region index + * + * Returns the fd. + */ +int vfio_device_get_region_fd(VFIODevice *vbasedev, int index); + bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type); int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, diff --git a/include/hw/vfio/vfio-region.h b/include/hw/vfio/vfio-region.h index cbffb26..ede6e0c 100644 --- a/include/hw/vfio/vfio-region.h +++ b/include/hw/vfio/vfio-region.h @@ -29,6 +29,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + bool post_wr; /* writes can be posted */ } VFIORegion; |