diff options
Diffstat (limited to 'hw/vfio/iommufd.c')
-rw-r--r-- | hw/vfio/iommufd.c | 211 |
1 files changed, 154 insertions, 57 deletions
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 42c8412..48c590b 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -15,19 +15,28 @@ #include <linux/vfio.h> #include <linux/iommufd.h> -#include "hw/vfio/vfio-common.h" +#include "hw/vfio/vfio-device.h" #include "qemu/error-report.h" #include "trace.h" #include "qapi/error.h" #include "system/iommufd.h" #include "hw/qdev-core.h" +#include "hw/vfio/vfio-cpr.h" #include "system/reset.h" #include "qemu/cutils.h" #include "qemu/chardev_open.h" +#include "migration/cpr.h" #include "pci.h" +#include "vfio-iommufd.h" +#include "vfio-helpers.h" +#include "vfio-listener.h" + +#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ + TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mr) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); @@ -37,13 +46,42 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, iova, size, vaddr, readonly); } +static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + int fd, unsigned long start, bool readonly) +{ + const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + return iommufd_backend_map_file_dma(container->be, + container->ioas_id, + iova, size, fd, start, readonly); +} + static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) + IOMMUTLBEntry *iotlb, bool unmap_all) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + /* unmap in halves */ + if (unmap_all) { + Int128 llsize = int128_rshift(int128_2_64(), 1); + int ret; + + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + 0, int128_get64(llsize)); + + if (ret == 0) { + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + int128_get64(llsize), + int128_get64(llsize)); + } + + return ret; + } + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ return iommufd_backend_unmap_dma(container->be, container->ioas_id, iova, size); @@ -84,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) goto err_kvm_device_add; } + if (cpr_is_incoming()) { + goto skip_bind; + } + /* Bind device to iommufd */ bind.iommufd = iommufd->fd; if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) { @@ -95,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) vbasedev->devid = bind.out_devid; trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, vbasedev->fd, vbasedev->devid); + +skip_bind: return true; err_bind: iommufd_cdev_kvm_device_del(vbasedev); @@ -280,14 +324,22 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, { ERRP_GUARD(); IOMMUFDBackend *iommufd = vbasedev->iommufd; - uint32_t flags = 0; + uint32_t type, flags = 0; + uint64_t hw_caps; VFIOIOASHwpt *hwpt; uint32_t hwpt_id; int ret; /* Try to find a domain */ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + if (!cpr_is_incoming()) { + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) { + ret = 0; + } else { + continue; + } + if (ret) { /* -EINVAL means the domain is incompatible with the device. */ if (ret == -EINVAL) { @@ -304,6 +356,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } else { vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); return true; @@ -317,10 +370,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, * vfio_migration_realize() may decide to use VF dirty tracking * instead. */ - if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) { + if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid, + &type, NULL, 0, &hw_caps, errp)) { + return false; + } + + if (hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) { flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; } + if (cpr_is_incoming()) { + hwpt_id = vbasedev->cpr.hwpt_id; + goto skip_alloc; + } + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, container->ioas_id, flags, IOMMU_HWPT_DATA_NONE, 0, NULL, @@ -328,19 +391,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); + if (ret) { + iommufd_backend_free_id(container->be, hwpt_id); + return false; + } + +skip_alloc: hwpt = g_malloc0(sizeof(*hwpt)); hwpt->hwpt_id = hwpt_id; hwpt->hwpt_flags = flags; QLIST_INIT(&hwpt->device_list); - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); - if (ret) { - iommufd_backend_free_id(container->be, hwpt->hwpt_id); - g_free(hwpt); - return false; - } - vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); @@ -378,7 +442,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev, return iommufd_cdev_autodomains_get(vbasedev, container, errp); } - return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); + /* If CPR, we are already attached to ioas_id. */ + return cpr_is_incoming() || + !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); } static void iommufd_cdev_detach_container(VFIODevice *vbasedev, @@ -403,7 +469,8 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) if (!QLIST_EMPTY(&bcontainer->device_list)) { return; } - memory_listener_unregister(&bcontainer->listener); + vfio_iommufd_cpr_unregister_container(container); + vfio_listener_unregister(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); object_unref(container); } @@ -466,11 +533,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, VFIOAddressSpace *space; struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; int ret, devfd; + bool res; uint32_t ioas_id; Error *err = NULL; const VFIOIOMMUClass *iommufd_vioc = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + vfio_cpr_load_device(vbasedev); + if (vbasedev->fd < 0) { devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); if (devfd < 0) { @@ -485,18 +555,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, goto err_connect_bind; } - space = vfio_get_address_space(as); - - /* - * The HostIOMMUDevice data from legacy backend is static and doesn't need - * any information from the (type1-iommu) backend to be initialized. In - * contrast however, the IOMMUFD HostIOMMUDevice data requires the iommufd - * FD to be connected and having a devid to be able to successfully call - * iommufd_backend_get_device_info(). - */ - if (!vfio_device_hiod_realize(vbasedev, errp)) { - goto err_alloc_ioas; - } + space = vfio_address_space_get(as); /* try to attach to an existing container in this space */ QLIST_FOREACH(bcontainer, &space->containers, next) { @@ -505,7 +564,16 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, vbasedev->iommufd != container->be) { continue; } - if (!iommufd_cdev_attach_container(vbasedev, container, &err)) { + + if (!cpr_is_incoming()) { + res = iommufd_cdev_attach_container(vbasedev, container, &err); + } else if (vbasedev->cpr.ioas_id == container->ioas_id) { + res = true; + } else { + continue; + } + + if (!res) { const char *msg = error_get_pretty(err); trace_iommufd_cdev_fail_attach_existing_container(msg); @@ -522,6 +590,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, } } + if (cpr_is_incoming()) { + ioas_id = vbasedev->cpr.ioas_id; + goto skip_ioas_alloc; + } + /* Need to allocate a new dedicated container */ if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) { goto err_alloc_ioas; @@ -529,10 +602,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); +skip_ioas_alloc: container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; QLIST_INIT(&container->hwpt_list); + vbasedev->cpr.ioas_id = ioas_id; bcontainer = &container->bcontainer; vfio_address_space_insert(space, bcontainer); @@ -555,12 +630,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, bcontainer->pgsizes = qemu_real_host_page_size(); } - bcontainer->listener = vfio_memory_listener; - memory_listener_register(&bcontainer->listener, bcontainer->space->as); + if (!vfio_listener_register(bcontainer, errp)) { + goto err_listener_register; + } - if (bcontainer->error) { - error_propagate_prepend(errp, bcontainer->error, - "memory listener initialization failed: "); + if (!vfio_iommufd_cpr_register_container(container, errp)) { goto err_listener_register; } @@ -573,7 +647,12 @@ found_container: goto err_listener_register; } - if (!vfio_cpr_register_container(bcontainer, errp)) { + /* + * Do not move this code before attachment! The nested IOMMU support + * needs device and hwpt id which are generated only after attachment. + */ + if (!vfio_device_hiod_create_and_realize(vbasedev, + TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) { goto err_listener_register; } @@ -585,14 +664,8 @@ found_container: iommufd_cdev_ram_block_discard_disable(false); } - vbasedev->group = 0; - vbasedev->num_irqs = dev_info.num_irqs; - vbasedev->num_regions = dev_info.num_regions; - vbasedev->flags = dev_info.flags; - vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - vbasedev->bcontainer = bcontainer; - QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + vfio_device_prepare(vbasedev, bcontainer, &dev_info); + vfio_iommufd_cpr_register_device(vbasedev); trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, vbasedev->num_regions, vbasedev->flags); @@ -605,7 +678,7 @@ err_discard_disable: err_attach_container: iommufd_cdev_container_destroy(container); err_alloc_ioas: - vfio_put_address_space(space); + vfio_address_space_put(space); iommufd_cdev_unbind_and_disconnect(vbasedev); err_connect_bind: close(vbasedev->fd); @@ -619,19 +692,18 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; + vfio_device_unprepare(vbasedev); if (!vbasedev->ram_block_discard_allowed) { iommufd_cdev_ram_block_discard_disable(false); } - vfio_cpr_unregister_container(bcontainer); + object_unref(vbasedev->hiod); iommufd_cdev_detach_container(vbasedev, container); iommufd_cdev_container_destroy(container); - vfio_put_address_space(space); + vfio_address_space_put(space); + vfio_iommufd_cpr_unregister_device(vbasedev); iommufd_cdev_unbind_and_disconnect(vbasedev); close(vbasedev->fd); } @@ -786,13 +858,12 @@ out_single: return ret; } -static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) +static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) { VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); - vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO; - vioc->dma_map = iommufd_cdev_map; + vioc->dma_map_file = iommufd_cdev_map_file; vioc->dma_unmap = iommufd_cdev_unmap; vioc->attach_device = iommufd_cdev_attach; vioc->detach_device = iommufd_cdev_detach; @@ -801,21 +872,38 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; }; +static bool +host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); +} + +static bool +host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp); +} + static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, Error **errp) { VFIODevice *vdev = opaque; + HostIOMMUDeviceIOMMUFD *idev; HostIOMMUDeviceCaps *caps = &hiod->caps; + VendorCaps *vendor_caps = &caps->vendor_caps; enum iommu_hw_info_type type; - union { - struct iommu_hw_info_vtd vtd; - } data; uint64_t hw_caps; hiod->agent = opaque; - if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, - &type, &data, sizeof(data), + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type, + vendor_caps, sizeof(*vendor_caps), &hw_caps, errp)) { return false; } @@ -824,6 +912,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, caps->type = type; caps->hw_caps = hw_caps; + idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); + idev->iommufd = vdev->iommufd; + idev->devid = vdev->devid; + idev->hwpt_id = vdev->hwpt->hwpt_id; + return true; } @@ -846,13 +939,17 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod) } -static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data) +static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data) { HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); + HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc); hiodc->realize = hiod_iommufd_vfio_realize; hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask; + + idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt; + idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt; }; static const TypeInfo types[] = { |