diff options
Diffstat (limited to 'hw/vfio/iommufd.c')
-rw-r--r-- | hw/vfio/iommufd.c | 310 |
1 files changed, 267 insertions, 43 deletions
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 7b5f87a..d3efef7 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -15,19 +15,27 @@ #include <linux/vfio.h> #include <linux/iommufd.h> -#include "hw/vfio/vfio-common.h" +#include "hw/vfio/vfio-device.h" #include "qemu/error-report.h" #include "trace.h" #include "qapi/error.h" -#include "sysemu/iommufd.h" +#include "system/iommufd.h" #include "hw/qdev-core.h" -#include "sysemu/reset.h" +#include "hw/vfio/vfio-cpr.h" +#include "system/reset.h" #include "qemu/cutils.h" #include "qemu/chardev_open.h" #include "pci.h" +#include "vfio-iommufd.h" +#include "vfio-helpers.h" +#include "vfio-listener.h" + +#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ + TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mr) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); @@ -39,11 +47,28 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) + IOMMUTLBEntry *iotlb, bool unmap_all) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + /* unmap in halves */ + if (unmap_all) { + Int128 llsize = int128_rshift(int128_2_64(), 1); + int ret; + + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + 0, int128_get64(llsize)); + + if (ret == 0) { + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + int128_get64(llsize), + int128_get64(llsize)); + } + + return ret; + } + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ return iommufd_backend_unmap_dma(container->be, container->ioas_id, iova, size); @@ -110,6 +135,68 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) iommufd_backend_disconnect(vbasedev->iommufd); } +static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt) +{ + return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; +} + +static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, + bool start, Error **errp) +{ + const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + VFIOIOASHwpt *hwpt; + + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + if (!iommufd_hwpt_dirty_tracking(hwpt)) { + continue; + } + + if (!iommufd_backend_set_dirty_tracking(container->be, + hwpt->hwpt_id, start, errp)) { + goto err; + } + } + + return 0; + +err: + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + if (!iommufd_hwpt_dirty_tracking(hwpt)) { + continue; + } + iommufd_backend_set_dirty_tracking(container->be, + hwpt->hwpt_id, !start, NULL); + } + return -EINVAL; +} + +static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size, Error **errp) +{ + VFIOIOMMUFDContainer *container = container_of(bcontainer, + VFIOIOMMUFDContainer, + bcontainer); + unsigned long page_size = qemu_real_host_page_size(); + VFIOIOASHwpt *hwpt; + + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + if (!iommufd_hwpt_dirty_tracking(hwpt)) { + continue; + } + + if (!iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id, + iova, size, page_size, + (uint64_t *)vbmap->bitmap, + errp)) { + return -EINVAL; + } + } + + return 0; +} + static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) { ERRP_GUARD(); @@ -172,7 +259,7 @@ out: return ret; } -static bool iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, +static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, Error **errp) { int iommufd = vbasedev->iommufd->fd; @@ -187,12 +274,12 @@ static bool iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, error_setg_errno(errp, errno, "[iommufd=%d] error attach %s (%d) to id=%d", iommufd, vbasedev->name, vbasedev->fd, id); - return false; + return -errno; } trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, vbasedev->fd, id); - return true; + return 0; } static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) @@ -212,11 +299,117 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) return true; } +static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, + VFIOIOMMUFDContainer *container, + Error **errp) +{ + ERRP_GUARD(); + IOMMUFDBackend *iommufd = vbasedev->iommufd; + uint32_t type, flags = 0; + uint64_t hw_caps; + VFIOIOASHwpt *hwpt; + uint32_t hwpt_id; + int ret; + + /* Try to find a domain */ + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + if (ret) { + /* -EINVAL means the domain is incompatible with the device. */ + if (ret == -EINVAL) { + /* + * It is an expected failure and it just means we will try + * another domain, or create one if no existing compatible + * domain is found. Hence why the error is discarded below. + */ + error_free(*errp); + *errp = NULL; + continue; + } + + return false; + } else { + vbasedev->hwpt = hwpt; + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); + vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); + return true; + } + } + + /* + * This is quite early and VFIO Migration state isn't yet fully + * initialized, thus rely only on IOMMU hardware capabilities as to + * whether IOMMU dirty tracking is going to be requested. Later + * vfio_migration_realize() may decide to use VF dirty tracking + * instead. + */ + if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid, + &type, NULL, 0, &hw_caps, errp)) { + return false; + } + + if (hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) { + flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + } + + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, + container->ioas_id, flags, + IOMMU_HWPT_DATA_NONE, 0, NULL, + &hwpt_id, errp)) { + return false; + } + + hwpt = g_malloc0(sizeof(*hwpt)); + hwpt->hwpt_id = hwpt_id; + hwpt->hwpt_flags = flags; + QLIST_INIT(&hwpt->device_list); + + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + if (ret) { + iommufd_backend_free_id(container->be, hwpt->hwpt_id); + g_free(hwpt); + return false; + } + + vbasedev->hwpt = hwpt; + vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); + QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); + QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); + container->bcontainer.dirty_pages_supported |= + vbasedev->iommu_dirty_tracking; + if (container->bcontainer.dirty_pages_supported && + !vbasedev->iommu_dirty_tracking) { + warn_report("IOMMU instance for device %s doesn't support dirty tracking", + vbasedev->name); + } + return true; +} + +static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev, + VFIOIOMMUFDContainer *container) +{ + VFIOIOASHwpt *hwpt = vbasedev->hwpt; + + QLIST_REMOVE(vbasedev, hwpt_next); + vbasedev->hwpt = NULL; + + if (QLIST_EMPTY(&hwpt->device_list)) { + QLIST_REMOVE(hwpt, next); + iommufd_backend_free_id(container->be, hwpt->hwpt_id); + g_free(hwpt); + } +} + static bool iommufd_cdev_attach_container(VFIODevice *vbasedev, VFIOIOMMUFDContainer *container, Error **errp) { - return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); + /* mdevs aren't physical devices and will fail with auto domains */ + if (!vbasedev->mdev) { + return iommufd_cdev_autodomains_get(vbasedev, container, errp); + } + + return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); } static void iommufd_cdev_detach_container(VFIODevice *vbasedev, @@ -227,6 +420,11 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev, if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { error_report_err(err); } + + if (vbasedev->hwpt) { + iommufd_cdev_autodomains_put(vbasedev, container); + } + } static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) @@ -236,7 +434,8 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) if (!QLIST_EMPTY(&bcontainer->device_list)) { return; } - memory_listener_unregister(&bcontainer->listener); + vfio_cpr_unregister_container(bcontainer); + vfio_listener_unregister(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); object_unref(container); } @@ -318,7 +517,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, goto err_connect_bind; } - space = vfio_get_address_space(as); + space = vfio_address_space_get(as); /* try to attach to an existing container in this space */ QLIST_FOREACH(bcontainer, &space->containers, next) { @@ -336,8 +535,8 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, } else { ret = iommufd_cdev_ram_block_discard_disable(true); if (ret) { - error_setg(errp, - "Cannot set discarding of RAM broken (%d)", ret); + error_setg_errno(errp, -ret, + "Cannot set discarding of RAM broken"); goto err_discard_disable; } goto found_container; @@ -354,6 +553,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; + QLIST_INIT(&container->hwpt_list); bcontainer = &container->bcontainer; vfio_address_space_insert(space, bcontainer); @@ -364,6 +564,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, ret = iommufd_cdev_ram_block_discard_disable(true); if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); goto err_discard_disable; } @@ -375,12 +576,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, bcontainer->pgsizes = qemu_real_host_page_size(); } - bcontainer->listener = vfio_memory_listener; - memory_listener_register(&bcontainer->listener, bcontainer->space->as); + if (!vfio_listener_register(bcontainer, errp)) { + goto err_listener_register; + } - if (bcontainer->error) { - error_propagate_prepend(errp, bcontainer->error, - "memory listener initialization failed: "); + if (!vfio_cpr_register_container(bcontainer, errp)) { goto err_listener_register; } @@ -393,7 +593,12 @@ found_container: goto err_listener_register; } - if (!vfio_cpr_register_container(bcontainer, errp)) { + /* + * Do not move this code before attachment! The nested IOMMU support + * needs device and hwpt id which are generated only after attachment. + */ + if (!vfio_device_hiod_create_and_realize(vbasedev, + TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) { goto err_listener_register; } @@ -405,14 +610,7 @@ found_container: iommufd_cdev_ram_block_discard_disable(false); } - vbasedev->group = 0; - vbasedev->num_irqs = dev_info.num_irqs; - vbasedev->num_regions = dev_info.num_regions; - vbasedev->flags = dev_info.flags; - vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - vbasedev->bcontainer = bcontainer; - QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + vfio_device_prepare(vbasedev, bcontainer, &dev_info); trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, vbasedev->num_regions, vbasedev->flags); @@ -425,7 +623,7 @@ err_discard_disable: err_attach_container: iommufd_cdev_container_destroy(container); err_alloc_ioas: - vfio_put_address_space(space); + vfio_address_space_put(space); iommufd_cdev_unbind_and_disconnect(vbasedev); err_connect_bind: close(vbasedev->fd); @@ -439,18 +637,16 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; + vfio_device_unprepare(vbasedev); if (!vbasedev->ram_block_discard_allowed) { iommufd_cdev_ram_block_discard_disable(false); } - vfio_cpr_unregister_container(bcontainer); + object_unref(vbasedev->hiod); iommufd_cdev_detach_container(vbasedev, container); iommufd_cdev_container_destroy(container); - vfio_put_address_space(space); + vfio_address_space_put(space); iommufd_cdev_unbind_and_disconnect(vbasedev); close(vbasedev->fd); @@ -606,39 +802,63 @@ out_single: return ret; } -static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) +static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) { VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); - vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO; - vioc->dma_map = iommufd_cdev_map; vioc->dma_unmap = iommufd_cdev_unmap; vioc->attach_device = iommufd_cdev_attach; vioc->detach_device = iommufd_cdev_detach; vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; + vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking; + vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; }; +static bool +host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); +} + +static bool +host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp); +} + static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, Error **errp) { VFIODevice *vdev = opaque; + HostIOMMUDeviceIOMMUFD *idev; HostIOMMUDeviceCaps *caps = &hiod->caps; + VendorCaps *vendor_caps = &caps->vendor_caps; enum iommu_hw_info_type type; - union { - struct iommu_hw_info_vtd vtd; - } data; + uint64_t hw_caps; hiod->agent = opaque; - if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, - &type, &data, sizeof(data), errp)) { + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type, + vendor_caps, sizeof(*vendor_caps), + &hw_caps, errp)) { return false; } hiod->name = g_strdup(vdev->name); caps->type = type; - caps->aw_bits = vfio_device_get_aw_bits(vdev); + caps->hw_caps = hw_caps; + + idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); + idev->iommufd = vdev->iommufd; + idev->devid = vdev->devid; + idev->hwpt_id = vdev->hwpt->hwpt_id; return true; } @@ -662,13 +882,17 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod) } -static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data) +static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data) { HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); + HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc); hiodc->realize = hiod_iommufd_vfio_realize; hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask; + + idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt; + idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt; }; static const TypeInfo types[] = { |