diff options
Diffstat (limited to 'hw')
-rw-r--r-- | hw/core/register.c | 1 | ||||
-rw-r--r-- | hw/hyperv/hv-balloon.c | 12 | ||||
-rw-r--r-- | hw/intc/arm_gicv3_cpuif.c | 10 | ||||
-rw-r--r-- | hw/ppc/spapr_pci_vfio.c | 14 | ||||
-rw-r--r-- | hw/s390x/s390-pci-vfio.c | 16 | ||||
-rw-r--r-- | hw/sd/sdhci.c | 4 | ||||
-rw-r--r-- | hw/usb/hcd-uhci.c | 10 | ||||
-rw-r--r-- | hw/vfio-user/container.c | 18 | ||||
-rw-r--r-- | hw/vfio-user/container.h | 4 | ||||
-rw-r--r-- | hw/vfio-user/pci.c | 35 | ||||
-rw-r--r-- | hw/vfio/container-base.c | 347 | ||||
-rw-r--r-- | hw/vfio/container-legacy.c | 1277 | ||||
-rw-r--r-- | hw/vfio/container.c | 1325 | ||||
-rw-r--r-- | hw/vfio/cpr-iommufd.c | 4 | ||||
-rw-r--r-- | hw/vfio/cpr-legacy.c | 43 | ||||
-rw-r--r-- | hw/vfio/device.c | 4 | ||||
-rw-r--r-- | hw/vfio/iommufd.c | 48 | ||||
-rw-r--r-- | hw/vfio/listener.c | 74 | ||||
-rw-r--r-- | hw/vfio/meson.build | 2 | ||||
-rw-r--r-- | hw/vfio/pci-quirks.c | 9 | ||||
-rw-r--r-- | hw/vfio/pci.c | 72 | ||||
-rw-r--r-- | hw/vfio/pci.h | 2 | ||||
-rw-r--r-- | hw/vfio/region.c | 3 | ||||
-rw-r--r-- | hw/vfio/spapr.c | 52 | ||||
-rw-r--r-- | hw/vfio/types.h | 4 | ||||
-rw-r--r-- | hw/vfio/vfio-iommufd.h | 9 | ||||
-rw-r--r-- | hw/vfio/vfio-listener.h | 4 | ||||
-rw-r--r-- | hw/xen/xen_pt_msi.c | 11 |
28 files changed, 1687 insertions, 1727 deletions
diff --git a/hw/core/register.c b/hw/core/register.c index 8f63d9f..3340df7 100644 --- a/hw/core/register.c +++ b/hw/core/register.c @@ -314,7 +314,6 @@ RegisterInfoArray *register_init_block64(DeviceState *owner, void register_finalize_block(RegisterInfoArray *r_array) { - object_unparent(OBJECT(&r_array->mem)); g_free(r_array->r); g_free(r_array); } diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c index 6dbcb2d..2d6d7db 100644 --- a/hw/hyperv/hv-balloon.c +++ b/hw/hyperv/hv-balloon.c @@ -1475,16 +1475,6 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon) balloon->mr->align = memory_region_get_alignment(hostmem_mr); } -static void hv_balloon_free_mr(HvBalloon *balloon) -{ - if (!balloon->mr) { - return; - } - - object_unparent(OBJECT(balloon->mr)); - g_clear_pointer(&balloon->mr, g_free); -} - static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp) { ERRP_GUARD(); @@ -1580,7 +1570,7 @@ static void hv_balloon_vmdev_reset(VMBusDevice *vdev) */ static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon) { - hv_balloon_free_mr(balloon); + g_clear_pointer(&balloon->mr, g_free); balloon->addr = 0; balloon->memslot_count = 0; diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index 4b4cf09..72e91f9 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -3037,15 +3037,7 @@ void gicv3_init_cpuif(GICv3State *s) * cpu->gic_pribits */ - /* Note that we can't just use the GICv3CPUState as an opaque pointer - * in define_arm_cp_regs_with_opaque(), because when we're called back - * it might be with code translated by CPU 0 but run by CPU 1, in - * which case we'd get the wrong value. - * So instead we define the regs with no ri->opaque info, and - * get back to the GICv3CPUState from the CPUARMState. - * - * These CP regs callbacks can be called from either TCG or HVF code. - */ + /* These CP regs callbacks can be called from either TCG or HVF. */ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); /* diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 7e1c71e..a748a0b 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -24,7 +24,7 @@ #include "hw/pci-host/spapr.h" #include "hw/pci/msix.h" #include "hw/pci/pci_device.h" -#include "hw/vfio/vfio-container.h" +#include "hw/vfio/vfio-container-legacy.h" #include "qemu/error-report.h" #include CONFIG_DEVICES /* CONFIG_VFIO_PCI */ @@ -32,7 +32,7 @@ * Interfaces for IBM EEH (Enhanced Error Handling) */ #ifdef CONFIG_VFIO_PCI -static bool vfio_eeh_container_ok(VFIOContainer *container) +static bool vfio_eeh_container_ok(VFIOLegacyContainer *container) { /* * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO @@ -60,7 +60,7 @@ static bool vfio_eeh_container_ok(VFIOContainer *container) return true; } -static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) +static int vfio_eeh_container_op(VFIOLegacyContainer *container, uint32_t op) { struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), @@ -83,10 +83,10 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) return ret; } -static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) +static VFIOLegacyContainer *vfio_eeh_as_container(AddressSpace *as) { VFIOAddressSpace *space = vfio_address_space_get(as); - VFIOContainerBase *bcontainer = NULL; + VFIOContainer *bcontainer = NULL; if (QLIST_EMPTY(&space->containers)) { /* No containers to act on */ @@ -111,14 +111,14 @@ out: static bool vfio_eeh_as_ok(AddressSpace *as) { - VFIOContainer *container = vfio_eeh_as_container(as); + VFIOLegacyContainer *container = vfio_eeh_as_container(as); return (container != NULL) && vfio_eeh_container_ok(container); } static int vfio_eeh_as_op(AddressSpace *as, uint32_t op) { - VFIOContainer *container = vfio_eeh_as_container(as); + VFIOLegacyContainer *container = vfio_eeh_as_container(as); if (!container) { return -ENODEV; diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c index 938a551..9e31029 100644 --- a/hw/s390x/s390-pci-vfio.c +++ b/hw/s390x/s390-pci-vfio.c @@ -20,7 +20,7 @@ #include "hw/s390x/s390-pci-clp.h" #include "hw/s390x/s390-pci-vfio.h" #include "hw/vfio/pci.h" -#include "hw/vfio/vfio-container.h" +#include "hw/vfio/vfio-container-legacy.h" #include "hw/vfio/vfio-helpers.h" /* @@ -62,7 +62,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, { S390PCIDMACount *cnt; uint32_t avail; - VFIOPCIDevice *vpdev = VFIO_PCI_BASE(pbdev->pdev); + VFIOPCIDevice *vpdev = VFIO_PCI_DEVICE(pbdev->pdev); int id; assert(vpdev); @@ -108,7 +108,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, { struct vfio_info_cap_header *hdr; struct vfio_device_info_cap_zpci_base *cap; - VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); + VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev); uint64_t vfio_size; hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); @@ -162,7 +162,7 @@ static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, { struct vfio_info_cap_header *hdr; struct vfio_device_info_cap_zpci_base *cap; - VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); + VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev); hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); @@ -185,7 +185,7 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, struct vfio_device_info_cap_zpci_group *cap; S390pciState *s = s390_get_phb(); ClpRspQueryPciGrp *resgrp; - VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); + VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev); uint8_t start_gid = pbdev->zpci_fn.pfgid; hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); @@ -264,7 +264,7 @@ static void s390_pci_read_util(S390PCIBusDevice *pbdev, { struct vfio_info_cap_header *hdr; struct vfio_device_info_cap_zpci_util *cap; - VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); + VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev); hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL); @@ -291,7 +291,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, { struct vfio_info_cap_header *hdr; struct vfio_device_info_cap_zpci_pfip *cap; - VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); + VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev); hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP); @@ -314,7 +314,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev) { - VFIOPCIDevice *vfio_pci = VFIO_PCI_BASE(pbdev->pdev); + VFIOPCIDevice *vfio_pci = VFIO_PCI_DEVICE(pbdev->pdev); return vfio_get_device_info(vfio_pci->vbasedev.fd); } diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index 3c897e5..89b595c 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -1578,10 +1578,6 @@ static void sdhci_sysbus_finalize(Object *obj) { SDHCIState *s = SYSBUS_SDHCI(obj); - if (s->dma_mr) { - object_unparent(OBJECT(s->dma_mr)); - } - sdhci_uninitfn(s); } diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c index 4822c70..e207d05 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -735,6 +735,7 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr, bool spd; bool queuing = (q != NULL); uint8_t pid = td->token & 0xff; + uint8_t ep_id = (td->token >> 15) & 0xf; UHCIAsync *async; async = uhci_async_find_td(s, td_addr); @@ -778,9 +779,14 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr, switch (pid) { case USB_TOKEN_OUT: - case USB_TOKEN_SETUP: case USB_TOKEN_IN: break; + case USB_TOKEN_SETUP: + /* SETUP is only valid to endpoint 0 */ + if (ep_id == 0) { + break; + } + /* fallthrough */ default: /* invalid pid : frame interrupted */ s->status |= UHCI_STS_HCPERR; @@ -829,7 +835,7 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr, return uhci_handle_td_error(s, td, td_addr, USB_RET_NODEV, int_mask); } - ep = usb_ep_get(dev, pid, (td->token >> 15) & 0xf); + ep = usb_ep_get(dev, pid, ep_id); q = uhci_queue_new(s, qh_addr, td, ep); } async = uhci_async_alloc(q, td_addr); diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c index 3cdbd44..411eb7b 100644 --- a/hw/vfio-user/container.c +++ b/hw/vfio-user/container.c @@ -22,14 +22,14 @@ * will fire during memory update transactions. These depend on BQL being held, * so do any resulting map/demap ops async while keeping BQL. */ -static void vfio_user_listener_begin(VFIOContainerBase *bcontainer) +static void vfio_user_listener_begin(VFIOContainer *bcontainer) { VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer); container->proxy->async_ops = true; } -static void vfio_user_listener_commit(VFIOContainerBase *bcontainer) +static void vfio_user_listener_commit(VFIOContainer *bcontainer) { VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer); @@ -38,7 +38,7 @@ static void vfio_user_listener_commit(VFIOContainerBase *bcontainer) vfio_user_wait_reqs(container->proxy); } -static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, +static int vfio_user_dma_unmap(const VFIOContainer *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all) { @@ -80,7 +80,7 @@ static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, return ret; } -static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, +static int vfio_user_dma_map(const VFIOContainer *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mrp) { @@ -154,14 +154,14 @@ static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, } static int -vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, +vfio_user_set_dirty_page_tracking(const VFIOContainer *bcontainer, bool start, Error **errp) { error_setg_errno(errp, ENOTSUP, "Not supported"); return -ENOTSUP; } -static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, +static int vfio_user_query_dirty_bitmap(const VFIOContainer *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) { @@ -169,7 +169,7 @@ static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, return -ENOTSUP; } -static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp) +static bool vfio_user_setup(VFIOContainer *bcontainer, Error **errp) { VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer); @@ -202,7 +202,7 @@ static VFIOUserContainer * vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, Error **errp) { - VFIOContainerBase *bcontainer; + VFIOContainer *bcontainer; VFIOUserContainer *container; VFIOAddressSpace *space; VFIOIOMMUClass *vioc; @@ -260,7 +260,7 @@ put_space_exit: static void vfio_user_container_disconnect(VFIOUserContainer *container) { - VFIOContainerBase *bcontainer = VFIO_IOMMU(container); + VFIOContainer *bcontainer = VFIO_IOMMU(container); VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); VFIOAddressSpace *space = bcontainer->space; diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h index 96aa678..a2b42e3 100644 --- a/hw/vfio-user/container.h +++ b/hw/vfio-user/container.h @@ -9,12 +9,12 @@ #include "qemu/osdep.h" -#include "hw/vfio/vfio-container-base.h" +#include "hw/vfio/vfio-container.h" #include "hw/vfio-user/proxy.h" /* MMU container sub-class for vfio-user. */ struct VFIOUserContainer { - VFIOContainerBase parent_obj; + VFIOContainer parent_obj; VFIOUserProxy *proxy; }; diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c index e2c3097..b53ed3b 100644 --- a/hw/vfio-user/pci.c +++ b/hw/vfio-user/pci.c @@ -234,9 +234,10 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) { ERRP_GUARD(); VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev); - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; const char *sock_name; + AddressSpace *as; SocketAddress addr; VFIOUserProxy *proxy; @@ -343,10 +344,10 @@ error: vfio_pci_put_device(vdev); } -static void vfio_user_instance_init(Object *obj) +static void vfio_user_pci_init(Object *obj) { PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj); VFIODevice *vbasedev = &vdev->vbasedev; device_add_bootindex_property(obj, &vdev->bootindex, @@ -369,9 +370,9 @@ static void vfio_user_instance_init(Object *obj) pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } -static void vfio_user_instance_finalize(Object *obj) +static void vfio_user_pci_finalize(Object *obj) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj); VFIODevice *vbasedev = &vdev->vbasedev; if (vdev->msix != NULL) { @@ -387,7 +388,7 @@ static void vfio_user_instance_finalize(Object *obj) static void vfio_user_pci_reset(DeviceState *dev) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(dev); VFIODevice *vbasedev = &vdev->vbasedev; vfio_pci_pre_reset(vdev); @@ -399,7 +400,7 @@ static void vfio_user_pci_reset(DeviceState *dev) vfio_pci_post_reset(vdev); } -static const Property vfio_user_pci_dev_properties[] = { +static const Property vfio_user_pci_properties[] = { DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID), DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, @@ -421,7 +422,7 @@ static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name, VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj); bool success; - if (VFIO_PCI_BASE(udev)->vbasedev.proxy) { + if (VFIO_PCI_DEVICE(udev)->vbasedev.proxy) { error_setg(errp, "Proxy is connected"); return; } @@ -445,13 +446,13 @@ static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name, } } -static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data) +static void vfio_user_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); device_class_set_legacy_reset(dc, vfio_user_pci_reset); - device_class_set_props(dc, vfio_user_pci_dev_properties); + device_class_set_props(dc, vfio_user_pci_properties); object_class_property_add(klass, "socket", "SocketAddress", NULL, vfio_user_pci_set_socket, NULL, NULL); @@ -462,18 +463,18 @@ static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data) pdc->realize = vfio_user_pci_realize; } -static const TypeInfo vfio_user_pci_dev_info = { +static const TypeInfo vfio_user_pci_info = { .name = TYPE_VFIO_USER_PCI, - .parent = TYPE_VFIO_PCI_BASE, + .parent = TYPE_VFIO_PCI_DEVICE, .instance_size = sizeof(VFIOUserPCIDevice), - .class_init = vfio_user_pci_dev_class_init, - .instance_init = vfio_user_instance_init, - .instance_finalize = vfio_user_instance_finalize, + .class_init = vfio_user_pci_class_init, + .instance_init = vfio_user_pci_init, + .instance_finalize = vfio_user_pci_finalize, }; static void register_vfio_user_dev_type(void) { - type_register_static(&vfio_user_pci_dev_info); + type_register_static(&vfio_user_pci_info); } - type_init(register_vfio_user_dev_type) +type_init(register_vfio_user_dev_type) diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c deleted file mode 100644 index 5630497..0000000 --- a/hw/vfio/container-base.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - * VFIO BASE CONTAINER - * - * Copyright (C) 2023 Intel Corporation. - * Copyright Red Hat, Inc. 2023 - * - * Authors: Yi Liu <yi.l.liu@intel.com> - * Eric Auger <eric.auger@redhat.com> - * - * SPDX-License-Identifier: GPL-2.0-or-later - */ - -#include <sys/ioctl.h> -#include <linux/vfio.h> - -#include "qemu/osdep.h" -#include "system/tcg.h" -#include "system/ram_addr.h" -#include "qapi/error.h" -#include "qemu/error-report.h" -#include "hw/vfio/vfio-container-base.h" -#include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */ -#include "system/reset.h" -#include "vfio-helpers.h" - -#include "trace.h" - -static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces = - QLIST_HEAD_INITIALIZER(vfio_address_spaces); - -VFIOAddressSpace *vfio_address_space_get(AddressSpace *as) -{ - VFIOAddressSpace *space; - - QLIST_FOREACH(space, &vfio_address_spaces, list) { - if (space->as == as) { - return space; - } - } - - /* No suitable VFIOAddressSpace, create a new one */ - space = g_malloc0(sizeof(*space)); - space->as = as; - QLIST_INIT(&space->containers); - - if (QLIST_EMPTY(&vfio_address_spaces)) { - qemu_register_reset(vfio_device_reset_handler, NULL); - } - - QLIST_INSERT_HEAD(&vfio_address_spaces, space, list); - - return space; -} - -void vfio_address_space_put(VFIOAddressSpace *space) -{ - if (!QLIST_EMPTY(&space->containers)) { - return; - } - - QLIST_REMOVE(space, list); - g_free(space); - - if (QLIST_EMPTY(&vfio_address_spaces)) { - qemu_unregister_reset(vfio_device_reset_handler, NULL); - } -} - -void vfio_address_space_insert(VFIOAddressSpace *space, - VFIOContainerBase *bcontainer) -{ - QLIST_INSERT_HEAD(&space->containers, bcontainer, next); - bcontainer->space = space; -} - -int vfio_container_dma_map(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly, MemoryRegion *mr) -{ - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - RAMBlock *rb = mr->ram_block; - int mfd = rb ? qemu_ram_get_fd(rb) : -1; - - if (mfd >= 0 && vioc->dma_map_file) { - unsigned long start = vaddr - qemu_ram_get_host_addr(rb); - unsigned long offset = qemu_ram_get_fd_offset(rb); - - return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset, - readonly); - } - g_assert(vioc->dma_map); - return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr); -} - -int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb, bool unmap_all) -{ - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - - g_assert(vioc->dma_unmap); - return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all); -} - -bool vfio_container_add_section_window(VFIOContainerBase *bcontainer, - MemoryRegionSection *section, - Error **errp) -{ - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - - if (!vioc->add_window) { - return true; - } - - return vioc->add_window(bcontainer, section, errp); -} - -void vfio_container_del_section_window(VFIOContainerBase *bcontainer, - MemoryRegionSection *section) -{ - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - - if (!vioc->del_window) { - return; - } - - return vioc->del_window(bcontainer, section); -} - -int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - bool start, Error **errp) -{ - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - int ret; - - if (!bcontainer->dirty_pages_supported) { - return 0; - } - - g_assert(vioc->set_dirty_page_tracking); - if (bcontainer->dirty_pages_started == start) { - return 0; - } - - ret = vioc->set_dirty_page_tracking(bcontainer, start, errp); - if (!ret) { - bcontainer->dirty_pages_started = start; - } - - return ret; -} - -static bool vfio_container_devices_dirty_tracking_is_started( - const VFIOContainerBase *bcontainer) -{ - VFIODevice *vbasedev; - - QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (!vbasedev->dirty_tracking) { - return false; - } - } - - return true; -} - -bool vfio_container_dirty_tracking_is_started( - const VFIOContainerBase *bcontainer) -{ - return vfio_container_devices_dirty_tracking_is_started(bcontainer) || - bcontainer->dirty_pages_started; -} - -bool vfio_container_devices_dirty_tracking_is_supported( - const VFIOContainerBase *bcontainer) -{ - VFIODevice *vbasedev; - - QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) { - return false; - } - if (!vbasedev->dirty_pages_supported) { - return false; - } - } - - return true; -} - -static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, - hwaddr size, void *bitmap) -{ - uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + - sizeof(struct vfio_device_feature_dma_logging_report), - sizeof(uint64_t))] = {}; - struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; - struct vfio_device_feature_dma_logging_report *report = - (struct vfio_device_feature_dma_logging_report *)feature->data; - - report->iova = iova; - report->length = size; - report->page_size = qemu_real_host_page_size(); - report->bitmap = (uintptr_t)bitmap; - - feature->argsz = sizeof(buf); - feature->flags = VFIO_DEVICE_FEATURE_GET | - VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT; - - return vbasedev->io_ops->device_feature(vbasedev, feature); -} - -static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) -{ - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - - g_assert(vioc->query_dirty_bitmap); - return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size, - errp); -} - -static int vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) -{ - VFIODevice *vbasedev; - int ret; - - QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - ret = vfio_device_dma_logging_report(vbasedev, iova, size, - vbmap->bitmap); - if (ret) { - error_setg_errno(errp, -ret, - "%s: Failed to get DMA logging report, iova: " - "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx, - vbasedev->name, iova, size); - - return ret; - } - } - - return 0; -} - -int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr, Error **errp) -{ - bool all_device_dirty_tracking = - vfio_container_devices_dirty_tracking_is_supported(bcontainer); - uint64_t dirty_pages; - VFIOBitmap vbmap; - int ret; - - if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) { - cpu_physical_memory_set_dirty_range(ram_addr, size, - tcg_enabled() ? DIRTY_CLIENTS_ALL : - DIRTY_CLIENTS_NOCODE); - return 0; - } - - ret = vfio_bitmap_alloc(&vbmap, size); - if (ret) { - error_setg_errno(errp, -ret, - "Failed to allocate dirty tracking bitmap"); - return ret; - } - - if (all_device_dirty_tracking) { - ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size, - errp); - } else { - ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size, - errp); - } - - if (ret) { - goto out; - } - - dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, - vbmap.pages); - - trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr, - dirty_pages); -out: - g_free(vbmap.bitmap); - - return ret; -} - -static gpointer copy_iova_range(gconstpointer src, gpointer data) -{ - Range *source = (Range *)src; - Range *dest = g_new(Range, 1); - - range_set_bounds(dest, range_lob(source), range_upb(source)); - return dest; -} - -GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer) -{ - assert(bcontainer); - return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL); -} - -static void vfio_container_instance_finalize(Object *obj) -{ - VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); - VFIOGuestIOMMU *giommu, *tmp; - - QLIST_SAFE_REMOVE(bcontainer, next); - - QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { - memory_region_unregister_iommu_notifier( - MEMORY_REGION(giommu->iommu_mr), &giommu->n); - QLIST_REMOVE(giommu, giommu_next); - g_free(giommu); - } - - g_list_free_full(bcontainer->iova_ranges, g_free); -} - -static void vfio_container_instance_init(Object *obj) -{ - VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); - - bcontainer->error = NULL; - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; - bcontainer->iova_ranges = NULL; - QLIST_INIT(&bcontainer->giommu_list); - QLIST_INIT(&bcontainer->vrdl_list); -} - -static const TypeInfo types[] = { - { - .name = TYPE_VFIO_IOMMU, - .parent = TYPE_OBJECT, - .instance_init = vfio_container_instance_init, - .instance_finalize = vfio_container_instance_finalize, - .instance_size = sizeof(VFIOContainerBase), - .class_size = sizeof(VFIOIOMMUClass), - .abstract = true, - }, -}; - -DEFINE_TYPES(types) diff --git a/hw/vfio/container-legacy.c b/hw/vfio/container-legacy.c new file mode 100644 index 0000000..c0f87f7 --- /dev/null +++ b/hw/vfio/container-legacy.c @@ -0,0 +1,1277 @@ +/* + * generic functions used by VFIO devices + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Alex Williamson <alex.williamson@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Based on qemu-kvm device-assignment: + * Adapted for KVM by Qumranet. + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + */ + +#include "qemu/osdep.h" +#include <sys/ioctl.h> +#include <linux/vfio.h> + +#include "hw/vfio/vfio-device.h" +#include "system/address-spaces.h" +#include "system/memory.h" +#include "system/ram_addr.h" +#include "qemu/error-report.h" +#include "qemu/range.h" +#include "system/reset.h" +#include "trace.h" +#include "qapi/error.h" +#include "migration/cpr.h" +#include "migration/blocker.h" +#include "pci.h" +#include "hw/vfio/vfio-container-legacy.h" +#include "vfio-helpers.h" +#include "vfio-listener.h" + +#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" + +typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; +static VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); + +static int vfio_ram_block_discard_disable(VFIOLegacyContainer *container, + bool state) +{ + switch (container->iommu_type) { + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: + /* + * We support coordinated discarding of RAM via the RamDiscardManager. + */ + return ram_block_uncoordinated_discard_disable(state); + default: + /* + * VFIO_SPAPR_TCE_IOMMU most probably works just fine with + * RamDiscardManager, however, it is completely untested. + * + * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does + * completely the opposite of managing mapping/pinning dynamically as + * required by RamDiscardManager. We would have to special-case sections + * with a RamDiscardManager. + */ + return ram_block_discard_disable(state); + } +} + +static int vfio_dma_unmap_bitmap(const VFIOLegacyContainer *container, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) +{ + const VFIOContainer *bcontainer = VFIO_IOMMU(container); + struct vfio_iommu_type1_dma_unmap *unmap; + struct vfio_bitmap *bitmap; + VFIOBitmap vbmap; + int ret; + + ret = vfio_bitmap_alloc(&vbmap, size); + if (ret) { + return ret; + } + + unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); + + unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); + unmap->iova = iova; + unmap->size = size; + unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP; + bitmap = (struct vfio_bitmap *)&unmap->data; + + /* + * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of + * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize + * to qemu_real_host_page_size. + */ + bitmap->pgsize = qemu_real_host_page_size(); + bitmap->size = vbmap.size; + bitmap->data = (__u64 *)vbmap.bitmap; + + if (vbmap.size > bcontainer->max_dirty_bitmap_size) { + error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size); + ret = -E2BIG; + goto unmap_exit; + } + + ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); + if (!ret) { + cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, + iotlb->translated_addr, vbmap.pages); + } else { + error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); + } + +unmap_exit: + g_free(unmap); + g_free(vbmap.bitmap); + + return ret; +} + +static int vfio_legacy_dma_unmap_one(const VFIOContainer *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) +{ + const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = 0, + .iova = iova, + .size = size, + }; + bool need_dirty_sync = false; + int ret; + Error *local_err = NULL; + + g_assert(!cpr_is_incoming()); + + if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) { + if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) && + bcontainer->dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } + + need_dirty_sync = true; + } + + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + /* + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c + * v4.15) where an overflow in its wrap-around check prevents us from + * unmapping the last page of the address space. Test for the error + * condition and re-try the unmap excluding the last page. The + * expectation is that we've never mapped the last page anyway and this + * unmap request comes via vIOMMU support which also makes it unlikely + * that this page is used. This bug was introduced well after type1 v2 + * support was introduced, so we shouldn't need to test for v1. A fix + * is queued for kernel v5.0 so this workaround can be removed once + * affected kernels are sufficiently deprecated. + */ + if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { + trace_vfio_legacy_dma_unmap_overflow_workaround(); + unmap.size -= 1ULL << ctz64(bcontainer->pgsizes); + continue; + } + return -errno; + } + + if (need_dirty_sync) { + ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size, + iotlb->translated_addr, &local_err); + if (ret) { + error_report_err(local_err); + return ret; + } + } + + return 0; +} + +/* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +static int vfio_legacy_dma_unmap(const VFIOContainer *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb, bool unmap_all) +{ + int ret; + + if (unmap_all) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + Int128 llsize = int128_rshift(int128_2_64(), 1); + + ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize), + iotlb); + + if (ret == 0) { + ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize), + int128_get64(llsize), iotlb); + } + + } else { + ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb); + } + + return ret; +} + +static int vfio_legacy_dma_map(const VFIOContainer *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mr) +{ + const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_READ, + .vaddr = (__u64)(uintptr_t)vaddr, + .iova = iova, + .size = size, + }; + + if (!readonly) { + map.flags |= VFIO_DMA_MAP_FLAG_WRITE; + } + + /* + * Try the mapping, if it fails with EBUSY, unmap the region and try + * again. This shouldn't be necessary, but we sometimes see it in + * the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || + (errno == EBUSY && + vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, false) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { + return 0; + } + + return -errno; +} + +static int +vfio_legacy_set_dirty_page_tracking(const VFIOContainer *bcontainer, + bool start, Error **errp) +{ + const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + int ret; + struct vfio_iommu_type1_dirty_bitmap dirty = { + .argsz = sizeof(dirty), + }; + + if (start) { + dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; + } else { + dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; + } + + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); + if (ret) { + ret = -errno; + error_setg_errno(errp, errno, "Failed to set dirty tracking flag 0x%x", + dirty.flags); + } + + return ret; +} + +static int vfio_legacy_query_dirty_bitmap(const VFIOContainer *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) +{ + const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; + int ret; + + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); + + dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); + dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; + range->iova = iova; + range->size = size; + + /* + * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of + * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize + * to qemu_real_host_page_size. + */ + range->bitmap.pgsize = qemu_real_host_page_size(); + range->bitmap.size = vbmap->size; + range->bitmap.data = (__u64 *)vbmap->bitmap; + + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + if (ret) { + ret = -errno; + error_setg_errno(errp, errno, + "Failed to get dirty bitmap for iova: 0x%"PRIx64 + " size: 0x%"PRIx64, (uint64_t)range->iova, + (uint64_t)range->size); + } + + g_free(dbitmap); + + return ret; +} + +static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, + VFIOContainer *bcontainer) +{ + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_iova_range *cap; + + hdr = vfio_get_iommu_type1_info_cap(info, + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); + if (!hdr) { + return false; + } + + cap = (void *)hdr; + + for (int i = 0; i < cap->nr_iovas; i++) { + Range *range = g_new(Range, 1); + + range_set_bounds(range, cap->iova_ranges[i].start, + cap->iova_ranges[i].end); + bcontainer->iova_ranges = + range_list_insert(bcontainer->iova_ranges, range); + } + + return true; +} + +static void vfio_group_add_kvm_device(VFIOGroup *group) +{ + Error *err = NULL; + + if (vfio_kvm_device_add_fd(group->fd, &err)) { + error_reportf_err(err, "group ID %d: ", group->groupid); + } +} + +static void vfio_group_del_kvm_device(VFIOGroup *group) +{ + Error *err = NULL; + + if (vfio_kvm_device_del_fd(group->fd, &err)) { + error_reportf_err(err, "group ID %d: ", group->groupid); + } +} + +/* + * vfio_get_iommu_type - selects the richest iommu_type (v2 first) + */ +static int vfio_get_iommu_type(int container_fd, + Error **errp) +{ + int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, + VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU }; + int i; + + for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { + if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { + return iommu_types[i]; + } + } + error_setg(errp, "No available IOMMU models"); + return -EINVAL; +} + +/* + * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type + */ +static const char *vfio_get_iommu_class_name(int iommu_type) +{ + switch (iommu_type) { + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: + return TYPE_VFIO_IOMMU_LEGACY; + break; + case VFIO_SPAPR_TCE_v2_IOMMU: + case VFIO_SPAPR_TCE_IOMMU: + return TYPE_VFIO_IOMMU_SPAPR; + break; + default: + g_assert_not_reached(); + }; +} + +static bool vfio_set_iommu(int container_fd, int group_fd, + int *iommu_type, Error **errp) +{ + if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) { + error_setg_errno(errp, errno, "Failed to set group container"); + return false; + } + + while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) { + if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + /* + * On sPAPR, despite the IOMMU subdriver always advertises v1 and + * v2, the running platform may not support v2 and there is no + * way to guess it until an IOMMU group gets added to the container. + * So in case it fails with v2, try v1 as a fallback. + */ + *iommu_type = VFIO_SPAPR_TCE_IOMMU; + continue; + } + error_setg_errno(errp, errno, "Failed to set iommu for container"); + return false; + } + + return true; +} + +static VFIOLegacyContainer *vfio_create_container(int fd, VFIOGroup *group, + Error **errp) +{ + int iommu_type; + const char *vioc_name; + VFIOLegacyContainer *container; + + iommu_type = vfio_get_iommu_type(fd, errp); + if (iommu_type < 0) { + return NULL; + } + + /* + * During CPR, just set the container type and skip the ioctls, as the + * container and group are already configured in the kernel. + */ + if (!cpr_is_incoming() && + !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { + return NULL; + } + + vioc_name = vfio_get_iommu_class_name(iommu_type); + + container = VFIO_IOMMU_LEGACY(object_new(vioc_name)); + container->fd = fd; + container->iommu_type = iommu_type; + return container; +} + +static int vfio_get_iommu_info(VFIOLegacyContainer *container, + struct vfio_iommu_type1_info **info) +{ + + size_t argsz = sizeof(struct vfio_iommu_type1_info); + + *info = g_new0(struct vfio_iommu_type1_info, 1); +again: + (*info)->argsz = argsz; + + if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { + g_free(*info); + *info = NULL; + return -errno; + } + + if (((*info)->argsz > argsz)) { + argsz = (*info)->argsz; + *info = g_realloc(*info, argsz); + goto again; + } + + return 0; +} + +static struct vfio_info_cap_header * +vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) +{ + struct vfio_info_cap_header *hdr; + void *ptr = info; + + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { + return NULL; + } + + for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { + if (hdr->id == id) { + return hdr; + } + } + + return NULL; +} + +static void vfio_get_iommu_info_migration(VFIOLegacyContainer *container, + struct vfio_iommu_type1_info *info) +{ + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_migration *cap_mig; + VFIOContainer *bcontainer = VFIO_IOMMU(container); + + hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); + if (!hdr) { + return; + } + + cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, + header); + + /* + * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of + * qemu_real_host_page_size to mark those dirty. + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { + bcontainer->dirty_pages_supported = true; + bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap; + } +} + +static bool vfio_legacy_setup(VFIOContainer *bcontainer, Error **errp) +{ + VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + g_autofree struct vfio_iommu_type1_info *info = NULL; + int ret; + + ret = vfio_get_iommu_info(container, &info); + if (ret) { + error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); + return false; + } + + if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { + bcontainer->pgsizes = info->iova_pgsizes; + } else { + bcontainer->pgsizes = qemu_real_host_page_size(); + } + + if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { + bcontainer->dma_max_mappings = 65535; + } + + vfio_get_info_iova_range(info, bcontainer); + + vfio_get_iommu_info_migration(container, info); + return true; +} + +static bool vfio_container_attach_discard_disable( + VFIOLegacyContainer *container, VFIOGroup *group, Error **errp) +{ + int ret; + + /* + * VFIO is currently incompatible with discarding of RAM insofar as the + * madvise to purge (zap) the page from QEMU's address space does not + * interact with the memory API and therefore leaves stale virtual to + * physical mappings in the IOMMU if the page was previously pinned. We + * therefore set discarding broken for each group added to a container, + * whether the container is used individually or shared. This provides + * us with options to allow devices within a group to opt-in and allow + * discarding, so long as it is done consistently for a group (for instance + * if the device is an mdev device where it is known that the host vendor + * driver will never pin pages outside of the working set of the guest + * driver, which would thus not be discarding candidates). + * + * The first opportunity to induce pinning occurs here where we attempt to + * attach the group to existing containers within the AddressSpace. If any + * pages are already zapped from the virtual address space, such as from + * previous discards, new pinning will cause valid mappings to be + * re-established. Likewise, when the overall MemoryListener for a new + * container is registered, a replay of mappings within the AddressSpace + * will occur, re-establishing any previously zapped pages as well. + * + * Especially virtio-balloon is currently only prevented from discarding + * new memory, it will not yet set ram_block_discard_set_required() and + * therefore, neither stops us here or deals with the sudden memory + * consumption of inflated memory. + * + * We do support discarding of memory coordinated via the RamDiscardManager + * with some IOMMU types. vfio_ram_block_discard_disable() handles the + * details once we know which type of IOMMU we are using. + */ + + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { + error_report("vfio: error disconnecting group %d from" + " container", group->groupid); + } + } + return !ret; +} + +static bool vfio_container_group_add(VFIOLegacyContainer *container, + VFIOGroup *group, Error **errp) +{ + if (!vfio_container_attach_discard_disable(container, group, errp)) { + return false; + } + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + vfio_group_add_kvm_device(group); + /* + * Remember the container fd for each group, so we can attach to the same + * container after CPR. + */ + cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd); + return true; +} + +static void vfio_container_group_del(VFIOLegacyContainer *container, + VFIOGroup *group) +{ + QLIST_REMOVE(group, container_next); + group->container = NULL; + vfio_group_del_kvm_device(group); + vfio_ram_block_discard_disable(container, false); + cpr_delete_fd("vfio_container_for_group", group->groupid); +} + +static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, + Error **errp) +{ + VFIOLegacyContainer *container; + VFIOContainer *bcontainer; + int ret, fd = -1; + VFIOAddressSpace *space; + VFIOIOMMUClass *vioc = NULL; + bool new_container = false; + bool group_was_added = false; + + space = vfio_address_space_get(as); + fd = cpr_find_fd("vfio_container_for_group", group->groupid); + + if (!cpr_is_incoming()) { + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = VFIO_IOMMU_LEGACY(bcontainer); + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + return vfio_container_group_add(container, group, errp); + } + } + + fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); + if (fd < 0) { + goto fail; + } + } else { + /* + * For incoming CPR, the group is already attached in the kernel. + * If a container with matching fd is found, then update the + * userland group list and return. If not, then after the loop, + * create the container struct and group list. + */ + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = VFIO_IOMMU_LEGACY(bcontainer); + + if (vfio_cpr_container_match(container, group, fd)) { + return vfio_container_group_add(container, group, errp); + } + } + } + + ret = ioctl(fd, VFIO_GET_API_VERSION); + if (ret != VFIO_API_VERSION) { + error_setg(errp, "supported vfio version: %d, " + "reported version: %d", VFIO_API_VERSION, ret); + goto fail; + } + + container = vfio_create_container(fd, group, errp); + if (!container) { + goto fail; + } + new_container = true; + bcontainer = VFIO_IOMMU(container); + + if (!vfio_legacy_cpr_register_container(container, errp)) { + goto fail; + } + + vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + assert(vioc->setup); + + if (!vioc->setup(bcontainer, errp)) { + goto fail; + } + + vfio_address_space_insert(space, bcontainer); + + if (!vfio_container_group_add(container, group, errp)) { + goto fail; + } + group_was_added = true; + + /* + * If CPR, register the listener later, after all state that may + * affect regions and mapping boundaries has been cpr load'ed. Later, + * the listener will invoke its callback on each flat section and call + * dma_map to supply the new vaddr, and the calls will match the mappings + * remembered by the kernel. + */ + if (!cpr_is_incoming()) { + if (!vfio_listener_register(bcontainer, errp)) { + goto fail; + } + } + + bcontainer->initialized = true; + + return true; + +fail: + if (new_container) { + vfio_listener_unregister(bcontainer); + } + + if (group_was_added) { + vfio_container_group_del(container, group); + } + if (vioc && vioc->release) { + vioc->release(bcontainer); + } + if (new_container) { + vfio_legacy_cpr_unregister_container(container); + object_unref(container); + } + if (fd >= 0) { + close(fd); + } + vfio_address_space_put(space); + + return false; +} + +static void vfio_container_disconnect(VFIOGroup *group) +{ + VFIOLegacyContainer *container = group->container; + VFIOContainer *bcontainer = VFIO_IOMMU(container); + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + QLIST_REMOVE(group, container_next); + group->container = NULL; + cpr_delete_fd("vfio_container_for_group", group->groupid); + + /* + * Explicitly release the listener first before unset container, + * since unset may destroy the backend container if it's the last + * group. + */ + if (QLIST_EMPTY(&container->group_list)) { + vfio_listener_unregister(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); + } + } + + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { + error_report("vfio: error disconnecting group %d from container", + group->groupid); + } + + if (QLIST_EMPTY(&container->group_list)) { + VFIOAddressSpace *space = bcontainer->space; + + trace_vfio_container_disconnect(container->fd); + vfio_legacy_cpr_unregister_container(container); + close(container->fd); + object_unref(container); + + vfio_address_space_put(space); + } +} + +static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) +{ + ERRP_GUARD(); + VFIOGroup *group; + char path[32]; + struct vfio_group_status status = { .argsz = sizeof(status) }; + + QLIST_FOREACH(group, &vfio_group_list, next) { + if (group->groupid == groupid) { + /* Found it. Now is it already in the right context? */ + if (VFIO_IOMMU(group->container)->space->as == as) { + return group; + } else { + error_setg(errp, "group %d used in multiple address spaces", + group->groupid); + return NULL; + } + } + } + + group = g_malloc0(sizeof(*group)); + + snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); + group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp); + if (group->fd < 0) { + goto free_group_exit; + } + + if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { + error_setg_errno(errp, errno, "failed to get group %d status", groupid); + goto close_fd_exit; + } + + if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { + error_setg(errp, "group %d is not viable", groupid); + error_append_hint(errp, + "Please ensure all devices within the iommu_group " + "are bound to their vfio bus driver.\n"); + goto close_fd_exit; + } + + group->groupid = groupid; + QLIST_INIT(&group->device_list); + + if (!vfio_container_connect(group, as, errp)) { + error_prepend(errp, "failed to setup container for group %d: ", + groupid); + goto close_fd_exit; + } + + QLIST_INSERT_HEAD(&vfio_group_list, group, next); + + return group; + +close_fd_exit: + cpr_delete_fd("vfio_group", groupid); + close(group->fd); + +free_group_exit: + g_free(group); + + return NULL; +} + +static void vfio_group_put(VFIOGroup *group) +{ + if (!group || !QLIST_EMPTY(&group->device_list)) { + return; + } + + if (!group->ram_block_discard_allowed) { + vfio_ram_block_discard_disable(group->container, false); + } + vfio_group_del_kvm_device(group); + vfio_container_disconnect(group); + QLIST_REMOVE(group, next); + trace_vfio_group_put(group->fd); + cpr_delete_fd("vfio_group", group->groupid); + close(group->fd); + g_free(group); +} + +static bool vfio_device_get(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp) +{ + g_autofree struct vfio_device_info *info = NULL; + int fd; + + fd = vfio_cpr_group_get_device_fd(group->fd, name); + if (fd < 0) { + error_setg_errno(errp, errno, "error getting device from group %d", + group->groupid); + error_append_hint(errp, + "Verify all devices in group %d are bound to vfio-<bus> " + "or pci-stub and not already in use\n", group->groupid); + return false; + } + + info = vfio_get_device_info(fd); + if (!info) { + error_setg_errno(errp, errno, "error getting device info"); + goto fail; + } + + /* + * Set discarding of RAM as not broken for this group if the driver knows + * the device operates compatibly with discarding. Setting must be + * consistent per group, but since compatibility is really only possible + * with mdev currently, we expect singleton groups. + */ + if (vbasedev->ram_block_discard_allowed != + group->ram_block_discard_allowed) { + if (!QLIST_EMPTY(&group->device_list)) { + error_setg(errp, "Inconsistent setting of support for discarding " + "RAM (e.g., balloon) within group"); + goto fail; + } + + if (!group->ram_block_discard_allowed) { + group->ram_block_discard_allowed = true; + vfio_ram_block_discard_disable(group->container, false); + } + } + + vfio_device_prepare(vbasedev, VFIO_IOMMU(group->container), info); + + vbasedev->fd = fd; + vbasedev->group = group; + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); + + trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs); + + return true; + +fail: + close(fd); + cpr_delete_fd(name, 0); + return false; +} + +static void vfio_device_put(VFIODevice *vbasedev) +{ + if (!vbasedev->group) { + return; + } + QLIST_REMOVE(vbasedev, next); + vbasedev->group = NULL; + trace_vfio_device_put(vbasedev->fd); + cpr_delete_fd(vbasedev->name, 0); + close(vbasedev->fd); +} + +static int vfio_device_get_groupid(VFIODevice *vbasedev, Error **errp) +{ + char *tmp, group_path[PATH_MAX]; + g_autofree char *group_name = NULL; + int ret, groupid; + ssize_t len; + + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); + + if (len <= 0 || len >= sizeof(group_path)) { + ret = len < 0 ? -errno : -ENAMETOOLONG; + error_setg_errno(errp, -ret, "no iommu_group found"); + return ret; + } + + group_path[len] = 0; + + group_name = g_path_get_basename(group_path); + if (sscanf(group_name, "%d", &groupid) != 1) { + error_setg_errno(errp, errno, "failed to read %s", group_path); + return -errno; + } + return groupid; +} + +/* + * vfio_device_attach: attach a device to a security context + * @name and @vbasedev->name are likely to be different depending + * on the type of the device, hence the need for passing @name + */ +static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) +{ + int groupid = vfio_device_get_groupid(vbasedev, errp); + VFIODevice *vbasedev_iter; + VFIOGroup *group; + + if (groupid < 0) { + return false; + } + + trace_vfio_device_attach(vbasedev->name, groupid); + + group = vfio_group_get(groupid, as, errp); + if (!group) { + return false; + } + + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { + error_setg(errp, "device is already attached"); + goto group_put_exit; + } + } + if (!vfio_device_get(group, name, vbasedev, errp)) { + goto group_put_exit; + } + + if (!vfio_device_hiod_create_and_realize(vbasedev, + TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, + errp)) { + goto device_put_exit; + } + + if (vbasedev->mdev) { + error_setg(&vbasedev->cpr.mdev_blocker, + "CPR does not support vfio mdev %s", vbasedev->name); + if (migrate_add_blocker_modes(&vbasedev->cpr.mdev_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) < 0) { + goto hiod_unref_exit; + } + } + + return true; + +hiod_unref_exit: + object_unref(vbasedev->hiod); +device_put_exit: + vfio_device_put(vbasedev); +group_put_exit: + vfio_group_put(group); + return false; +} + +static void vfio_legacy_detach_device(VFIODevice *vbasedev) +{ + VFIOGroup *group = vbasedev->group; + + trace_vfio_device_detach(vbasedev->name, group->groupid); + + vfio_device_unprepare(vbasedev); + + migrate_del_blocker(&vbasedev->cpr.mdev_blocker); + object_unref(vbasedev->hiod); + vfio_device_put(vbasedev); + vfio_group_put(group); +} + +static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single) +{ + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + VFIOGroup *group; + struct vfio_pci_hot_reset_info *info = NULL; + struct vfio_pci_dependent_device *devices; + struct vfio_pci_hot_reset *reset; + int32_t *fds; + int ret, i, count; + bool multi = false; + + trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); + + if (!single) { + vfio_pci_pre_reset(vdev); + } + vdev->vbasedev.needs_reset = false; + + ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); + + if (ret) { + goto out_single; + } + devices = &info->devices[0]; + + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); + + /* Verify that we have all the groups required */ + for (i = 0; i < info->count; i++) { + PCIHostDeviceAddress host; + VFIOPCIDevice *tmp; + VFIODevice *vbasedev_iter; + + host.domain = devices[i].segment; + host.bus = devices[i].bus; + host.slot = PCI_SLOT(devices[i].devfn); + host.function = PCI_FUNC(devices[i].devfn); + + trace_vfio_pci_hot_reset_dep_devices(host.domain, + host.bus, host.slot, host.function, devices[i].group_id); + + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { + continue; + } + + QLIST_FOREACH(group, &vfio_group_list, next) { + if (group->groupid == devices[i].group_id) { + break; + } + } + + if (!group) { + if (!vdev->has_pm_reset) { + error_report("vfio: Cannot reset device %s, " + "depends on group %d which is not owned.", + vdev->vbasedev.name, devices[i].group_id); + } + ret = -EPERM; + goto out; + } + + /* Prep dependent devices for reset and clear our marker. */ + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + if (!vbasedev_iter->dev->realized || + !vfio_pci_from_vfio_device(vbasedev_iter)) { + continue; + } + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { + if (single) { + ret = -EINVAL; + goto out_single; + } + vfio_pci_pre_reset(tmp); + tmp->vbasedev.needs_reset = false; + multi = true; + break; + } + } + } + + if (!single && !multi) { + ret = -EINVAL; + goto out_single; + } + + /* Determine how many group fds need to be passed */ + count = 0; + QLIST_FOREACH(group, &vfio_group_list, next) { + for (i = 0; i < info->count; i++) { + if (group->groupid == devices[i].group_id) { + count++; + break; + } + } + } + + reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); + reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); + fds = &reset->group_fds[0]; + + /* Fill in group fds */ + QLIST_FOREACH(group, &vfio_group_list, next) { + for (i = 0; i < info->count; i++) { + if (group->groupid == devices[i].group_id) { + fds[reset->count++] = group->fd; + break; + } + } + } + + /* Bus reset! */ + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); + g_free(reset); + if (ret) { + ret = -errno; + } + + trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, + ret ? strerror(errno) : "Success"); + +out: + /* Re-enable INTx on affected devices */ + for (i = 0; i < info->count; i++) { + PCIHostDeviceAddress host; + VFIOPCIDevice *tmp; + VFIODevice *vbasedev_iter; + + host.domain = devices[i].segment; + host.bus = devices[i].bus; + host.slot = PCI_SLOT(devices[i].devfn); + host.function = PCI_FUNC(devices[i].devfn); + + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { + continue; + } + + QLIST_FOREACH(group, &vfio_group_list, next) { + if (group->groupid == devices[i].group_id) { + break; + } + } + + if (!group) { + break; + } + + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + if (!vbasedev_iter->dev->realized || + !vfio_pci_from_vfio_device(vbasedev_iter)) { + continue; + } + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { + vfio_pci_post_reset(tmp); + break; + } + } + } +out_single: + if (!single) { + vfio_pci_post_reset(vdev); + } + g_free(info); + + return ret; +} + +static void vfio_iommu_legacy_class_init(ObjectClass *klass, const void *data) +{ + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + + vioc->setup = vfio_legacy_setup; + vioc->dma_map = vfio_legacy_dma_map; + vioc->dma_unmap = vfio_legacy_dma_unmap; + vioc->attach_device = vfio_legacy_attach_device; + vioc->detach_device = vfio_legacy_detach_device; + vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking; + vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap; + vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; +}; + +static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + Error **errp) +{ + VFIODevice *vdev = opaque; + + hiod->name = g_strdup(vdev->name); + hiod->agent = opaque; + + return true; +} + +static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap, + Error **errp) +{ + switch (cap) { + case HOST_IOMMU_DEVICE_CAP_AW_BITS: + return vfio_device_get_aw_bits(hiod->agent); + default: + error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); + return -EINVAL; + } +} + +static GList * +hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod) +{ + VFIODevice *vdev = hiod->agent; + + g_assert(vdev); + return vfio_container_get_iova_ranges(vdev->bcontainer); +} + +static uint64_t +hiod_legacy_vfio_get_page_size_mask(HostIOMMUDevice *hiod) +{ + VFIODevice *vdev = hiod->agent; + + g_assert(vdev); + return vfio_container_get_page_size_mask(vdev->bcontainer); +} + +static void vfio_iommu_legacy_instance_init(Object *obj) +{ + VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(obj); + + QLIST_INIT(&container->group_list); +} + +static void hiod_legacy_vfio_class_init(ObjectClass *oc, const void *data) +{ + HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); + + hioc->realize = hiod_legacy_vfio_realize; + hioc->get_cap = hiod_legacy_vfio_get_cap; + hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges; + hioc->get_page_size_mask = hiod_legacy_vfio_get_page_size_mask; +}; + +static const TypeInfo types[] = { + { + .name = TYPE_VFIO_IOMMU_LEGACY, + .parent = TYPE_VFIO_IOMMU, + .instance_init = vfio_iommu_legacy_instance_init, + .instance_size = sizeof(VFIOLegacyContainer), + .class_init = vfio_iommu_legacy_class_init, + }, { + .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, + .parent = TYPE_HOST_IOMMU_DEVICE, + .class_init = hiod_legacy_vfio_class_init, + } +}; + +DEFINE_TYPES(types) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 030c6d3..250b20f 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -1,1275 +1,350 @@ /* - * generic functions used by VFIO devices + * VFIO BASE CONTAINER * - * Copyright Red Hat, Inc. 2012 + * Copyright (C) 2023 Intel Corporation. + * Copyright Red Hat, Inc. 2023 * - * Authors: - * Alex Williamson <alex.williamson@redhat.com> + * Authors: Yi Liu <yi.l.liu@intel.com> + * Eric Auger <eric.auger@redhat.com> * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Based on qemu-kvm device-assignment: - * Adapted for KVM by Qumranet. - * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) - * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) - * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) - * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) - * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + * SPDX-License-Identifier: GPL-2.0-or-later */ -#include "qemu/osdep.h" #include <sys/ioctl.h> #include <linux/vfio.h> -#include "hw/vfio/vfio-device.h" -#include "system/address-spaces.h" -#include "system/memory.h" +#include "qemu/osdep.h" +#include "system/tcg.h" #include "system/ram_addr.h" -#include "qemu/error-report.h" -#include "qemu/range.h" -#include "system/reset.h" -#include "trace.h" #include "qapi/error.h" -#include "migration/cpr.h" -#include "migration/blocker.h" -#include "pci.h" +#include "qemu/error-report.h" #include "hw/vfio/vfio-container.h" +#include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */ +#include "system/reset.h" #include "vfio-helpers.h" -#include "vfio-listener.h" - -#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" - -typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; -static VFIOGroupList vfio_group_list = - QLIST_HEAD_INITIALIZER(vfio_group_list); - -static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) -{ - switch (container->iommu_type) { - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_IOMMU: - /* - * We support coordinated discarding of RAM via the RamDiscardManager. - */ - return ram_block_uncoordinated_discard_disable(state); - default: - /* - * VFIO_SPAPR_TCE_IOMMU most probably works just fine with - * RamDiscardManager, however, it is completely untested. - * - * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does - * completely the opposite of managing mapping/pinning dynamically as - * required by RamDiscardManager. We would have to special-case sections - * with a RamDiscardManager. - */ - return ram_block_discard_disable(state); - } -} - -static int vfio_dma_unmap_bitmap(const VFIOContainer *container, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) -{ - const VFIOContainerBase *bcontainer = VFIO_IOMMU(container); - struct vfio_iommu_type1_dma_unmap *unmap; - struct vfio_bitmap *bitmap; - VFIOBitmap vbmap; - int ret; - - ret = vfio_bitmap_alloc(&vbmap, size); - if (ret) { - return ret; - } - - unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); - - unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); - unmap->iova = iova; - unmap->size = size; - unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP; - bitmap = (struct vfio_bitmap *)&unmap->data; - - /* - * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of - * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize - * to qemu_real_host_page_size. - */ - bitmap->pgsize = qemu_real_host_page_size(); - bitmap->size = vbmap.size; - bitmap->data = (__u64 *)vbmap.bitmap; - - if (vbmap.size > bcontainer->max_dirty_bitmap_size) { - error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size); - ret = -E2BIG; - goto unmap_exit; - } - - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); - if (!ret) { - cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, - iotlb->translated_addr, vbmap.pages); - } else { - error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); - } - -unmap_exit: - g_free(unmap); - g_free(vbmap.bitmap); - - return ret; -} - -static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) -{ - const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); - struct vfio_iommu_type1_dma_unmap unmap = { - .argsz = sizeof(unmap), - .flags = 0, - .iova = iova, - .size = size, - }; - bool need_dirty_sync = false; - int ret; - Error *local_err = NULL; - - g_assert(!cpr_is_incoming()); - - if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) { - if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) && - bcontainer->dirty_pages_supported) { - return vfio_dma_unmap_bitmap(container, iova, size, iotlb); - } - need_dirty_sync = true; - } - - while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { - /* - * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c - * v4.15) where an overflow in its wrap-around check prevents us from - * unmapping the last page of the address space. Test for the error - * condition and re-try the unmap excluding the last page. The - * expectation is that we've never mapped the last page anyway and this - * unmap request comes via vIOMMU support which also makes it unlikely - * that this page is used. This bug was introduced well after type1 v2 - * support was introduced, so we shouldn't need to test for v1. A fix - * is queued for kernel v5.0 so this workaround can be removed once - * affected kernels are sufficiently deprecated. - */ - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && - container->iommu_type == VFIO_TYPE1v2_IOMMU) { - trace_vfio_legacy_dma_unmap_overflow_workaround(); - unmap.size -= 1ULL << ctz64(bcontainer->pgsizes); - continue; - } - return -errno; - } - - if (need_dirty_sync) { - ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size, - iotlb->translated_addr, &local_err); - if (ret) { - error_report_err(local_err); - return ret; - } - } +#include "trace.h" - return 0; -} +static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces = + QLIST_HEAD_INITIALIZER(vfio_address_spaces); -/* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ -static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb, bool unmap_all) +VFIOAddressSpace *vfio_address_space_get(AddressSpace *as) { - int ret; - - if (unmap_all) { - /* The unmap ioctl doesn't accept a full 64-bit span. */ - Int128 llsize = int128_rshift(int128_2_64(), 1); - - ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize), - iotlb); + VFIOAddressSpace *space; - if (ret == 0) { - ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize), - int128_get64(llsize), iotlb); + QLIST_FOREACH(space, &vfio_address_spaces, list) { + if (space->as == as) { + return space; } - - } else { - ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb); - } - - return ret; -} - -static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly, - MemoryRegion *mr) -{ - const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); - struct vfio_iommu_type1_dma_map map = { - .argsz = sizeof(map), - .flags = VFIO_DMA_MAP_FLAG_READ, - .vaddr = (__u64)(uintptr_t)vaddr, - .iova = iova, - .size = size, - }; - - if (!readonly) { - map.flags |= VFIO_DMA_MAP_FLAG_WRITE; - } - - /* - * Try the mapping, if it fails with EBUSY, unmap the region and try - * again. This shouldn't be necessary, but we sometimes see it in - * the VGA ROM space. - */ - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || - (errno == EBUSY && - vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, false) == 0 && - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { - return 0; - } - - return -errno; -} - -static int -vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, - bool start, Error **errp) -{ - const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); - int ret; - struct vfio_iommu_type1_dirty_bitmap dirty = { - .argsz = sizeof(dirty), - }; - - if (start) { - dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; - } else { - dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; - } - - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); - if (ret) { - ret = -errno; - error_setg_errno(errp, errno, "Failed to set dirty tracking flag 0x%x", - dirty.flags); - } - - return ret; -} - -static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) -{ - const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); - struct vfio_iommu_type1_dirty_bitmap *dbitmap; - struct vfio_iommu_type1_dirty_bitmap_get *range; - int ret; - - dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); - - dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); - dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; - range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; - range->iova = iova; - range->size = size; - - /* - * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of - * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize - * to qemu_real_host_page_size. - */ - range->bitmap.pgsize = qemu_real_host_page_size(); - range->bitmap.size = vbmap->size; - range->bitmap.data = (__u64 *)vbmap->bitmap; - - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); - if (ret) { - ret = -errno; - error_setg_errno(errp, errno, - "Failed to get dirty bitmap for iova: 0x%"PRIx64 - " size: 0x%"PRIx64, (uint64_t)range->iova, - (uint64_t)range->size); - } - - g_free(dbitmap); - - return ret; -} - -static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, - VFIOContainerBase *bcontainer) -{ - struct vfio_info_cap_header *hdr; - struct vfio_iommu_type1_info_cap_iova_range *cap; - - hdr = vfio_get_iommu_type1_info_cap(info, - VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); - if (!hdr) { - return false; } - cap = (void *)hdr; - - for (int i = 0; i < cap->nr_iovas; i++) { - Range *range = g_new(Range, 1); + /* No suitable VFIOAddressSpace, create a new one */ + space = g_malloc0(sizeof(*space)); + space->as = as; + QLIST_INIT(&space->containers); - range_set_bounds(range, cap->iova_ranges[i].start, - cap->iova_ranges[i].end); - bcontainer->iova_ranges = - range_list_insert(bcontainer->iova_ranges, range); + if (QLIST_EMPTY(&vfio_address_spaces)) { + qemu_register_reset(vfio_device_reset_handler, NULL); } - return true; -} - -static void vfio_group_add_kvm_device(VFIOGroup *group) -{ - Error *err = NULL; + QLIST_INSERT_HEAD(&vfio_address_spaces, space, list); - if (vfio_kvm_device_add_fd(group->fd, &err)) { - error_reportf_err(err, "group ID %d: ", group->groupid); - } + return space; } -static void vfio_group_del_kvm_device(VFIOGroup *group) +void vfio_address_space_put(VFIOAddressSpace *space) { - Error *err = NULL; - - if (vfio_kvm_device_del_fd(group->fd, &err)) { - error_reportf_err(err, "group ID %d: ", group->groupid); + if (!QLIST_EMPTY(&space->containers)) { + return; } -} -/* - * vfio_get_iommu_type - selects the richest iommu_type (v2 first) - */ -static int vfio_get_iommu_type(int container_fd, - Error **errp) -{ - int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, - VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU }; - int i; + QLIST_REMOVE(space, list); + g_free(space); - for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { - if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { - return iommu_types[i]; - } + if (QLIST_EMPTY(&vfio_address_spaces)) { + qemu_unregister_reset(vfio_device_reset_handler, NULL); } - error_setg(errp, "No available IOMMU models"); - return -EINVAL; } -/* - * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type - */ -static const char *vfio_get_iommu_class_name(int iommu_type) +void vfio_address_space_insert(VFIOAddressSpace *space, + VFIOContainer *bcontainer) { - switch (iommu_type) { - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_IOMMU: - return TYPE_VFIO_IOMMU_LEGACY; - break; - case VFIO_SPAPR_TCE_v2_IOMMU: - case VFIO_SPAPR_TCE_IOMMU: - return TYPE_VFIO_IOMMU_SPAPR; - break; - default: - g_assert_not_reached(); - }; + QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + bcontainer->space = space; } -static bool vfio_set_iommu(int container_fd, int group_fd, - int *iommu_type, Error **errp) +int vfio_container_dma_map(VFIOContainer *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly, MemoryRegion *mr) { - if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) { - error_setg_errno(errp, errno, "Failed to set group container"); - return false; - } - - while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) { - if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - /* - * On sPAPR, despite the IOMMU subdriver always advertises v1 and - * v2, the running platform may not support v2 and there is no - * way to guess it until an IOMMU group gets added to the container. - * So in case it fails with v2, try v1 as a fallback. - */ - *iommu_type = VFIO_SPAPR_TCE_IOMMU; - continue; - } - error_setg_errno(errp, errno, "Failed to set iommu for container"); - return false; - } - - return true; -} - -static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, - Error **errp) -{ - int iommu_type; - const char *vioc_name; - VFIOContainer *container; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + RAMBlock *rb = mr->ram_block; + int mfd = rb ? qemu_ram_get_fd(rb) : -1; - iommu_type = vfio_get_iommu_type(fd, errp); - if (iommu_type < 0) { - return NULL; - } + if (mfd >= 0 && vioc->dma_map_file) { + unsigned long start = vaddr - qemu_ram_get_host_addr(rb); + unsigned long offset = qemu_ram_get_fd_offset(rb); - /* - * During CPR, just set the container type and skip the ioctls, as the - * container and group are already configured in the kernel. - */ - if (!cpr_is_incoming() && - !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { - return NULL; + return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset, + readonly); } - - vioc_name = vfio_get_iommu_class_name(iommu_type); - - container = VFIO_IOMMU_LEGACY(object_new(vioc_name)); - container->fd = fd; - container->iommu_type = iommu_type; - return container; + g_assert(vioc->dma_map); + return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr); } -static int vfio_get_iommu_info(VFIOContainer *container, - struct vfio_iommu_type1_info **info) +int vfio_container_dma_unmap(VFIOContainer *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb, bool unmap_all) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - size_t argsz = sizeof(struct vfio_iommu_type1_info); - - *info = g_new0(struct vfio_iommu_type1_info, 1); -again: - (*info)->argsz = argsz; - - if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { - g_free(*info); - *info = NULL; - return -errno; - } - - if (((*info)->argsz > argsz)) { - argsz = (*info)->argsz; - *info = g_realloc(*info, argsz); - goto again; - } - - return 0; + g_assert(vioc->dma_unmap); + return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all); } -static struct vfio_info_cap_header * -vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) +bool vfio_container_add_section_window(VFIOContainer *bcontainer, + MemoryRegionSection *section, + Error **errp) { - struct vfio_info_cap_header *hdr; - void *ptr = info; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { - return NULL; + if (!vioc->add_window) { + return true; } - for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { - if (hdr->id == id) { - return hdr; - } - } - - return NULL; + return vioc->add_window(bcontainer, section, errp); } -static void vfio_get_iommu_info_migration(VFIOContainer *container, - struct vfio_iommu_type1_info *info) +void vfio_container_del_section_window(VFIOContainer *bcontainer, + MemoryRegionSection *section) { - struct vfio_info_cap_header *hdr; - struct vfio_iommu_type1_info_cap_migration *cap_mig; - VFIOContainerBase *bcontainer = VFIO_IOMMU(container); + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); - if (!hdr) { + if (!vioc->del_window) { return; } - cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, - header); - - /* - * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of - * qemu_real_host_page_size to mark those dirty. - */ - if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { - bcontainer->dirty_pages_supported = true; - bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; - bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap; - } + return vioc->del_window(bcontainer, section); } -static bool vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) +int vfio_container_set_dirty_page_tracking(VFIOContainer *bcontainer, + bool start, Error **errp) { - VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); - g_autofree struct vfio_iommu_type1_info *info = NULL; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); int ret; - ret = vfio_get_iommu_info(container, &info); - if (ret) { - error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); - return false; + if (!bcontainer->dirty_pages_supported) { + return 0; } - if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { - bcontainer->pgsizes = info->iova_pgsizes; - } else { - bcontainer->pgsizes = qemu_real_host_page_size(); + g_assert(vioc->set_dirty_page_tracking); + if (bcontainer->dirty_pages_started == start) { + return 0; } - if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { - bcontainer->dma_max_mappings = 65535; + ret = vioc->set_dirty_page_tracking(bcontainer, start, errp); + if (!ret) { + bcontainer->dirty_pages_started = start; } - vfio_get_info_iova_range(info, bcontainer); - - vfio_get_iommu_info_migration(container, info); - return true; + return ret; } -static bool vfio_container_attach_discard_disable(VFIOContainer *container, - VFIOGroup *group, Error **errp) +static bool vfio_container_devices_dirty_tracking_is_started( + const VFIOContainer *bcontainer) { - int ret; + VFIODevice *vbasedev; - /* - * VFIO is currently incompatible with discarding of RAM insofar as the - * madvise to purge (zap) the page from QEMU's address space does not - * interact with the memory API and therefore leaves stale virtual to - * physical mappings in the IOMMU if the page was previously pinned. We - * therefore set discarding broken for each group added to a container, - * whether the container is used individually or shared. This provides - * us with options to allow devices within a group to opt-in and allow - * discarding, so long as it is done consistently for a group (for instance - * if the device is an mdev device where it is known that the host vendor - * driver will never pin pages outside of the working set of the guest - * driver, which would thus not be discarding candidates). - * - * The first opportunity to induce pinning occurs here where we attempt to - * attach the group to existing containers within the AddressSpace. If any - * pages are already zapped from the virtual address space, such as from - * previous discards, new pinning will cause valid mappings to be - * re-established. Likewise, when the overall MemoryListener for a new - * container is registered, a replay of mappings within the AddressSpace - * will occur, re-establishing any previously zapped pages as well. - * - * Especially virtio-balloon is currently only prevented from discarding - * new memory, it will not yet set ram_block_discard_set_required() and - * therefore, neither stops us here or deals with the sudden memory - * consumption of inflated memory. - * - * We do support discarding of memory coordinated via the RamDiscardManager - * with some IOMMU types. vfio_ram_block_discard_disable() handles the - * details once we know which type of IOMMU we are using. - */ - - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { - error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { - error_report("vfio: error disconnecting group %d from" - " container", group->groupid); + QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (!vbasedev->dirty_tracking) { + return false; } } - return !ret; -} -static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group, - Error **errp) -{ - if (!vfio_container_attach_discard_disable(container, group, errp)) { - return false; - } - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - vfio_group_add_kvm_device(group); - /* - * Remember the container fd for each group, so we can attach to the same - * container after CPR. - */ - cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd); return true; } -static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group) +bool vfio_container_dirty_tracking_is_started( + const VFIOContainer *bcontainer) { - QLIST_REMOVE(group, container_next); - group->container = NULL; - vfio_group_del_kvm_device(group); - vfio_ram_block_discard_disable(container, false); - cpr_delete_fd("vfio_container_for_group", group->groupid); + return vfio_container_devices_dirty_tracking_is_started(bcontainer) || + bcontainer->dirty_pages_started; } -static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, - Error **errp) +bool vfio_container_devices_dirty_tracking_is_supported( + const VFIOContainer *bcontainer) { - VFIOContainer *container; - VFIOContainerBase *bcontainer; - int ret, fd = -1; - VFIOAddressSpace *space; - VFIOIOMMUClass *vioc = NULL; - bool new_container = false; - bool group_was_added = false; - - space = vfio_address_space_get(as); - fd = cpr_find_fd("vfio_container_for_group", group->groupid); - - if (!cpr_is_incoming()) { - QLIST_FOREACH(bcontainer, &space->containers, next) { - container = VFIO_IOMMU_LEGACY(bcontainer); - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - return vfio_container_group_add(container, group, errp); - } - } + VFIODevice *vbasedev; - fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); - if (fd < 0) { - goto fail; + QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) { + return false; } - } else { - /* - * For incoming CPR, the group is already attached in the kernel. - * If a container with matching fd is found, then update the - * userland group list and return. If not, then after the loop, - * create the container struct and group list. - */ - QLIST_FOREACH(bcontainer, &space->containers, next) { - container = VFIO_IOMMU_LEGACY(bcontainer); - - if (vfio_cpr_container_match(container, group, fd)) { - return vfio_container_group_add(container, group, errp); - } - } - } - - ret = ioctl(fd, VFIO_GET_API_VERSION); - if (ret != VFIO_API_VERSION) { - error_setg(errp, "supported vfio version: %d, " - "reported version: %d", VFIO_API_VERSION, ret); - goto fail; - } - - container = vfio_create_container(fd, group, errp); - if (!container) { - goto fail; - } - new_container = true; - bcontainer = VFIO_IOMMU(container); - - if (!vfio_legacy_cpr_register_container(container, errp)) { - goto fail; - } - - vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - assert(vioc->setup); - - if (!vioc->setup(bcontainer, errp)) { - goto fail; - } - - vfio_address_space_insert(space, bcontainer); - - if (!vfio_container_group_add(container, group, errp)) { - goto fail; - } - group_was_added = true; - - /* - * If CPR, register the listener later, after all state that may - * affect regions and mapping boundaries has been cpr load'ed. Later, - * the listener will invoke its callback on each flat section and call - * dma_map to supply the new vaddr, and the calls will match the mappings - * remembered by the kernel. - */ - if (!cpr_is_incoming()) { - if (!vfio_listener_register(bcontainer, errp)) { - goto fail; + if (!vbasedev->dirty_pages_supported) { + return false; } } - bcontainer->initialized = true; - return true; - -fail: - if (new_container) { - vfio_listener_unregister(bcontainer); - } - - if (group_was_added) { - vfio_container_group_del(container, group); - } - if (vioc && vioc->release) { - vioc->release(bcontainer); - } - if (new_container) { - vfio_legacy_cpr_unregister_container(container); - object_unref(container); - } - if (fd >= 0) { - close(fd); - } - vfio_address_space_put(space); - - return false; -} - -static void vfio_container_disconnect(VFIOGroup *group) -{ - VFIOContainer *container = group->container; - VFIOContainerBase *bcontainer = VFIO_IOMMU(container); - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - - QLIST_REMOVE(group, container_next); - group->container = NULL; - cpr_delete_fd("vfio_container_for_group", group->groupid); - - /* - * Explicitly release the listener first before unset container, - * since unset may destroy the backend container if it's the last - * group. - */ - if (QLIST_EMPTY(&container->group_list)) { - vfio_listener_unregister(bcontainer); - if (vioc->release) { - vioc->release(bcontainer); - } - } - - if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { - error_report("vfio: error disconnecting group %d from container", - group->groupid); - } - - if (QLIST_EMPTY(&container->group_list)) { - VFIOAddressSpace *space = bcontainer->space; - - trace_vfio_container_disconnect(container->fd); - vfio_legacy_cpr_unregister_container(container); - close(container->fd); - object_unref(container); - - vfio_address_space_put(space); - } } -static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) +static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, + hwaddr size, void *bitmap) { - ERRP_GUARD(); - VFIOGroup *group; - char path[32]; - struct vfio_group_status status = { .argsz = sizeof(status) }; - - QLIST_FOREACH(group, &vfio_group_list, next) { - if (group->groupid == groupid) { - /* Found it. Now is it already in the right context? */ - if (VFIO_IOMMU(group->container)->space->as == as) { - return group; - } else { - error_setg(errp, "group %d used in multiple address spaces", - group->groupid); - return NULL; - } - } - } + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_dma_logging_report), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; + struct vfio_device_feature_dma_logging_report *report = + (struct vfio_device_feature_dma_logging_report *)feature->data; - group = g_malloc0(sizeof(*group)); + report->iova = iova; + report->length = size; + report->page_size = qemu_real_host_page_size(); + report->bitmap = (uintptr_t)bitmap; - snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); - group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp); - if (group->fd < 0) { - goto free_group_exit; - } + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_GET | + VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT; - if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { - error_setg_errno(errp, errno, "failed to get group %d status", groupid); - goto close_fd_exit; - } - - if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { - error_setg(errp, "group %d is not viable", groupid); - error_append_hint(errp, - "Please ensure all devices within the iommu_group " - "are bound to their vfio bus driver.\n"); - goto close_fd_exit; - } - - group->groupid = groupid; - QLIST_INIT(&group->device_list); - - if (!vfio_container_connect(group, as, errp)) { - error_prepend(errp, "failed to setup container for group %d: ", - groupid); - goto close_fd_exit; - } - - QLIST_INSERT_HEAD(&vfio_group_list, group, next); - - return group; - -close_fd_exit: - cpr_delete_fd("vfio_group", groupid); - close(group->fd); - -free_group_exit: - g_free(group); - - return NULL; -} - -static void vfio_group_put(VFIOGroup *group) -{ - if (!group || !QLIST_EMPTY(&group->device_list)) { - return; - } - - if (!group->ram_block_discard_allowed) { - vfio_ram_block_discard_disable(group->container, false); - } - vfio_group_del_kvm_device(group); - vfio_container_disconnect(group); - QLIST_REMOVE(group, next); - trace_vfio_group_put(group->fd); - cpr_delete_fd("vfio_group", group->groupid); - close(group->fd); - g_free(group); -} - -static bool vfio_device_get(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp) -{ - g_autofree struct vfio_device_info *info = NULL; - int fd; - - fd = vfio_cpr_group_get_device_fd(group->fd, name); - if (fd < 0) { - error_setg_errno(errp, errno, "error getting device from group %d", - group->groupid); - error_append_hint(errp, - "Verify all devices in group %d are bound to vfio-<bus> " - "or pci-stub and not already in use\n", group->groupid); - return false; - } - - info = vfio_get_device_info(fd); - if (!info) { - error_setg_errno(errp, errno, "error getting device info"); - goto fail; - } - - /* - * Set discarding of RAM as not broken for this group if the driver knows - * the device operates compatibly with discarding. Setting must be - * consistent per group, but since compatibility is really only possible - * with mdev currently, we expect singleton groups. - */ - if (vbasedev->ram_block_discard_allowed != - group->ram_block_discard_allowed) { - if (!QLIST_EMPTY(&group->device_list)) { - error_setg(errp, "Inconsistent setting of support for discarding " - "RAM (e.g., balloon) within group"); - goto fail; - } - - if (!group->ram_block_discard_allowed) { - group->ram_block_discard_allowed = true; - vfio_ram_block_discard_disable(group->container, false); - } - } - - vfio_device_prepare(vbasedev, VFIO_IOMMU(group->container), info); - - vbasedev->fd = fd; - vbasedev->group = group; - QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); - - trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs); - - return true; - -fail: - close(fd); - cpr_delete_fd(name, 0); - return false; -} - -static void vfio_device_put(VFIODevice *vbasedev) -{ - if (!vbasedev->group) { - return; - } - QLIST_REMOVE(vbasedev, next); - vbasedev->group = NULL; - trace_vfio_device_put(vbasedev->fd); - cpr_delete_fd(vbasedev->name, 0); - close(vbasedev->fd); + return vbasedev->io_ops->device_feature(vbasedev, feature); } -static int vfio_device_get_groupid(VFIODevice *vbasedev, Error **errp) +static int vfio_container_iommu_query_dirty_bitmap( + const VFIOContainer *bcontainer, VFIOBitmap *vbmap, hwaddr iova, + hwaddr size, Error **errp) { - char *tmp, group_path[PATH_MAX]; - g_autofree char *group_name = NULL; - int ret, groupid; - ssize_t len; - - tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); - len = readlink(tmp, group_path, sizeof(group_path)); - g_free(tmp); - - if (len <= 0 || len >= sizeof(group_path)) { - ret = len < 0 ? -errno : -ENAMETOOLONG; - error_setg_errno(errp, -ret, "no iommu_group found"); - return ret; - } - - group_path[len] = 0; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - group_name = g_path_get_basename(group_path); - if (sscanf(group_name, "%d", &groupid) != 1) { - error_setg_errno(errp, errno, "failed to read %s", group_path); - return -errno; - } - return groupid; + g_assert(vioc->query_dirty_bitmap); + return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size, + errp); } -/* - * vfio_device_attach: attach a device to a security context - * @name and @vbasedev->name are likely to be different depending - * on the type of the device, hence the need for passing @name - */ -static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) +static int vfio_container_devices_query_dirty_bitmap( + const VFIOContainer *bcontainer, VFIOBitmap *vbmap, hwaddr iova, + hwaddr size, Error **errp) { - int groupid = vfio_device_get_groupid(vbasedev, errp); - VFIODevice *vbasedev_iter; - VFIOGroup *group; - - if (groupid < 0) { - return false; - } - - trace_vfio_device_attach(vbasedev->name, groupid); - - group = vfio_group_get(groupid, as, errp); - if (!group) { - return false; - } - - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { - if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { - error_setg(errp, "device is already attached"); - goto group_put_exit; - } - } - if (!vfio_device_get(group, name, vbasedev, errp)) { - goto group_put_exit; - } + VFIODevice *vbasedev; + int ret; - if (!vfio_device_hiod_create_and_realize(vbasedev, - TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, - errp)) { - goto device_put_exit; - } + QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + ret = vfio_device_dma_logging_report(vbasedev, iova, size, + vbmap->bitmap); + if (ret) { + error_setg_errno(errp, -ret, + "%s: Failed to get DMA logging report, iova: " + "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx, + vbasedev->name, iova, size); - if (vbasedev->mdev) { - error_setg(&vbasedev->cpr.mdev_blocker, - "CPR does not support vfio mdev %s", vbasedev->name); - if (migrate_add_blocker_modes(&vbasedev->cpr.mdev_blocker, errp, - MIG_MODE_CPR_TRANSFER, -1) < 0) { - goto hiod_unref_exit; + return ret; } } - return true; - -hiod_unref_exit: - object_unref(vbasedev->hiod); -device_put_exit: - vfio_device_put(vbasedev); -group_put_exit: - vfio_group_put(group); - return false; + return 0; } -static void vfio_legacy_detach_device(VFIODevice *vbasedev) +int vfio_container_query_dirty_bitmap(const VFIOContainer *bcontainer, + uint64_t iova, uint64_t size, + ram_addr_t ram_addr, Error **errp) { - VFIOGroup *group = vbasedev->group; - - trace_vfio_device_detach(vbasedev->name, group->groupid); - - vfio_device_unprepare(vbasedev); - - migrate_del_blocker(&vbasedev->cpr.mdev_blocker); - object_unref(vbasedev->hiod); - vfio_device_put(vbasedev); - vfio_group_put(group); -} + bool all_device_dirty_tracking = + vfio_container_devices_dirty_tracking_is_supported(bcontainer); + uint64_t dirty_pages; + VFIOBitmap vbmap; + int ret; -static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single) -{ - VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); - VFIOGroup *group; - struct vfio_pci_hot_reset_info *info = NULL; - struct vfio_pci_dependent_device *devices; - struct vfio_pci_hot_reset *reset; - int32_t *fds; - int ret, i, count; - bool multi = false; - - trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); - - if (!single) { - vfio_pci_pre_reset(vdev); + if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) { + cpu_physical_memory_set_dirty_range(ram_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); + return 0; } - vdev->vbasedev.needs_reset = false; - - ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); + ret = vfio_bitmap_alloc(&vbmap, size); if (ret) { - goto out_single; - } - devices = &info->devices[0]; - - trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); - - /* Verify that we have all the groups required */ - for (i = 0; i < info->count; i++) { - PCIHostDeviceAddress host; - VFIOPCIDevice *tmp; - VFIODevice *vbasedev_iter; - - host.domain = devices[i].segment; - host.bus = devices[i].bus; - host.slot = PCI_SLOT(devices[i].devfn); - host.function = PCI_FUNC(devices[i].devfn); - - trace_vfio_pci_hot_reset_dep_devices(host.domain, - host.bus, host.slot, host.function, devices[i].group_id); - - if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { - continue; - } - - QLIST_FOREACH(group, &vfio_group_list, next) { - if (group->groupid == devices[i].group_id) { - break; - } - } - - if (!group) { - if (!vdev->has_pm_reset) { - error_report("vfio: Cannot reset device %s, " - "depends on group %d which is not owned.", - vdev->vbasedev.name, devices[i].group_id); - } - ret = -EPERM; - goto out; - } - - /* Prep dependent devices for reset and clear our marker. */ - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { - if (!vbasedev_iter->dev->realized || - !vfio_pci_from_vfio_device(vbasedev_iter)) { - continue; - } - tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { - if (single) { - ret = -EINVAL; - goto out_single; - } - vfio_pci_pre_reset(tmp); - tmp->vbasedev.needs_reset = false; - multi = true; - break; - } - } - } - - if (!single && !multi) { - ret = -EINVAL; - goto out_single; - } - - /* Determine how many group fds need to be passed */ - count = 0; - QLIST_FOREACH(group, &vfio_group_list, next) { - for (i = 0; i < info->count; i++) { - if (group->groupid == devices[i].group_id) { - count++; - break; - } - } + error_setg_errno(errp, -ret, + "Failed to allocate dirty tracking bitmap"); + return ret; } - reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); - reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); - fds = &reset->group_fds[0]; - - /* Fill in group fds */ - QLIST_FOREACH(group, &vfio_group_list, next) { - for (i = 0; i < info->count; i++) { - if (group->groupid == devices[i].group_id) { - fds[reset->count++] = group->fd; - break; - } - } + if (all_device_dirty_tracking) { + ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size, + errp); + } else { + ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size, + errp); } - /* Bus reset! */ - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); - g_free(reset); if (ret) { - ret = -errno; + goto out; } - trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, - ret ? strerror(errno) : "Success"); + dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, + vbmap.pages); + trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr, + dirty_pages); out: - /* Re-enable INTx on affected devices */ - for (i = 0; i < info->count; i++) { - PCIHostDeviceAddress host; - VFIOPCIDevice *tmp; - VFIODevice *vbasedev_iter; - - host.domain = devices[i].segment; - host.bus = devices[i].bus; - host.slot = PCI_SLOT(devices[i].devfn); - host.function = PCI_FUNC(devices[i].devfn); - - if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { - continue; - } - - QLIST_FOREACH(group, &vfio_group_list, next) { - if (group->groupid == devices[i].group_id) { - break; - } - } - - if (!group) { - break; - } - - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { - if (!vbasedev_iter->dev->realized || - !vfio_pci_from_vfio_device(vbasedev_iter)) { - continue; - } - tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { - vfio_pci_post_reset(tmp); - break; - } - } - } -out_single: - if (!single) { - vfio_pci_post_reset(vdev); - } - g_free(info); + g_free(vbmap.bitmap); return ret; } -static void vfio_iommu_legacy_class_init(ObjectClass *klass, const void *data) +static gpointer copy_iova_range(gconstpointer src, gpointer data) { - VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); - - vioc->setup = vfio_legacy_setup; - vioc->dma_map = vfio_legacy_dma_map; - vioc->dma_unmap = vfio_legacy_dma_unmap; - vioc->attach_device = vfio_legacy_attach_device; - vioc->detach_device = vfio_legacy_detach_device; - vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking; - vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap; - vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; -}; - -static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, - Error **errp) -{ - VFIODevice *vdev = opaque; + Range *source = (Range *)src; + Range *dest = g_new(Range, 1); - hiod->name = g_strdup(vdev->name); - hiod->agent = opaque; - - return true; + range_set_bounds(dest, range_lob(source), range_upb(source)); + return dest; } -static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap, - Error **errp) +GList *vfio_container_get_iova_ranges(const VFIOContainer *bcontainer) { - switch (cap) { - case HOST_IOMMU_DEVICE_CAP_AW_BITS: - return vfio_device_get_aw_bits(hiod->agent); - default: - error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); - return -EINVAL; - } + assert(bcontainer); + return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL); } -static GList * -hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod) +static void vfio_container_instance_finalize(Object *obj) { - VFIODevice *vdev = hiod->agent; + VFIOContainer *bcontainer = VFIO_IOMMU(obj); + VFIOGuestIOMMU *giommu, *tmp; - g_assert(vdev); - return vfio_container_get_iova_ranges(vdev->bcontainer); -} + QLIST_SAFE_REMOVE(bcontainer, next); -static uint64_t -hiod_legacy_vfio_get_page_size_mask(HostIOMMUDevice *hiod) -{ - VFIODevice *vdev = hiod->agent; + QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier( + MEMORY_REGION(giommu->iommu_mr), &giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } - g_assert(vdev); - return vfio_container_get_page_size_mask(vdev->bcontainer); + g_list_free_full(bcontainer->iova_ranges, g_free); } -static void vfio_iommu_legacy_instance_init(Object *obj) +static void vfio_container_instance_init(Object *obj) { - VFIOContainer *container = VFIO_IOMMU_LEGACY(obj); + VFIOContainer *bcontainer = VFIO_IOMMU(obj); - QLIST_INIT(&container->group_list); + bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + bcontainer->iova_ranges = NULL; + QLIST_INIT(&bcontainer->giommu_list); + QLIST_INIT(&bcontainer->vrdl_list); } -static void hiod_legacy_vfio_class_init(ObjectClass *oc, const void *data) -{ - HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); - - hioc->realize = hiod_legacy_vfio_realize; - hioc->get_cap = hiod_legacy_vfio_get_cap; - hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges; - hioc->get_page_size_mask = hiod_legacy_vfio_get_page_size_mask; -}; - static const TypeInfo types[] = { { - .name = TYPE_VFIO_IOMMU_LEGACY, - .parent = TYPE_VFIO_IOMMU, - .instance_init = vfio_iommu_legacy_instance_init, + .name = TYPE_VFIO_IOMMU, + .parent = TYPE_OBJECT, + .instance_init = vfio_container_instance_init, + .instance_finalize = vfio_container_instance_finalize, .instance_size = sizeof(VFIOContainer), - .class_init = vfio_iommu_legacy_class_init, - }, { - .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, - .parent = TYPE_HOST_IOMMU_DEVICE, - .class_init = hiod_legacy_vfio_class_init, - } + .class_size = sizeof(VFIOIOMMUClass), + .abstract = true, + }, }; DEFINE_TYPES(types) diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c index 148a06d..1d70c87 100644 --- a/hw/vfio/cpr-iommufd.c +++ b/hw/vfio/cpr-iommufd.c @@ -176,7 +176,7 @@ void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be) bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container, Error **errp) { - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainer *bcontainer = VFIO_IOMMU(container); migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, vfio_cpr_reboot_notifier, @@ -189,7 +189,7 @@ bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container, void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container) { - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainer *bcontainer = VFIO_IOMMU(container); migration_remove_notifier(&bcontainer->cpr_reboot_notifier); } diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c index 8f43719..bbf7a0d 100644 --- a/hw/vfio/cpr-legacy.c +++ b/hw/vfio/cpr-legacy.c @@ -7,7 +7,7 @@ #include <sys/ioctl.h> #include <linux/vfio.h> #include "qemu/osdep.h" -#include "hw/vfio/vfio-container.h" +#include "hw/vfio/vfio-container-legacy.h" #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-listener.h" #include "migration/blocker.h" @@ -17,7 +17,8 @@ #include "qapi/error.h" #include "qemu/error-report.h" -static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp) +static bool vfio_dma_unmap_vaddr_all(VFIOLegacyContainer *container, + Error **errp) { struct vfio_iommu_type1_dma_unmap unmap = { .argsz = sizeof(unmap), @@ -37,11 +38,11 @@ static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp) * Set the new @vaddr for any mappings registered during cpr load. * The incoming state is cleared thereafter. */ -static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer, +static int vfio_legacy_cpr_dma_map(const VFIOContainer *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mr) { - const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); struct vfio_iommu_type1_dma_map map = { .argsz = sizeof(map), @@ -63,12 +64,13 @@ static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer, static void vfio_region_remap(MemoryListener *listener, MemoryRegionSection *section) { - VFIOContainer *container = container_of(listener, VFIOContainer, - cpr.remap_listener); + VFIOLegacyContainer *container = container_of(listener, + VFIOLegacyContainer, + cpr.remap_listener); vfio_container_region_add(VFIO_IOMMU(container), section, true); } -static bool vfio_cpr_supported(VFIOContainer *container, Error **errp) +static bool vfio_cpr_supported(VFIOLegacyContainer *container, Error **errp) { if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) { error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR"); @@ -85,7 +87,7 @@ static bool vfio_cpr_supported(VFIOContainer *container, Error **errp) static int vfio_container_pre_save(void *opaque) { - VFIOContainer *container = opaque; + VFIOLegacyContainer *container = opaque; Error *local_err = NULL; if (!vfio_dma_unmap_vaddr_all(container, &local_err)) { @@ -97,8 +99,8 @@ static int vfio_container_pre_save(void *opaque) static int vfio_container_post_load(void *opaque, int version_id) { - VFIOContainer *container = opaque; - VFIOContainerBase *bcontainer = VFIO_IOMMU(container); + VFIOLegacyContainer *container = opaque; + VFIOContainer *bcontainer = VFIO_IOMMU(container); VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); dma_map_fn saved_dma_map = vioc->dma_map; Error *local_err = NULL; @@ -133,9 +135,9 @@ static const VMStateDescription vfio_container_vmstate = { static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, MigrationEvent *e, Error **errp) { - VFIOContainer *container = - container_of(notifier, VFIOContainer, cpr.transfer_notifier); - VFIOContainerBase *bcontainer = VFIO_IOMMU(container); + VFIOLegacyContainer *container = + container_of(notifier, VFIOLegacyContainer, cpr.transfer_notifier); + VFIOContainer *bcontainer = VFIO_IOMMU(container); if (e->type != MIG_EVENT_PRECOPY_FAILED) { return 0; @@ -165,9 +167,10 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, return 0; } -bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) +bool vfio_legacy_cpr_register_container(VFIOLegacyContainer *container, + Error **errp) { - VFIOContainerBase *bcontainer = VFIO_IOMMU(container); + VFIOContainer *bcontainer = VFIO_IOMMU(container); Error **cpr_blocker = &container->cpr.blocker; migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, @@ -189,9 +192,9 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) return true; } -void vfio_legacy_cpr_unregister_container(VFIOContainer *container) +void vfio_legacy_cpr_unregister_container(VFIOLegacyContainer *container) { - VFIOContainerBase *bcontainer = VFIO_IOMMU(container); + VFIOContainer *bcontainer = VFIO_IOMMU(container); migration_remove_notifier(&bcontainer->cpr_reboot_notifier); migrate_del_blocker(&container->cpr.blocker); @@ -207,7 +210,7 @@ void vfio_legacy_cpr_unregister_container(VFIOContainer *container) * The giommu already exists. Find it and replay it, which calls * vfio_legacy_cpr_dma_map further down the stack. */ -void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer, +void vfio_cpr_giommu_remap(VFIOContainer *bcontainer, MemoryRegionSection *section) { VFIOGuestIOMMU *giommu = NULL; @@ -232,7 +235,7 @@ void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer, * The ram discard listener already exists. Call its populate function * directly, which calls vfio_legacy_cpr_dma_map. */ -bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer, +bool vfio_cpr_ram_discard_register_listener(VFIOContainer *bcontainer, MemoryRegionSection *section) { VFIORamDiscardListener *vrdl = @@ -263,7 +266,7 @@ static bool same_device(int fd1, int fd2) return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev; } -bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group, +bool vfio_cpr_container_match(VFIOLegacyContainer *container, VFIOGroup *group, int fd) { if (container->fd == fd) { diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 08f12ac..64f8750 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -423,7 +423,7 @@ bool vfio_device_hiod_create_and_realize(VFIODevice *vbasedev, VFIODevice *vfio_get_vfio_device(Object *obj) { if (object_dynamic_cast(obj, TYPE_VFIO_PCI)) { - return &VFIO_PCI_BASE(obj)->vbasedev; + return &VFIO_PCI_DEVICE(obj)->vbasedev; } else { return NULL; } @@ -460,7 +460,7 @@ void vfio_device_detach(VFIODevice *vbasedev) VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev); } -void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, +void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainer *bcontainer, struct vfio_device_info *info) { int i; diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 8c27222..f0ffe23 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -34,36 +34,33 @@ #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" -static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, +static int iommufd_cdev_map(const VFIOContainer *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mr) { - const VFIOIOMMUFDContainer *container = - container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer); return iommufd_backend_map_dma(container->be, container->ioas_id, iova, size, vaddr, readonly); } -static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer, +static int iommufd_cdev_map_file(const VFIOContainer *bcontainer, hwaddr iova, ram_addr_t size, int fd, unsigned long start, bool readonly) { - const VFIOIOMMUFDContainer *container = - container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer); return iommufd_backend_map_file_dma(container->be, container->ioas_id, iova, size, fd, start, readonly); } -static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, +static int iommufd_cdev_unmap(const VFIOContainer *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all) { - const VFIOIOMMUFDContainer *container = - container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer); /* unmap in halves */ if (unmap_all) { @@ -159,11 +156,10 @@ static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt) return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; } -static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, +static int iommufd_set_dirty_page_tracking(const VFIOContainer *bcontainer, bool start, Error **errp) { - const VFIOIOMMUFDContainer *container = - container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer); VFIOIOASHwpt *hwpt; QLIST_FOREACH(hwpt, &container->hwpt_list, next) { @@ -190,13 +186,11 @@ err: return -EINVAL; } -static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer, +static int iommufd_query_dirty_bitmap(const VFIOContainer *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) { - VFIOIOMMUFDContainer *container = container_of(bcontainer, - VFIOIOMMUFDContainer, - bcontainer); + VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer); unsigned long page_size = qemu_real_host_page_size(); VFIOIOASHwpt *hwpt; @@ -324,6 +318,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, { ERRP_GUARD(); IOMMUFDBackend *iommufd = vbasedev->iommufd; + VFIOContainer *bcontainer = VFIO_IOMMU(container); uint32_t type, flags = 0; uint64_t hw_caps; VFIOIOASHwpt *hwpt; @@ -408,9 +403,9 @@ skip_alloc: vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); - container->bcontainer.dirty_pages_supported |= + bcontainer->dirty_pages_supported |= vbasedev->iommu_dirty_tracking; - if (container->bcontainer.dirty_pages_supported && + if (bcontainer->dirty_pages_supported && !vbasedev->iommu_dirty_tracking) { warn_report("IOMMU instance for device %s doesn't support dirty tracking", vbasedev->name); @@ -464,7 +459,7 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev, static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) { - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainer *bcontainer = VFIO_IOMMU(container); if (!QLIST_EMPTY(&bcontainer->device_list)) { return; @@ -486,7 +481,7 @@ static int iommufd_cdev_ram_block_discard_disable(bool state) static bool iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container, uint32_t ioas_id, Error **errp) { - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainer *bcontainer = VFIO_IOMMU(container); g_autofree struct iommu_ioas_iova_ranges *info = NULL; struct iommu_iova_range *iova_ranges; int sz, fd = container->be->fd; @@ -528,7 +523,7 @@ error: static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, AddressSpace *as, Error **errp) { - VFIOContainerBase *bcontainer; + VFIOContainer *bcontainer; VFIOIOMMUFDContainer *container; VFIOAddressSpace *space; struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; @@ -559,7 +554,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, /* try to attach to an existing container in this space */ QLIST_FOREACH(bcontainer, &space->containers, next) { - container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + container = VFIO_IOMMU_IOMMUFD(bcontainer); if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc || vbasedev->iommufd != container->be) { continue; @@ -609,7 +604,7 @@ skip_ioas_alloc: QLIST_INIT(&container->hwpt_list); vbasedev->cpr.ioas_id = ioas_id; - bcontainer = &container->bcontainer; + bcontainer = VFIO_IOMMU(container); vfio_address_space_insert(space, bcontainer); if (!iommufd_cdev_attach_container(vbasedev, container, errp)) { @@ -687,11 +682,10 @@ err_connect_bind: static void iommufd_cdev_detach(VFIODevice *vbasedev) { - VFIOContainerBase *bcontainer = vbasedev->bcontainer; + VFIOContainer *bcontainer = vbasedev->bcontainer; VFIOAddressSpace *space = bcontainer->space; - VFIOIOMMUFDContainer *container = container_of(bcontainer, - VFIOIOMMUFDContainer, - bcontainer); + VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer); + vfio_device_unprepare(vbasedev); if (!vbasedev->ram_block_discard_allowed) { diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index e093833..3b6f17f 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -52,7 +52,7 @@ */ -static bool vfio_log_sync_needed(const VFIOContainerBase *bcontainer) +static bool vfio_log_sync_needed(const VFIOContainer *bcontainer) { VFIODevice *vbasedev; @@ -125,7 +125,7 @@ static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) { VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); - VFIOContainerBase *bcontainer = giommu->bcontainer; + VFIOContainer *bcontainer = giommu->bcontainer; hwaddr iova = iotlb->iova + giommu->iommu_offset; MemoryRegion *mr; hwaddr xlat; @@ -202,7 +202,7 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, { VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, listener); - VFIOContainerBase *bcontainer = vrdl->bcontainer; + VFIOContainer *bcontainer = vrdl->bcontainer; const hwaddr size = int128_get64(section->size); const hwaddr iova = section->offset_within_address_space; int ret; @@ -220,7 +220,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, { VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, listener); - VFIOContainerBase *bcontainer = vrdl->bcontainer; + VFIOContainer *bcontainer = vrdl->bcontainer; const hwaddr end = section->offset_within_region + int128_get64(section->size); hwaddr start, next, iova; @@ -250,7 +250,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, return 0; } -static bool vfio_ram_discard_register_listener(VFIOContainerBase *bcontainer, +static bool vfio_ram_discard_register_listener(VFIOContainer *bcontainer, MemoryRegionSection *section, Error **errp) { @@ -328,7 +328,7 @@ static bool vfio_ram_discard_register_listener(VFIOContainerBase *bcontainer, return true; } -static void vfio_ram_discard_unregister_listener(VFIOContainerBase *bcontainer, +static void vfio_ram_discard_unregister_listener(VFIOContainer *bcontainer, MemoryRegionSection *section) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); @@ -396,7 +396,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section, return true; } -static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, +static bool vfio_get_section_iova_range(VFIOContainer *bcontainer, MemoryRegionSection *section, hwaddr *out_iova, hwaddr *out_end, Int128 *out_llend) @@ -423,9 +423,9 @@ static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, static void vfio_listener_begin(MemoryListener *listener) { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); - void (*listener_begin)(VFIOContainerBase *bcontainer); + VFIOContainer *bcontainer = container_of(listener, VFIOContainer, + listener); + void (*listener_begin)(VFIOContainer *bcontainer); listener_begin = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin; @@ -436,9 +436,9 @@ static void vfio_listener_begin(MemoryListener *listener) static void vfio_listener_commit(MemoryListener *listener) { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); - void (*listener_commit)(VFIOContainerBase *bcontainer); + VFIOContainer *bcontainer = container_of(listener, VFIOContainer, + listener); + void (*listener_commit)(VFIOContainer *bcontainer); listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_commit; @@ -460,7 +460,7 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp) } VFIORamDiscardListener *vfio_find_ram_discard_listener( - VFIOContainerBase *bcontainer, MemoryRegionSection *section) + VFIOContainer *bcontainer, MemoryRegionSection *section) { VFIORamDiscardListener *vrdl = NULL; @@ -482,12 +482,12 @@ VFIORamDiscardListener *vfio_find_ram_discard_listener( static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); + VFIOContainer *bcontainer = container_of(listener, VFIOContainer, + listener); vfio_container_region_add(bcontainer, section, false); } -void vfio_container_region_add(VFIOContainerBase *bcontainer, +void vfio_container_region_add(VFIOContainer *bcontainer, MemoryRegionSection *section, bool cpr_remap) { @@ -656,8 +656,8 @@ fail: static void vfio_listener_region_del(MemoryListener *listener, MemoryRegionSection *section) { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); + VFIOContainer *bcontainer = container_of(listener, VFIOContainer, + listener); hwaddr iova, end; Int128 llend, llsize; int ret; @@ -744,13 +744,13 @@ typedef struct VFIODirtyRanges { } VFIODirtyRanges; typedef struct VFIODirtyRangesListener { - VFIOContainerBase *bcontainer; + VFIOContainer *bcontainer; VFIODirtyRanges ranges; MemoryListener listener; } VFIODirtyRangesListener; static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, - VFIOContainerBase *bcontainer) + VFIOContainer *bcontainer) { VFIOPCIDevice *pcidev; VFIODevice *vbasedev; @@ -835,7 +835,7 @@ static const MemoryListener vfio_dirty_tracking_listener = { .region_add = vfio_dirty_tracking_update, }; -static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer, +static void vfio_dirty_tracking_init(VFIOContainer *bcontainer, VFIODirtyRanges *ranges) { VFIODirtyRangesListener dirty; @@ -860,7 +860,7 @@ static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer, memory_listener_unregister(&dirty.listener); } -static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) +static void vfio_devices_dma_logging_stop(VFIOContainer *bcontainer) { uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), sizeof(uint64_t))] = {}; @@ -889,7 +889,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) } static struct vfio_device_feature * -vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer, +vfio_device_feature_dma_logging_start_create(VFIOContainer *bcontainer, VFIODirtyRanges *tracking) { struct vfio_device_feature *feature; @@ -962,7 +962,7 @@ static void vfio_device_feature_dma_logging_start_destroy( g_free(feature); } -static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, +static bool vfio_devices_dma_logging_start(VFIOContainer *bcontainer, Error **errp) { struct vfio_device_feature *feature; @@ -1006,8 +1006,8 @@ static bool vfio_listener_log_global_start(MemoryListener *listener, Error **errp) { ERRP_GUARD(); - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); + VFIOContainer *bcontainer = container_of(listener, VFIOContainer, + listener); bool ret; if (vfio_container_devices_dirty_tracking_is_supported(bcontainer)) { @@ -1024,8 +1024,8 @@ static bool vfio_listener_log_global_start(MemoryListener *listener, static void vfio_listener_log_global_stop(MemoryListener *listener) { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); + VFIOContainer *bcontainer = container_of(listener, VFIOContainer, + listener); Error *local_err = NULL; int ret = 0; @@ -1057,7 +1057,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) vfio_giommu_dirty_notifier *gdn = container_of(n, vfio_giommu_dirty_notifier, n); VFIOGuestIOMMU *giommu = gdn->giommu; - VFIOContainerBase *bcontainer = giommu->bcontainer; + VFIOContainer *bcontainer = giommu->bcontainer; hwaddr iova = iotlb->iova + giommu->iommu_offset; ram_addr_t translated_addr; Error *local_err = NULL; @@ -1127,7 +1127,7 @@ static int vfio_ram_discard_query_dirty_bitmap(MemoryRegionSection *section, } static int -vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, +vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *bcontainer, MemoryRegionSection *section) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); @@ -1143,7 +1143,7 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, &vrdl); } -static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer, +static int vfio_sync_iommu_dirty_bitmap(VFIOContainer *bcontainer, MemoryRegionSection *section) { VFIOGuestIOMMU *giommu; @@ -1180,7 +1180,7 @@ static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer, return 0; } -static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, +static int vfio_sync_dirty_bitmap(VFIOContainer *bcontainer, MemoryRegionSection *section, Error **errp) { ram_addr_t ram_addr; @@ -1209,8 +1209,8 @@ static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, static void vfio_listener_log_sync(MemoryListener *listener, MemoryRegionSection *section) { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); + VFIOContainer *bcontainer = container_of(listener, VFIOContainer, + listener); int ret; Error *local_err = NULL; @@ -1241,7 +1241,7 @@ static const MemoryListener vfio_memory_listener = { .log_sync = vfio_listener_log_sync, }; -bool vfio_listener_register(VFIOContainerBase *bcontainer, Error **errp) +bool vfio_listener_register(VFIOContainer *bcontainer, Error **errp) { bcontainer->listener = vfio_memory_listener; memory_listener_register(&bcontainer->listener, bcontainer->space->as); @@ -1255,7 +1255,7 @@ bool vfio_listener_register(VFIOContainerBase *bcontainer, Error **errp) return true; } -void vfio_listener_unregister(VFIOContainerBase *bcontainer) +void vfio_listener_unregister(VFIOContainer *bcontainer) { memory_listener_unregister(&bcontainer->listener); } diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index d3ed3cb..82f6869 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -3,8 +3,8 @@ vfio_ss = ss.source_set() vfio_ss.add(files( 'listener.c', - 'container-base.c', 'container.c', + 'container-legacy.c', 'helpers.c', )) vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index c97606d..b5da6af 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1159,15 +1159,12 @@ void vfio_vga_quirk_exit(VFIOPCIDevice *vdev) void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev) { - int i, j; + int i; for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks); QLIST_REMOVE(quirk, next); - for (j = 0; j < quirk->nr_mem; j++) { - object_unparent(OBJECT(&quirk->mem[j])); - } g_free(quirk->mem); g_free(quirk->data); g_free(quirk); @@ -1207,14 +1204,10 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr) void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - int i; while (!QLIST_EMPTY(&bar->quirks)) { VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); QLIST_REMOVE(quirk, next); - for (i = 0; i < quirk->nr_mem; i++) { - object_unparent(OBJECT(&quirk->mem[i])); - } g_free(quirk->mem); g_free(quirk->data); g_free(quirk); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index d14e96b..5b022da 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -305,7 +305,7 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route) static void vfio_intx_routing_notifier(PCIDevice *pdev) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev); PCIINTxRoute route; if (vdev->interrupt != VFIO_INT_INTx) { @@ -660,7 +660,7 @@ void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr) static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev); VFIOMSIVector *vector; int ret; bool resizing = !!(vdev->nr_vectors < nr + 1); @@ -755,7 +755,7 @@ static int vfio_msix_vector_use(PCIDevice *pdev, static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev); VFIOMSIVector *vector = &vdev->msi_vectors[nr]; trace_vfio_msix_vector_release(vdev->vbasedev.name, nr); @@ -1346,7 +1346,7 @@ static const MemoryRegionOps vfio_vga_ops = { */ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev); VFIORegion *region = &vdev->bars[bar].region; MemoryRegion *mmap_mr, *region_mr, *base_mr; PCIIORegion *r; @@ -1392,7 +1392,7 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) */ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; @@ -1426,7 +1426,7 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; uint32_t val_le = cpu_to_le32(val); int ret; @@ -2025,7 +2025,6 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) vfio_region_finalize(&bar->region); if (bar->mr) { assert(bar->size); - object_unparent(OBJECT(bar->mr)); g_free(bar->mr); bar->mr = NULL; } @@ -2033,9 +2032,6 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) if (vdev->vga) { vfio_vga_quirk_finalize(vdev); - for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { - object_unparent(OBJECT(&vdev->vga->region[i].mem)); - } g_free(vdev->vga); } } @@ -3396,7 +3392,7 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) static void vfio_pci_realize(PCIDevice *pdev, Error **errp) { ERRP_GUARD(); - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; int i; char uuid[UUID_STR_LEN]; @@ -3554,16 +3550,16 @@ error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); } -static void vfio_instance_finalize(Object *obj) +static void vfio_pci_finalize(Object *obj) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj); vfio_pci_put_device(vdev); } static void vfio_exitfn(PCIDevice *pdev) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; vfio_unregister_req_notifier(vdev); @@ -3587,7 +3583,7 @@ static void vfio_exitfn(PCIDevice *pdev) static void vfio_pci_reset(DeviceState *dev) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(dev); /* Do not reset the device during qemu_system_reset prior to cpr load */ if (cpr_is_incoming()) { @@ -3629,10 +3625,10 @@ post_reset: vfio_pci_post_reset(vdev); } -static void vfio_instance_init(Object *obj) +static void vfio_pci_init(Object *obj) { PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj); VFIODevice *vbasedev = &vdev->vbasedev; device_add_bootindex_property(obj, &vdev->bootindex, @@ -3660,7 +3656,7 @@ static void vfio_instance_init(Object *obj) pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR; } -static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) +static void vfio_pci_device_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); @@ -3672,12 +3668,12 @@ static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) pdc->config_write = vfio_pci_write_config; } -static const TypeInfo vfio_pci_base_dev_info = { - .name = TYPE_VFIO_PCI_BASE, +static const TypeInfo vfio_pci_device_info = { + .name = TYPE_VFIO_PCI_DEVICE, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(VFIOPCIDevice), .abstract = true, - .class_init = vfio_pci_base_dev_class_init, + .class_init = vfio_pci_device_class_init, .interfaces = (const InterfaceInfo[]) { { INTERFACE_PCIE_DEVICE }, { INTERFACE_CONVENTIONAL_PCI_DEVICE }, @@ -3687,7 +3683,7 @@ static const TypeInfo vfio_pci_base_dev_info = { static PropertyInfo vfio_pci_migration_multifd_transfer_prop; -static const Property vfio_pci_dev_properties[] = { +static const Property vfio_pci_properties[] = { DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), @@ -3762,18 +3758,18 @@ static const Property vfio_pci_dev_properties[] = { #ifdef CONFIG_IOMMUFD static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) { - VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj); vfio_device_set_fd(&vdev->vbasedev, str, errp); } #endif -static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) +static void vfio_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); device_class_set_legacy_reset(dc, vfio_pci_reset); - device_class_set_props(dc, vfio_pci_dev_properties); + device_class_set_props(dc, vfio_pci_properties); #ifdef CONFIG_IOMMUFD object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); #endif @@ -3916,15 +3912,15 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) "multifd channels"); } -static const TypeInfo vfio_pci_dev_info = { +static const TypeInfo vfio_pci_info = { .name = TYPE_VFIO_PCI, - .parent = TYPE_VFIO_PCI_BASE, - .class_init = vfio_pci_dev_class_init, - .instance_init = vfio_instance_init, - .instance_finalize = vfio_instance_finalize, + .parent = TYPE_VFIO_PCI_DEVICE, + .class_init = vfio_pci_class_init, + .instance_init = vfio_pci_init, + .instance_finalize = vfio_pci_finalize, }; -static const Property vfio_pci_dev_nohotplug_properties[] = { +static const Property vfio_pci_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), DEFINE_PROP_BOOL("use-legacy-x86-rom", VFIOPCIDevice, use_legacy_x86_rom, false), @@ -3932,12 +3928,12 @@ static const Property vfio_pci_dev_nohotplug_properties[] = { ON_OFF_AUTO_AUTO), }; -static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, +static void vfio_pci_nohotplug_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - device_class_set_props(dc, vfio_pci_dev_nohotplug_properties); + device_class_set_props(dc, vfio_pci_nohotplug_properties); dc->hotpluggable = false; object_class_property_set_description(klass, /* 3.1 */ @@ -3953,11 +3949,11 @@ static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, "Controls loading of a legacy VGA BIOS ROM"); } -static const TypeInfo vfio_pci_nohotplug_dev_info = { +static const TypeInfo vfio_pci_nohotplug_info = { .name = TYPE_VFIO_PCI_NOHOTPLUG, .parent = TYPE_VFIO_PCI, .instance_size = sizeof(VFIOPCIDevice), - .class_init = vfio_pci_nohotplug_dev_class_init, + .class_init = vfio_pci_nohotplug_class_init, }; static void register_vfio_pci_dev_type(void) @@ -3973,9 +3969,9 @@ static void register_vfio_pci_dev_type(void) vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto; vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true; - type_register_static(&vfio_pci_base_dev_info); - type_register_static(&vfio_pci_dev_info); - type_register_static(&vfio_pci_nohotplug_dev_info); + type_register_static(&vfio_pci_device_info); + type_register_static(&vfio_pci_info); + type_register_static(&vfio_pci_nohotplug_info); } type_init(register_vfio_pci_dev_type) diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index e0aef82..0f78cf9 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -120,7 +120,7 @@ typedef struct VFIOMSIXInfo { MemoryRegion *pba_region; } VFIOMSIXInfo; -OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_BASE) +OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_DEVICE) struct VFIOPCIDevice { PCIDevice parent_obj; diff --git a/hw/vfio/region.c b/hw/vfio/region.c index d04c57d..b165ab0 100644 --- a/hw/vfio/region.c +++ b/hw/vfio/region.c @@ -365,12 +365,9 @@ void vfio_region_finalize(VFIORegion *region) for (i = 0; i < region->nr_mmaps; i++) { if (region->mmaps[i].mmap) { munmap(region->mmaps[i].mmap, region->mmaps[i].size); - object_unparent(OBJECT(®ion->mmaps[i].mem)); } } - object_unparent(OBJECT(region->mem)); - g_free(region->mem); g_free(region->mmaps); diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index c41e458..8d9d68d 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -15,7 +15,7 @@ #include "system/hostmem.h" #include "system/address-spaces.h" -#include "hw/vfio/vfio-container.h" +#include "hw/vfio/vfio-container-legacy.h" #include "hw/hw.h" #include "system/ram_addr.h" #include "qemu/error-report.h" @@ -30,12 +30,13 @@ typedef struct VFIOHostDMAWindow { QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; } VFIOHostDMAWindow; -typedef struct VFIOSpaprContainer { - VFIOContainer container; +struct VFIOSpaprContainer { + VFIOLegacyContainer parent_obj; + MemoryListener prereg_listener; QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; unsigned int levels; -} VFIOSpaprContainer; +}; OBJECT_DECLARE_SIMPLE_TYPE(VFIOSpaprContainer, VFIO_IOMMU_SPAPR); @@ -61,8 +62,8 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, { VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, prereg_listener); - VFIOContainer *container = &scontainer->container; - VFIOContainerBase *bcontainer = VFIO_IOMMU(container); + VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(scontainer); + VFIOContainer *bcontainer = VFIO_IOMMU(container); const hwaddr gpa = section->offset_within_address_space; hwaddr end; int ret; @@ -121,7 +122,7 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener, { VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, prereg_listener); - VFIOContainer *container = &scontainer->container; + VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(scontainer); const hwaddr gpa = section->offset_within_address_space; hwaddr end; int ret; @@ -218,7 +219,7 @@ static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container, return hostwin_found ? hostwin : NULL; } -static int vfio_spapr_remove_window(VFIOContainer *container, +static int vfio_spapr_remove_window(VFIOLegacyContainer *container, hwaddr offset_within_address_space) { struct vfio_iommu_spapr_tce_remove remove = { @@ -239,14 +240,13 @@ static int vfio_spapr_remove_window(VFIOContainer *container, return 0; } -static bool vfio_spapr_create_window(VFIOContainer *container, +static bool vfio_spapr_create_window(VFIOLegacyContainer *container, MemoryRegionSection *section, hwaddr *pgsize, Error **errp) { int ret = 0; - VFIOContainerBase *bcontainer = VFIO_IOMMU(container); - VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, - container); + VFIOContainer *bcontainer = VFIO_IOMMU(container); + VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(bcontainer); IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask; unsigned entries, bits_total, bits_per_level, max_levels, ddw_levels; @@ -348,13 +348,12 @@ static bool vfio_spapr_create_window(VFIOContainer *container, } static bool -vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, +vfio_spapr_container_add_section_window(VFIOContainer *bcontainer, MemoryRegionSection *section, Error **errp) { - VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); - VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, - container); + VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container); VFIOHostDMAWindow *hostwin; hwaddr pgsize = 0; int ret; @@ -439,12 +438,11 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, } static void -vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, +vfio_spapr_container_del_section_window(VFIOContainer *bcontainer, MemoryRegionSection *section) { - VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); - VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, - container); + VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container); if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { return; @@ -461,11 +459,10 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, } } -static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) +static void vfio_spapr_container_release(VFIOContainer *bcontainer) { - VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); - VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, - container); + VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container); VFIOHostDMAWindow *hostwin, *next; if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { @@ -478,12 +475,11 @@ static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) } } -static bool vfio_spapr_container_setup(VFIOContainerBase *bcontainer, +static bool vfio_spapr_container_setup(VFIOContainer *bcontainer, Error **errp) { - VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); - VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, - container); + VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container); struct vfio_iommu_spapr_tce_info info; bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; int ret, fd = container->fd; diff --git a/hw/vfio/types.h b/hw/vfio/types.h index c19334f..5482d90 100644 --- a/hw/vfio/types.h +++ b/hw/vfio/types.h @@ -9,11 +9,11 @@ #define HW_VFIO_VFIO_TYPES_H /* - * TYPE_VFIO_PCI_BASE is an abstract type used to share code + * TYPE_VFIO_PCI_DEVICE is an abstract type used to share code * between VFIO implementations that use a kernel driver * with those that use user sockets. */ -#define TYPE_VFIO_PCI_BASE "vfio-pci-base" +#define TYPE_VFIO_PCI_DEVICE "vfio-pci-device" #define TYPE_VFIO_PCI "vfio-pci" /* TYPE_VFIO_PCI shares struct VFIOPCIDevice. */ diff --git a/hw/vfio/vfio-iommufd.h b/hw/vfio/vfio-iommufd.h index 07ea0f4..6b28e1f 100644 --- a/hw/vfio/vfio-iommufd.h +++ b/hw/vfio/vfio-iommufd.h @@ -9,7 +9,7 @@ #ifndef HW_VFIO_VFIO_IOMMUFD_H #define HW_VFIO_VFIO_IOMMUFD_H -#include "hw/vfio/vfio-container-base.h" +#include "hw/vfio/vfio-container.h" typedef struct VFIODevice VFIODevice; @@ -22,12 +22,13 @@ typedef struct VFIOIOASHwpt { typedef struct IOMMUFDBackend IOMMUFDBackend; -typedef struct VFIOIOMMUFDContainer { - VFIOContainerBase bcontainer; +struct VFIOIOMMUFDContainer { + VFIOContainer parent_obj; + IOMMUFDBackend *be; uint32_t ioas_id; QLIST_HEAD(, VFIOIOASHwpt) hwpt_list; -} VFIOIOMMUFDContainer; +}; OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD); diff --git a/hw/vfio/vfio-listener.h b/hw/vfio/vfio-listener.h index eb69ddd..a90674c 100644 --- a/hw/vfio/vfio-listener.h +++ b/hw/vfio/vfio-listener.h @@ -9,7 +9,7 @@ #ifndef HW_VFIO_VFIO_LISTENER_H #define HW_VFIO_VFIO_LISTENER_H -bool vfio_listener_register(VFIOContainerBase *bcontainer, Error **errp); -void vfio_listener_unregister(VFIOContainerBase *bcontainer); +bool vfio_listener_register(VFIOContainer *bcontainer, Error **errp); +void vfio_listener_unregister(VFIOContainer *bcontainer); #endif /* HW_VFIO_VFIO_LISTENER_H */ diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index 09cca4e..e9ba173 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -637,14 +637,5 @@ void xen_pt_msix_unmap(XenPCIPassthroughState *s) void xen_pt_msix_delete(XenPCIPassthroughState *s) { - XenPTMSIX *msix = s->msix; - - if (!msix) { - return; - } - - object_unparent(OBJECT(&msix->mmio)); - - g_free(s->msix); - s->msix = NULL; + g_clear_pointer(&s->msix, g_free); } |