diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-03-17 19:30:13 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-03-17 19:30:13 +0000 |
commit | 2255564fd21059960966b47212def9069cb56077 (patch) | |
tree | 451e8764c53b9283a555ac50b6aad16dd0b55826 /hw | |
parent | 69259911f948ad2755bd1f2c999dd60ac322c890 (diff) | |
parent | 758b96b61d5cbc19204f340012d5a325f0a2105b (diff) | |
download | qemu-2255564fd21059960966b47212def9069cb56077.zip qemu-2255564fd21059960966b47212def9069cb56077.tar.gz qemu-2255564fd21059960966b47212def9069cb56077.tar.bz2 |
Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20210316.0' into staging
VFIO update 2021-03-16
* Fix "listerner" typo (Zenghui Yu)
* Inclusive language and MAINTAINERS update (Philippe Mathieu-Daudé)
* vIOMMU unmap notifier fixes (Eric Auger)
* Migration fixes and optimizations (Shenming Lu)
* Use host page size for dirty bitmap (Kunkun Jiang)
* Use log_global_start/stop to switch dirty tracking (Keqian Zhu)
# gpg: Signature made Tue 16 Mar 2021 16:59:10 GMT
# gpg: using RSA key 239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>" [full]
# gpg: aka "Alex Williamson <alex@shazbot.org>" [full]
# gpg: aka "Alex Williamson <alwillia@redhat.com>" [full]
# gpg: aka "Alex Williamson <alex.l.williamson@gmail.com>" [full]
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B 8A90 239B 9B6E 3BB0 8B22
* remotes/awilliam/tags/vfio-update-20210316.0:
vfio/migrate: Move switch of dirty tracking into vfio_memory_listener
vfio: Support host translation granule size
vfio: Avoid disabling and enabling vectors repeatedly in VFIO migration
vfio: Set the priority of the VFIO VM state change handler explicitly
vfio: Move the saving of the config space to the right place in VFIO migration
spapr_iommu: Fix vhost integration regression
vfio: Do not register any IOMMU_NOTIFIER_DEVIOTLB_UNMAP notifier
MAINTAINERS: Cover docs/igd-assign.txt in VFIO section
hw/vfio/pci-quirks: Replace the word 'blacklist'
vfio: Fix vfio_listener_log_sync function name typo
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/ppc/spapr_iommu.c | 5 | ||||
-rw-r--r-- | hw/vfio/common.c | 103 | ||||
-rw-r--r-- | hw/vfio/migration.c | 63 | ||||
-rw-r--r-- | hw/vfio/pci-quirks.c | 14 | ||||
-rw-r--r-- | hw/vfio/pci.c | 24 | ||||
-rw-r--r-- | hw/vfio/pci.h | 2 | ||||
-rw-r--r-- | hw/vfio/trace-events | 2 |
7 files changed, 118 insertions, 95 deletions
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 30352df..24537ff 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -212,6 +212,11 @@ static int spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu, { struct SpaprTceTable *tbl = container_of(iommu, SpaprTceTable, iommu); + if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { + error_setg(errp, "spart_tce does not support dev-iotlb yet"); + return -EINVAL; + } + if (old == IOMMU_NOTIFIER_NONE && new != IOMMU_NOTIFIER_NONE) { spapr_tce_set_need_vfio(tbl, true); } else if (old != IOMMU_NOTIFIER_NONE && new == IOMMU_NOTIFIER_NONE) { diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 6ff1daa..ae5654f 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -311,7 +311,7 @@ bool vfio_mig_active(void) return true; } -static bool vfio_devices_all_saving(VFIOContainer *container) +static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) { VFIOGroup *group; VFIODevice *vbasedev; @@ -329,13 +329,8 @@ static bool vfio_devices_all_saving(VFIOContainer *container) return false; } - if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { - if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) - && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { - return false; - } - continue; - } else { + if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) + && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { return false; } } @@ -378,7 +373,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, { struct vfio_iommu_type1_dma_unmap *unmap; struct vfio_bitmap *bitmap; - uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS; + uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size; int ret; unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); @@ -390,12 +385,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, bitmap = (struct vfio_bitmap *)&unmap->data; /* - * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of - * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to - * TARGET_PAGE_SIZE. + * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of + * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize + * to qemu_real_host_page_size. */ - bitmap->pgsize = TARGET_PAGE_SIZE; + bitmap->pgsize = qemu_real_host_page_size; bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / BITS_PER_BYTE; @@ -674,16 +669,17 @@ static void vfio_listener_region_add(MemoryListener *listener, return; } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & + ~qemu_real_host_page_mask) != + (section->offset_within_region & ~qemu_real_host_page_mask))) { error_report("%s received unaligned region", __func__); return; } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); llend = int128_make64(section->offset_within_address_space); llend = int128_add(llend, section->size); - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); if (int128_ge(int128_make64(iova), llend)) { return; @@ -787,7 +783,7 @@ static void vfio_listener_region_add(MemoryListener *listener, iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, MEMTXATTRS_UNSPECIFIED); iommu_notifier_init(&giommu->n, vfio_iommu_map_notify, - IOMMU_NOTIFIER_ALL, + IOMMU_NOTIFIER_IOTLB_EVENTS, section->offset_within_region, int128_get64(llend), iommu_idx); @@ -892,8 +888,9 @@ static void vfio_listener_region_del(MemoryListener *listener, return; } - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { + if (unlikely((section->offset_within_address_space & + ~qemu_real_host_page_mask) != + (section->offset_within_region & ~qemu_real_host_page_mask))) { error_report("%s received unaligned region", __func__); return; } @@ -921,10 +918,10 @@ static void vfio_listener_region_del(MemoryListener *listener, */ } - iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); + iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); llend = int128_make64(section->offset_within_address_space); llend = int128_add(llend, section->size); - llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); + llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); if (int128_ge(int128_make64(iova), llend)) { return; @@ -987,6 +984,40 @@ static void vfio_listener_region_del(MemoryListener *listener, } } +static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) +{ + int ret; + struct vfio_iommu_type1_dirty_bitmap dirty = { + .argsz = sizeof(dirty), + }; + + if (start) { + dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; + } else { + dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; + } + + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); + if (ret) { + error_report("Failed to set dirty tracking flag 0x%x errno: %d", + dirty.flags, errno); + } +} + +static void vfio_listener_log_global_start(MemoryListener *listener) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + + vfio_set_dirty_page_tracking(container, true); +} + +static void vfio_listener_log_global_stop(MemoryListener *listener) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + + vfio_set_dirty_page_tracking(container, false); +} + static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, uint64_t size, ram_addr_t ram_addr) { @@ -1004,13 +1035,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, range->size = size; /* - * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of - * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to - * TARGET_PAGE_SIZE. + * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of + * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize + * to qemu_real_host_page_size. */ - range->bitmap.pgsize = TARGET_PAGE_SIZE; + range->bitmap.pgsize = qemu_real_host_page_size; - pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS; + pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size; range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / BITS_PER_BYTE; range->bitmap.data = g_try_malloc0(range->bitmap.size); @@ -1114,11 +1145,11 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, section->offset_within_region; return vfio_get_dirty_bitmap(container, - TARGET_PAGE_ALIGN(section->offset_within_address_space), - int128_get64(section->size), ram_addr); + REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), + int128_get64(section->size), ram_addr); } -static void vfio_listerner_log_sync(MemoryListener *listener, +static void vfio_listener_log_sync(MemoryListener *listener, MemoryRegionSection *section) { VFIOContainer *container = container_of(listener, VFIOContainer, listener); @@ -1128,7 +1159,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener, return; } - if (vfio_devices_all_saving(container)) { + if (vfio_devices_all_dirty_tracking(container)) { vfio_sync_dirty_bitmap(container, section); } } @@ -1136,7 +1167,9 @@ static void vfio_listerner_log_sync(MemoryListener *listener, static const MemoryListener vfio_memory_listener = { .region_add = vfio_listener_region_add, .region_del = vfio_listener_region_del, - .log_sync = vfio_listerner_log_sync, + .log_global_start = vfio_listener_log_global_start, + .log_global_stop = vfio_listener_log_global_stop, + .log_sync = vfio_listener_log_sync, }; static void vfio_listener_release(VFIOContainer *container) @@ -1655,10 +1688,10 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, header); /* - * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of - * TARGET_PAGE_SIZE to mark those dirty. + * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of + * qemu_real_host_page_size to mark those dirty. */ - if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) { + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size) { container->dirty_pages_supported = true; container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; container->dirty_pgsizes = cap_mig->pgsize_bitmap; diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 134bdcc..384576c 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -395,40 +395,10 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) return qemu_file_get_error(f); } -static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start) -{ - int ret; - VFIOMigration *migration = vbasedev->migration; - VFIOContainer *container = vbasedev->group->container; - struct vfio_iommu_type1_dirty_bitmap dirty = { - .argsz = sizeof(dirty), - }; - - if (start) { - if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { - dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; - } else { - return -EINVAL; - } - } else { - dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; - } - - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); - if (ret) { - error_report("Failed to set dirty tracking flag 0x%x errno: %d", - dirty.flags, errno); - return -errno; - } - return ret; -} - static void vfio_migration_cleanup(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; - vfio_set_dirty_page_tracking(vbasedev, false); - if (migration->region.mmaps) { vfio_region_unmap(&migration->region); } @@ -469,11 +439,6 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) return ret; } - ret = vfio_set_dirty_page_tracking(vbasedev, true); - if (ret) { - return ret; - } - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ret = qemu_file_get_error(f); @@ -575,11 +540,6 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) return ret; } - ret = vfio_save_device_config_state(f, opaque); - if (ret) { - return ret; - } - ret = vfio_update_pending(vbasedev); if (ret) { return ret; @@ -620,6 +580,19 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) return ret; } +static void vfio_save_state(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + int ret; + + ret = vfio_save_device_config_state(f, opaque); + if (ret) { + error_report("%s: Failed to save device config space", + vbasedev->name); + qemu_file_set_error(f, ret); + } +} + static int vfio_load_setup(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; @@ -670,11 +643,7 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) switch (data) { case VFIO_MIG_FLAG_DEV_CONFIG_STATE: { - ret = vfio_load_device_config_state(f, opaque); - if (ret) { - return ret; - } - break; + return vfio_load_device_config_state(f, opaque); } case VFIO_MIG_FLAG_DEV_SETUP_STATE: { @@ -720,6 +689,7 @@ static SaveVMHandlers savevm_vfio_handlers = { .save_live_pending = vfio_save_pending, .save_live_iterate = vfio_save_iterate, .save_live_complete_precopy = vfio_save_complete_precopy, + .save_state = vfio_save_state, .load_setup = vfio_load_setup, .load_cleanup = vfio_load_cleanup, .load_state = vfio_load_state, @@ -857,7 +827,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, vbasedev); - migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, + migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev, + vfio_vmstate_change, vbasedev); migration->migration_state.notify = vfio_migration_state_notifier; add_migration_state_change_notifier(&migration->migration_state); diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index c5c4c61..b90cf3d 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -44,19 +44,19 @@ static const struct { uint32_t vendor; uint32_t device; -} romblacklist[] = { +} rom_denylist[] = { { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */ }; -bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev) +bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev) { int i; - for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) { - if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) { - trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name, - romblacklist[i].vendor, - romblacklist[i].device); + for (i = 0 ; i < ARRAY_SIZE(rom_denylist); i++) { + if (vfio_pci_is(vdev, rom_denylist[i].vendor, rom_denylist[i].device)) { + trace_vfio_quirk_rom_in_denylist(vdev->vbasedev.name, + rom_denylist[i].vendor, + rom_denylist[i].device); return true; } } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index f74be78..5c65aa0 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -569,6 +569,9 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) static void vfio_msix_enable(VFIOPCIDevice *vdev) { + PCIDevice *pdev = &vdev->pdev; + unsigned int nr, max_vec = 0; + vfio_disable_interrupts(vdev); vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries); @@ -587,11 +590,22 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) * triggering to userspace, then immediately release the vector, leaving * the physical device with no vectors enabled, but MSI-X enabled, just * like the guest view. + * If there are already unmasked vectors (in migration resume phase and + * some guest startups) which will be enabled soon, we can allocate all + * of them here to avoid inefficiently disabling and enabling vectors + * repeatedly later. */ - vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL); - vfio_msix_vector_release(&vdev->pdev, 0); + if (!pdev->msix_function_masked) { + for (nr = 0; nr < msix_nr_vectors_allocated(pdev); nr++) { + if (!msix_is_masked(pdev, nr)) { + max_vec = nr; + } + } + } + vfio_msix_vector_do_use(pdev, max_vec, NULL, NULL); + vfio_msix_vector_release(pdev, max_vec); - if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, + if (msix_set_vector_notifiers(pdev, vfio_msix_vector_use, vfio_msix_vector_release, NULL)) { error_report("vfio: msix_set_vector_notifiers failed"); } @@ -900,7 +914,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ - if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) { + if (vfio_opt_rom_in_denylist(vdev) && vdev->pdev.romfile) { warn_report("Device at %s is known to cause system instability" " issues during option rom execution", vdev->vbasedev.name); @@ -927,7 +941,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) return; } - if (vfio_blacklist_opt_rom(vdev)) { + if (vfio_opt_rom_in_denylist(vdev)) { if (dev->opts && qemu_opt_get(dev->opts, "rombar")) { warn_report("Device at %s is known to cause system instability" " issues during option rom execution", diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 1574ef9..6477751 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -197,7 +197,7 @@ void vfio_pci_write_config(PCIDevice *pdev, uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); -bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev); +bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev); void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); void vfio_vga_quirk_exit(VFIOPCIDevice *vdev); void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index c0e75f2..079f53a 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -49,7 +49,7 @@ vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s 0x%04x" # pci-quirks.c -vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x" +vfio_quirk_rom_in_denylist(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x" vfio_quirk_generic_window_address_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64 vfio_quirk_generic_window_data_read(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64 vfio_quirk_generic_window_data_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64 |