diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-07-10 09:17:06 +0100 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-07-10 09:17:06 +0100 |
commit | fcb237e64f9d026c03d635579c7b288d0008a6e5 (patch) | |
tree | c508291b3bd8e5e4445ae9b763ba66ce2bc9e37f /hw | |
parent | 2ff49e96accc8fd9a38e9abd16f0cfa0adab1605 (diff) | |
parent | c00aac6f1428d40a4ca2ab9b89070afc2a5bf979 (diff) | |
download | qemu-fcb237e64f9d026c03d635579c7b288d0008a6e5.zip qemu-fcb237e64f9d026c03d635579c7b288d0008a6e5.tar.gz qemu-fcb237e64f9d026c03d635579c7b288d0008a6e5.tar.bz2 |
Merge tag 'pull-vfio-20230710' of https://github.com/legoater/qemu into staging
vfio queue:
* Fixes in error handling paths of VFIO PCI devices
* Improvements of reported errors for VFIO migration
* Linux header update
* Enablement of AtomicOps completers on root ports
* Fix for unplug of passthrough AP devices
# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmSrug0ACgkQUaNDx8/7
# 7KHYCRAAt6UeZi8nKPlN+cs6guOagCcAJOu13nm7XN0bFxjYf/Q2t618cpM7PLSk
# h+4VGsMUVJ1dumcCkBmv7LAn0G6CpVR3VDi5QuGfMODRhpWfSoaypPIizRgrbarL
# lSyaVaPIaddlDZ4AIfFA9Ebnytvm5/ecsyTr0cv7OejVKWI/jN6bC/v36AmNQKKQ
# J5RCDpQ6fOsdqf0Dzvn7xjuHRE4DYtsWkVoslDoBQMgPWHLF8UwRu/OPD6cBQYAR
# /fmgoOkkNDMdN3laqwAyfAUjKfOFpLuZzJ5KNFjtkBiktm66dw4Y8/lWoChVR+S6
# PRZ3nk0HxyzB96zCytfggBX905PBD54LIuockRaYKTlTxT19C3fDjDz5tsjKNhLR
# aFec4KiJaUJj0fa/Vw8DB/WUbCgbOXGHiWhY8vNdpVoc9AZe8xj9z4nB3hmzx1i/
# lZhsM/s3kTNHpVGlW7vTfbToFBmt1eoglu+ILe/HeHLi8LjzCsHy+wR5c0n0/HVI
# fLUuUS1AGQvi8+HCCUi7gwzpJkl4rPJsPx51wfXJk+q/3GQ8g9Mg9qotHNHm4N60
# zq/I5VqqEkJzdaMjup04ZqsMAWqGrnU2f4aNPvBhgaeO9CQE/buIsA34buQRwiG4
# wTodqm0jrkx0Z59jliZ0mFU/LxMvhMaQCEh+OdyZ9vRtfLBjF4c=
# =U2Hc
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 10 Jul 2023 08:58:05 AM BST
# gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1
# gpg: Good signature from "Cédric Le Goater <clg@kaod.org>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1
* tag 'pull-vfio-20230710' of https://github.com/legoater/qemu:
vfio/pci: Enable AtomicOps completers on root ports
pcie: Add a PCIe capability version helper
s390x/ap: Wire up the device request notifier interface
linux-headers: update to v6.5-rc1
vfio: Fix null pointer dereference bug in vfio_bars_finalize()
vfio/migration: Return bool type for vfio_migration_realize()
vfio/migration: Remove print of "Migration disabled"
vfio/migration: Free resources when vfio_migration_realize fails
vfio/migration: Change vIOMMU blocker from global to per device
vfio/pci: Disable INTx in vfio_realize error path
hw/vfio/pci-quirks: Sanitize capability pointer
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/pci/pcie.c | 7 | ||||
-rw-r--r-- | hw/vfio/ap.c | 113 | ||||
-rw-r--r-- | hw/vfio/common.c | 51 | ||||
-rw-r--r-- | hw/vfio/migration.c | 51 | ||||
-rw-r--r-- | hw/vfio/pci-quirks.c | 10 | ||||
-rw-r--r-- | hw/vfio/pci.c | 91 | ||||
-rw-r--r-- | hw/vfio/pci.h | 1 |
7 files changed, 252 insertions, 72 deletions
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index b8c24cf..b7f107e 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -274,6 +274,13 @@ uint8_t pcie_cap_get_type(const PCIDevice *dev) PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT; } +uint8_t pcie_cap_get_version(const PCIDevice *dev) +{ + uint32_t pos = dev->exp.exp_cap; + assert(pos > 0); + return pci_get_word(dev->config + pos + PCI_EXP_FLAGS) & PCI_EXP_FLAGS_VERS; +} + /* MSI/MSI-X */ /* pci express interrupt message number */ /* 7.8.2 PCI Express Capabilities Register: Interrupt Message Number */ diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index e0dd561..6e21d1d 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -18,6 +18,8 @@ #include "hw/vfio/vfio-common.h" #include "hw/s390x/ap-device.h" #include "qemu/error-report.h" +#include "qemu/event_notifier.h" +#include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/option.h" #include "qemu/config-file.h" @@ -33,6 +35,7 @@ struct VFIOAPDevice { APDevice apdev; VFIODevice vdev; + EventNotifier req_notifier; }; OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE) @@ -84,10 +87,110 @@ static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) return vfio_get_group(groupid, &address_space_memory, errp); } +static void vfio_ap_req_notifier_handler(void *opaque) +{ + VFIOAPDevice *vapdev = opaque; + Error *err = NULL; + + if (!event_notifier_test_and_clear(&vapdev->req_notifier)) { + return; + } + + qdev_unplug(DEVICE(vapdev), &err); + + if (err) { + warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); + } +} + +static void vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, + unsigned int irq, Error **errp) +{ + int fd; + size_t argsz; + IOHandler *fd_read; + EventNotifier *notifier; + struct vfio_irq_info *irq_info; + VFIODevice *vdev = &vapdev->vdev; + + switch (irq) { + case VFIO_AP_REQ_IRQ_INDEX: + notifier = &vapdev->req_notifier; + fd_read = vfio_ap_req_notifier_handler; + break; + default: + error_setg(errp, "vfio: Unsupported device irq(%d)", irq); + return; + } + + if (vdev->num_irqs < irq + 1) { + error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)", + irq, vdev->num_irqs); + return; + } + + argsz = sizeof(*irq_info); + irq_info = g_malloc0(argsz); + irq_info->index = irq; + irq_info->argsz = argsz; + + if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, + irq_info) < 0 || irq_info->count < 1) { + error_setg_errno(errp, errno, "vfio: Error getting irq info"); + goto out_free_info; + } + + if (event_notifier_init(notifier, 0)) { + error_setg_errno(errp, errno, + "vfio: Unable to init event notifier for irq (%d)", + irq); + goto out_free_info; + } + + fd = event_notifier_get_fd(notifier); + qemu_set_fd_handler(fd, fd_read, NULL, vapdev); + + if (vfio_set_irq_signaling(vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, + errp)) { + qemu_set_fd_handler(fd, NULL, NULL, vapdev); + event_notifier_cleanup(notifier); + } + +out_free_info: + g_free(irq_info); + +} + +static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, + unsigned int irq) +{ + Error *err = NULL; + EventNotifier *notifier; + + switch (irq) { + case VFIO_AP_REQ_IRQ_INDEX: + notifier = &vapdev->req_notifier; + break; + default: + error_report("vfio: Unsupported device irq(%d)", irq); + return; + } + + if (vfio_set_irq_signaling(&vapdev->vdev, irq, 0, + VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { + warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); + } + + qemu_set_fd_handler(event_notifier_get_fd(notifier), + NULL, NULL, vapdev); + event_notifier_cleanup(notifier); +} + static void vfio_ap_realize(DeviceState *dev, Error **errp) { int ret; char *mdevid; + Error *err = NULL; VFIOGroup *vfio_group; APDevice *apdev = AP_DEVICE(dev); VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); @@ -116,6 +219,15 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) goto out_get_dev_err; } + vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, &err); + if (err) { + /* + * Report this error, but do not make it a failing condition. + * Lack of this IRQ in the host does not prevent normal operation. + */ + error_report_err(err); + } + return; out_get_dev_err: @@ -129,6 +241,7 @@ static void vfio_ap_unrealize(DeviceState *dev) VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); VFIOGroup *group = vapdev->vdev.group; + vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); vfio_ap_put_device(vapdev); vfio_put_group(group); } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 77e2ee0..9aac21a 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -362,7 +362,6 @@ bool vfio_mig_active(void) } static Error *multiple_devices_migration_blocker; -static Error *giommu_migration_blocker; static unsigned int vfio_migratable_device_num(void) { @@ -420,55 +419,9 @@ void vfio_unblock_multiple_devices_migration(void) multiple_devices_migration_blocker = NULL; } -static bool vfio_viommu_preset(void) +bool vfio_viommu_preset(VFIODevice *vbasedev) { - VFIOAddressSpace *space; - - QLIST_FOREACH(space, &vfio_address_spaces, list) { - if (space->as != &address_space_memory) { - return true; - } - } - - return false; -} - -int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) -{ - int ret; - - if (giommu_migration_blocker || - !vfio_viommu_preset()) { - return 0; - } - - if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { - error_setg(errp, - "Migration is currently not supported with vIOMMU enabled"); - return -EINVAL; - } - - error_setg(&giommu_migration_blocker, - "Migration is currently not supported with vIOMMU enabled"); - ret = migrate_add_blocker(giommu_migration_blocker, errp); - if (ret < 0) { - error_free(giommu_migration_blocker); - giommu_migration_blocker = NULL; - } - - return ret; -} - -void vfio_migration_finalize(void) -{ - if (!giommu_migration_blocker || - vfio_viommu_preset()) { - return; - } - - migrate_del_blocker(giommu_migration_blocker); - error_free(giommu_migration_blocker); - giommu_migration_blocker = NULL; + return vbasedev->group->container->space->as != &address_space_memory; } static void vfio_set_migration_error(int err) diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 1db7d52..2674f4b 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -802,6 +802,17 @@ static int vfio_migration_init(VFIODevice *vbasedev) return 0; } +static void vfio_migration_deinit(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + + remove_migration_state_change_notifier(&migration->migration_state); + qemu_del_vm_change_state_handler(migration->vm_state); + unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); + vfio_migration_free(vbasedev); + vfio_unblock_multiple_devices_migration(); +} + static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) { int ret; @@ -835,7 +846,12 @@ void vfio_reset_bytes_transferred(void) bytes_transferred = 0; } -int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) +/* + * Return true when either migration initialized or blocker registered. + * Currently only return false when adding blocker fails which will + * de-register vfio device. + */ +bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) { Error *err = NULL; int ret; @@ -843,7 +859,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { error_setg(&err, "%s: Migration is disabled for VFIO device", vbasedev->name); - return vfio_block_migration(vbasedev, err, errp); + return !vfio_block_migration(vbasedev, err, errp); } ret = vfio_migration_init(vbasedev); @@ -858,7 +874,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) vbasedev->name, ret, strerror(-ret)); } - return vfio_block_migration(vbasedev, err, errp); + return !vfio_block_migration(vbasedev, err, errp); } if (!vbasedev->dirty_pages_supported) { @@ -866,7 +882,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) error_setg(&err, "%s: VFIO device doesn't support device dirty tracking", vbasedev->name); - return vfio_block_migration(vbasedev, err, errp); + goto add_blocker; } warn_report("%s: VFIO device doesn't support device dirty tracking", @@ -875,28 +891,31 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) ret = vfio_block_multiple_devices_migration(vbasedev, errp); if (ret) { - return ret; + goto out_deinit; } - ret = vfio_block_giommu_migration(vbasedev, errp); - if (ret) { - return ret; + if (vfio_viommu_preset(vbasedev)) { + error_setg(&err, "%s: Migration is currently not supported " + "with vIOMMU enabled", vbasedev->name); + goto add_blocker; } trace_vfio_migration_realize(vbasedev->name); - return 0; + return true; + +add_blocker: + ret = vfio_block_migration(vbasedev, err, errp); +out_deinit: + if (ret) { + vfio_migration_deinit(vbasedev); + } + return !ret; } void vfio_migration_exit(VFIODevice *vbasedev) { if (vbasedev->migration) { - VFIOMigration *migration = vbasedev->migration; - - remove_migration_state_change_notifier(&migration->migration_state); - qemu_del_vm_change_state_handler(migration->vm_state); - unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); - vfio_migration_free(vbasedev); - vfio_unblock_multiple_devices_migration(); + vfio_migration_deinit(vbasedev); } if (vbasedev->migration_blocker) { diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 0ed2fcd..f4ff836 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { .set = set_nv_gpudirect_clique_id, }; +static bool is_valid_std_cap_offset(uint8_t pos) +{ + return (pos >= PCI_STD_HEADER_SIZEOF && + pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF)); +} + static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) */ ret = pread(vdev->vbasedev.fd, &tmp, 1, vdev->config_offset + PCI_CAPABILITY_LIST); - if (ret != 1 || !tmp) { + if (ret != 1 || !is_valid_std_cap_offset(tmp)) { error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); return -EINVAL; } @@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) d4_conflict = true; } tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; - } while (tmp); + } while (is_valid_std_cap_offset(tmp)); if (!c8_conflict) { pos = 0xC8; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index ab6645b..a205c6b 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1752,9 +1752,11 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) vfio_bar_quirk_finalize(vdev, i); vfio_region_finalize(&bar->region); - if (bar->size) { + if (bar->mr) { + assert(bar->size); object_unparent(OBJECT(bar->mr)); g_free(bar->mr); + bar->mr = NULL; } } @@ -1826,6 +1828,81 @@ static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos, vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask); } +static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev) +{ + struct vfio_device_info_cap_pci_atomic_comp *cap; + g_autofree struct vfio_device_info *info = NULL; + PCIBus *bus = pci_get_bus(&vdev->pdev); + PCIDevice *parent = bus->parent_dev; + struct vfio_info_cap_header *hdr; + uint32_t mask = 0; + uint8_t *pos; + + /* + * PCIe Atomic Ops completer support is only added automatically for single + * function devices downstream of a root port supporting DEVCAP2. Support + * is added during realize and, if added, removed during device exit. The + * single function requirement avoids conflicting requirements should a + * slot be composed of multiple devices with differing capabilities. + */ + if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap || + pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT || + pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 || + vdev->pdev.devfn || + vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + return; + } + + pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2; + + /* Abort if there'a already an Atomic Ops configuration on the root port */ + if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64 | + PCI_EXP_DEVCAP2_ATOMIC_COMP128)) { + return; + } + + info = vfio_get_device_info(vdev->vbasedev.fd); + if (!info) { + return; + } + + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP); + if (!hdr) { + return; + } + + cap = (void *)hdr; + if (cap->flags & VFIO_PCI_ATOMIC_COMP32) { + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP32; + } + if (cap->flags & VFIO_PCI_ATOMIC_COMP64) { + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP64; + } + if (cap->flags & VFIO_PCI_ATOMIC_COMP128) { + mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP128; + } + + if (!mask) { + return; + } + + pci_long_test_and_set_mask(pos, mask); + vdev->clear_parent_atomics_on_exit = true; +} + +static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev) +{ + if (vdev->clear_parent_atomics_on_exit) { + PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev; + uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2; + + pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64 | + PCI_EXP_DEVCAP2_ATOMIC_COMP128); + } +} + static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, Error **errp) { @@ -1929,6 +2006,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0); vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0); } + + vfio_pci_enable_rp_atomics(vdev); } /* @@ -3207,9 +3286,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } if (!pdev->failover_pair_id) { - ret = vfio_migration_realize(vbasedev, errp); - if (ret) { - error_report("%s: Migration disabled", vbasedev->name); + if (!vfio_migration_realize(vbasedev, errp)) { + goto out_deregister; } } @@ -3220,6 +3298,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) return; out_deregister: + if (vdev->interrupt == VFIO_INT_INTx) { + vfio_intx_disable(vdev); + } pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); if (vdev->irqchip_change_notifier.notify) { kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); @@ -3252,7 +3333,6 @@ static void vfio_instance_finalize(Object *obj) */ vfio_put_device(vdev); vfio_put_group(group); - vfio_migration_finalize(); } static void vfio_exitfn(PCIDevice *pdev) @@ -3270,6 +3350,7 @@ static void vfio_exitfn(PCIDevice *pdev) timer_free(vdev->intx.mmap_timer); } vfio_teardown_msi(vdev); + vfio_pci_disable_rp_atomics(vdev); vfio_bars_exit(vdev); vfio_migration_exit(&vdev->vbasedev); } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 2674476..a2771b9 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -174,6 +174,7 @@ struct VFIOPCIDevice { bool no_vfio_ioeventfd; bool enable_ramfb; bool defer_kvm_irq_routing; + bool clear_parent_atomics_on_exit; VFIODisplay *dpy; Notifier irqchip_change_notifier; }; |