diff options
Diffstat (limited to 'hw/vfio/pci.c')
-rw-r--r-- | hw/vfio/pci.c | 294 |
1 files changed, 178 insertions, 116 deletions
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index a1bfdfe..fa25bde 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -30,6 +30,7 @@ #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" #include "migration/vmstate.h" +#include "migration/cpr.h" #include "qobject/qdict.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" @@ -56,6 +57,23 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); +static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e, + const char *name, int nr, Error **errp) +{ + int ret = event_notifier_init(e, 0); + + if (ret) { + error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name); + } + return !ret; +} + +static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e, + const char *name, int nr) +{ + event_notifier_cleanup(e); +} + /* * Disabling BAR mmaping can be slow, but toggling it around INTx can * also be a huge overhead. We try to get the best of both worlds by @@ -103,7 +121,7 @@ static void vfio_intx_interrupt(void *opaque) } } -static void vfio_intx_eoi(VFIODevice *vbasedev) +void vfio_pci_intx_eoi(VFIODevice *vbasedev) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); @@ -111,7 +129,7 @@ static void vfio_intx_eoi(VFIODevice *vbasedev) return; } - trace_vfio_intx_eoi(vbasedev->name); + trace_vfio_pci_intx_eoi(vbasedev->name); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -136,8 +154,7 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) pci_irq_deassert(&vdev->pdev); /* Get an eventfd for resample/unmask */ - if (event_notifier_init(&vdev->intx.unmask, 0)) { - error_setg(errp, "event_notifier_init failed eoi"); + if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { goto fail; } @@ -169,7 +186,7 @@ fail_vfio: kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt, vdev->intx.route.irq); fail_irqfd: - event_notifier_cleanup(&vdev->intx.unmask); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); fail: qemu_set_fd_handler(irq_fd, vfio_intx_interrupt, NULL, vdev); vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); @@ -201,7 +218,7 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) } /* We only need to close the eventfd for VFIO to cleanup the kernel side */ - event_notifier_cleanup(&vdev->intx.unmask); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); /* QEMU starts listening for interrupt events. */ qemu_set_fd_handler(event_notifier_get_fd(&vdev->intx.interrupt), @@ -236,7 +253,7 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route) } /* Re-enable the interrupt in cased we missed an EOI */ - vfio_intx_eoi(&vdev->vbasedev); + vfio_pci_intx_eoi(&vdev->vbasedev); } static void vfio_intx_routing_notifier(PCIDevice *pdev) @@ -268,7 +285,6 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); Error *err = NULL; int32_t fd; - int ret; if (!pin) { @@ -291,9 +307,8 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) } #endif - ret = event_notifier_init(&vdev->intx.interrupt, 0); - if (ret) { - error_setg_errno(errp, -ret, "event_notifier_init failed"); + if (!vfio_notifier_init(vdev, &vdev->intx.interrupt, "intx-interrupt", 0, + errp)) { return false; } fd = event_notifier_get_fd(&vdev->intx.interrupt); @@ -302,7 +317,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->intx.interrupt); + vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0); return false; } @@ -329,13 +344,18 @@ static void vfio_intx_disable(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->intx.interrupt); qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->intx.interrupt); + vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0); vdev->interrupt = VFIO_INT_NONE; trace_vfio_intx_disable(vdev->vbasedev.name); } +bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp) +{ + return vfio_intx_enable(vdev, errp); +} + /* * MSI/X */ @@ -460,8 +480,8 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) return ret; } -static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, - int vector_n, bool msix) +void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, + int vector_n, bool msix) { if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { return; @@ -471,13 +491,16 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, vector_n, &vdev->pdev); } -static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector) +static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector, int nr) { + const char *name = "kvm_interrupt"; + if (vector->virq < 0) { return; } - if (event_notifier_init(&vector->kvm_interrupt, 0)) { + if (!vfio_notifier_init(vector->vdev, &vector->kvm_interrupt, name, nr, + NULL)) { goto fail_notifier; } @@ -489,19 +512,20 @@ static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector) return; fail_kvm: - event_notifier_cleanup(&vector->kvm_interrupt); + vfio_notifier_cleanup(vector->vdev, &vector->kvm_interrupt, name, nr); fail_notifier: kvm_irqchip_release_virq(kvm_state, vector->virq); vector->virq = -1; } -static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) +static void vfio_remove_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, + int nr) { kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, vector->virq); kvm_irqchip_release_virq(kvm_state, vector->virq); vector->virq = -1; - event_notifier_cleanup(&vector->kvm_interrupt); + vfio_notifier_cleanup(vdev, &vector->kvm_interrupt, "kvm_interrupt", nr); } static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, @@ -511,6 +535,43 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, kvm_irqchip_commit_routes(kvm_state); } +static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector, + unsigned int nr) +{ + Error *err = NULL; + int32_t fd; + + if (vector->virq >= 0) { + fd = event_notifier_get_fd(&vector->kvm_interrupt); + } else { + fd = event_notifier_get_fd(&vector->interrupt); + } + + if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr, + VFIO_IRQ_SET_ACTION_TRIGGER, + fd, &err)) { + error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name); + } +} + +void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr) +{ + VFIOMSIVector *vector = &vdev->msi_vectors[nr]; + PCIDevice *pdev = &vdev->pdev; + Error *local_err = NULL; + + vector->vdev = vdev; + vector->virq = -1; + if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", nr, + &local_err)) { + error_report_err(local_err); + } + vector->use = true; + if (vdev->interrupt == VFIO_INT_MSIX) { + msix_vector_use(pdev, nr); + } +} + static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { @@ -524,13 +585,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vector = &vdev->msi_vectors[nr]; if (!vector->use) { - vector->vdev = vdev; - vector->virq = -1; - if (event_notifier_init(&vector->interrupt, 0)) { - error_report("vfio: Error: event_notifier_init failed"); - } - vector->use = true; - msix_vector_use(pdev, nr); + vfio_pci_vector_init(vdev, nr); } qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), @@ -542,19 +597,19 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, */ if (vector->virq >= 0) { if (!msg) { - vfio_remove_kvm_msi_virq(vector); + vfio_remove_kvm_msi_virq(vdev, vector, nr); } else { vfio_update_kvm_msi_virq(vector, *msg, pdev); } } else { if (msg) { if (vdev->defer_kvm_irq_routing) { - vfio_add_kvm_msi_virq(vdev, vector, nr, true); + vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true); } else { vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state); - vfio_add_kvm_msi_virq(vdev, vector, nr, true); + vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true); kvm_irqchip_commit_route_changes(&vfio_route_change); - vfio_connect_kvm_msi_virq(vector); + vfio_connect_kvm_msi_virq(vector, nr); } } } @@ -583,21 +638,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, strerror(-ret)); } } else { - Error *err = NULL; - int32_t fd; - - if (vector->virq >= 0) { - fd = event_notifier_get_fd(&vector->kvm_interrupt); - } else { - fd = event_notifier_get_fd(&vector->interrupt); - } - - if (!vfio_device_irq_set_signaling(&vdev->vbasedev, - VFIO_PCI_MSIX_IRQ_INDEX, nr, - VFIO_IRQ_SET_ACTION_TRIGGER, fd, - &err)) { - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); - } + set_irq_signalling(&vdev->vbasedev, vector, nr); } } @@ -645,14 +686,14 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } -static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) +void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) { assert(!vdev->defer_kvm_irq_routing); vdev->defer_kvm_irq_routing = true; vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state); } -static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) +void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) { int i; @@ -662,7 +703,7 @@ static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) kvm_irqchip_commit_route_changes(&vfio_route_change); for (i = 0; i < vdev->nr_vectors; i++) { - vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]); + vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i], i); } } @@ -682,14 +723,14 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) * routes once rather than per vector provides a substantial * performance improvement. */ - vfio_prepare_kvm_msi_virq_batch(vdev); + vfio_pci_prepare_kvm_msi_virq_batch(vdev); if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, vfio_msix_vector_release, NULL)) { error_report("vfio: msix_set_vector_notifiers failed"); } - vfio_commit_kvm_msi_virq_batch(vdev); + vfio_pci_commit_kvm_msi_virq_batch(vdev); if (vdev->nr_vectors) { ret = vfio_enable_vectors(vdev, true); @@ -733,19 +774,21 @@ retry: * Deferring to commit the KVM routes once rather than per vector * provides a substantial performance improvement. */ - vfio_prepare_kvm_msi_virq_batch(vdev); + vfio_pci_prepare_kvm_msi_virq_batch(vdev); vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); for (i = 0; i < vdev->nr_vectors; i++) { VFIOMSIVector *vector = &vdev->msi_vectors[i]; + Error *local_err = NULL; vector->vdev = vdev; vector->virq = -1; vector->use = true; - if (event_notifier_init(&vector->interrupt, 0)) { - error_report("vfio: Error: event_notifier_init failed"); + if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", i, + &local_err)) { + error_report_err(local_err); } qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), @@ -755,10 +798,10 @@ retry: * Attempt to enable route through KVM irqchip, * default to userspace handling if unavailable. */ - vfio_add_kvm_msi_virq(vdev, vector, i, false); + vfio_pci_add_kvm_msi_virq(vdev, vector, i, false); } - vfio_commit_kvm_msi_virq_batch(vdev); + vfio_pci_commit_kvm_msi_virq_batch(vdev); /* Set interrupt type prior to possible interrupts */ vdev->interrupt = VFIO_INT_MSI; @@ -801,11 +844,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev) VFIOMSIVector *vector = &vdev->msi_vectors[i]; if (vdev->msi_vectors[i].use) { if (vector->virq >= 0) { - vfio_remove_kvm_msi_virq(vector); + vfio_remove_kvm_msi_virq(vdev, vector, i); } qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), NULL, NULL, NULL); - event_notifier_cleanup(&vector->interrupt); + vfio_notifier_cleanup(vdev, &vector->interrupt, "interrupt", i); } } @@ -984,7 +1027,7 @@ static int vfio_pci_config_space_write(VFIOPCIDevice *vdev, off_t offset, { return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, - offset, size, data); + offset, size, data, false); } static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) @@ -1738,7 +1781,7 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) return true; } -static void vfio_teardown_msi(VFIOPCIDevice *vdev) +void vfio_pci_teardown_msi(VFIOPCIDevice *vdev) { msi_uninit(&vdev->pdev); @@ -1788,6 +1831,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); bar->size = bar->region.size; + + /* IO regions are sync, memory can be async */ + bar->region.post_wr = (bar->ioport == 0); } static void vfio_bars_prepare(VFIOPCIDevice *vdev) @@ -1834,7 +1880,7 @@ static void vfio_bars_register(VFIOPCIDevice *vdev) } } -static void vfio_bars_exit(VFIOPCIDevice *vdev) +void vfio_pci_bars_exit(VFIOPCIDevice *vdev) { int i; @@ -2425,7 +2471,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) g_free(config); } -static bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -2701,7 +2747,7 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) static VFIODeviceOps vfio_pci_ops = { .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, - .vfio_eoi = vfio_intx_eoi, + .vfio_eoi = vfio_pci_intx_eoi, .vfio_get_object = vfio_pci_get_object, .vfio_save_config = vfio_pci_save_config, .vfio_load_config = vfio_pci_load_config, @@ -2772,7 +2818,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) return true; } -static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp) { VFIODevice *vbasedev = &vdev->vbasedev; struct vfio_region_info *reg_info = NULL; @@ -2818,7 +2864,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) return false; } - trace_vfio_populate_device_config(vdev->vbasedev.name, + trace_vfio_pci_populate_device_config(vdev->vbasedev.name, (unsigned long)reg_info->size, (unsigned long)reg_info->offset, (unsigned long)reg_info->flags); @@ -2840,7 +2886,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ - trace_vfio_populate_device_get_irq_info_failure(strerror(-ret)); + trace_vfio_pci_populate_device_get_irq_info_failure(strerror(-ret)); } else if (irq_info.count == 1) { vdev->pci_aer = true; } else { @@ -2852,8 +2898,20 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) return true; } -static void vfio_pci_put_device(VFIOPCIDevice *vdev) +void vfio_pci_put_device(VFIOPCIDevice *vdev) { + vfio_display_finalize(vdev); + vfio_bars_finalize(vdev); + g_free(vdev->emulated_config_bits); + g_free(vdev->rom); + /* + * XXX Leaking igd_opregion is not an oversight, we can't remove the + * fw_cfg entry therefore leaking this allocation seems like the safest + * option. + * + * g_free(vdev->igd_opregion); + */ + vfio_device_detach(&vdev->vbasedev); g_free(vdev->vbasedev.name); @@ -2888,7 +2946,7 @@ static void vfio_err_notifier_handler(void *opaque) * and continue after disabling error recovery support for the * device. */ -static void vfio_register_err_notifier(VFIOPCIDevice *vdev) +void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev) { Error *err = NULL; int32_t fd; @@ -2897,8 +2955,9 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev) return; } - if (event_notifier_init(&vdev->err_notifier, 0)) { - error_report("vfio: Unable to init event notifier for error detection"); + if (!vfio_notifier_init(vdev, &vdev->err_notifier, "err_notifier", 0, + &err)) { + error_report_err(err); vdev->pci_aer = false; return; } @@ -2910,7 +2969,7 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev) VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->err_notifier); + vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0); vdev->pci_aer = false; } } @@ -2929,7 +2988,7 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev) } qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier), NULL, NULL, vdev); - event_notifier_cleanup(&vdev->err_notifier); + vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0); } static void vfio_req_notifier_handler(void *opaque) @@ -2947,7 +3006,7 @@ static void vfio_req_notifier_handler(void *opaque) } } -static void vfio_register_req_notifier(VFIOPCIDevice *vdev) +void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev) { struct vfio_irq_info irq_info; Error *err = NULL; @@ -2964,8 +3023,9 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) return; } - if (event_notifier_init(&vdev->req_notifier, 0)) { - error_report("vfio: Unable to init event notifier for device request"); + if (!vfio_notifier_init(vdev, &vdev->req_notifier, "req_notifier", 0, + &err)) { + error_report_err(err); return; } @@ -2976,7 +3036,7 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->req_notifier); + vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0); } else { vdev->req_enabled = true; } @@ -2996,15 +3056,28 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) } qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier), NULL, NULL, vdev); - event_notifier_cleanup(&vdev->req_notifier); + vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0); vdev->req_enabled = false; } -static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; VFIODevice *vbasedev = &vdev->vbasedev; + uint32_t config_space_size; + int ret; + + config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); + + /* Get a copy of config space */ + ret = vfio_pci_config_space_read(vdev, 0, config_space_size, + vdev->pdev.config); + if (ret < (int)config_space_size) { + ret = ret < 0 ? -ret : EFAULT; + error_setg_errno(errp, ret, "failed to read device config space"); + return false; + } /* vfio emulates a lot for us, but some bits need extra love */ vdev->emulated_config_bits = g_malloc0(vdev->config_size); @@ -3094,7 +3167,7 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) return true; } -static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -3126,15 +3199,14 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) return true; } -static void vfio_realize(PCIDevice *pdev, Error **errp) +static void vfio_pci_realize(PCIDevice *pdev, Error **errp) { ERRP_GUARD(); VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; - int i, ret; + int i; char uuid[UUID_STR_LEN]; g_autofree char *name = NULL; - uint32_t config_space_size; if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -3185,18 +3257,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto error; } - if (!vfio_populate_device(vdev, errp)) { - goto error; - } - - config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); - - /* Get a copy of config space */ - ret = vfio_pci_config_space_read(vdev, 0, config_space_size, - vdev->pdev.config); - if (ret < (int)config_space_size) { - ret = ret < 0 ? -ret : EFAULT; - error_setg_errno(errp, ret, "failed to read device config space"); + if (!vfio_pci_populate_device(vdev, errp)) { goto error; } @@ -3210,7 +3271,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto out_teardown; } - if (!vfio_add_capabilities(vdev, errp)) { + if (!vfio_pci_add_capabilities(vdev, errp)) { goto out_unset_idev; } @@ -3226,7 +3287,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bar_quirk_setup(vdev, i); } - if (!vfio_interrupt_setup(vdev, errp)) { + if (!vfio_pci_interrupt_setup(vdev, errp)) { goto out_unset_idev; } @@ -3270,8 +3331,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } - vfio_register_err_notifier(vdev); - vfio_register_req_notifier(vdev); + vfio_pci_register_err_notifier(vdev); + vfio_pci_register_req_notifier(vdev); vfio_setup_resetfn_quirk(vdev); return; @@ -3292,8 +3353,8 @@ out_unset_idev: pci_device_unset_iommu_device(pdev); } out_teardown: - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); + vfio_pci_teardown_msi(vdev); + vfio_pci_bars_exit(vdev); error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); } @@ -3302,17 +3363,6 @@ static void vfio_instance_finalize(Object *obj) { VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); - vfio_display_finalize(vdev); - vfio_bars_finalize(vdev); - g_free(vdev->emulated_config_bits); - g_free(vdev->rom); - /* - * XXX Leaking igd_opregion is not an oversight, we can't remove the - * fw_cfg entry therefore leaking this allocation seems like the safest - * option. - * - * g_free(vdev->igd_opregion); - */ vfio_pci_put_device(vdev); } @@ -3331,9 +3381,9 @@ static void vfio_exitfn(PCIDevice *pdev) if (vdev->intx.mmap_timer) { timer_free(vdev->intx.mmap_timer); } - vfio_teardown_msi(vdev); + vfio_pci_teardown_msi(vdev); vfio_pci_disable_rp_atomics(vdev); - vfio_bars_exit(vdev); + vfio_pci_bars_exit(vdev); vfio_migration_exit(vbasedev); if (!vbasedev->mdev) { pci_device_unset_iommu_device(pdev); @@ -3344,6 +3394,11 @@ static void vfio_pci_reset(DeviceState *dev) { VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); + /* Do not reset the device during qemu_system_reset prior to cpr load */ + if (cpr_is_incoming()) { + return; + } + trace_vfio_pci_reset(vdev->vbasedev.name); vfio_pci_pre_reset(vdev); @@ -3401,6 +3456,13 @@ static void vfio_instance_init(Object *obj) /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command * line, therefore, no need to wait to realize like other devices */ pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; + + /* + * A device that is resuming for cpr is already configured, so do not + * reset it during qemu_system_reset prior to cpr load, else interrupts + * may be lost. + */ + pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR; } static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) @@ -3418,7 +3480,7 @@ static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) static const TypeInfo vfio_pci_base_dev_info = { .name = TYPE_VFIO_PCI_BASE, .parent = TYPE_PCI_DEVICE, - .instance_size = 0, + .instance_size = sizeof(VFIOPCIDevice), .abstract = true, .class_init = vfio_pci_base_dev_class_init, .interfaces = (const InterfaceInfo[]) { @@ -3513,8 +3575,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) #ifdef CONFIG_IOMMUFD object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); #endif + dc->vmsd = &vfio_cpr_pci_vmstate; dc->desc = "VFIO-based PCI device assignment"; - pdc->realize = vfio_realize; + pdc->realize = vfio_pci_realize; object_class_property_set_description(klass, /* 1.3 */ "host", @@ -3640,7 +3703,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) static const TypeInfo vfio_pci_dev_info = { .name = TYPE_VFIO_PCI, .parent = TYPE_VFIO_PCI_BASE, - .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_dev_class_init, .instance_init = vfio_instance_init, .instance_finalize = vfio_instance_finalize, |