aboutsummaryrefslogtreecommitdiff
path: root/hw/vfio/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio/pci.c')
-rw-r--r--hw/vfio/pci.c182
1 files changed, 171 insertions, 11 deletions
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index fa25bde..4fa692c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -29,6 +29,7 @@
#include "hw/pci/pci_bridge.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "hw/vfio/vfio-cpr.h"
#include "migration/vmstate.h"
#include "migration/cpr.h"
#include "qobject/qdict.h"
@@ -48,8 +49,6 @@
#include "vfio-migration-internal.h"
#include "vfio-helpers.h"
-#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
-
/* Protected by BQL */
static KVMRouteChange vfio_route_change;
@@ -57,20 +56,33 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
+/* Create new or reuse existing eventfd */
static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e,
const char *name, int nr, Error **errp)
{
- int ret = event_notifier_init(e, 0);
+ int fd, ret;
+
+ fd = vfio_cpr_load_vector_fd(vdev, name, nr);
+ if (fd >= 0) {
+ event_notifier_init_fd(e, fd);
+ return true;
+ }
+ ret = event_notifier_init(e, 0);
if (ret) {
error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name);
+ return false;
}
- return !ret;
+
+ fd = event_notifier_get_fd(e);
+ vfio_cpr_save_vector_fd(vdev, name, nr, fd);
+ return true;
}
static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e,
const char *name, int nr)
{
+ vfio_cpr_delete_vector_fd(vdev, name, nr);
event_notifier_cleanup(e);
}
@@ -196,6 +208,36 @@ fail:
#endif
}
+static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
+{
+#ifdef CONFIG_KVM
+ if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
+ vdev->intx.route.mode != PCI_INTX_ENABLED ||
+ !kvm_resamplefds_enabled()) {
+ return true;
+ }
+
+ if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) {
+ return false;
+ }
+
+ if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+ &vdev->intx.interrupt,
+ &vdev->intx.unmask,
+ vdev->intx.route.irq)) {
+ error_setg_errno(errp, errno, "failed to setup resample irqfd");
+ vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
+ return false;
+ }
+
+ vdev->intx.kvm_accel = true;
+ trace_vfio_intx_enable_kvm(vdev->vbasedev.name);
+ return true;
+#else
+ return true;
+#endif
+}
+
static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
{
#ifdef CONFIG_KVM
@@ -291,7 +333,13 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
return true;
}
- vfio_disable_interrupts(vdev);
+ /*
+ * Do not alter interrupt state during vfio_realize and cpr load.
+ * The incoming state is cleared thereafter.
+ */
+ if (!cpr_is_incoming()) {
+ vfio_disable_interrupts(vdev);
+ }
vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
pci_config_set_interrupt_pin(vdev->pdev.config, pin);
@@ -314,6 +362,14 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
fd = event_notifier_get_fd(&vdev->intx.interrupt);
qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
+
+ if (cpr_is_incoming()) {
+ if (!vfio_cpr_intx_enable_kvm(vdev, &err)) {
+ warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ }
+ goto skip_signaling;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
qemu_set_fd_handler(fd, NULL, NULL, vdev);
@@ -325,6 +381,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
+skip_signaling:
vdev->interrupt = VFIO_INT_INTx;
trace_vfio_intx_enable(vdev->vbasedev.name);
@@ -394,6 +451,14 @@ static void vfio_msi_interrupt(void *opaque)
notify(&vdev->pdev, nr);
}
+void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOMSIVector *vector = &vdev->msi_vectors[nr];
+ int fd = event_notifier_get_fd(&vector->interrupt);
+
+ qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector);
+}
+
/*
* Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid
* fd to kernel.
@@ -656,6 +721,15 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
static int vfio_msix_vector_use(PCIDevice *pdev,
unsigned int nr, MSIMessage msg)
{
+ /*
+ * Ignore the callback from msix_set_vector_notifiers during resume.
+ * The necessary subset of these actions is called from
+ * vfio_cpr_claim_vectors during post load.
+ */
+ if (cpr_is_incoming()) {
+ return 0;
+ }
+
return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt);
}
@@ -686,6 +760,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
}
}
+void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev)
+{
+ msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
+ vfio_msix_vector_release, NULL);
+}
+
void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
assert(!vdev->defer_kvm_irq_routing);
@@ -2744,6 +2824,20 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
return ret;
}
+void vfio_sub_page_bar_update_mappings(VFIOPCIDevice *vdev)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ int page_size = qemu_real_host_page_size();
+ int bar;
+
+ for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+ PCIIORegion *r = &pdev->io_regions[bar];
+ if (r->addr != PCI_BAR_UNMAPPED && r->size > 0 && r->size < page_size) {
+ vfio_sub_page_bar_update_mapping(pdev, bar);
+ }
+ }
+}
+
static VFIODeviceOps vfio_pci_ops = {
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
@@ -2811,10 +2905,6 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
"vfio-vga-io@0x3c0",
QEMU_PCI_VGA_IO_HI_SIZE);
- pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
- &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
- &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
-
return true;
}
@@ -2914,7 +3004,7 @@ void vfio_pci_put_device(VFIOPCIDevice *vdev)
vfio_device_detach(&vdev->vbasedev);
- g_free(vdev->vbasedev.name);
+ vfio_device_free_name(&vdev->vbasedev);
g_free(vdev->msix);
}
@@ -2965,6 +3055,11 @@ void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->err_notifier);
qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
@@ -3032,6 +3127,12 @@ void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->req_notifier);
qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ vdev->req_enabled = true;
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
@@ -3061,6 +3162,15 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
vdev->req_enabled = false;
}
+void vfio_pci_config_register_vga(VFIOPCIDevice *vdev)
+{
+ assert(vdev->vga != NULL);
+
+ pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
+}
+
bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
@@ -3135,6 +3245,23 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
vdev->sub_device_id);
}
+ /*
+ * Class code is a 24-bit value at config space 0x09. Allow overriding it
+ * with any 24-bit value.
+ */
+ if (vdev->class_code != PCI_ANY_ID) {
+ if (vdev->class_code > 0xffffff) {
+ error_setg(errp, "invalid PCI class code provided");
+ return false;
+ }
+ /* Higher 24 bits of PCI_CLASS_REVISION are class code */
+ vfio_add_emulated_long(vdev, PCI_CLASS_REVISION,
+ vdev->class_code << 8, ~0xff);
+ trace_vfio_pci_emulated_class_code(vbasedev->name, vdev->class_code);
+ } else {
+ vdev->class_code = pci_get_long(pdev->config + PCI_CLASS_REVISION) >> 8;
+ }
+
/* QEMU can change multi-function devices to single function, or reverse */
vdev->emulated_config_bits[PCI_HEADER_TYPE] =
PCI_HEADER_TYPE_MULTI_FUNCTION;
@@ -3164,6 +3291,10 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
vfio_bars_register(vdev);
+ if (vdev->vga && vfio_is_vga(vdev)) {
+ vfio_pci_config_register_vga(vdev);
+ }
+
return true;
}
@@ -3189,7 +3320,13 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
vfio_intx_routing_notifier);
vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
- if (!vfio_intx_enable(vdev, errp)) {
+
+ /*
+ * During CPR, do not call vfio_intx_enable at this time. Instead,
+ * call it from vfio_pci_post_load after the intx routing data has
+ * been loaded from vmstate.
+ */
+ if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) {
timer_free(vdev->intx.mmap_timer);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
@@ -3524,6 +3661,11 @@ static const Property vfio_pci_dev_properties[] = {
vbasedev.migration_multifd_transfer,
vfio_pci_migration_multifd_transfer_prop, OnOffAuto,
.set_default = true, .defval.i = ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", VFIOPCIDevice,
+ vbasedev.migration_load_config_after_iter,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice,
+ vbasedev.migration_max_queued_buffers_size, UINT64_MAX),
DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
vbasedev.migration_events, false),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
@@ -3544,6 +3686,8 @@ static const Property vfio_pci_dev_properties[] = {
sub_vendor_id, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
sub_device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-class-code", VFIOPCIDevice,
+ class_code, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice,
nv_gpudirect_clique,
@@ -3698,6 +3842,20 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
"x-migration-multifd-transfer",
"Transfer this device state via "
"multifd channels when live migrating it");
+ object_class_property_set_description(klass, /* 10.1 */
+ "x-migration-load-config-after-iter",
+ "Start the config load only after "
+ "all iterables were loaded (during "
+ "non-iterables loading phase) when "
+ "doing live migration of device state "
+ "via multifd channels");
+ object_class_property_set_description(klass, /* 10.1 */
+ "x-migration-max-queued-buffers-size",
+ "Maximum size of in-flight VFIO "
+ "device state buffers queued at the "
+ "destination when doing live "
+ "migration of device state via "
+ "multifd channels");
}
static const TypeInfo vfio_pci_dev_info = {
@@ -3710,6 +3868,8 @@ static const TypeInfo vfio_pci_dev_info = {
static const Property vfio_pci_dev_nohotplug_properties[] = {
DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false),
+ DEFINE_PROP_BOOL("use-legacy-x86-rom", VFIOPCIDevice,
+ use_legacy_x86_rom, false),
DEFINE_PROP_ON_OFF_AUTO("x-ramfb-migrate", VFIOPCIDevice, ramfb_migrate,
ON_OFF_AUTO_AUTO),
};