aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2025-03-07 07:39:21 +0800
committerStefan Hajnoczi <stefanha@redhat.com>2025-03-07 07:39:21 +0800
commit2400fad572906127e9d453b92f90806d66583dc7 (patch)
treec2fe81ac6cbce922a1b4e4e27c17cdbac82723f0 /hw
parente17f08b5fbaeb71daee175f835b1b00d841391b6 (diff)
parent59a67e70950bcc2002d3a8d22a17743e0f70da96 (diff)
downloadqemu-2400fad572906127e9d453b92f90806d66583dc7.zip
qemu-2400fad572906127e9d453b92f90806d66583dc7.tar.gz
qemu-2400fad572906127e9d453b92f90806d66583dc7.tar.bz2
Merge tag 'pull-vfio-20250306' of https://github.com/legoater/qemu into staging
vfio queue: * Added property documentation * Added Minor fixes * Implemented basic PCI PM capability backing * Promoted new IGD maintainer * Deprecated vfio-plaform * Extended VFIO migration with multifd support # -----BEGIN PGP SIGNATURE----- # # iQIyBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmfJrZoACgkQUaNDx8/7 # 7KFE2A/0Dmief9u/dDJIKGIDa+iawcf4hu8iX4v5pB0DlGniT3rgK8WMGnhDpPxq # Q4wsKfo+JJ2q6msInrT7Ckqyydu9nQztI3vwmfMuWxLhTMyH28K96ptwPqIZBjOx # rPTEXfnVX4W3tpn1+48S+vefWVa/gkBkIvv7RpK18rMBXv1kDeyOvc/d2dbAt7ft # zJc4f8gH3jfQzGwmnYVZU1yPrZN7p6zhYR/AD3RQOY97swgZIEyYxXhOuTPiCuEC # zC+2AMKi9nmnCG6x/mnk7l2yJXSlv7lJdqcjYZhJ9EOIYfiUGTREYIgQbARcafE/ # 4KSg2QR35BoUd4YrmEWxXJCRf3XnyWXDY36dDKVhC0OHng1F/U44HuL4QxwoTIay # s1SP/DHcvDiPAewVTvdgt7Iwfn9xGhcQO2pkrxBoNLB5JYwW+R6mG7WXeDv1o3GT # QosTu1fXZezQqFd4v6+q5iRNS2KtBZLTspwAmVdywEFUs+ZLBRlC+bodYlinZw6B # Yl/z0LfAEh4J55QmX2espbp8MH1+mALuW2H2tgSGSrTBX1nwxZFI5veFzPepgF2S # eTx69BMjiNMwzIjq1T7e9NpDCceiW0fXDu7IK1MzYhqg1nM9lX9AidhFTeiF2DB2 # EPb3ljy/8fyxcPKa1T9X47hQaSjbMwofaO8Snoh0q0jokY246Q== # =hIBw # -----END PGP SIGNATURE----- # gpg: Signature made Thu 06 Mar 2025 22:13:46 HKT # gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1 # gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [full] # gpg: aka "Cédric Le Goater <clg@kaod.org>" [full] # Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1 * tag 'pull-vfio-20250306' of https://github.com/legoater/qemu: (42 commits) hw/core/machine: Add compat for x-migration-multifd-transfer VFIO property vfio/migration: Make x-migration-multifd-transfer VFIO property mutable vfio/migration: Add x-migration-multifd-transfer VFIO property vfio/migration: Multifd device state transfer support - send side vfio/migration: Multifd device state transfer support - config loading support migration/qemu-file: Define g_autoptr() cleanup function for QEMUFile vfio/migration: Multifd device state transfer support - load thread vfio/migration: Multifd device state transfer support - received buffers queuing vfio/migration: Setup and cleanup multifd transfer in these general methods vfio/migration: Multifd setup/cleanup functions and associated VFIOMultifd vfio/migration: Multifd device state transfer - add support checking function vfio/migration: Multifd device state transfer support - basic types vfio/migration: Move migration channel flags to vfio-common.h header file vfio/migration: Add vfio_add_bytes_transferred() vfio/migration: Convert bytes_transferred counter to atomic vfio/migration: Add load_device_config_state_start trace event migration: Add save_live_complete_precopy_thread handler migration/multifd: Add multifd_device_state_supported() migration/multifd: Make MultiFDSendData a struct migration/multifd: Device state transfer support - send side ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/core/machine.c2
-rw-r--r--hw/net/e1000e.c3
-rw-r--r--hw/net/eepro100.c4
-rw-r--r--hw/net/igb.c3
-rw-r--r--hw/nvme/ctrl.c3
-rw-r--r--hw/pci-bridge/pcie_pci_bridge.c3
-rw-r--r--hw/pci/pci.c93
-rw-r--r--hw/pci/trace-events2
-rw-r--r--hw/vfio/amd-xgbe.c2
-rw-r--r--hw/vfio/ap.c9
-rw-r--r--hw/vfio/calxeda-xgmac.c2
-rw-r--r--hw/vfio/ccw.c27
-rw-r--r--hw/vfio/meson.build1
-rw-r--r--hw/vfio/migration-multifd.c679
-rw-r--r--hw/vfio/migration-multifd.h34
-rw-r--r--hw/vfio/migration.c106
-rw-r--r--hw/vfio/pci.c180
-rw-r--r--hw/vfio/pci.h1
-rw-r--r--hw/vfio/platform.c25
-rw-r--r--hw/vfio/trace-events13
-rw-r--r--hw/virtio/virtio-pci.c11
21 files changed, 1125 insertions, 78 deletions
diff --git a/hw/core/machine.c b/hw/core/machine.c
index b68b8b9..f52a4f2 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -44,6 +44,8 @@ GlobalProperty hw_compat_9_2[] = {
{ "virtio-balloon-pci-non-transitional", "vectors", "0" },
{ "virtio-mem-pci", "vectors", "0" },
{ "migration", "multifd-clean-tls-termination", "false" },
+ { "migration", "send-switchover-start", "off"},
+ { "vfio-pci", "x-migration-multifd-transfer", "off" },
};
const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index f637853..b72cbab 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -372,8 +372,7 @@ static int
e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
{
Error *local_err = NULL;
- int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &local_err);
+ int ret = pci_pm_init(pdev, offset, &local_err);
if (local_err) {
error_report_err(local_err);
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index 6d85322..29a3986 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -551,9 +551,7 @@ static void e100_pci_reset(EEPRO100State *s, Error **errp)
if (info->power_management) {
/* Power Management Capabilities */
int cfg_offset = 0xdc;
- int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM,
- cfg_offset, PCI_PM_SIZEOF,
- errp);
+ int r = pci_pm_init(&s->dev, cfg_offset, errp);
if (r < 0) {
return;
}
diff --git a/hw/net/igb.c b/hw/net/igb.c
index c965fc2..e318df4 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -356,8 +356,7 @@ static int
igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
{
Error *local_err = NULL;
- int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &local_err);
+ int ret = pci_pm_init(pdev, offset, &local_err);
if (local_err) {
error_report_err(local_err);
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index e62c6a3..518d02d 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8600,8 +8600,7 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
Error *err = NULL;
int ret;
- ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &err);
+ ret = pci_pm_init(pci_dev, offset, &err);
if (err) {
error_report_err(err);
return ret;
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index fd4514a..2429503 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -52,11 +52,10 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp)
goto cap_error;
}
- pos = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF, errp);
+ pos = pci_pm_init(d, 0, errp);
if (pos < 0) {
goto pm_error;
}
- d->exp.pm_cap = pos;
pci_set_word(d->config + pos + PCI_PM_PMC, 0x3);
pcie_cap_arifwd_init(d);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e3c28668..2844ec5 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -435,6 +435,84 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
attrs, NULL);
}
+/*
+ * Register and track a PM capability. If wmask is also enabled for the power
+ * state field of the pmcsr register, guest writes may change the device PM
+ * state. BAR access is only enabled while the device is in the D0 state.
+ * Return the capability offset or negative error code.
+ */
+int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp)
+{
+ int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp);
+
+ if (cap < 0) {
+ return cap;
+ }
+
+ d->pm_cap = cap;
+ d->cap_present |= QEMU_PCI_CAP_PM;
+
+ return cap;
+}
+
+static uint8_t pci_pm_state(PCIDevice *d)
+{
+ uint16_t pmcsr;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM)) {
+ return 0;
+ }
+
+ pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL);
+
+ return pmcsr & PCI_PM_CTRL_STATE_MASK;
+}
+
+/*
+ * Update the PM capability state based on the new value stored in config
+ * space respective to the old, pre-write state provided. If the new value
+ * is rejected (unsupported or invalid transition) restore the old value.
+ * Return the resulting PM state.
+ */
+static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old)
+{
+ uint16_t pmc;
+ uint8_t new;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM) ||
+ !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) {
+ return old;
+ }
+
+ new = pci_pm_state(d);
+ if (new == old) {
+ return old;
+ }
+
+ pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC);
+
+ /*
+ * Transitions to D1 & D2 are only allowed if supported. Devices may
+ * only transition to higher D-states or to D0.
+ */
+ if ((!(pmc & PCI_PM_CAP_D1) && new == 1) ||
+ (!(pmc & PCI_PM_CAP_D2) && new == 2) ||
+ (old && new && new < old)) {
+ pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ old);
+ trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d),
+ PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
+ old, new);
+ return old;
+ }
+
+ trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn),
+ PCI_FUNC(d->devfn), old, new);
+ return new;
+}
+
static void pci_reset_regions(PCIDevice *dev)
{
int r;
@@ -474,6 +552,11 @@ static void pci_do_device_reset(PCIDevice *dev)
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
+ /* Default PM state is D0 */
+ if (dev->cap_present & QEMU_PCI_CAP_PM) {
+ pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ }
pci_reset_regions(dev);
pci_update_mappings(dev);
@@ -1606,7 +1689,7 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
- if (!d->enabled) {
+ if (!d->enabled || pci_pm_state(d)) {
new_addr = PCI_BAR_UNMAPPED;
}
@@ -1672,6 +1755,7 @@ uint32_t pci_default_read_config(PCIDevice *d,
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
{
+ uint8_t new_pm_state, old_pm_state = pci_pm_state(d);
int i, was_irq_disabled = pci_irq_disabled(d);
uint32_t val = val_in;
@@ -1684,11 +1768,16 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
}
+
+ new_pm_state = pci_pm_update(d, addr, l, old_pm_state);
+
if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
- range_covers_byte(addr, l, PCI_COMMAND))
+ range_covers_byte(addr, l, PCI_COMMAND) ||
+ !!new_pm_state != !!old_pm_state) {
pci_update_mappings(d);
+ }
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index e98f575..6a99689 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -1,6 +1,8 @@
# See docs/devel/tracing.rst for syntax documentation.
# pci.c
+pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x REJECTED PM transition D%d->D%d"
+pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d"
pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c
index aaa9690..5927503 100644
--- a/hw/vfio/amd-xgbe.c
+++ b/hw/vfio/amd-xgbe.c
@@ -15,12 +15,14 @@
#include "hw/vfio/vfio-amd-xgbe.h"
#include "migration/vmstate.h"
#include "qemu/module.h"
+#include "qemu/error-report.h"
static void amd_xgbe_realize(DeviceState *dev, Error **errp)
{
VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
VFIOAmdXgbeDeviceClass *k = VFIO_AMD_XGBE_DEVICE_GET_CLASS(dev);
+ warn_report("-device vfio-amd-xgbe is deprecated");
vdev->compat = g_strdup("amd,xgbe-seattle-v1a");
vdev->num_compat = 1;
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 30b08ad..c7ab4ff 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -257,6 +257,15 @@ static void vfio_ap_class_init(ObjectClass *klass, void *data)
dc->hotpluggable = true;
device_class_set_legacy_reset(dc, vfio_ap_reset);
dc->bus_type = TYPE_AP_BUS;
+
+ object_class_property_set_description(klass, /* 3.1 */
+ "sysfsdev",
+ "Host sysfs path of assigned device");
+#ifdef CONFIG_IOMMUFD
+ object_class_property_set_description(klass, /* 9.0 */
+ "iommufd",
+ "Set host IOMMUFD backend device");
+#endif
}
static const TypeInfo vfio_ap_info = {
diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c
index b016d42..a5ef262 100644
--- a/hw/vfio/calxeda-xgmac.c
+++ b/hw/vfio/calxeda-xgmac.c
@@ -15,12 +15,14 @@
#include "hw/vfio/vfio-calxeda-xgmac.h"
#include "migration/vmstate.h"
#include "qemu/module.h"
+#include "qemu/error-report.h"
static void calxeda_xgmac_realize(DeviceState *dev, Error **errp)
{
VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
VFIOCalxedaXgmacDeviceClass *k = VFIO_CALXEDA_XGMAC_DEVICE_GET_CLASS(dev);
+ warn_report("-device vfio-calxeda-xgmac is deprecated");
vdev->compat = g_strdup("calxeda,hb-xgmac");
vdev->num_compat = 1;
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 67bc137..e5e0d9e 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -51,17 +51,8 @@ struct VFIOCCWDevice {
EventNotifier crw_notifier;
EventNotifier req_notifier;
bool force_orb_pfch;
- bool warned_orb_pfch;
};
-static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch,
- const char *msg)
-{
- warn_report_once_cond(&vcdev->warned_orb_pfch,
- "vfio-ccw (devno %x.%x.%04x): %s",
- sch->cssid, sch->ssid, sch->devno, msg);
-}
-
static void vfio_ccw_compute_needs_reset(VFIODevice *vdev)
{
vdev->needs_reset = false;
@@ -83,7 +74,8 @@ static IOInstEnding vfio_ccw_handle_request(SubchDev *sch)
if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH) && vcdev->force_orb_pfch) {
sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH;
- warn_once_pfch(vcdev, sch, "PFCH flag forced");
+ warn_report_once("vfio-ccw (devno %x.%x.%04x): PFCH flag forced",
+ sch->cssid, sch->ssid, sch->devno);
}
QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB));
@@ -717,6 +709,21 @@ static void vfio_ccw_class_init(ObjectClass *klass, void *data)
cdc->handle_halt = vfio_ccw_handle_halt;
cdc->handle_clear = vfio_ccw_handle_clear;
cdc->handle_store = vfio_ccw_handle_store;
+
+ object_class_property_set_description(klass, /* 2.10 */
+ "sysfsdev",
+ "Host sysfs path of assigned device");
+ object_class_property_set_description(klass, /* 3.0 */
+ "force-orb-pfch",
+ "Force unlimited prefetch");
+#ifdef CONFIG_IOMMUFD
+ object_class_property_set_description(klass, /* 9.0 */
+ "iommufd",
+ "Set host IOMMUFD backend device");
+#endif
+ object_class_property_set_description(klass, /* 9.2 */
+ "loadparm",
+ "Define which devices that can be used for booting");
}
static const TypeInfo vfio_ccw_info = {
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index bba776f..260d65f 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -5,6 +5,7 @@ vfio_ss.add(files(
'container-base.c',
'container.c',
'migration.c',
+ 'migration-multifd.c',
'cpr.c',
))
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
new file mode 100644
index 0000000..2337247
--- /dev/null
+++ b/hw/vfio/migration-multifd.c
@@ -0,0 +1,679 @@
+/*
+ * Multifd VFIO migration
+ *
+ * Copyright (C) 2024,2025 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/vfio/vfio-common.h"
+#include "migration/misc.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/lockable.h"
+#include "qemu/main-loop.h"
+#include "qemu/thread.h"
+#include "io/channel-buffer.h"
+#include "migration/qemu-file.h"
+#include "migration-multifd.h"
+#include "trace.h"
+
+#define VFIO_DEVICE_STATE_CONFIG_STATE (1)
+
+#define VFIO_DEVICE_STATE_PACKET_VER_CURRENT (0)
+
+typedef struct VFIODeviceStatePacket {
+ uint32_t version;
+ uint32_t idx;
+ uint32_t flags;
+ uint8_t data[0];
+} QEMU_PACKED VFIODeviceStatePacket;
+
+/* type safety */
+typedef struct VFIOStateBuffers {
+ GArray *array;
+} VFIOStateBuffers;
+
+typedef struct VFIOStateBuffer {
+ bool is_present;
+ char *data;
+ size_t len;
+} VFIOStateBuffer;
+
+typedef struct VFIOMultifd {
+ bool load_bufs_thread_running;
+ bool load_bufs_thread_want_exit;
+
+ VFIOStateBuffers load_bufs;
+ QemuCond load_bufs_buffer_ready_cond;
+ QemuCond load_bufs_thread_finished_cond;
+ QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
+ uint32_t load_buf_idx;
+ uint32_t load_buf_idx_last;
+} VFIOMultifd;
+
+static void vfio_state_buffer_clear(gpointer data)
+{
+ VFIOStateBuffer *lb = data;
+
+ if (!lb->is_present) {
+ return;
+ }
+
+ g_clear_pointer(&lb->data, g_free);
+ lb->is_present = false;
+}
+
+static void vfio_state_buffers_init(VFIOStateBuffers *bufs)
+{
+ bufs->array = g_array_new(FALSE, TRUE, sizeof(VFIOStateBuffer));
+ g_array_set_clear_func(bufs->array, vfio_state_buffer_clear);
+}
+
+static void vfio_state_buffers_destroy(VFIOStateBuffers *bufs)
+{
+ g_clear_pointer(&bufs->array, g_array_unref);
+}
+
+static void vfio_state_buffers_assert_init(VFIOStateBuffers *bufs)
+{
+ assert(bufs->array);
+}
+
+static unsigned int vfio_state_buffers_size_get(VFIOStateBuffers *bufs)
+{
+ return bufs->array->len;
+}
+
+static void vfio_state_buffers_size_set(VFIOStateBuffers *bufs,
+ unsigned int size)
+{
+ g_array_set_size(bufs->array, size);
+}
+
+static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs,
+ unsigned int idx)
+{
+ return &g_array_index(bufs->array, VFIOStateBuffer, idx);
+}
+
+/* called with load_bufs_mutex locked */
+static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
+ VFIODeviceStatePacket *packet,
+ size_t packet_total_size,
+ Error **errp)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ VFIOStateBuffer *lb;
+
+ vfio_state_buffers_assert_init(&multifd->load_bufs);
+ if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
+ vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1);
+ }
+
+ lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx);
+ if (lb->is_present) {
+ error_setg(errp, "%s: state buffer %" PRIu32 " already filled",
+ vbasedev->name, packet->idx);
+ return false;
+ }
+
+ assert(packet->idx >= multifd->load_buf_idx);
+
+ lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
+ lb->len = packet_total_size - sizeof(*packet);
+ lb->is_present = true;
+
+ return true;
+}
+
+bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
+ Error **errp)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
+
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
+ error_setg(errp,
+ "%s: got device state packet but not doing multifd transfer",
+ vbasedev->name);
+ return false;
+ }
+
+ assert(multifd);
+
+ if (data_size < sizeof(*packet)) {
+ error_setg(errp, "%s: packet too short at %zu (min is %zu)",
+ vbasedev->name, data_size, sizeof(*packet));
+ return false;
+ }
+
+ if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
+ error_setg(errp, "%s: packet has unknown version %" PRIu32,
+ vbasedev->name, packet->version);
+ return false;
+ }
+
+ if (packet->idx == UINT32_MAX) {
+ error_setg(errp, "%s: packet index is invalid", vbasedev->name);
+ return false;
+ }
+
+ trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx);
+
+ /*
+ * Holding BQL here would violate the lock order and can cause
+ * a deadlock once we attempt to lock load_bufs_mutex below.
+ */
+ assert(!bql_locked());
+
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+ /* config state packet should be the last one in the stream */
+ if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
+ multifd->load_buf_idx_last = packet->idx;
+ }
+
+ if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size,
+ errp)) {
+ return false;
+ }
+
+ qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
+ }
+
+ return true;
+}
+
+static bool vfio_load_bufs_thread_load_config(VFIODevice *vbasedev,
+ Error **errp)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ VFIOStateBuffer *lb;
+ g_autoptr(QIOChannelBuffer) bioc = NULL;
+ g_autoptr(QEMUFile) f_out = NULL, f_in = NULL;
+ uint64_t mig_header;
+ int ret;
+
+ assert(multifd->load_buf_idx == multifd->load_buf_idx_last);
+ lb = vfio_state_buffers_at(&multifd->load_bufs, multifd->load_buf_idx);
+ assert(lb->is_present);
+
+ bioc = qio_channel_buffer_new(lb->len);
+ qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-load");
+
+ f_out = qemu_file_new_output(QIO_CHANNEL(bioc));
+ qemu_put_buffer(f_out, (uint8_t *)lb->data, lb->len);
+
+ ret = qemu_fflush(f_out);
+ if (ret) {
+ error_setg(errp, "%s: load config state flush failed: %d",
+ vbasedev->name, ret);
+ return false;
+ }
+
+ qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
+ f_in = qemu_file_new_input(QIO_CHANNEL(bioc));
+
+ mig_header = qemu_get_be64(f_in);
+ if (mig_header != VFIO_MIG_FLAG_DEV_CONFIG_STATE) {
+ error_setg(errp, "%s: expected FLAG_DEV_CONFIG_STATE but got %" PRIx64,
+ vbasedev->name, mig_header);
+ return false;
+ }
+
+ bql_lock();
+ ret = vfio_load_device_config_state(f_in, vbasedev);
+ bql_unlock();
+
+ if (ret < 0) {
+ error_setg(errp, "%s: vfio_load_device_config_state() failed: %d",
+ vbasedev->name, ret);
+ return false;
+ }
+
+ return true;
+}
+
+static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
+{
+ VFIOStateBuffer *lb;
+ unsigned int bufs_len;
+
+ bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs);
+ if (multifd->load_buf_idx >= bufs_len) {
+ assert(multifd->load_buf_idx == bufs_len);
+ return NULL;
+ }
+
+ lb = vfio_state_buffers_at(&multifd->load_bufs,
+ multifd->load_buf_idx);
+ if (!lb->is_present) {
+ return NULL;
+ }
+
+ return lb;
+}
+
+static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
+ VFIOStateBuffer *lb,
+ Error **errp)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ g_autofree char *buf = NULL;
+ char *buf_cur;
+ size_t buf_len;
+
+ if (!lb->len) {
+ return true;
+ }
+
+ trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
+ multifd->load_buf_idx);
+
+ /* lb might become re-allocated when we drop the lock */
+ buf = g_steal_pointer(&lb->data);
+ buf_cur = buf;
+ buf_len = lb->len;
+ while (buf_len > 0) {
+ ssize_t wr_ret;
+ int errno_save;
+
+ /*
+ * Loading data to the device takes a while,
+ * drop the lock during this process.
+ */
+ qemu_mutex_unlock(&multifd->load_bufs_mutex);
+ wr_ret = write(migration->data_fd, buf_cur, buf_len);
+ errno_save = errno;
+ qemu_mutex_lock(&multifd->load_bufs_mutex);
+
+ if (wr_ret < 0) {
+ error_setg(errp,
+ "%s: writing state buffer %" PRIu32 " failed: %d",
+ vbasedev->name, multifd->load_buf_idx, errno_save);
+ return false;
+ }
+
+ assert(wr_ret <= buf_len);
+ buf_len -= wr_ret;
+ buf_cur += wr_ret;
+ }
+
+ trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
+ multifd->load_buf_idx);
+
+ return true;
+}
+
+static bool vfio_load_bufs_thread_want_exit(VFIOMultifd *multifd,
+ bool *should_quit)
+{
+ return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit);
+}
+
+/*
+ * This thread is spawned by vfio_multifd_switchover_start() which gets
+ * called upon encountering the switchover point marker in main migration
+ * stream.
+ *
+ * It exits after either:
+ * * completing loading the remaining device state and device config, OR:
+ * * encountering some error while doing the above, OR:
+ * * being forcefully aborted by the migration core by it setting should_quit
+ * or by vfio_load_cleanup_load_bufs_thread() setting
+ * multifd->load_bufs_thread_want_exit.
+ */
+static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ bool ret = false;
+
+ trace_vfio_load_bufs_thread_start(vbasedev->name);
+
+ assert(multifd);
+ QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
+
+ assert(multifd->load_bufs_thread_running);
+
+ while (true) {
+ VFIOStateBuffer *lb;
+
+ /*
+ * Always check cancellation first after the buffer_ready wait below in
+ * case that cond was signalled by vfio_load_cleanup_load_bufs_thread().
+ */
+ if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
+ error_setg(errp, "operation cancelled");
+ goto thread_exit;
+ }
+
+ assert(multifd->load_buf_idx <= multifd->load_buf_idx_last);
+
+ lb = vfio_load_state_buffer_get(multifd);
+ if (!lb) {
+ trace_vfio_load_state_device_buffer_starved(vbasedev->name,
+ multifd->load_buf_idx);
+ qemu_cond_wait(&multifd->load_bufs_buffer_ready_cond,
+ &multifd->load_bufs_mutex);
+ continue;
+ }
+
+ if (multifd->load_buf_idx == multifd->load_buf_idx_last) {
+ break;
+ }
+
+ if (multifd->load_buf_idx == 0) {
+ trace_vfio_load_state_device_buffer_start(vbasedev->name);
+ }
+
+ if (!vfio_load_state_buffer_write(vbasedev, lb, errp)) {
+ goto thread_exit;
+ }
+
+ if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) {
+ trace_vfio_load_state_device_buffer_end(vbasedev->name);
+ }
+
+ multifd->load_buf_idx++;
+ }
+
+ if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) {
+ goto thread_exit;
+ }
+
+ ret = true;
+
+thread_exit:
+ /*
+ * Notify possibly waiting vfio_load_cleanup_load_bufs_thread() that
+ * this thread is exiting.
+ */
+ multifd->load_bufs_thread_running = false;
+ qemu_cond_signal(&multifd->load_bufs_thread_finished_cond);
+
+ trace_vfio_load_bufs_thread_end(vbasedev->name);
+
+ return ret;
+}
+
+static VFIOMultifd *vfio_multifd_new(void)
+{
+ VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
+
+ vfio_state_buffers_init(&multifd->load_bufs);
+
+ qemu_mutex_init(&multifd->load_bufs_mutex);
+
+ multifd->load_buf_idx = 0;
+ multifd->load_buf_idx_last = UINT32_MAX;
+ qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
+
+ multifd->load_bufs_thread_running = false;
+ multifd->load_bufs_thread_want_exit = false;
+ qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
+
+ return multifd;
+}
+
+/*
+ * Terminates vfio_load_bufs_thread by setting
+ * multifd->load_bufs_thread_want_exit and signalling all the conditions
+ * the thread could be blocked on.
+ *
+ * Waits for the thread to signal that it had finished.
+ */
+static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
+{
+ /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
+ bql_unlock();
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+ while (multifd->load_bufs_thread_running) {
+ multifd->load_bufs_thread_want_exit = true;
+
+ qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
+ qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
+ &multifd->load_bufs_mutex);
+ }
+ }
+ bql_lock();
+}
+
+static void vfio_multifd_free(VFIOMultifd *multifd)
+{
+ vfio_load_cleanup_load_bufs_thread(multifd);
+
+ qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
+ vfio_state_buffers_destroy(&multifd->load_bufs);
+ qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
+ qemu_mutex_destroy(&multifd->load_bufs_mutex);
+
+ g_free(multifd);
+}
+
+void vfio_multifd_cleanup(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+
+ g_clear_pointer(&migration->multifd, vfio_multifd_free);
+}
+
+bool vfio_multifd_transfer_supported(void)
+{
+ return multifd_device_state_supported() &&
+ migrate_send_switchover_start();
+}
+
+bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+
+ return migration->multifd_transfer;
+}
+
+bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp)
+{
+ VFIOMigration *migration = vbasedev->migration;
+
+ /*
+ * Make a copy of this setting at the start in case it is changed
+ * mid-migration.
+ */
+ if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) {
+ migration->multifd_transfer = vfio_multifd_transfer_supported();
+ } else {
+ migration->multifd_transfer =
+ vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON;
+ }
+
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
+ /* Nothing further to check or do */
+ return true;
+ }
+
+ if (!vfio_multifd_transfer_supported()) {
+ error_setg(errp,
+ "%s: Multifd device transfer requested but unsupported in the current config",
+ vbasedev->name);
+ return false;
+ }
+
+ if (alloc_multifd) {
+ assert(!migration->multifd);
+ migration->multifd = vfio_multifd_new();
+ }
+
+ return true;
+}
+
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
+{
+ assert(vfio_multifd_transfer_enabled(vbasedev));
+
+ /*
+ * Emit dummy NOP data on the main migration channel since the actual
+ * device state transfer is done via multifd channels.
+ */
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+}
+
+static bool
+vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
+ char *idstr,
+ uint32_t instance_id,
+ uint32_t idx,
+ Error **errp)
+{
+ g_autoptr(QIOChannelBuffer) bioc = NULL;
+ g_autoptr(QEMUFile) f = NULL;
+ int ret;
+ g_autofree VFIODeviceStatePacket *packet = NULL;
+ size_t packet_len;
+
+ bioc = qio_channel_buffer_new(0);
+ qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
+
+ f = qemu_file_new_output(QIO_CHANNEL(bioc));
+
+ if (vfio_save_device_config_state(f, vbasedev, errp)) {
+ return false;
+ }
+
+ ret = qemu_fflush(f);
+ if (ret) {
+ error_setg(errp, "%s: save config state flush failed: %d",
+ vbasedev->name, ret);
+ return false;
+ }
+
+ packet_len = sizeof(*packet) + bioc->usage;
+ packet = g_malloc0(packet_len);
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+ packet->idx = idx;
+ packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
+ memcpy(&packet->data, bioc->data, bioc->usage);
+
+ if (!multifd_queue_device_state(idstr, instance_id,
+ (char *)packet, packet_len)) {
+ error_setg(errp, "%s: multifd config data queuing failed",
+ vbasedev->name);
+ return false;
+ }
+
+ vfio_mig_add_bytes_transferred(packet_len);
+
+ return true;
+}
+
+/*
+ * This thread is spawned by the migration core directly via
+ * .save_live_complete_precopy_thread SaveVMHandler.
+ *
+ * It exits after either:
+ * * completing saving the remaining device state and device config, OR:
+ * * encountering some error while doing the above, OR:
+ * * being forcefully aborted by the migration core by
+ * multifd_device_state_save_thread_should_exit() returning true.
+ */
+bool
+vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+ Error **errp)
+{
+ VFIODevice *vbasedev = d->handler_opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ bool ret = false;
+ g_autofree VFIODeviceStatePacket *packet = NULL;
+ uint32_t idx;
+
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
+ /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
+ return true;
+ }
+
+ trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
+ d->idstr, d->instance_id);
+
+ /* We reach here with device state STOP or STOP_COPY only */
+ if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+ VFIO_DEVICE_STATE_STOP, errp)) {
+ goto thread_exit;
+ }
+
+ packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+
+ for (idx = 0; ; idx++) {
+ ssize_t data_size;
+ size_t packet_size;
+
+ if (multifd_device_state_save_thread_should_exit()) {
+ error_setg(errp, "operation cancelled");
+ goto thread_exit;
+ }
+
+ data_size = read(migration->data_fd, &packet->data,
+ migration->data_buffer_size);
+ if (data_size < 0) {
+ error_setg(errp, "%s: reading state buffer %" PRIu32 " failed: %d",
+ vbasedev->name, idx, errno);
+ goto thread_exit;
+ } else if (data_size == 0) {
+ break;
+ }
+
+ packet->idx = idx;
+ packet_size = sizeof(*packet) + data_size;
+
+ if (!multifd_queue_device_state(d->idstr, d->instance_id,
+ (char *)packet, packet_size)) {
+ error_setg(errp, "%s: multifd data queuing failed", vbasedev->name);
+ goto thread_exit;
+ }
+
+ vfio_mig_add_bytes_transferred(packet_size);
+ }
+
+ if (!vfio_save_complete_precopy_thread_config_state(vbasedev,
+ d->idstr,
+ d->instance_id,
+ idx, errp)) {
+ goto thread_exit;
+ }
+
+ ret = true;
+
+thread_exit:
+ trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
+
+ return ret;
+}
+
+int vfio_multifd_switchover_start(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+
+ assert(multifd);
+
+ /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
+ bql_unlock();
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+ assert(!multifd->load_bufs_thread_running);
+ multifd->load_bufs_thread_running = true;
+ }
+ bql_lock();
+
+ qemu_loadvm_start_load_thread(vfio_load_bufs_thread, vbasedev);
+
+ return 0;
+}
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
new file mode 100644
index 0000000..a664051
--- /dev/null
+++ b/hw/vfio/migration-multifd.h
@@ -0,0 +1,34 @@
+/*
+ * Multifd VFIO migration
+ *
+ * Copyright (C) 2024,2025 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_MIGRATION_MULTIFD_H
+#define HW_VFIO_MIGRATION_MULTIFD_H
+
+#include "hw/vfio/vfio-common.h"
+
+bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp);
+void vfio_multifd_cleanup(VFIODevice *vbasedev);
+
+bool vfio_multifd_transfer_supported(void);
+bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
+
+bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
+ Error **errp);
+
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
+
+bool
+vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+ Error **errp);
+
+int vfio_multifd_switchover_start(VFIODevice *vbasedev);
+
+#endif
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index adfa752..416643d 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -23,6 +23,7 @@
#include "migration/qemu-file.h"
#include "migration/register.h"
#include "migration/blocker.h"
+#include "migration-multifd.h"
#include "qapi/error.h"
#include "qapi/qapi-events-vfio.h"
#include "exec/ramlist.h"
@@ -32,30 +33,13 @@
#include "hw/hw.h"
/*
- * Flags to be used as unique delimiters for VFIO devices in the migration
- * stream. These flags are composed as:
- * 0xffffffff => MSB 32-bit all 1s
- * 0xef10 => Magic ID, represents emulated (virtual) function IO
- * 0x0000 => 16-bits reserved for flags
- *
- * The beginning of state information is marked by _DEV_CONFIG_STATE,
- * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
- * certain state information is marked by _END_OF_STATE.
- */
-#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
-#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
-#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
-#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
-#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
-
-/*
* This is an arbitrary size based on migration of mlx5 devices, where typically
* total device migration size is on the order of 100s of MB. Testing with
* larger values, e.g. 128MB and 1GB, did not show a performance improvement.
*/
#define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
-static int64_t bytes_transferred;
+static unsigned long bytes_transferred;
static const char *mig_state_to_str(enum vfio_device_mig_state state)
{
@@ -136,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
vfio_migration_send_event(vbasedev);
}
-static int vfio_migration_set_state(VFIODevice *vbasedev,
- enum vfio_device_mig_state new_state,
- enum vfio_device_mig_state recover_state,
- Error **errp)
+int vfio_migration_set_state(VFIODevice *vbasedev,
+ enum vfio_device_mig_state new_state,
+ enum vfio_device_mig_state recover_state,
+ Error **errp)
{
VFIOMigration *migration = vbasedev->migration;
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -254,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
return ret;
}
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
- Error **errp)
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
{
VFIODevice *vbasedev = opaque;
int ret;
@@ -280,11 +263,13 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
return ret;
}
-static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
+int vfio_load_device_config_state(QEMUFile *f, void *opaque)
{
VFIODevice *vbasedev = opaque;
uint64_t data;
+ trace_vfio_load_device_config_state_start(vbasedev->name);
+
if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
int ret;
@@ -303,7 +288,7 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
return -EINVAL;
}
- trace_vfio_load_device_config_state(vbasedev->name);
+ trace_vfio_load_device_config_state_end(vbasedev->name);
return qemu_file_get_error(f);
}
@@ -389,7 +374,7 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration)
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
qemu_put_be64(f, data_size);
qemu_put_buffer(f, migration->data_buffer, data_size);
- bytes_transferred += data_size;
+ vfio_mig_add_bytes_transferred(data_size);
trace_vfio_save_block(migration->vbasedev->name, data_size);
@@ -467,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
int ret;
+ if (!vfio_multifd_setup(vbasedev, false, errp)) {
+ return -EINVAL;
+ }
+
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
@@ -523,6 +512,9 @@ static void vfio_save_cleanup(void *opaque)
Error *local_err = NULL;
int ret;
+ /* Currently a NOP, done for symmetry with load_cleanup() */
+ vfio_multifd_cleanup(vbasedev);
+
/*
* Changing device state from STOP_COPY to STOP can take time. Do it here,
* after migration has completed, so it won't increase downtime.
@@ -645,6 +637,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
int ret;
Error *local_err = NULL;
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ return 0;
+ }
+
trace_vfio_save_complete_precopy_start(vbasedev->name);
/* We reach here with device state STOP or STOP_COPY only */
@@ -676,6 +673,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
Error *local_err = NULL;
int ret;
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ return;
+ }
+
ret = vfio_save_device_config_state(f, opaque, &local_err);
if (ret) {
error_prepend(&local_err,
@@ -688,15 +690,28 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
{
VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ int ret;
+
+ if (!vfio_multifd_setup(vbasedev, true, errp)) {
+ return -EINVAL;
+ }
+
+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
+ migration->device_state, errp);
+ if (ret) {
+ return ret;
+ }
- return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
- vbasedev->migration->device_state, errp);
+ return 0;
}
static int vfio_load_cleanup(void *opaque)
{
VFIODevice *vbasedev = opaque;
+ vfio_multifd_cleanup(vbasedev);
+
vfio_migration_cleanup(vbasedev);
trace_vfio_load_cleanup(vbasedev->name);
@@ -717,6 +732,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
switch (data) {
case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
{
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ error_report("%s: got DEV_CONFIG_STATE in main migration "
+ "channel but doing multifd transfer",
+ vbasedev->name);
+ return -EINVAL;
+ }
+
return vfio_load_device_config_state(f, opaque);
}
case VFIO_MIG_FLAG_DEV_SETUP_STATE:
@@ -782,6 +804,17 @@ static bool vfio_switchover_ack_needed(void *opaque)
return vfio_precopy_supported(vbasedev);
}
+static int vfio_switchover_start(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ return vfio_multifd_switchover_start(vbasedev);
+ }
+
+ return 0;
+}
+
static const SaveVMHandlers savevm_vfio_handlers = {
.save_prepare = vfio_save_prepare,
.save_setup = vfio_save_setup,
@@ -796,6 +829,12 @@ static const SaveVMHandlers savevm_vfio_handlers = {
.load_cleanup = vfio_load_cleanup,
.load_state = vfio_load_state,
.switchover_ack_needed = vfio_switchover_ack_needed,
+ /*
+ * Multifd support
+ */
+ .load_state_buffer = vfio_multifd_load_state_buffer,
+ .switchover_start = vfio_switchover_start,
+ .save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread,
};
/* ---------------------------------------------------------------------- */
@@ -1011,12 +1050,17 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
int64_t vfio_mig_bytes_transferred(void)
{
- return bytes_transferred;
+ return MIN(qatomic_read(&bytes_transferred), INT64_MAX);
}
void vfio_reset_bytes_transferred(void)
{
- bytes_transferred = 0;
+ qatomic_set(&bytes_transferred, 0);
+}
+
+void vfio_mig_add_bytes_transferred(unsigned long val)
+{
+ qatomic_add(&bytes_transferred, val);
}
/*
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 89d900e..fdbc158 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2215,8 +2215,12 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
break;
case PCI_CAP_ID_PM:
vfio_check_pm_reset(vdev, pos);
- vdev->pm_cap = pos;
- ret = pci_add_capability(pdev, cap_id, pos, size, errp) >= 0;
+ ret = pci_pm_init(pdev, pos, errp) >= 0;
+ /*
+ * PCI-core config space emulation needs write access to the power
+ * state enabled for tracking BAR mapping relative to PM state.
+ */
+ pci_set_word(pdev->wmask + pos + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK);
break;
case PCI_CAP_ID_AF:
vfio_check_af_flr(vdev, pos);
@@ -2406,18 +2410,27 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
vfio_disable_interrupts(vdev);
+ /*
+ * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master.
+ * Also put INTx Disable in known state.
+ */
+ cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
+ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+ vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
+
/* Make sure the device is in D0 */
- if (vdev->pm_cap) {
+ if (pdev->pm_cap) {
uint16_t pmcsr;
uint8_t state;
- pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
+ pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2);
state = pmcsr & PCI_PM_CTRL_STATE_MASK;
if (state) {
pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
- vfio_pci_write_config(pdev, vdev->pm_cap + PCI_PM_CTRL, pmcsr, 2);
+ vfio_pci_write_config(pdev, pdev->pm_cap + PCI_PM_CTRL, pmcsr, 2);
/* vfio handles the necessary delay here */
- pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
+ pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2);
state = pmcsr & PCI_PM_CTRL_STATE_MASK;
if (state) {
error_report("vfio: Unable to power on device, stuck in D%d",
@@ -2425,15 +2438,6 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
}
}
}
-
- /*
- * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master.
- * Also put INTx Disable in known state.
- */
- cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
- cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
- PCI_COMMAND_INTX_DISABLE);
- vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
}
void vfio_pci_post_reset(VFIOPCIDevice *vdev)
@@ -3353,6 +3357,8 @@ static void vfio_instance_init(Object *obj)
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
+static PropertyInfo vfio_pci_migration_multifd_transfer_prop;
+
static const Property vfio_pci_dev_properties[] = {
DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token),
@@ -3377,6 +3383,10 @@ static const Property vfio_pci_dev_properties[] = {
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice,
vbasedev.enable_migration, ON_OFF_AUTO_AUTO),
+ DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice,
+ vbasedev.migration_multifd_transfer,
+ vfio_pci_migration_multifd_transfer_prop, OnOffAuto,
+ .set_default = true, .defval.i = ON_OFF_AUTO_AUTO),
DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
vbasedev.migration_events, false),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
@@ -3433,6 +3443,126 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
pdc->exit = vfio_exitfn;
pdc->config_read = vfio_pci_read_config;
pdc->config_write = vfio_pci_write_config;
+
+ object_class_property_set_description(klass, /* 1.3 */
+ "host",
+ "Host PCI address [domain:]<bus:slot.function> of assigned device");
+ object_class_property_set_description(klass, /* 1.3 */
+ "x-intx-mmap-timeout-ms",
+ "When EOI is not provided by KVM/QEMU, wait time "
+ "(milliseconds) to re-enable device direct access "
+ "after INTx (DEBUG)");
+ object_class_property_set_description(klass, /* 1.5 */
+ "x-vga",
+ "Expose VGA address spaces for device");
+ object_class_property_set_description(klass, /* 2.3 */
+ "x-req",
+ "Disable device request notification support (DEBUG)");
+ object_class_property_set_description(klass, /* 2.4 and 2.5 */
+ "x-no-mmap",
+ "Disable MMAP for device. Allows to trace MMIO "
+ "accesses (DEBUG)");
+ object_class_property_set_description(klass, /* 2.5 */
+ "x-no-kvm-intx",
+ "Disable direct VFIO->KVM INTx injection. Allows to "
+ "trace INTx interrupts (DEBUG)");
+ object_class_property_set_description(klass, /* 2.5 */
+ "x-no-kvm-msi",
+ "Disable direct VFIO->KVM MSI injection. Allows to "
+ "trace MSI interrupts (DEBUG)");
+ object_class_property_set_description(klass, /* 2.5 */
+ "x-no-kvm-msix",
+ "Disable direct VFIO->KVM MSIx injection. Allows to "
+ "trace MSIx interrupts (DEBUG)");
+ object_class_property_set_description(klass, /* 2.5 */
+ "x-pci-vendor-id",
+ "Override PCI Vendor ID with provided value (DEBUG)");
+ object_class_property_set_description(klass, /* 2.5 */
+ "x-pci-device-id",
+ "Override PCI device ID with provided value (DEBUG)");
+ object_class_property_set_description(klass, /* 2.5 */
+ "x-pci-sub-vendor-id",
+ "Override PCI Subsystem Vendor ID with provided value "
+ "(DEBUG)");
+ object_class_property_set_description(klass, /* 2.5 */
+ "x-pci-sub-device-id",
+ "Override PCI Subsystem Device ID with provided value "
+ "(DEBUG)");
+ object_class_property_set_description(klass, /* 2.6 */
+ "sysfsdev",
+ "Host sysfs path of assigned device");
+ object_class_property_set_description(klass, /* 2.7 */
+ "x-igd-opregion",
+ "Expose host IGD OpRegion to guest");
+ object_class_property_set_description(klass, /* 2.7 (See c4c45e943e51) */
+ "x-igd-gms",
+ "Override IGD data stolen memory size (32MiB units)");
+ object_class_property_set_description(klass, /* 2.11 */
+ "x-nv-gpudirect-clique",
+ "Add NVIDIA GPUDirect capability indicating P2P DMA "
+ "clique for device [0-15]");
+ object_class_property_set_description(klass, /* 2.12 */
+ "x-no-geforce-quirks",
+ "Disable GeForce quirks (for NVIDIA Quadro/GRID/Tesla). "
+ "Improves performance");
+ object_class_property_set_description(klass, /* 2.12 */
+ "display",
+ "Enable display support for device, ex. vGPU");
+ object_class_property_set_description(klass, /* 2.12 */
+ "x-msix-relocation",
+ "Specify MSI-X MMIO relocation to the end of specified "
+ "existing BAR or new BAR to avoid virtualization overhead "
+ "due to adjacent device registers");
+ object_class_property_set_description(klass, /* 3.0 */
+ "x-no-kvm-ioeventfd",
+ "Disable registration of ioeventfds with KVM (DEBUG)");
+ object_class_property_set_description(klass, /* 3.0 */
+ "x-no-vfio-ioeventfd",
+ "Disable linking of KVM ioeventfds to VFIO ioeventfds "
+ "(DEBUG)");
+ object_class_property_set_description(klass, /* 3.1 */
+ "x-balloon-allowed",
+ "Override allowing ballooning with device (DEBUG, DANGER)");
+ object_class_property_set_description(klass, /* 3.2 */
+ "xres",
+ "Set X display resolution the vGPU should use");
+ object_class_property_set_description(klass, /* 3.2 */
+ "yres",
+ "Set Y display resolution the vGPU should use");
+ object_class_property_set_description(klass, /* 5.2 */
+ "x-pre-copy-dirty-page-tracking",
+ "Disable dirty pages tracking during iterative phase "
+ "(DEBUG)");
+ object_class_property_set_description(klass, /* 5.2, 8.0 non-experimetal */
+ "enable-migration",
+ "Enale device migration. Also requires a host VFIO PCI "
+ "variant or mdev driver with migration support enabled");
+ object_class_property_set_description(klass, /* 8.1 */
+ "vf-token",
+ "Specify UUID VF token. Required for VF when PF is owned "
+ "by another VFIO driver");
+#ifdef CONFIG_IOMMUFD
+ object_class_property_set_description(klass, /* 9.0 */
+ "iommufd",
+ "Set host IOMMUFD backend device");
+#endif
+ object_class_property_set_description(klass, /* 9.1 */
+ "x-device-dirty-page-tracking",
+ "Disable device dirty page tracking and use "
+ "container-based dirty page tracking (DEBUG)");
+ object_class_property_set_description(klass, /* 9.1 */
+ "migration-events",
+ "Emit VFIO migration QAPI event when a VFIO device "
+ "changes its migration state. For management applications");
+ object_class_property_set_description(klass, /* 9.1 */
+ "skip-vsc-check",
+ "Skip config space check for Vendor Specific Capability. "
+ "Setting to false will enforce strict checking of VSC content "
+ "(DEBUG)");
+ object_class_property_set_description(klass, /* 10.0 */
+ "x-migration-multifd-transfer",
+ "Transfer this device state via "
+ "multifd channels when live migrating it");
}
static const TypeInfo vfio_pci_dev_info = {
@@ -3461,6 +3591,15 @@ static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, void *data)
device_class_set_props(dc, vfio_pci_dev_nohotplug_properties);
dc->hotpluggable = false;
+
+ object_class_property_set_description(klass, /* 3.1 */
+ "ramfb",
+ "Enable ramfb to provide pre-boot graphics for devices "
+ "enabling display option");
+ object_class_property_set_description(klass, /* 8.2 */
+ "x-ramfb-migrate",
+ "Override default migration support for ramfb support "
+ "(DEBUG)");
}
static const TypeInfo vfio_pci_nohotplug_dev_info = {
@@ -3472,6 +3611,17 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = {
static void register_vfio_pci_dev_type(void)
{
+ /*
+ * Ordinary ON_OFF_AUTO property isn't runtime-mutable, but source VM can
+ * run for a long time before being migrated so it is desirable to have a
+ * fallback mechanism to the old way of transferring VFIO device state if
+ * it turns to be necessary.
+ * The following makes this type of property have the same mutability level
+ * as ordinary migration parameters.
+ */
+ vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto;
+ vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true;
+
type_register_static(&vfio_pci_dev_info);
type_register_static(&vfio_pci_nohotplug_dev_info);
}
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 43c1666..d638c78 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -160,7 +160,6 @@ struct VFIOPCIDevice {
int32_t bootindex;
uint32_t igd_gms;
OffAutoPCIBAR msix_relo;
- uint8_t pm_cap;
uint8_t nv_gpudirect_clique;
bool pci_aer;
bool req_enabled;
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index f491f4d..67bc574 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -575,6 +575,7 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
VFIODevice *vbasedev = &vdev->vbasedev;
int i;
+ warn_report("-device vfio-platform is deprecated");
qemu_mutex_init(&vdev->intp_mutex);
trace_vfio_platform_realize(vbasedev->sysfsdev ?
@@ -672,6 +673,30 @@ static void vfio_platform_class_init(ObjectClass *klass, void *data)
dc->desc = "VFIO-based platform device assignment";
sbc->connect_irq_notifier = vfio_start_irqfd_injection;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+
+ object_class_property_set_description(klass, /* 2.4 */
+ "host",
+ "Host device name of assigned device");
+ object_class_property_set_description(klass, /* 2.4 and 2.5 */
+ "x-no-mmap",
+ "Disable MMAP for device. Allows to trace MMIO "
+ "accesses (DEBUG)");
+ object_class_property_set_description(klass, /* 2.4 */
+ "mmap-timeout-ms",
+ "When EOI is not provided by KVM/QEMU, wait time "
+ "(milliseconds) to re-enable device direct access "
+ "after level interrupt (DEBUG)");
+ object_class_property_set_description(klass, /* 2.4 */
+ "x-irqfd",
+ "Allow disabling irqfd support (DEBUG)");
+ object_class_property_set_description(klass, /* 2.6 */
+ "sysfsdev",
+ "Host sysfs path of assigned device");
+#ifdef CONFIG_IOMMUFD
+ object_class_property_set_description(klass, /* 9.0 */
+ "iommufd",
+ "Set host IOMMUFD backend device");
+#endif
}
static const TypeInfo vfio_platform_dev_info = {
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index c5385e1..9347e3a 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -149,10 +149,19 @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u"
vfio_display_edid_write_error(void) ""
# migration.c
+vfio_load_bufs_thread_start(const char *name) " (%s)"
+vfio_load_bufs_thread_end(const char *name) " (%s)"
vfio_load_cleanup(const char *name) " (%s)"
-vfio_load_device_config_state(const char *name) " (%s)"
+vfio_load_device_config_state_start(const char *name) " (%s)"
+vfio_load_device_config_state_end(const char *name) " (%s)"
vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d"
+vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32
+vfio_load_state_device_buffer_start(const char *name) " (%s)"
+vfio_load_state_device_buffer_starved(const char *name, uint32_t idx) " (%s) idx %"PRIu32
+vfio_load_state_device_buffer_load_start(const char *name, uint32_t idx) " (%s) idx %"PRIu32
+vfio_load_state_device_buffer_load_end(const char *name, uint32_t idx) " (%s) idx %"PRIu32
+vfio_load_state_device_buffer_end(const char *name) " (%s)"
vfio_migration_realize(const char *name) " (%s)"
vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s"
vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s"
@@ -162,6 +171,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
vfio_save_cleanup(const char *name) " (%s)"
vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
vfio_save_complete_precopy_start(const char *name) " (%s)"
+vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
+vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
vfio_save_device_config_state(const char *name) " (%s)"
vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
vfio_save_iterate_start(const char *name) " (%s)"
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index c773a91..3ca3f84 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2204,14 +2204,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
pos = pcie_endpoint_cap_init(pci_dev, 0);
assert(pos > 0);
- pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
- PCI_PM_SIZEOF, errp);
+ pos = pci_pm_init(pci_dev, 0, errp);
if (pos < 0) {
return;
}
- pci_dev->exp.pm_cap = pos;
-
/*
* Indicates that this function complies with revision 1.2 of the
* PCI Power Management Interface Specification.
@@ -2310,11 +2307,11 @@ static bool virtio_pci_no_soft_reset(PCIDevice *dev)
{
uint16_t pmcsr;
- if (!pci_is_express(dev) || !dev->exp.pm_cap) {
+ if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) {
return false;
}
- pmcsr = pci_get_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL);
+ pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL);
/*
* When No_Soft_Reset bit is set and the device
@@ -2343,7 +2340,7 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
pci_word_test_and_clear_mask(
- dev->config + dev->exp.pm_cap + PCI_PM_CTRL,
+ dev->config + dev->pm_cap + PCI_PM_CTRL,
PCI_PM_CTRL_STATE_MASK);
}
}