aboutsummaryrefslogtreecommitdiff
path: root/hw/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio')
-rw-r--r--hw/vfio/device.c6
-rw-r--r--hw/vfio/helpers.c17
-rw-r--r--hw/vfio/migration-multifd.c101
-rw-r--r--hw/vfio/migration-multifd.h3
-rw-r--r--hw/vfio/migration.c10
-rw-r--r--hw/vfio/pci.c48
-rw-r--r--hw/vfio/pci.h6
-rw-r--r--hw/vfio/trace-events1
-rw-r--r--hw/vfio/vfio-helpers.h2
-rw-r--r--hw/vfio/vfio-migration-internal.h1
10 files changed, 182 insertions, 13 deletions
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 96cf214..52a1996 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -463,6 +463,8 @@ void vfio_device_detach(VFIODevice *vbasedev)
void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
struct vfio_device_info *info)
{
+ int i;
+
vbasedev->num_irqs = info->num_irqs;
vbasedev->num_regions = info->num_regions;
vbasedev->flags = info->flags;
@@ -477,6 +479,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
vbasedev->num_regions);
if (vbasedev->use_region_fds) {
vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
+ for (i = 0; i < vbasedev->num_regions; i++) {
+ vbasedev->region_fds[i] = -1;
+ }
}
}
@@ -489,7 +494,6 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
close(vbasedev->region_fds[i]);
}
-
}
g_clear_pointer(&vbasedev->reginfo, g_free);
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 9a5f621..23d13e5 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -209,3 +209,20 @@ retry:
return info;
}
+
+bool vfio_arch_wants_loading_config_after_iter(void)
+{
+ /*
+ * Starting the config load only after all iterables were loaded (during
+ * non-iterables loading phase) is required for ARM64 due to this platform
+ * VFIO dependency on interrupt controller being loaded first.
+ *
+ * See commit d329f5032e17 ("vfio: Move the saving of the config space to
+ * the right place in VFIO migration").
+ */
+#if defined(TARGET_ARM)
+ return true;
+#else
+ return false;
+#endif
+}
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 5563548..e478503 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -13,7 +13,6 @@
#include "hw/vfio/vfio-device.h"
#include "migration/misc.h"
#include "qapi/error.h"
-#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "qemu/lockable.h"
#include "qemu/main-loop.h"
@@ -23,6 +22,7 @@
#include "migration-multifd.h"
#include "vfio-migration-internal.h"
#include "trace.h"
+#include "vfio-helpers.h"
#define VFIO_DEVICE_STATE_CONFIG_STATE (1)
@@ -35,6 +35,18 @@ typedef struct VFIODeviceStatePacket {
uint8_t data[0];
} QEMU_PACKED VFIODeviceStatePacket;
+bool vfio_load_config_after_iter(VFIODevice *vbasedev)
+{
+ if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_ON) {
+ return true;
+ } else if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_OFF) {
+ return false;
+ }
+
+ assert(vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_AUTO);
+ return vfio_arch_wants_loading_config_after_iter();
+}
+
/* type safety */
typedef struct VFIOStateBuffers {
GArray *array;
@@ -50,12 +62,16 @@ typedef struct VFIOMultifd {
bool load_bufs_thread_running;
bool load_bufs_thread_want_exit;
+ bool load_bufs_iter_done;
+ QemuCond load_bufs_iter_done_cond;
+
VFIOStateBuffers load_bufs;
QemuCond load_bufs_buffer_ready_cond;
QemuCond load_bufs_thread_finished_cond;
QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
uint32_t load_buf_idx;
uint32_t load_buf_idx_last;
+ size_t load_buf_queued_pending_buffers_size;
} VFIOMultifd;
static void vfio_state_buffer_clear(gpointer data)
@@ -112,6 +128,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
VFIOMigration *migration = vbasedev->migration;
VFIOMultifd *multifd = migration->multifd;
VFIOStateBuffer *lb;
+ size_t data_size = packet_total_size - sizeof(*packet);
vfio_state_buffers_assert_init(&multifd->load_bufs);
if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
@@ -127,8 +144,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
assert(packet->idx >= multifd->load_buf_idx);
- lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
- lb->len = packet_total_size - sizeof(*packet);
+ multifd->load_buf_queued_pending_buffers_size += data_size;
+ if (multifd->load_buf_queued_pending_buffers_size >
+ vbasedev->migration_max_queued_buffers_size) {
+ error_setg(errp,
+ "%s: queuing state buffer %" PRIu32
+ " would exceed the size max of %" PRIu64,
+ vbasedev->name, packet->idx,
+ vbasedev->migration_max_queued_buffers_size);
+ return false;
+ }
+
+ lb->data = g_memdup2(&packet->data, data_size);
+ lb->len = data_size;
lb->is_present = true;
return true;
@@ -312,6 +340,9 @@ static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
assert(wr_ret <= buf_len);
buf_len -= wr_ret;
buf_cur += wr_ret;
+
+ assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret);
+ multifd->load_buf_queued_pending_buffers_size -= wr_ret;
}
trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
@@ -394,6 +425,22 @@ static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp)
multifd->load_buf_idx++;
}
+ if (vfio_load_config_after_iter(vbasedev)) {
+ while (!multifd->load_bufs_iter_done) {
+ qemu_cond_wait(&multifd->load_bufs_iter_done_cond,
+ &multifd->load_bufs_mutex);
+
+ /*
+ * Need to re-check cancellation immediately after wait in case
+ * cond was signalled by vfio_load_cleanup_load_bufs_thread().
+ */
+ if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
+ error_setg(errp, "operation cancelled");
+ goto thread_exit;
+ }
+ }
+ }
+
if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) {
goto thread_exit;
}
@@ -413,6 +460,48 @@ thread_exit:
return ret;
}
+int vfio_load_state_config_load_ready(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ int ret = 0;
+
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
+ error_report("%s: got DEV_CONFIG_LOAD_READY outside multifd transfer",
+ vbasedev->name);
+ return -EINVAL;
+ }
+
+ if (!vfio_load_config_after_iter(vbasedev)) {
+ error_report("%s: got DEV_CONFIG_LOAD_READY but was disabled",
+ vbasedev->name);
+ return -EINVAL;
+ }
+
+ assert(multifd);
+
+ /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
+ bql_unlock();
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+ if (multifd->load_bufs_iter_done) {
+ /* Can't print error here as we're outside BQL */
+ ret = -EINVAL;
+ break;
+ }
+
+ multifd->load_bufs_iter_done = true;
+ qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
+ }
+ bql_lock();
+
+ if (ret) {
+ error_report("%s: duplicate DEV_CONFIG_LOAD_READY",
+ vbasedev->name);
+ }
+
+ return ret;
+}
+
static VFIOMultifd *vfio_multifd_new(void)
{
VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
@@ -423,8 +512,12 @@ static VFIOMultifd *vfio_multifd_new(void)
multifd->load_buf_idx = 0;
multifd->load_buf_idx_last = UINT32_MAX;
+ multifd->load_buf_queued_pending_buffers_size = 0;
qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
+ multifd->load_bufs_iter_done = false;
+ qemu_cond_init(&multifd->load_bufs_iter_done_cond);
+
multifd->load_bufs_thread_running = false;
multifd->load_bufs_thread_want_exit = false;
qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
@@ -448,6 +541,7 @@ static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
multifd->load_bufs_thread_want_exit = true;
qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
+ qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
&multifd->load_bufs_mutex);
}
@@ -460,6 +554,7 @@ static void vfio_multifd_free(VFIOMultifd *multifd)
vfio_load_cleanup_load_bufs_thread(multifd);
qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
+ qemu_cond_destroy(&multifd->load_bufs_iter_done_cond);
vfio_state_buffers_destroy(&multifd->load_bufs);
qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
qemu_mutex_destroy(&multifd->load_bufs_mutex);
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
index ebf22a7..82d2d3a 100644
--- a/hw/vfio/migration-multifd.h
+++ b/hw/vfio/migration-multifd.h
@@ -20,9 +20,12 @@ void vfio_multifd_cleanup(VFIODevice *vbasedev);
bool vfio_multifd_transfer_supported(void);
bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
+bool vfio_load_config_after_iter(VFIODevice *vbasedev);
bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
Error **errp);
+int vfio_load_state_config_load_ready(VFIODevice *vbasedev);
+
void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
bool
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index c329578..4c06e3d 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -675,7 +675,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
int ret;
if (vfio_multifd_transfer_enabled(vbasedev)) {
- vfio_multifd_emit_dummy_eos(vbasedev, f);
+ if (vfio_load_config_after_iter(vbasedev)) {
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY);
+ } else {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ }
return;
}
@@ -784,6 +788,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
return ret;
}
+ case VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY:
+ {
+ return vfio_load_state_config_load_ready(vbasedev);
+ }
default:
error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
return -EINVAL;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 1093b28..be05002 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2893,10 +2893,6 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
"vfio-vga-io@0x3c0",
QEMU_PCI_VGA_IO_HI_SIZE);
- pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
- &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
- &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
-
return true;
}
@@ -3228,6 +3224,23 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
vdev->sub_device_id);
}
+ /*
+ * Class code is a 24-bit value at config space 0x09. Allow overriding it
+ * with any 24-bit value.
+ */
+ if (vdev->class_code != PCI_ANY_ID) {
+ if (vdev->class_code > 0xffffff) {
+ error_setg(errp, "invalid PCI class code provided");
+ return false;
+ }
+ /* Higher 24 bits of PCI_CLASS_REVISION are class code */
+ vfio_add_emulated_long(vdev, PCI_CLASS_REVISION,
+ vdev->class_code << 8, ~0xff);
+ trace_vfio_pci_emulated_class_code(vbasedev->name, vdev->class_code);
+ } else {
+ vdev->class_code = pci_get_long(pdev->config + PCI_CLASS_REVISION) >> 8;
+ }
+
/* QEMU can change multi-function devices to single function, or reverse */
vdev->emulated_config_bits[PCI_HEADER_TYPE] =
PCI_HEADER_TYPE_MULTI_FUNCTION;
@@ -3257,6 +3270,12 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
vfio_bars_register(vdev);
+ if (vdev->vga && vfio_is_vga(vdev)) {
+ pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
+ }
+
return true;
}
@@ -3623,6 +3642,11 @@ static const Property vfio_pci_dev_properties[] = {
vbasedev.migration_multifd_transfer,
vfio_pci_migration_multifd_transfer_prop, OnOffAuto,
.set_default = true, .defval.i = ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", VFIOPCIDevice,
+ vbasedev.migration_load_config_after_iter,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice,
+ vbasedev.migration_max_queued_buffers_size, UINT64_MAX),
DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
vbasedev.migration_events, false),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
@@ -3643,6 +3667,8 @@ static const Property vfio_pci_dev_properties[] = {
sub_vendor_id, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
sub_device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-class-code", VFIOPCIDevice,
+ class_code, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice,
nv_gpudirect_clique,
@@ -3797,6 +3823,20 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
"x-migration-multifd-transfer",
"Transfer this device state via "
"multifd channels when live migrating it");
+ object_class_property_set_description(klass, /* 10.1 */
+ "x-migration-load-config-after-iter",
+ "Start the config load only after "
+ "all iterables were loaded (during "
+ "non-iterables loading phase) when "
+ "doing live migration of device state "
+ "via multifd channels");
+ object_class_property_set_description(klass, /* 10.1 */
+ "x-migration-max-queued-buffers-size",
+ "Maximum size of in-flight VFIO "
+ "device state buffers queued at the "
+ "destination when doing live "
+ "migration of device state via "
+ "multifd channels");
}
static const TypeInfo vfio_pci_dev_info = {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 495fae7..4aa6461 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -157,6 +157,7 @@ struct VFIOPCIDevice {
uint32_t device_id;
uint32_t sub_vendor_id;
uint32_t sub_device_id;
+ uint32_t class_code;
uint32_t features;
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
@@ -205,10 +206,7 @@ static inline bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t de
static inline bool vfio_is_vga(VFIOPCIDevice *vdev)
{
- PCIDevice *pdev = &vdev->pdev;
- uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
-
- return class == PCI_CLASS_DISPLAY_VGA;
+ return (vdev->class_code >> 8) == PCI_CLASS_DISPLAY_VGA;
}
/* MSI/MSI-X/INTx */
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 8ec0ad0..fc6ed23 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -48,6 +48,7 @@ vfio_pci_emulated_vendor_id(const char *name, uint16_t val) "%s 0x%04x"
vfio_pci_emulated_device_id(const char *name, uint16_t val) "%s 0x%04x"
vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s 0x%04x"
vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s 0x%04x"
+vfio_pci_emulated_class_code(const char *name, uint32_t val) "%s 0x%06x"
# pci-quirks.c
vfio_quirk_rom_in_denylist(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x"
diff --git a/hw/vfio/vfio-helpers.h b/hw/vfio/vfio-helpers.h
index 54a327f..ce31758 100644
--- a/hw/vfio/vfio-helpers.h
+++ b/hw/vfio/vfio-helpers.h
@@ -32,4 +32,6 @@ struct vfio_device_info *vfio_get_device_info(int fd);
int vfio_kvm_device_add_fd(int fd, Error **errp);
int vfio_kvm_device_del_fd(int fd, Error **errp);
+bool vfio_arch_wants_loading_config_after_iter(void);
+
#endif /* HW_VFIO_VFIO_HELPERS_H */
diff --git a/hw/vfio/vfio-migration-internal.h b/hw/vfio/vfio-migration-internal.h
index a8b456b..54141e2 100644
--- a/hw/vfio/vfio-migration-internal.h
+++ b/hw/vfio/vfio-migration-internal.h
@@ -32,6 +32,7 @@
#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
+#define VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY (0xffffffffef100006ULL)
typedef struct VFIODevice VFIODevice;
typedef struct VFIOMultifd VFIOMultifd;