aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaciej S. Szmigiero <maciej.szmigiero@oracle.com>2025-07-15 16:37:37 +0200
committerCédric Le Goater <clg@redhat.com>2025-07-15 17:11:12 +0200
commit300dcf58b72fa1635190b19f102231b0775e93cb (patch)
treeef712b0fe23af924c43bbab81ec40948d9027aa2
parent6380b0a02fbdac253b8a98b300398319ab655237 (diff)
downloadqemu-300dcf58b72fa1635190b19f102231b0775e93cb.zip
qemu-300dcf58b72fa1635190b19f102231b0775e93cb.tar.gz
qemu-300dcf58b72fa1635190b19f102231b0775e93cb.tar.bz2
vfio/migration: Max in-flight VFIO device state buffers size limit
Allow capping the maximum total size of in-flight VFIO device state buffers queued at the destination, otherwise a malicious QEMU source could theoretically cause the target QEMU to allocate unlimited amounts of memory for buffers-in-flight. Since this is not expected to be a realistic threat in most of VFIO live migration use cases and the right value depends on the particular setup disable this limit by default by setting it to UINT64_MAX. Reviewed-by: Fabiano Rosas <farosas@suse.de> Reviewed-by: Avihai Horon <avihaih@nvidia.com> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> Link: https://lore.kernel.org/qemu-devel/4f7cad490988288f58e36b162d7a888ed7e7fd17.1752589295.git.maciej.szmigiero@oracle.com Signed-off-by: Cédric Le Goater <clg@redhat.com>
-rw-r--r--docs/devel/migration/vfio.rst13
-rw-r--r--hw/vfio/migration-multifd.c21
-rw-r--r--hw/vfio/pci.c9
-rw-r--r--include/hw/vfio/vfio-device.h1
4 files changed, 42 insertions, 2 deletions
diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst
index dae3a98..0790e50 100644
--- a/docs/devel/migration/vfio.rst
+++ b/docs/devel/migration/vfio.rst
@@ -248,6 +248,19 @@ The multifd VFIO device state transfer is controlled by
AUTO, which means that VFIO device state transfer via multifd channels is
attempted in configurations that otherwise support it.
+Since the target QEMU needs to load device state buffers in-order it needs to
+queue incoming buffers until they can be loaded into the device.
+This means that a malicious QEMU source could theoretically cause the target
+QEMU to allocate unlimited amounts of memory for such buffers-in-flight.
+
+The "x-migration-max-queued-buffers-size" property allows capping the total size
+of these VFIO device state buffers queued at the destination.
+
+Because a malicious QEMU source causing OOM on the target is not expected to be
+a realistic threat in most of VFIO live migration use cases and the right value
+depends on the particular setup by default this queued buffers size limit is
+disabled by setting it to UINT64_MAX.
+
Some host platforms (like ARM64) require that VFIO device config is loaded only
after all iterables were loaded, during non-iterables loading phase.
Such interlocking is controlled by "x-migration-load-config-after-iter" VFIO
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index e539bef..d522671 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -72,6 +72,7 @@ typedef struct VFIOMultifd {
QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
uint32_t load_buf_idx;
uint32_t load_buf_idx_last;
+ size_t load_buf_queued_pending_buffers_size;
} VFIOMultifd;
static void vfio_state_buffer_clear(gpointer data)
@@ -128,6 +129,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
VFIOMigration *migration = vbasedev->migration;
VFIOMultifd *multifd = migration->multifd;
VFIOStateBuffer *lb;
+ size_t data_size = packet_total_size - sizeof(*packet);
vfio_state_buffers_assert_init(&multifd->load_bufs);
if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
@@ -143,8 +145,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
assert(packet->idx >= multifd->load_buf_idx);
- lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
- lb->len = packet_total_size - sizeof(*packet);
+ multifd->load_buf_queued_pending_buffers_size += data_size;
+ if (multifd->load_buf_queued_pending_buffers_size >
+ vbasedev->migration_max_queued_buffers_size) {
+ error_setg(errp,
+ "%s: queuing state buffer %" PRIu32
+ " would exceed the size max of %" PRIu64,
+ vbasedev->name, packet->idx,
+ vbasedev->migration_max_queued_buffers_size);
+ return false;
+ }
+
+ lb->data = g_memdup2(&packet->data, data_size);
+ lb->len = data_size;
lb->is_present = true;
return true;
@@ -328,6 +341,9 @@ static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
assert(wr_ret <= buf_len);
buf_len -= wr_ret;
buf_cur += wr_ret;
+
+ assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret);
+ multifd->load_buf_queued_pending_buffers_size -= wr_ret;
}
trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
@@ -497,6 +513,7 @@ static VFIOMultifd *vfio_multifd_new(void)
multifd->load_buf_idx = 0;
multifd->load_buf_idx_last = UINT32_MAX;
+ multifd->load_buf_queued_pending_buffers_size = 0;
qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
multifd->load_bufs_iter_done = false;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 09acad0..be05002 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3645,6 +3645,8 @@ static const Property vfio_pci_dev_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", VFIOPCIDevice,
vbasedev.migration_load_config_after_iter,
ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice,
+ vbasedev.migration_max_queued_buffers_size, UINT64_MAX),
DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
vbasedev.migration_events, false),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
@@ -3828,6 +3830,13 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
"non-iterables loading phase) when "
"doing live migration of device state "
"via multifd channels");
+ object_class_property_set_description(klass, /* 10.1 */
+ "x-migration-max-queued-buffers-size",
+ "Maximum size of in-flight VFIO "
+ "device state buffers queued at the "
+ "destination when doing live "
+ "migration of device state via "
+ "multifd channels");
}
static const TypeInfo vfio_pci_dev_info = {
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index dac3fdc..6e4d5cc 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -68,6 +68,7 @@ typedef struct VFIODevice {
OnOffAuto enable_migration;
OnOffAuto migration_multifd_transfer;
OnOffAuto migration_load_config_after_iter;
+ uint64_t migration_max_queued_buffers_size;
bool migration_events;
bool use_region_fds;
VFIODeviceOps *ops;