aboutsummaryrefslogtreecommitdiff
path: root/hw/vfio/migration.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio/migration.c')
-rw-r--r--hw/vfio/migration.c249
1 files changed, 203 insertions, 46 deletions
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 34d4be2..b76697bd 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -15,38 +15,23 @@
#include <linux/vfio.h>
#include <sys/ioctl.h>
-#include "sysemu/runstate.h"
-#include "hw/vfio/vfio-common.h"
+#include "system/runstate.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio/vfio-migration.h"
#include "migration/misc.h"
#include "migration/savevm.h"
#include "migration/vmstate.h"
#include "migration/qemu-file.h"
#include "migration/register.h"
#include "migration/blocker.h"
+#include "migration-multifd.h"
#include "qapi/error.h"
#include "qapi/qapi-events-vfio.h"
#include "exec/ramlist.h"
-#include "exec/ram_addr.h"
#include "pci.h"
#include "trace.h"
#include "hw/hw.h"
-
-/*
- * Flags to be used as unique delimiters for VFIO devices in the migration
- * stream. These flags are composed as:
- * 0xffffffff => MSB 32-bit all 1s
- * 0xef10 => Magic ID, represents emulated (virtual) function IO
- * 0x0000 => 16-bits reserved for flags
- *
- * The beginning of state information is marked by _DEV_CONFIG_STATE,
- * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
- * certain state information is marked by _END_OF_STATE.
- */
-#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
-#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
-#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
-#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
-#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
+#include "vfio-migration-internal.h"
/*
* This is an arbitrary size based on migration of mlx5 devices, where typically
@@ -55,7 +40,7 @@
*/
#define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
-static int64_t bytes_transferred;
+static unsigned long bytes_transferred;
static const char *mig_state_to_str(enum vfio_device_mig_state state)
{
@@ -81,7 +66,7 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state)
}
}
-static VfioMigrationState
+static QapiVfioMigrationState
mig_state_to_qapi_state(enum vfio_device_mig_state state)
{
switch (state) {
@@ -136,10 +121,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
vfio_migration_send_event(vbasedev);
}
-static int vfio_migration_set_state(VFIODevice *vbasedev,
- enum vfio_device_mig_state new_state,
- enum vfio_device_mig_state recover_state,
- Error **errp)
+int vfio_migration_set_state(VFIODevice *vbasedev,
+ enum vfio_device_mig_state new_state,
+ enum vfio_device_mig_state recover_state,
+ Error **errp)
{
VFIOMigration *migration = vbasedev->migration;
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -254,8 +239,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
return ret;
}
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
- Error **errp)
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
{
VFIODevice *vbasedev = opaque;
int ret;
@@ -280,11 +264,13 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
return ret;
}
-static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
+int vfio_load_device_config_state(QEMUFile *f, void *opaque)
{
VFIODevice *vbasedev = opaque;
uint64_t data;
+ trace_vfio_load_device_config_state_start(vbasedev->name);
+
if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
int ret;
@@ -303,7 +289,7 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
return -EINVAL;
}
- trace_vfio_load_device_config_state(vbasedev->name);
+ trace_vfio_load_device_config_state_end(vbasedev->name);
return qemu_file_get_error(f);
}
@@ -370,6 +356,10 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration)
* please refer to the Linux kernel VFIO uAPI.
*/
if (errno == ENOMSG) {
+ if (!migration->event_precopy_empty_hit) {
+ trace_vfio_save_block_precopy_empty_hit(migration->vbasedev->name);
+ migration->event_precopy_empty_hit = true;
+ }
return 0;
}
@@ -379,10 +369,13 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration)
return 0;
}
+ /* Non-empty read: re-arm the trace event */
+ migration->event_precopy_empty_hit = false;
+
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
qemu_put_be64(f, data_size);
qemu_put_buffer(f, migration->data_buffer, data_size);
- bytes_transferred += data_size;
+ vfio_migration_add_bytes_transferred(data_size);
trace_vfio_save_block(migration->vbasedev->name, data_size);
@@ -460,6 +453,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
int ret;
+ if (!vfio_multifd_setup(vbasedev, false, errp)) {
+ return -EINVAL;
+ }
+
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
@@ -472,6 +469,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
return -ENOMEM;
}
+ migration->event_save_iterate_started = false;
+ migration->event_precopy_empty_hit = false;
+
if (vfio_precopy_supported(vbasedev)) {
switch (migration->device_state) {
case VFIO_DEVICE_STATE_RUNNING:
@@ -513,6 +513,9 @@ static void vfio_save_cleanup(void *opaque)
Error *local_err = NULL;
int ret;
+ /* Currently a NOP, done for symmetry with load_cleanup() */
+ vfio_multifd_cleanup(vbasedev);
+
/*
* Changing device state from STOP_COPY to STOP can take time. Do it here,
* after migration has completed, so it won't increase downtime.
@@ -576,9 +579,6 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
if (vfio_device_state_is_precopy(vbasedev)) {
vfio_query_precopy_size(migration);
-
- *must_precopy +=
- migration->precopy_init_size + migration->precopy_dirty_size;
}
trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy,
@@ -605,6 +605,11 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
VFIOMigration *migration = vbasedev->migration;
ssize_t data_size;
+ if (!migration->event_save_iterate_started) {
+ trace_vfio_save_iterate_start(vbasedev->name);
+ migration->event_save_iterate_started = true;
+ }
+
data_size = vfio_save_block(f, migration);
if (data_size < 0) {
return data_size;
@@ -633,6 +638,13 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
int ret;
Error *local_err = NULL;
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ return 0;
+ }
+
+ trace_vfio_save_complete_precopy_start(vbasedev->name);
+
/* We reach here with device state STOP or STOP_COPY only */
ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
VFIO_DEVICE_STATE_STOP, &local_err);
@@ -662,6 +674,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
Error *local_err = NULL;
int ret;
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ return;
+ }
+
ret = vfio_save_device_config_state(f, opaque, &local_err);
if (ret) {
error_prepend(&local_err,
@@ -674,15 +691,28 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
{
VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ int ret;
+
+ if (!vfio_multifd_setup(vbasedev, true, errp)) {
+ return -EINVAL;
+ }
- return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
- vbasedev->migration->device_state, errp);
+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
+ migration->device_state, errp);
+ if (ret) {
+ return ret;
+ }
+
+ return 0;
}
static int vfio_load_cleanup(void *opaque)
{
VFIODevice *vbasedev = opaque;
+ vfio_multifd_cleanup(vbasedev);
+
vfio_migration_cleanup(vbasedev);
trace_vfio_load_cleanup(vbasedev->name);
@@ -703,6 +733,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
switch (data) {
case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
{
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ error_report("%s: got DEV_CONFIG_STATE in main migration "
+ "channel but doing multifd transfer",
+ vbasedev->name);
+ return -EINVAL;
+ }
+
return vfio_load_device_config_state(f, opaque);
}
case VFIO_MIG_FLAG_DEV_SETUP_STATE:
@@ -768,6 +805,17 @@ static bool vfio_switchover_ack_needed(void *opaque)
return vfio_precopy_supported(vbasedev);
}
+static int vfio_switchover_start(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ return vfio_multifd_switchover_start(vbasedev);
+ }
+
+ return 0;
+}
+
static const SaveVMHandlers savevm_vfio_handlers = {
.save_prepare = vfio_save_prepare,
.save_setup = vfio_save_setup,
@@ -782,6 +830,12 @@ static const SaveVMHandlers savevm_vfio_handlers = {
.load_cleanup = vfio_load_cleanup,
.load_state = vfio_load_state,
.switchover_ack_needed = vfio_switchover_ack_needed,
+ /*
+ * Multifd support
+ */
+ .load_state_buffer = vfio_multifd_load_state_buffer,
+ .switchover_start = vfio_switchover_start,
+ .save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread,
};
/* ---------------------------------------------------------------------- */
@@ -962,13 +1016,72 @@ static int vfio_migration_init(VFIODevice *vbasedev)
vfio_vmstate_change_prepare :
NULL;
migration->vm_state = qdev_add_vm_change_state_handler_full(
- vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev);
+ vbasedev->dev, vfio_vmstate_change, prepare_cb, NULL, vbasedev);
migration_add_notifier(&migration->migration_state,
vfio_migration_state_notifier);
return 0;
}
+static Error *multiple_devices_migration_blocker;
+
+/*
+ * Multiple devices migration is allowed only if all devices support P2P
+ * migration. Single device migration is allowed regardless of P2P migration
+ * support.
+ */
+static bool vfio_multiple_devices_migration_is_supported(void)
+{
+ VFIODevice *vbasedev;
+ unsigned int device_num = 0;
+ bool all_support_p2p = true;
+
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
+ if (vbasedev->migration) {
+ device_num++;
+
+ if (!(vbasedev->migration->mig_flags & VFIO_MIGRATION_P2P)) {
+ all_support_p2p = false;
+ }
+ }
+ }
+
+ return all_support_p2p || device_num <= 1;
+}
+
+static int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp)
+{
+ if (vfio_multiple_devices_migration_is_supported()) {
+ return 0;
+ }
+
+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
+ error_setg(errp, "Multiple VFIO devices migration is supported only if "
+ "all of them support P2P migration");
+ return -EINVAL;
+ }
+
+ if (multiple_devices_migration_blocker) {
+ return 0;
+ }
+
+ error_setg(&multiple_devices_migration_blocker,
+ "Multiple VFIO devices migration is supported only if all of "
+ "them support P2P migration");
+ return migrate_add_blocker_normal(&multiple_devices_migration_blocker,
+ errp);
+}
+
+static void vfio_unblock_multiple_devices_migration(void)
+{
+ if (!multiple_devices_migration_blocker ||
+ !vfio_multiple_devices_migration_is_supported()) {
+ return;
+ }
+
+ migrate_del_blocker(&multiple_devices_migration_blocker);
+}
+
static void vfio_migration_deinit(VFIODevice *vbasedev)
{
VFIOMigration *migration = vbasedev->migration;
@@ -995,14 +1108,40 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
/* ---------------------------------------------------------------------- */
-int64_t vfio_mig_bytes_transferred(void)
+int64_t vfio_migration_bytes_transferred(void)
{
- return bytes_transferred;
+ return MIN(qatomic_read(&bytes_transferred), INT64_MAX);
}
-void vfio_reset_bytes_transferred(void)
+void vfio_migration_reset_bytes_transferred(void)
{
- bytes_transferred = 0;
+ qatomic_set(&bytes_transferred, 0);
+}
+
+void vfio_migration_add_bytes_transferred(unsigned long val)
+{
+ qatomic_add(&bytes_transferred, val);
+}
+
+bool vfio_migration_active(void)
+{
+ VFIODevice *vbasedev;
+
+ if (QLIST_EMPTY(&vfio_device_list)) {
+ return false;
+ }
+
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
+ if (vbasedev->migration_blocker) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool vfio_viommu_preset(VFIODevice *vbasedev)
+{
+ return vbasedev->bcontainer->space->as != &address_space_memory;
}
/*
@@ -1036,16 +1175,18 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
return !vfio_block_migration(vbasedev, err, errp);
}
- if (!vbasedev->dirty_pages_supported) {
+ if ((!vbasedev->dirty_pages_supported ||
+ vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
+ !vbasedev->iommu_dirty_tracking) {
if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
error_setg(&err,
- "%s: VFIO device doesn't support device dirty tracking",
- vbasedev->name);
+ "%s: VFIO device doesn't support device and "
+ "IOMMU dirty tracking", vbasedev->name);
goto add_blocker;
}
- warn_report("%s: VFIO device doesn't support device dirty tracking",
- vbasedev->name);
+ warn_report("%s: VFIO device doesn't support device and "
+ "IOMMU dirty tracking", vbasedev->name);
}
ret = vfio_block_multiple_devices_migration(vbasedev, errp);
@@ -1079,3 +1220,19 @@ void vfio_migration_exit(VFIODevice *vbasedev)
migrate_del_blocker(&vbasedev->migration_blocker);
}
+
+bool vfio_device_state_is_running(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+
+ return migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
+ migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P;
+}
+
+bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+
+ return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY ||
+ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P;
+}