diff options
Diffstat (limited to 'hw/vfio/migration.c')
-rw-r--r-- | hw/vfio/migration.c | 249 |
1 files changed, 203 insertions, 46 deletions
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 34d4be2..b76697bd 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -15,38 +15,23 @@ #include <linux/vfio.h> #include <sys/ioctl.h> -#include "sysemu/runstate.h" -#include "hw/vfio/vfio-common.h" +#include "system/runstate.h" +#include "hw/vfio/vfio-device.h" +#include "hw/vfio/vfio-migration.h" #include "migration/misc.h" #include "migration/savevm.h" #include "migration/vmstate.h" #include "migration/qemu-file.h" #include "migration/register.h" #include "migration/blocker.h" +#include "migration-multifd.h" #include "qapi/error.h" #include "qapi/qapi-events-vfio.h" #include "exec/ramlist.h" -#include "exec/ram_addr.h" #include "pci.h" #include "trace.h" #include "hw/hw.h" - -/* - * Flags to be used as unique delimiters for VFIO devices in the migration - * stream. These flags are composed as: - * 0xffffffff => MSB 32-bit all 1s - * 0xef10 => Magic ID, represents emulated (virtual) function IO - * 0x0000 => 16-bits reserved for flags - * - * The beginning of state information is marked by _DEV_CONFIG_STATE, - * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a - * certain state information is marked by _END_OF_STATE. - */ -#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) -#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) -#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) -#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) -#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) +#include "vfio-migration-internal.h" /* * This is an arbitrary size based on migration of mlx5 devices, where typically @@ -55,7 +40,7 @@ */ #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) -static int64_t bytes_transferred; +static unsigned long bytes_transferred; static const char *mig_state_to_str(enum vfio_device_mig_state state) { @@ -81,7 +66,7 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state) } } -static VfioMigrationState +static QapiVfioMigrationState mig_state_to_qapi_state(enum vfio_device_mig_state state) { switch (state) { @@ -136,10 +121,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev, vfio_migration_send_event(vbasedev); } -static int vfio_migration_set_state(VFIODevice *vbasedev, - enum vfio_device_mig_state new_state, - enum vfio_device_mig_state recover_state, - Error **errp) +int vfio_migration_set_state(VFIODevice *vbasedev, + enum vfio_device_mig_state new_state, + enum vfio_device_mig_state recover_state, + Error **errp) { VFIOMigration *migration = vbasedev->migration; uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + @@ -254,8 +239,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, return ret; } -static int vfio_save_device_config_state(QEMUFile *f, void *opaque, - Error **errp) +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp) { VFIODevice *vbasedev = opaque; int ret; @@ -280,11 +264,13 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque, return ret; } -static int vfio_load_device_config_state(QEMUFile *f, void *opaque) +int vfio_load_device_config_state(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; uint64_t data; + trace_vfio_load_device_config_state_start(vbasedev->name); + if (vbasedev->ops && vbasedev->ops->vfio_load_config) { int ret; @@ -303,7 +289,7 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) return -EINVAL; } - trace_vfio_load_device_config_state(vbasedev->name); + trace_vfio_load_device_config_state_end(vbasedev->name); return qemu_file_get_error(f); } @@ -370,6 +356,10 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) * please refer to the Linux kernel VFIO uAPI. */ if (errno == ENOMSG) { + if (!migration->event_precopy_empty_hit) { + trace_vfio_save_block_precopy_empty_hit(migration->vbasedev->name); + migration->event_precopy_empty_hit = true; + } return 0; } @@ -379,10 +369,13 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) return 0; } + /* Non-empty read: re-arm the trace event */ + migration->event_precopy_empty_hit = false; + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); qemu_put_be64(f, data_size); qemu_put_buffer(f, migration->data_buffer, data_size); - bytes_transferred += data_size; + vfio_migration_add_bytes_transferred(data_size); trace_vfio_save_block(migration->vbasedev->name, data_size); @@ -460,6 +453,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; int ret; + if (!vfio_multifd_setup(vbasedev, false, errp)) { + return -EINVAL; + } + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); vfio_query_stop_copy_size(vbasedev, &stop_copy_size); @@ -472,6 +469,9 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) return -ENOMEM; } + migration->event_save_iterate_started = false; + migration->event_precopy_empty_hit = false; + if (vfio_precopy_supported(vbasedev)) { switch (migration->device_state) { case VFIO_DEVICE_STATE_RUNNING: @@ -513,6 +513,9 @@ static void vfio_save_cleanup(void *opaque) Error *local_err = NULL; int ret; + /* Currently a NOP, done for symmetry with load_cleanup() */ + vfio_multifd_cleanup(vbasedev); + /* * Changing device state from STOP_COPY to STOP can take time. Do it here, * after migration has completed, so it won't increase downtime. @@ -576,9 +579,6 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, if (vfio_device_state_is_precopy(vbasedev)) { vfio_query_precopy_size(migration); - - *must_precopy += - migration->precopy_init_size + migration->precopy_dirty_size; } trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, @@ -605,6 +605,11 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) VFIOMigration *migration = vbasedev->migration; ssize_t data_size; + if (!migration->event_save_iterate_started) { + trace_vfio_save_iterate_start(vbasedev->name); + migration->event_save_iterate_started = true; + } + data_size = vfio_save_block(f, migration); if (data_size < 0) { return data_size; @@ -633,6 +638,13 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) int ret; Error *local_err = NULL; + if (vfio_multifd_transfer_enabled(vbasedev)) { + vfio_multifd_emit_dummy_eos(vbasedev, f); + return 0; + } + + trace_vfio_save_complete_precopy_start(vbasedev->name); + /* We reach here with device state STOP or STOP_COPY only */ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, VFIO_DEVICE_STATE_STOP, &local_err); @@ -662,6 +674,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque) Error *local_err = NULL; int ret; + if (vfio_multifd_transfer_enabled(vbasedev)) { + vfio_multifd_emit_dummy_eos(vbasedev, f); + return; + } + ret = vfio_save_device_config_state(f, opaque, &local_err); if (ret) { error_prepend(&local_err, @@ -674,15 +691,28 @@ static void vfio_save_state(QEMUFile *f, void *opaque) static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp) { VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + int ret; + + if (!vfio_multifd_setup(vbasedev, true, errp)) { + return -EINVAL; + } - return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, - vbasedev->migration->device_state, errp); + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, + migration->device_state, errp); + if (ret) { + return ret; + } + + return 0; } static int vfio_load_cleanup(void *opaque) { VFIODevice *vbasedev = opaque; + vfio_multifd_cleanup(vbasedev); + vfio_migration_cleanup(vbasedev); trace_vfio_load_cleanup(vbasedev->name); @@ -703,6 +733,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) switch (data) { case VFIO_MIG_FLAG_DEV_CONFIG_STATE: { + if (vfio_multifd_transfer_enabled(vbasedev)) { + error_report("%s: got DEV_CONFIG_STATE in main migration " + "channel but doing multifd transfer", + vbasedev->name); + return -EINVAL; + } + return vfio_load_device_config_state(f, opaque); } case VFIO_MIG_FLAG_DEV_SETUP_STATE: @@ -768,6 +805,17 @@ static bool vfio_switchover_ack_needed(void *opaque) return vfio_precopy_supported(vbasedev); } +static int vfio_switchover_start(void *opaque) +{ + VFIODevice *vbasedev = opaque; + + if (vfio_multifd_transfer_enabled(vbasedev)) { + return vfio_multifd_switchover_start(vbasedev); + } + + return 0; +} + static const SaveVMHandlers savevm_vfio_handlers = { .save_prepare = vfio_save_prepare, .save_setup = vfio_save_setup, @@ -782,6 +830,12 @@ static const SaveVMHandlers savevm_vfio_handlers = { .load_cleanup = vfio_load_cleanup, .load_state = vfio_load_state, .switchover_ack_needed = vfio_switchover_ack_needed, + /* + * Multifd support + */ + .load_state_buffer = vfio_multifd_load_state_buffer, + .switchover_start = vfio_switchover_start, + .save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread, }; /* ---------------------------------------------------------------------- */ @@ -962,13 +1016,72 @@ static int vfio_migration_init(VFIODevice *vbasedev) vfio_vmstate_change_prepare : NULL; migration->vm_state = qdev_add_vm_change_state_handler_full( - vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev); + vbasedev->dev, vfio_vmstate_change, prepare_cb, NULL, vbasedev); migration_add_notifier(&migration->migration_state, vfio_migration_state_notifier); return 0; } +static Error *multiple_devices_migration_blocker; + +/* + * Multiple devices migration is allowed only if all devices support P2P + * migration. Single device migration is allowed regardless of P2P migration + * support. + */ +static bool vfio_multiple_devices_migration_is_supported(void) +{ + VFIODevice *vbasedev; + unsigned int device_num = 0; + bool all_support_p2p = true; + + QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->migration) { + device_num++; + + if (!(vbasedev->migration->mig_flags & VFIO_MIGRATION_P2P)) { + all_support_p2p = false; + } + } + } + + return all_support_p2p || device_num <= 1; +} + +static int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) +{ + if (vfio_multiple_devices_migration_is_supported()) { + return 0; + } + + if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { + error_setg(errp, "Multiple VFIO devices migration is supported only if " + "all of them support P2P migration"); + return -EINVAL; + } + + if (multiple_devices_migration_blocker) { + return 0; + } + + error_setg(&multiple_devices_migration_blocker, + "Multiple VFIO devices migration is supported only if all of " + "them support P2P migration"); + return migrate_add_blocker_normal(&multiple_devices_migration_blocker, + errp); +} + +static void vfio_unblock_multiple_devices_migration(void) +{ + if (!multiple_devices_migration_blocker || + !vfio_multiple_devices_migration_is_supported()) { + return; + } + + migrate_del_blocker(&multiple_devices_migration_blocker); +} + static void vfio_migration_deinit(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; @@ -995,14 +1108,40 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) /* ---------------------------------------------------------------------- */ -int64_t vfio_mig_bytes_transferred(void) +int64_t vfio_migration_bytes_transferred(void) { - return bytes_transferred; + return MIN(qatomic_read(&bytes_transferred), INT64_MAX); } -void vfio_reset_bytes_transferred(void) +void vfio_migration_reset_bytes_transferred(void) { - bytes_transferred = 0; + qatomic_set(&bytes_transferred, 0); +} + +void vfio_migration_add_bytes_transferred(unsigned long val) +{ + qatomic_add(&bytes_transferred, val); +} + +bool vfio_migration_active(void) +{ + VFIODevice *vbasedev; + + if (QLIST_EMPTY(&vfio_device_list)) { + return false; + } + + QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->migration_blocker) { + return false; + } + } + return true; +} + +static bool vfio_viommu_preset(VFIODevice *vbasedev) +{ + return vbasedev->bcontainer->space->as != &address_space_memory; } /* @@ -1036,16 +1175,18 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) return !vfio_block_migration(vbasedev, err, errp); } - if (!vbasedev->dirty_pages_supported) { + if ((!vbasedev->dirty_pages_supported || + vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) && + !vbasedev->iommu_dirty_tracking) { if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { error_setg(&err, - "%s: VFIO device doesn't support device dirty tracking", - vbasedev->name); + "%s: VFIO device doesn't support device and " + "IOMMU dirty tracking", vbasedev->name); goto add_blocker; } - warn_report("%s: VFIO device doesn't support device dirty tracking", - vbasedev->name); + warn_report("%s: VFIO device doesn't support device and " + "IOMMU dirty tracking", vbasedev->name); } ret = vfio_block_multiple_devices_migration(vbasedev, errp); @@ -1079,3 +1220,19 @@ void vfio_migration_exit(VFIODevice *vbasedev) migrate_del_blocker(&vbasedev->migration_blocker); } + +bool vfio_device_state_is_running(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + + return migration->device_state == VFIO_DEVICE_STATE_RUNNING || + migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P; +} + +bool vfio_device_state_is_precopy(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + + return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY || + migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P; +} |