From 5c4dbcb7489463b8862d3e4fa2490f5fd3d683fe Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 16 Feb 2023 16:36:21 +0200 Subject: vfio/migration: Fix NULL pointer dereference bug As part of its error flow, vfio_vmstate_change() accesses MigrationState->to_dst_file without any checks. This can cause a NULL pointer dereference if the error flow is taken and MigrationState->to_dst_file is not set. For example, this can happen if VM is started or stopped not during migration and vfio_vmstate_change() error flow is taken, as MigrationState->to_dst_file is not set at that time. Fix it by checking that MigrationState->to_dst_file is set before using it. Fixes: 02a7e71b1e5b ("vfio: Add VM state change handler to know state of VM") Signed-off-by: Avihai Horon Reviewed-by: Juan Quintela Reviewed-by: Vladimir Sementsov-Ogievskiy Link: https://lore.kernel.org/r/20230216143630.25610-3-avihaih@nvidia.com Signed-off-by: Alex Williamson --- hw/vfio/migration.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 83d2d44..65f3f3b 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -741,7 +741,9 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) */ error_report("%s: Failed to set device state 0x%x", vbasedev->name, (migration->device_state & mask) | value); - qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + if (migrate_get_current()->to_dst_file) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + } } vbasedev->migration->vm_running = running; trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), -- cgit v1.1 From b051a3f640e2efc95e9e23c5cb1bb5a4c07731e2 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 16 Feb 2023 16:36:22 +0200 Subject: vfio/migration: Allow migration without VFIO IOMMU dirty tracking support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if IOMMU of a VFIO container doesn't support dirty page tracking, migration is blocked. This is because a DMA-able VFIO device can dirty RAM pages without updating QEMU about it, thus breaking the migration. However, this doesn't mean that migration can't be done at all. In such case, allow migration and let QEMU VFIO code mark all pages dirty. This guarantees that all pages that might have gotten dirty are reported back, and thus guarantees a valid migration even without VFIO IOMMU dirty tracking support. The motivation for this patch is the introduction of iommufd [1]. iommufd can directly implement the /dev/vfio/vfio container IOCTLs by mapping them into its internal ops, allowing the usage of these IOCTLs over iommufd. However, VFIO IOMMU dirty tracking is not supported by this VFIO compatibility API. This patch will allow migration by hosts that use the VFIO compatibility API and prevent migration regressions caused by the lack of VFIO IOMMU dirty tracking support. [1] https://lore.kernel.org/kvm/0-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com/ Signed-off-by: Avihai Horon Reviewed-by: Cédric Le Goater Reviewed-by: Juan Quintela Link: https://lore.kernel.org/r/20230216143630.25610-4-avihaih@nvidia.com Signed-off-by: Alex Williamson --- hw/vfio/common.c | 20 ++++++++++++++++++-- hw/vfio/migration.c | 3 +-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'hw') diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 130e5d1..f6dd571 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -488,6 +488,12 @@ static int vfio_dma_unmap(VFIOContainer *container, return -errno; } + if (iotlb && vfio_devices_all_running_and_saving(container)) { + cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); + } + return 0; } @@ -1201,6 +1207,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) .argsz = sizeof(dirty), }; + if (!container->dirty_pages_supported) { + return; + } + if (start) { dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; } else { @@ -1236,6 +1246,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, uint64_t pages; int ret; + if (!container->dirty_pages_supported) { + cpu_physical_memory_set_dirty_range(ram_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); + return 0; + } + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); @@ -1409,8 +1426,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, { VFIOContainer *container = container_of(listener, VFIOContainer, listener); - if (vfio_listener_skipped_section(section) || - !container->dirty_pages_supported) { + if (vfio_listener_skipped_section(section)) { return; } diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 65f3f3b..e56eef1 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -858,11 +858,10 @@ int64_t vfio_mig_bytes_transferred(void) int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) { - VFIOContainer *container = vbasedev->group->container; struct vfio_region_info *info = NULL; int ret = -ENOTSUP; - if (!vbasedev->enable_migration || !container->dirty_pages_supported) { + if (!vbasedev->enable_migration) { goto add_blocker; } -- cgit v1.1 From 8b942af393a2d9f822aea4e5e0d241e668146bf2 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 16 Feb 2023 16:36:23 +0200 Subject: vfio/common: Change vfio_devices_all_running_and_saving() logic to equivalent one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vfio_devices_all_running_and_saving() is used to check if migration is in pre-copy phase. This is done by checking if migration is in setup or active states and if all VFIO devices are in pre-copy state, i.e. _SAVING | _RUNNING. In VFIO migration protocol v2 pre-copy support is made optional. Hence, a matching v2 protocol pre-copy state can't be used here. As preparation for adding v2 protocol, change vfio_devices_all_running_and_saving() logic such that it doesn't use the VFIO pre-copy state. The new equivalent logic checks if migration is in active state and if all VFIO devices are in running state [1]. No functional changes intended. [1] Note that checking if migration is in setup or active states and if all VFIO devices are in running state doesn't guarantee that we are in pre-copy phase, thus we check if migration is only in active state. Signed-off-by: Avihai Horon Reviewed-by: Cédric Le Goater Reviewed-by: Juan Quintela Link: https://lore.kernel.org/r/20230216143630.25610-5-avihaih@nvidia.com Signed-off-by: Alex Williamson --- hw/vfio/common.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'hw') diff --git a/hw/vfio/common.c b/hw/vfio/common.c index f6dd571..3a35f4a 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -40,6 +40,7 @@ #include "trace.h" #include "qapi/error.h" #include "migration/migration.h" +#include "migration/misc.h" #include "sysemu/tpm.h" VFIOGroupList vfio_group_list = @@ -363,13 +364,16 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return true; } -static bool vfio_devices_all_running_and_saving(VFIOContainer *container) +/* + * Check if all VFIO devices are running and migration is active, which is + * essentially equivalent to the migration being in pre-copy phase. + */ +static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) { VFIOGroup *group; VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - if (!migration_is_setup_or_active(ms->state)) { + if (!migration_is_active(migrate_get_current())) { return false; } @@ -381,8 +385,7 @@ static bool vfio_devices_all_running_and_saving(VFIOContainer *container) return false; } - if ((migration->device_state & VFIO_DEVICE_STATE_V1_SAVING) && - (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) { + if (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING) { continue; } else { return false; @@ -461,7 +464,7 @@ static int vfio_dma_unmap(VFIOContainer *container, }; if (iotlb && container->dirty_pages_supported && - vfio_devices_all_running_and_saving(container)) { + vfio_devices_all_running_and_mig_active(container)) { return vfio_dma_unmap_bitmap(container, iova, size, iotlb); } @@ -488,7 +491,7 @@ static int vfio_dma_unmap(VFIOContainer *container, return -errno; } - if (iotlb && vfio_devices_all_running_and_saving(container)) { + if (iotlb && vfio_devices_all_running_and_mig_active(container)) { cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size, tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE); -- cgit v1.1 From 29d81b71aa2ac0f594d881460e22e291a9417a74 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 16 Feb 2023 16:36:24 +0200 Subject: vfio/migration: Block multiple devices migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently VFIO migration doesn't implement some kind of intermediate quiescent state in which P2P DMAs are quiesced before stopping or running the device. This can cause problems in multi-device migration where the devices are doing P2P DMAs, since the devices are not stopped together at the same time. Until such support is added, block migration of multiple devices. Signed-off-by: Avihai Horon Reviewed-by: Cédric Le Goater Reviewed-by: Juan Quintela Link: https://lore.kernel.org/r/20230216143630.25610-6-avihaih@nvidia.com Signed-off-by: Alex Williamson --- hw/vfio/common.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ hw/vfio/migration.c | 6 ++++++ 2 files changed, 59 insertions(+) (limited to 'hw') diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 3a35f4a..fe80ccf 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -41,6 +41,7 @@ #include "qapi/error.h" #include "migration/migration.h" #include "migration/misc.h" +#include "migration/blocker.h" #include "sysemu/tpm.h" VFIOGroupList vfio_group_list = @@ -337,6 +338,58 @@ bool vfio_mig_active(void) return true; } +static Error *multiple_devices_migration_blocker; + +static unsigned int vfio_migratable_device_num(void) +{ + VFIOGroup *group; + VFIODevice *vbasedev; + unsigned int device_num = 0; + + QLIST_FOREACH(group, &vfio_group_list, next) { + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (vbasedev->migration) { + device_num++; + } + } + } + + return device_num; +} + +int vfio_block_multiple_devices_migration(Error **errp) +{ + int ret; + + if (multiple_devices_migration_blocker || + vfio_migratable_device_num() <= 1) { + return 0; + } + + error_setg(&multiple_devices_migration_blocker, + "Migration is currently not supported with multiple " + "VFIO devices"); + ret = migrate_add_blocker(multiple_devices_migration_blocker, errp); + if (ret < 0) { + error_free(multiple_devices_migration_blocker); + multiple_devices_migration_blocker = NULL; + } + + return ret; +} + +void vfio_unblock_multiple_devices_migration(void) +{ + if (!multiple_devices_migration_blocker || + vfio_migratable_device_num() > 1) { + return; + } + + migrate_del_blocker(multiple_devices_migration_blocker); + error_free(multiple_devices_migration_blocker); + multiple_devices_migration_blocker = NULL; +} + static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) { VFIOGroup *group; diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index e56eef1..8e96999 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -878,6 +878,11 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) goto add_blocker; } + ret = vfio_block_multiple_devices_migration(errp); + if (ret) { + return ret; + } + trace_vfio_migration_probe(vbasedev->name, info->index); g_free(info); return 0; @@ -904,6 +909,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev) qemu_del_vm_change_state_handler(migration->vm_state); unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); vfio_migration_exit(vbasedev); + vfio_unblock_multiple_devices_migration(); } if (vbasedev->migration_blocker) { -- cgit v1.1 From 16fe4e8ab7588896f67ffc8a1d0dc1b0c698b064 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 16 Feb 2023 16:36:25 +0200 Subject: vfio/migration: Move migration v1 logic to vfio_migration_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move vfio_dev_get_region_info() logic from vfio_migration_probe() to vfio_migration_init(). This logic is specific to v1 protocol and moving it will make it easier to add the v2 protocol implementation later. No functional changes intended. Signed-off-by: Avihai Horon Reviewed-by: Cédric Le Goater Reviewed-by: Juan Quintela Link: https://lore.kernel.org/r/20230216143630.25610-7-avihaih@nvidia.com Signed-off-by: Alex Williamson --- hw/vfio/migration.c | 30 +++++++++++++++--------------- hw/vfio/trace-events | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'hw') diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 8e96999..a3bf732 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -786,14 +786,14 @@ static void vfio_migration_exit(VFIODevice *vbasedev) vbasedev->migration = NULL; } -static int vfio_migration_init(VFIODevice *vbasedev, - struct vfio_region_info *info) +static int vfio_migration_init(VFIODevice *vbasedev) { int ret; Object *obj; VFIOMigration *migration; char id[256] = ""; g_autofree char *path = NULL, *oid = NULL; + struct vfio_region_info *info; if (!vbasedev->ops->vfio_get_object) { return -EINVAL; @@ -804,6 +804,14 @@ static int vfio_migration_init(VFIODevice *vbasedev, return -EINVAL; } + ret = vfio_get_dev_region_info(vbasedev, + VFIO_REGION_TYPE_MIGRATION_DEPRECATED, + VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED, + &info); + if (ret) { + return ret; + } + vbasedev->migration = g_new0(VFIOMigration, 1); vbasedev->migration->device_state = VFIO_DEVICE_STATE_V1_RUNNING; vbasedev->migration->vm_running = runstate_is_running(); @@ -823,6 +831,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, goto err; } + g_free(info); + migration = vbasedev->migration; migration->vbasedev = vbasedev; @@ -845,6 +855,7 @@ static int vfio_migration_init(VFIODevice *vbasedev, return 0; err: + g_free(info); vfio_migration_exit(vbasedev); return ret; } @@ -858,22 +869,13 @@ int64_t vfio_mig_bytes_transferred(void) int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) { - struct vfio_region_info *info = NULL; int ret = -ENOTSUP; if (!vbasedev->enable_migration) { goto add_blocker; } - ret = vfio_get_dev_region_info(vbasedev, - VFIO_REGION_TYPE_MIGRATION_DEPRECATED, - VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED, - &info); - if (ret) { - goto add_blocker; - } - - ret = vfio_migration_init(vbasedev, info); + ret = vfio_migration_init(vbasedev); if (ret) { goto add_blocker; } @@ -883,14 +885,12 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) return ret; } - trace_vfio_migration_probe(vbasedev->name, info->index); - g_free(info); + trace_vfio_migration_probe(vbasedev->name); return 0; add_blocker: error_setg(&vbasedev->migration_blocker, "VFIO device doesn't support migration"); - g_free(info); ret = migrate_add_blocker(vbasedev->migration_blocker, errp); if (ret < 0) { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 90a8aec..6be5381 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -148,7 +148,7 @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" vfio_display_edid_write_error(void) "" # migration.c -vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" +vfio_migration_probe(const char *name) " (%s)" vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" -- cgit v1.1 From 6eeb2909104664af4c3488232f3c3cd8471c38c3 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 16 Feb 2023 16:36:26 +0200 Subject: vfio/migration: Rename functions/structs related to v1 protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid name collisions, rename functions and structs related to VFIO migration protocol v1. This will allow the two protocols to co-exist when v2 protocol is added, until v1 is removed. No functional changes intended. Signed-off-by: Avihai Horon Reviewed-by: Cédric Le Goater Reviewed-by: Juan Quintela Link: https://lore.kernel.org/r/20230216143630.25610-8-avihaih@nvidia.com Signed-off-by: Alex Williamson --- hw/vfio/common.c | 6 +-- hw/vfio/migration.c | 102 +++++++++++++++++++++++++-------------------------- hw/vfio/trace-events | 12 +++--- 3 files changed, 60 insertions(+), 60 deletions(-) (limited to 'hw') diff --git a/hw/vfio/common.c b/hw/vfio/common.c index fe80ccf..1c974e9 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -408,8 +408,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return false; } - if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) - && (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) { + if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) && + (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING)) { return false; } } @@ -438,7 +438,7 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) return false; } - if (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING) { + if (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) { continue; } else { return false; diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index a3bf732..a5fe285 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -107,8 +107,8 @@ static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, * an error is returned. */ -static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, - uint32_t value) +static int vfio_migration_v1_set_state(VFIODevice *vbasedev, uint32_t mask, + uint32_t value) { VFIOMigration *migration = vbasedev->migration; VFIORegion *region = &migration->region; @@ -145,8 +145,8 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, return ret; } - migration->device_state = device_state; - trace_vfio_migration_set_state(vbasedev->name, device_state); + migration->device_state_v1 = device_state; + trace_vfio_migration_v1_set_state(vbasedev->name, device_state); return 0; } @@ -260,8 +260,8 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) return ret; } -static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, - uint64_t data_size) +static int vfio_v1_load_buffer(QEMUFile *f, VFIODevice *vbasedev, + uint64_t data_size) { VFIORegion *region = &vbasedev->migration->region; uint64_t data_offset = 0, size, report_size; @@ -288,7 +288,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, data_size = 0; } - trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); + trace_vfio_v1_load_state_device_data(vbasedev->name, data_offset, size); while (size) { void *buf; @@ -394,7 +394,7 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) return qemu_file_get_error(f); } -static void vfio_migration_cleanup(VFIODevice *vbasedev) +static void vfio_migration_v1_cleanup(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; @@ -405,13 +405,13 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev) /* ---------------------------------------------------------------------- */ -static int vfio_save_setup(QEMUFile *f, void *opaque) +static int vfio_v1_save_setup(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; VFIOMigration *migration = vbasedev->migration; int ret; - trace_vfio_save_setup(vbasedev->name); + trace_vfio_v1_save_setup(vbasedev->name); qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); @@ -431,8 +431,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) } } - ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_V1_SAVING); + ret = vfio_migration_v1_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, + VFIO_DEVICE_STATE_V1_SAVING); if (ret) { error_report("%s: Failed to set state SAVING", vbasedev->name); return ret; @@ -448,16 +448,16 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) return 0; } -static void vfio_save_cleanup(void *opaque) +static void vfio_v1_save_cleanup(void *opaque) { VFIODevice *vbasedev = opaque; - vfio_migration_cleanup(vbasedev); + vfio_migration_v1_cleanup(vbasedev); trace_vfio_save_cleanup(vbasedev->name); } -static void vfio_state_pending(void *opaque, uint64_t *must_precopy, - uint64_t *can_postcopy) +static void vfio_v1_state_pending(void *opaque, uint64_t *must_precopy, + uint64_t *can_postcopy) { VFIODevice *vbasedev = opaque; VFIOMigration *migration = vbasedev->migration; @@ -470,7 +470,7 @@ static void vfio_state_pending(void *opaque, uint64_t *must_precopy, *must_precopy += migration->pending_bytes; - trace_vfio_state_pending(vbasedev->name, *must_precopy, *can_postcopy); + trace_vfio_v1_state_pending(vbasedev->name, *must_precopy, *can_postcopy); } static int vfio_save_iterate(QEMUFile *f, void *opaque) @@ -520,15 +520,15 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) return 0; } -static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) +static int vfio_v1_save_complete_precopy(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; VFIOMigration *migration = vbasedev->migration; uint64_t data_size; int ret; - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING, - VFIO_DEVICE_STATE_V1_SAVING); + ret = vfio_migration_v1_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING, + VFIO_DEVICE_STATE_V1_SAVING); if (ret) { error_report("%s: Failed to set state STOP and SAVING", vbasedev->name); @@ -565,13 +565,14 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) return ret; } - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, 0); + ret = vfio_migration_v1_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, + 0); if (ret) { error_report("%s: Failed to set state STOPPED", vbasedev->name); return ret; } - trace_vfio_save_complete_precopy(vbasedev->name); + trace_vfio_v1_save_complete_precopy(vbasedev->name); return ret; } @@ -588,7 +589,7 @@ static void vfio_save_state(QEMUFile *f, void *opaque) } } -static int vfio_load_setup(QEMUFile *f, void *opaque) +static int vfio_v1_load_setup(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; VFIOMigration *migration = vbasedev->migration; @@ -604,8 +605,8 @@ static int vfio_load_setup(QEMUFile *f, void *opaque) } } - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_V1_RESUMING); + ret = vfio_migration_v1_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, + VFIO_DEVICE_STATE_V1_RESUMING); if (ret) { error_report("%s: Failed to set state RESUMING", vbasedev->name); if (migration->region.mmaps) { @@ -615,11 +616,11 @@ static int vfio_load_setup(QEMUFile *f, void *opaque) return ret; } -static int vfio_load_cleanup(void *opaque) +static int vfio_v1_load_cleanup(void *opaque) { VFIODevice *vbasedev = opaque; - vfio_migration_cleanup(vbasedev); + vfio_migration_v1_cleanup(vbasedev); trace_vfio_load_cleanup(vbasedev->name); return 0; } @@ -657,7 +658,7 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) uint64_t data_size = qemu_get_be64(f); if (data_size) { - ret = vfio_load_buffer(f, vbasedev, data_size); + ret = vfio_v1_load_buffer(f, vbasedev, data_size); if (ret < 0) { return ret; } @@ -678,22 +679,22 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) return ret; } -static SaveVMHandlers savevm_vfio_handlers = { - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, - .state_pending_exact = vfio_state_pending, - .state_pending_estimate = vfio_state_pending, +static SaveVMHandlers savevm_vfio_v1_handlers = { + .save_setup = vfio_v1_save_setup, + .save_cleanup = vfio_v1_save_cleanup, + .state_pending_exact = vfio_v1_state_pending, + .state_pending_estimate = vfio_v1_state_pending, .save_live_iterate = vfio_save_iterate, - .save_live_complete_precopy = vfio_save_complete_precopy, + .save_live_complete_precopy = vfio_v1_save_complete_precopy, .save_state = vfio_save_state, - .load_setup = vfio_load_setup, - .load_cleanup = vfio_load_cleanup, + .load_setup = vfio_v1_load_setup, + .load_cleanup = vfio_v1_load_cleanup, .load_state = vfio_load_state, }; /* ---------------------------------------------------------------------- */ -static void vfio_vmstate_change(void *opaque, bool running, RunState state) +static void vfio_v1_vmstate_change(void *opaque, bool running, RunState state) { VFIODevice *vbasedev = opaque; VFIOMigration *migration = vbasedev->migration; @@ -733,21 +734,21 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) } } - ret = vfio_migration_set_state(vbasedev, mask, value); + ret = vfio_migration_v1_set_state(vbasedev, mask, value); if (ret) { /* * Migration should be aborted in this case, but vm_state_notify() * currently does not support reporting failures. */ error_report("%s: Failed to set device state 0x%x", vbasedev->name, - (migration->device_state & mask) | value); + (migration->device_state_v1 & mask) | value); if (migrate_get_current()->to_dst_file) { qemu_file_set_error(migrate_get_current()->to_dst_file, ret); } } vbasedev->migration->vm_running = running; - trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), - (migration->device_state & mask) | value); + trace_vfio_v1_vmstate_change(vbasedev->name, running, RunState_str(state), + (migration->device_state_v1 & mask) | value); } static void vfio_migration_state_notifier(Notifier *notifier, void *data) @@ -766,10 +767,10 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_FAILED: bytes_transferred = 0; - ret = vfio_migration_set_state(vbasedev, - ~(VFIO_DEVICE_STATE_V1_SAVING | - VFIO_DEVICE_STATE_V1_RESUMING), - VFIO_DEVICE_STATE_V1_RUNNING); + ret = vfio_migration_v1_set_state(vbasedev, + ~(VFIO_DEVICE_STATE_V1_SAVING | + VFIO_DEVICE_STATE_V1_RESUMING), + VFIO_DEVICE_STATE_V1_RUNNING); if (ret) { error_report("%s: Failed to set state RUNNING", vbasedev->name); } @@ -813,7 +814,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) } vbasedev->migration = g_new0(VFIOMigration, 1); - vbasedev->migration->device_state = VFIO_DEVICE_STATE_V1_RUNNING; + vbasedev->migration->device_state_v1 = VFIO_DEVICE_STATE_V1_RUNNING; vbasedev->migration->vm_running = runstate_is_running(); ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, @@ -844,12 +845,11 @@ static int vfio_migration_init(VFIODevice *vbasedev) } strpadcpy(id, sizeof(id), path, '\0'); - register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, - vbasedev); + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_vfio_v1_handlers, vbasedev); - migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev, - vfio_vmstate_change, - vbasedev); + migration->vm_state = qdev_add_vm_change_state_handler( + vbasedev->dev, vfio_v1_vmstate_change, vbasedev); migration->migration_state.notify = vfio_migration_state_notifier; add_migration_state_change_notifier(&migration->migration_state); return 0; diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 6be5381..c873944 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -149,20 +149,20 @@ vfio_display_edid_write_error(void) "" # migration.c vfio_migration_probe(const char *name) " (%s)" -vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" -vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" +vfio_migration_v1_set_state(const char *name, uint32_t state) " (%s) state %d" +vfio_v1_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" -vfio_save_setup(const char *name) " (%s)" +vfio_v1_save_setup(const char *name) " (%s)" vfio_save_cleanup(const char *name) " (%s)" vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64 vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64 vfio_save_device_config_state(const char *name) " (%s)" -vfio_state_pending(const char *name, uint64_t precopy, uint64_t postcopy) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64 +vfio_v1_state_pending(const char *name, uint64_t precopy, uint64_t postcopy) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64 vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" -vfio_save_complete_precopy(const char *name) " (%s)" +vfio_v1_save_complete_precopy(const char *name) " (%s)" vfio_load_device_config_state(const char *name) " (%s)" vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 -vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 +vfio_v1_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 vfio_load_cleanup(const char *name) " (%s)" vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 -- cgit v1.1 From 31bcbbb5be04c7036223ce680a12927f5e51dc77 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 16 Feb 2023 16:36:27 +0200 Subject: vfio/migration: Implement VFIO migration protocol v2 Implement the basic mandatory part of VFIO migration protocol v2. This includes all functionality that is necessary to support VFIO_MIGRATION_STOP_COPY part of the v2 protocol. The two protocols, v1 and v2, will co-exist and in the following patches v1 protocol code will be removed. There are several main differences between v1 and v2 protocols: - VFIO device state is now represented as a finite state machine instead of a bitmap. - Migration interface with kernel is now done using VFIO_DEVICE_FEATURE ioctl and normal read() and write() instead of the migration region. - Pre-copy is made optional in v2 protocol. Support for pre-copy will be added later on. Detailed information about VFIO migration protocol v2 and its difference compared to v1 protocol can be found here [1]. [1] https://lore.kernel.org/all/20220224142024.147653-10-yishaih@nvidia.com/ Signed-off-by: Avihai Horon Reviewed-by: Juan Quintela . Link: https://lore.kernel.org/r/20230216143630.25610-9-avihaih@nvidia.com Signed-off-by: Alex Williamson --- hw/vfio/common.c | 17 +- hw/vfio/migration.c | 479 +++++++++++++++++++++++++++++++++++++++++++++++---- hw/vfio/trace-events | 7 + 3 files changed, 464 insertions(+), 39 deletions(-) (limited to 'hw') diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 1c974e9..54fee2d 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -408,10 +408,17 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return false; } - if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) && + if (!migration->v2 && + (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) && (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING)) { return false; } + + if (migration->v2 && + vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && + migration->device_state == VFIO_DEVICE_STATE_RUNNING) { + return false; + } } } return true; @@ -438,7 +445,13 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) return false; } - if (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) { + if (!migration->v2 && + migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) { + continue; + } + + if (migration->v2 && + migration->device_state == VFIO_DEVICE_STATE_RUNNING) { continue; } else { return false; diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index a5fe285..452d96a 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qemu/cutils.h" +#include "qemu/units.h" #include #include @@ -44,8 +45,114 @@ #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) +/* + * This is an arbitrary size based on migration of mlx5 devices, where typically + * total device migration size is on the order of 100s of MB. Testing with + * larger values, e.g. 128MB and 1GB, did not show a performance improvement. + */ +#define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) + static int64_t bytes_transferred; +static const char *mig_state_to_str(enum vfio_device_mig_state state) +{ + switch (state) { + case VFIO_DEVICE_STATE_ERROR: + return "ERROR"; + case VFIO_DEVICE_STATE_STOP: + return "STOP"; + case VFIO_DEVICE_STATE_RUNNING: + return "RUNNING"; + case VFIO_DEVICE_STATE_STOP_COPY: + return "STOP_COPY"; + case VFIO_DEVICE_STATE_RESUMING: + return "RESUMING"; + default: + return "UNKNOWN STATE"; + } +} + +static int vfio_migration_set_state(VFIODevice *vbasedev, + enum vfio_device_mig_state new_state, + enum vfio_device_mig_state recover_state) +{ + VFIOMigration *migration = vbasedev->migration; + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_mig_state), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; + struct vfio_device_feature_mig_state *mig_state = + (struct vfio_device_feature_mig_state *)feature->data; + int ret; + + feature->argsz = sizeof(buf); + feature->flags = + VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE; + mig_state->device_state = new_state; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + /* Try to set the device in some good state */ + ret = -errno; + + if (recover_state == VFIO_DEVICE_STATE_ERROR) { + error_report("%s: Failed setting device state to %s, err: %s. " + "Recover state is ERROR. Resetting device", + vbasedev->name, mig_state_to_str(new_state), + strerror(errno)); + + goto reset_device; + } + + error_report( + "%s: Failed setting device state to %s, err: %s. Setting device in recover state %s", + vbasedev->name, mig_state_to_str(new_state), + strerror(errno), mig_state_to_str(recover_state)); + + mig_state->device_state = recover_state; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + ret = -errno; + error_report( + "%s: Failed setting device in recover state, err: %s. Resetting device", + vbasedev->name, strerror(errno)); + + goto reset_device; + } + + migration->device_state = recover_state; + + return ret; + } + + migration->device_state = new_state; + if (mig_state->data_fd != -1) { + if (migration->data_fd != -1) { + /* + * This can happen if the device is asynchronously reset and + * terminates a data transfer. + */ + error_report("%s: data_fd out of sync", vbasedev->name); + close(mig_state->data_fd); + + return -EBADF; + } + + migration->data_fd = mig_state->data_fd; + } + + trace_vfio_migration_set_state(vbasedev->name, mig_state_to_str(new_state)); + + return 0; + +reset_device: + if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) { + hw_error("%s: Failed resetting device, err: %s", vbasedev->name, + strerror(errno)); + } + + migration->device_state = VFIO_DEVICE_STATE_RUNNING; + + return ret; +} + static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, off_t off, bool iswrite) { @@ -260,6 +367,18 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) return ret; } +static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, + uint64_t data_size) +{ + VFIOMigration *migration = vbasedev->migration; + int ret; + + ret = qemu_file_get_to_fd(f, migration->data_fd, data_size); + trace_vfio_load_state_device_data(vbasedev->name, data_size, ret); + + return ret; +} + static int vfio_v1_load_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t data_size) { @@ -394,6 +513,14 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) return qemu_file_get_error(f); } +static void vfio_migration_cleanup(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + + close(migration->data_fd); + migration->data_fd = -1; +} + static void vfio_migration_v1_cleanup(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; @@ -403,8 +530,80 @@ static void vfio_migration_v1_cleanup(VFIODevice *vbasedev) } } +static int vfio_query_stop_copy_size(VFIODevice *vbasedev, + uint64_t *stop_copy_size) +{ + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_mig_data_size), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; + struct vfio_device_feature_mig_data_size *mig_data_size = + (struct vfio_device_feature_mig_data_size *)feature->data; + + feature->argsz = sizeof(buf); + feature->flags = + VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIG_DATA_SIZE; + + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + return -errno; + } + + *stop_copy_size = mig_data_size->stop_copy_length; + + return 0; +} + +/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */ +static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) +{ + ssize_t data_size; + + data_size = read(migration->data_fd, migration->data_buffer, + migration->data_buffer_size); + if (data_size < 0) { + return -errno; + } + if (data_size == 0) { + return 1; + } + + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); + qemu_put_be64(f, data_size); + qemu_put_buffer(f, migration->data_buffer, data_size); + bytes_transferred += data_size; + + trace_vfio_save_block(migration->vbasedev->name, data_size); + + return qemu_file_get_error(f); +} + /* ---------------------------------------------------------------------- */ +static int vfio_save_setup(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; + + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); + + vfio_query_stop_copy_size(vbasedev, &stop_copy_size); + migration->data_buffer_size = MIN(VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE, + stop_copy_size); + migration->data_buffer = g_try_malloc0(migration->data_buffer_size); + if (!migration->data_buffer) { + error_report("%s: Failed to allocate migration data buffer", + vbasedev->name); + return -ENOMEM; + } + + trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + return qemu_file_get_error(f); +} + static int vfio_v1_save_setup(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; @@ -448,6 +647,17 @@ static int vfio_v1_save_setup(QEMUFile *f, void *opaque) return 0; } +static void vfio_save_cleanup(void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + + g_free(migration->data_buffer); + migration->data_buffer = NULL; + vfio_migration_cleanup(vbasedev); + trace_vfio_save_cleanup(vbasedev->name); +} + static void vfio_v1_save_cleanup(void *opaque) { VFIODevice *vbasedev = opaque; @@ -456,6 +666,35 @@ static void vfio_v1_save_cleanup(void *opaque) trace_vfio_save_cleanup(vbasedev->name); } +/* + * Migration size of VFIO devices can be as little as a few KBs or as big as + * many GBs. This value should be big enough to cover the worst case. + */ +#define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) + +/* + * Only exact function is implemented and not estimate function. The reason is + * that during pre-copy phase of migration the estimate function is called + * repeatedly while pending RAM size is over the threshold, thus migration + * can't converge and querying the VFIO device pending data size is useless. + */ +static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + uint64_t *can_postcopy) +{ + VFIODevice *vbasedev = opaque; + uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; + + /* + * If getting pending migration size fails, VFIO_MIG_STOP_COPY_SIZE is + * reported so downtime limit won't be violated. + */ + vfio_query_stop_copy_size(vbasedev, &stop_copy_size); + *must_precopy += stop_copy_size; + + trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, + stop_copy_size); +} + static void vfio_v1_state_pending(void *opaque, uint64_t *must_precopy, uint64_t *can_postcopy) { @@ -520,6 +759,42 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) return 0; } +static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + int ret; + + /* We reach here with device state STOP only */ + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, + VFIO_DEVICE_STATE_STOP); + if (ret) { + return ret; + } + + do { + ret = vfio_save_block(f, vbasedev->migration); + if (ret < 0) { + return ret; + } + } while (!ret); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + ret = qemu_file_get_error(f); + if (ret) { + return ret; + } + + /* + * If setting the device in STOP state fails, the device should be reset. + * To do so, use ERROR state as a recover state. + */ + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP, + VFIO_DEVICE_STATE_ERROR); + trace_vfio_save_complete_precopy(vbasedev->name, ret); + + return ret; +} + static int vfio_v1_save_complete_precopy(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; @@ -589,6 +864,14 @@ static void vfio_save_state(QEMUFile *f, void *opaque) } } +static int vfio_load_setup(QEMUFile *f, void *opaque) +{ + VFIODevice *vbasedev = opaque; + + return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, + vbasedev->migration->device_state); +} + static int vfio_v1_load_setup(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; @@ -616,6 +899,16 @@ static int vfio_v1_load_setup(QEMUFile *f, void *opaque) return ret; } +static int vfio_load_cleanup(void *opaque) +{ + VFIODevice *vbasedev = opaque; + + vfio_migration_cleanup(vbasedev); + trace_vfio_load_cleanup(vbasedev->name); + + return 0; +} + static int vfio_v1_load_cleanup(void *opaque) { VFIODevice *vbasedev = opaque; @@ -658,7 +951,11 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) uint64_t data_size = qemu_get_be64(f); if (data_size) { - ret = vfio_v1_load_buffer(f, vbasedev, data_size); + if (vbasedev->migration->v2) { + ret = vfio_load_buffer(f, vbasedev, data_size); + } else { + ret = vfio_v1_load_buffer(f, vbasedev, data_size); + } if (ret < 0) { return ret; } @@ -679,6 +976,17 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) return ret; } +static const SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, + .state_pending_exact = vfio_state_pending_exact, + .save_live_complete_precopy = vfio_save_complete_precopy, + .save_state = vfio_save_state, + .load_setup = vfio_load_setup, + .load_cleanup = vfio_load_cleanup, + .load_state = vfio_load_state, +}; + static SaveVMHandlers savevm_vfio_v1_handlers = { .save_setup = vfio_v1_save_setup, .save_cleanup = vfio_v1_save_cleanup, @@ -694,6 +1002,38 @@ static SaveVMHandlers savevm_vfio_v1_handlers = { /* ---------------------------------------------------------------------- */ +static void vfio_vmstate_change(void *opaque, bool running, RunState state) +{ + VFIODevice *vbasedev = opaque; + enum vfio_device_mig_state new_state; + int ret; + + if (running) { + new_state = VFIO_DEVICE_STATE_RUNNING; + } else { + new_state = VFIO_DEVICE_STATE_STOP; + } + + /* + * If setting the device in new_state fails, the device should be reset. + * To do so, use ERROR state as a recover state. + */ + ret = vfio_migration_set_state(vbasedev, new_state, + VFIO_DEVICE_STATE_ERROR); + if (ret) { + /* + * Migration should be aborted in this case, but vm_state_notify() + * currently does not support reporting failures. + */ + if (migrate_get_current()->to_dst_file) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + } + } + + trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), + mig_state_to_str(new_state)); +} + static void vfio_v1_vmstate_change(void *opaque, bool running, RunState state) { VFIODevice *vbasedev = opaque; @@ -767,12 +1107,21 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_FAILED: bytes_transferred = 0; - ret = vfio_migration_v1_set_state(vbasedev, - ~(VFIO_DEVICE_STATE_V1_SAVING | - VFIO_DEVICE_STATE_V1_RESUMING), - VFIO_DEVICE_STATE_V1_RUNNING); - if (ret) { - error_report("%s: Failed to set state RUNNING", vbasedev->name); + if (migration->v2) { + /* + * If setting the device in RUNNING state fails, the device should + * be reset. To do so, use ERROR state as a recover state. + */ + vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING, + VFIO_DEVICE_STATE_ERROR); + } else { + ret = vfio_migration_v1_set_state(vbasedev, + ~(VFIO_DEVICE_STATE_V1_SAVING | + VFIO_DEVICE_STATE_V1_RESUMING), + VFIO_DEVICE_STATE_V1_RUNNING); + if (ret) { + error_report("%s: Failed to set state RUNNING", vbasedev->name); + } } } } @@ -781,12 +1130,42 @@ static void vfio_migration_exit(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; - vfio_region_exit(&migration->region); - vfio_region_finalize(&migration->region); + if (!migration->v2) { + vfio_region_exit(&migration->region); + vfio_region_finalize(&migration->region); + } g_free(vbasedev->migration); vbasedev->migration = NULL; } +static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) +{ + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_migration), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; + struct vfio_device_feature_migration *mig = + (struct vfio_device_feature_migration *)feature->data; + + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + if (errno == ENOTTY) { + error_report("%s: VFIO migration is not supported in kernel", + vbasedev->name); + } else { + error_report("%s: Failed to query VFIO migration support, err: %s", + vbasedev->name, strerror(errno)); + } + + return -errno; + } + + *mig_flags = mig->flags; + + return 0; +} + static int vfio_migration_init(VFIODevice *vbasedev) { int ret; @@ -795,6 +1174,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) char id[256] = ""; g_autofree char *path = NULL, *oid = NULL; struct vfio_region_info *info; + uint64_t mig_flags = 0; if (!vbasedev->ops->vfio_get_object) { return -EINVAL; @@ -805,34 +1185,50 @@ static int vfio_migration_init(VFIODevice *vbasedev) return -EINVAL; } - ret = vfio_get_dev_region_info(vbasedev, - VFIO_REGION_TYPE_MIGRATION_DEPRECATED, - VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED, - &info); - if (ret) { - return ret; - } + ret = vfio_migration_query_flags(vbasedev, &mig_flags); + if (!ret) { + /* Migration v2 */ + /* Basic migration functionality must be supported */ + if (!(mig_flags & VFIO_MIGRATION_STOP_COPY)) { + return -EOPNOTSUPP; + } + vbasedev->migration = g_new0(VFIOMigration, 1); + vbasedev->migration->device_state = VFIO_DEVICE_STATE_RUNNING; + vbasedev->migration->data_fd = -1; + vbasedev->migration->v2 = true; + } else if (ret == -ENOTTY) { + /* Migration v1 */ + ret = vfio_get_dev_region_info(vbasedev, + VFIO_REGION_TYPE_MIGRATION_DEPRECATED, + VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED, + &info); + if (ret) { + return ret; + } - vbasedev->migration = g_new0(VFIOMigration, 1); - vbasedev->migration->device_state_v1 = VFIO_DEVICE_STATE_V1_RUNNING; - vbasedev->migration->vm_running = runstate_is_running(); + vbasedev->migration = g_new0(VFIOMigration, 1); + vbasedev->migration->device_state_v1 = VFIO_DEVICE_STATE_V1_RUNNING; + vbasedev->migration->vm_running = runstate_is_running(); - ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, - info->index, "migration"); - if (ret) { - error_report("%s: Failed to setup VFIO migration region %d: %s", - vbasedev->name, info->index, strerror(-ret)); - goto err; - } + ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, + info->index, "migration"); + if (ret) { + error_report("%s: Failed to setup VFIO migration region %d: %s", + vbasedev->name, info->index, strerror(-ret)); + goto err; + } - if (!vbasedev->migration->region.size) { - error_report("%s: Invalid zero-sized VFIO migration region %d", - vbasedev->name, info->index); - ret = -EINVAL; - goto err; - } + if (!vbasedev->migration->region.size) { + error_report("%s: Invalid zero-sized VFIO migration region %d", + vbasedev->name, info->index); + ret = -EINVAL; + goto err; + } - g_free(info); + g_free(info); + } else { + return ret; + } migration = vbasedev->migration; migration->vbasedev = vbasedev; @@ -845,11 +1241,20 @@ static int vfio_migration_init(VFIODevice *vbasedev) } strpadcpy(id, sizeof(id), path, '\0'); - register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, - &savevm_vfio_v1_handlers, vbasedev); + if (migration->v2) { + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_vfio_handlers, vbasedev); + + migration->vm_state = qdev_add_vm_change_state_handler( + vbasedev->dev, vfio_vmstate_change, vbasedev); + } else { + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_vfio_v1_handlers, vbasedev); + + migration->vm_state = qdev_add_vm_change_state_handler( + vbasedev->dev, vfio_v1_vmstate_change, vbasedev); + } - migration->vm_state = qdev_add_vm_change_state_handler( - vbasedev->dev, vfio_v1_vmstate_change, vbasedev); migration->migration_state.notify = vfio_migration_state_notifier; add_migration_state_change_notifier(&migration->migration_state); return 0; diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index c873944..b24e448 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -149,20 +149,27 @@ vfio_display_edid_write_error(void) "" # migration.c vfio_migration_probe(const char *name) " (%s)" +vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" vfio_migration_v1_set_state(const char *name, uint32_t state) " (%s) state %d" +vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" vfio_v1_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" +vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 vfio_v1_save_setup(const char *name) " (%s)" vfio_save_cleanup(const char *name) " (%s)" vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64 vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64 vfio_save_device_config_state(const char *name) " (%s)" +vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 vfio_v1_state_pending(const char *name, uint64_t precopy, uint64_t postcopy) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64 vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" +vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" vfio_v1_save_complete_precopy(const char *name) " (%s)" vfio_load_device_config_state(const char *name) " (%s)" vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 vfio_v1_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 +vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" vfio_load_cleanup(const char *name) " (%s)" vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 +vfio_save_block(const char *name, int data_size) " (%s) data_size %d" -- cgit v1.1 From 7429aebe1cff4bd2143fe5a690bf2892c5467932 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 16 Feb 2023 16:36:28 +0200 Subject: vfio/migration: Remove VFIO migration protocol v1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that v2 protocol implementation has been added, remove the deprecated v1 implementation. Signed-off-by: Avihai Horon Reviewed-by: Cédric Le Goater Reviewed-by: Juan Quintela Link: https://lore.kernel.org/r/20230216143630.25610-10-avihaih@nvidia.com Signed-off-by: Alex Williamson --- hw/vfio/common.c | 17 +- hw/vfio/migration.c | 700 ++------------------------------------------------- hw/vfio/trace-events | 9 - 3 files changed, 24 insertions(+), 702 deletions(-) (limited to 'hw') diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 54fee2d..bab83c0 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -408,14 +408,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return false; } - if (!migration->v2 && - (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) && - (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING)) { - return false; - } - - if (migration->v2 && - vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && + if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && migration->device_state == VFIO_DEVICE_STATE_RUNNING) { return false; } @@ -445,13 +438,7 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) return false; } - if (!migration->v2 && - migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) { - continue; - } - - if (migration->v2 && - migration->device_state == VFIO_DEVICE_STATE_RUNNING) { + if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) { continue; } else { return false; diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 452d96a..a2c3d9b 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -153,220 +153,6 @@ reset_device: return ret; } -static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, - off_t off, bool iswrite) -{ - int ret; - - ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : - pread(vbasedev->fd, val, count, off); - if (ret < count) { - error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" - HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, - vbasedev->name, off, strerror(errno)); - return (ret < 0) ? ret : -EINVAL; - } - return 0; -} - -static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, - off_t off, bool iswrite) -{ - int ret, done = 0; - __u8 *tbuf = buf; - - while (count) { - int bytes = 0; - - if (count >= 8 && !(off % 8)) { - bytes = 8; - } else if (count >= 4 && !(off % 4)) { - bytes = 4; - } else if (count >= 2 && !(off % 2)) { - bytes = 2; - } else { - bytes = 1; - } - - ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); - if (ret) { - return ret; - } - - count -= bytes; - done += bytes; - off += bytes; - tbuf += bytes; - } - return done; -} - -#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) -#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) - -#define VFIO_MIG_STRUCT_OFFSET(f) \ - offsetof(struct vfio_device_migration_info, f) -/* - * Change the device_state register for device @vbasedev. Bits set in @mask - * are preserved, bits set in @value are set, and bits not set in either @mask - * or @value are cleared in device_state. If the register cannot be accessed, - * the resulting state would be invalid, or the device enters an error state, - * an error is returned. - */ - -static int vfio_migration_v1_set_state(VFIODevice *vbasedev, uint32_t mask, - uint32_t value) -{ - VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - off_t dev_state_off = region->fd_offset + - VFIO_MIG_STRUCT_OFFSET(device_state); - uint32_t device_state; - int ret; - - ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), - dev_state_off); - if (ret < 0) { - return ret; - } - - device_state = (device_state & mask) | value; - - if (!VFIO_DEVICE_STATE_VALID(device_state)) { - return -EINVAL; - } - - ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), - dev_state_off); - if (ret < 0) { - int rret; - - rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), - dev_state_off); - - if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { - hw_error("%s: Device in error state 0x%x", vbasedev->name, - device_state); - return rret ? rret : -EIO; - } - return ret; - } - - migration->device_state_v1 = device_state; - trace_vfio_migration_v1_set_state(vbasedev->name, device_state); - return 0; -} - -static void *get_data_section_size(VFIORegion *region, uint64_t data_offset, - uint64_t data_size, uint64_t *size) -{ - void *ptr = NULL; - uint64_t limit = 0; - int i; - - if (!region->mmaps) { - if (size) { - *size = MIN(data_size, region->size - data_offset); - } - return ptr; - } - - for (i = 0; i < region->nr_mmaps; i++) { - VFIOMmap *map = region->mmaps + i; - - if ((data_offset >= map->offset) && - (data_offset < map->offset + map->size)) { - - /* check if data_offset is within sparse mmap areas */ - ptr = map->mmap + data_offset - map->offset; - if (size) { - *size = MIN(data_size, map->offset + map->size - data_offset); - } - break; - } else if ((data_offset < map->offset) && - (!limit || limit > map->offset)) { - /* - * data_offset is not within sparse mmap areas, find size of - * non-mapped area. Check through all list since region->mmaps list - * is not sorted. - */ - limit = map->offset; - } - } - - if (!ptr && size) { - *size = limit ? MIN(data_size, limit - data_offset) : data_size; - } - return ptr; -} - -static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) -{ - VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - uint64_t data_offset = 0, data_size = 0, sz; - int ret; - - ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); - if (ret < 0) { - return ret; - } - - ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); - if (ret < 0) { - return ret; - } - - trace_vfio_save_buffer(vbasedev->name, data_offset, data_size, - migration->pending_bytes); - - qemu_put_be64(f, data_size); - sz = data_size; - - while (sz) { - void *buf; - uint64_t sec_size; - bool buf_allocated = false; - - buf = get_data_section_size(region, data_offset, sz, &sec_size); - - if (!buf) { - buf = g_try_malloc(sec_size); - if (!buf) { - error_report("%s: Error allocating buffer ", __func__); - return -ENOMEM; - } - buf_allocated = true; - - ret = vfio_mig_read(vbasedev, buf, sec_size, - region->fd_offset + data_offset); - if (ret < 0) { - g_free(buf); - return ret; - } - } - - qemu_put_buffer(f, buf, sec_size); - - if (buf_allocated) { - g_free(buf); - } - sz -= sec_size; - data_offset += sec_size; - } - - ret = qemu_file_get_error(f); - - if (!ret && size) { - *size = data_size; - } - - bytes_transferred += data_size; - return ret; -} - static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t data_size) { @@ -379,96 +165,6 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, return ret; } -static int vfio_v1_load_buffer(QEMUFile *f, VFIODevice *vbasedev, - uint64_t data_size) -{ - VFIORegion *region = &vbasedev->migration->region; - uint64_t data_offset = 0, size, report_size; - int ret; - - do { - ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); - if (ret < 0) { - return ret; - } - - if (data_offset + data_size > region->size) { - /* - * If data_size is greater than the data section of migration region - * then iterate the write buffer operation. This case can occur if - * size of migration region at destination is smaller than size of - * migration region at source. - */ - report_size = size = region->size - data_offset; - data_size -= size; - } else { - report_size = size = data_size; - data_size = 0; - } - - trace_vfio_v1_load_state_device_data(vbasedev->name, data_offset, size); - - while (size) { - void *buf; - uint64_t sec_size; - bool buf_alloc = false; - - buf = get_data_section_size(region, data_offset, size, &sec_size); - - if (!buf) { - buf = g_try_malloc(sec_size); - if (!buf) { - error_report("%s: Error allocating buffer ", __func__); - return -ENOMEM; - } - buf_alloc = true; - } - - qemu_get_buffer(f, buf, sec_size); - - if (buf_alloc) { - ret = vfio_mig_write(vbasedev, buf, sec_size, - region->fd_offset + data_offset); - g_free(buf); - - if (ret < 0) { - return ret; - } - } - size -= sec_size; - data_offset += sec_size; - } - - ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); - if (ret < 0) { - return ret; - } - } while (data_size); - - return 0; -} - -static int vfio_update_pending(VFIODevice *vbasedev) -{ - VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - uint64_t pending_bytes = 0; - int ret; - - ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); - if (ret < 0) { - migration->pending_bytes = 0; - return ret; - } - - migration->pending_bytes = pending_bytes; - trace_vfio_update_pending(vbasedev->name, pending_bytes); - return 0; -} - static int vfio_save_device_config_state(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; @@ -521,15 +217,6 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev) migration->data_fd = -1; } -static void vfio_migration_v1_cleanup(VFIODevice *vbasedev) -{ - VFIOMigration *migration = vbasedev->migration; - - if (migration->region.mmaps) { - vfio_region_unmap(&migration->region); - } -} - static int vfio_query_stop_copy_size(VFIODevice *vbasedev, uint64_t *stop_copy_size) { @@ -604,49 +291,6 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) return qemu_file_get_error(f); } -static int vfio_v1_save_setup(QEMUFile *f, void *opaque) -{ - VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - int ret; - - trace_vfio_v1_save_setup(vbasedev->name); - - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); - - if (migration->region.mmaps) { - /* - * Calling vfio_region_mmap() from migration thread. Memory API called - * from this function require locking the iothread when called from - * outside the main loop thread. - */ - qemu_mutex_lock_iothread(); - ret = vfio_region_mmap(&migration->region); - qemu_mutex_unlock_iothread(); - if (ret) { - error_report("%s: Failed to mmap VFIO migration region: %s", - vbasedev->name, strerror(-ret)); - error_report("%s: Falling back to slow path", vbasedev->name); - } - } - - ret = vfio_migration_v1_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_V1_SAVING); - if (ret) { - error_report("%s: Failed to set state SAVING", vbasedev->name); - return ret; - } - - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - - ret = qemu_file_get_error(f); - if (ret) { - return ret; - } - - return 0; -} - static void vfio_save_cleanup(void *opaque) { VFIODevice *vbasedev = opaque; @@ -658,14 +302,6 @@ static void vfio_save_cleanup(void *opaque) trace_vfio_save_cleanup(vbasedev->name); } -static void vfio_v1_save_cleanup(void *opaque) -{ - VFIODevice *vbasedev = opaque; - - vfio_migration_v1_cleanup(vbasedev); - trace_vfio_save_cleanup(vbasedev->name); -} - /* * Migration size of VFIO devices can be as little as a few KBs or as big as * many GBs. This value should be big enough to cover the worst case. @@ -695,70 +331,6 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, stop_copy_size); } -static void vfio_v1_state_pending(void *opaque, uint64_t *must_precopy, - uint64_t *can_postcopy) -{ - VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - int ret; - - ret = vfio_update_pending(vbasedev); - if (ret) { - return; - } - - *must_precopy += migration->pending_bytes; - - trace_vfio_v1_state_pending(vbasedev->name, *must_precopy, *can_postcopy); -} - -static int vfio_save_iterate(QEMUFile *f, void *opaque) -{ - VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - uint64_t data_size; - int ret; - - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); - - if (migration->pending_bytes == 0) { - ret = vfio_update_pending(vbasedev); - if (ret) { - return ret; - } - - if (migration->pending_bytes == 0) { - qemu_put_be64(f, 0); - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - /* indicates data finished, goto complete phase */ - return 1; - } - } - - ret = vfio_save_buffer(f, vbasedev, &data_size); - if (ret) { - error_report("%s: vfio_save_buffer failed %s", vbasedev->name, - strerror(errno)); - return ret; - } - - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - - ret = qemu_file_get_error(f); - if (ret) { - return ret; - } - - /* - * Reset pending_bytes as state_pending* are not called during - * savevm or snapshot case, in such case vfio_update_pending() at - * the start of this function updates pending_bytes. - */ - migration->pending_bytes = 0; - trace_vfio_save_iterate(vbasedev->name, data_size); - return 0; -} - static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; @@ -795,62 +367,6 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) return ret; } -static int vfio_v1_save_complete_precopy(QEMUFile *f, void *opaque) -{ - VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - uint64_t data_size; - int ret; - - ret = vfio_migration_v1_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING, - VFIO_DEVICE_STATE_V1_SAVING); - if (ret) { - error_report("%s: Failed to set state STOP and SAVING", - vbasedev->name); - return ret; - } - - ret = vfio_update_pending(vbasedev); - if (ret) { - return ret; - } - - while (migration->pending_bytes > 0) { - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); - ret = vfio_save_buffer(f, vbasedev, &data_size); - if (ret < 0) { - error_report("%s: Failed to save buffer", vbasedev->name); - return ret; - } - - if (data_size == 0) { - break; - } - - ret = vfio_update_pending(vbasedev); - if (ret) { - return ret; - } - } - - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - - ret = qemu_file_get_error(f); - if (ret) { - return ret; - } - - ret = vfio_migration_v1_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, - 0); - if (ret) { - error_report("%s: Failed to set state STOPPED", vbasedev->name); - return ret; - } - - trace_vfio_v1_save_complete_precopy(vbasedev->name); - return ret; -} - static void vfio_save_state(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; @@ -872,33 +388,6 @@ static int vfio_load_setup(QEMUFile *f, void *opaque) vbasedev->migration->device_state); } -static int vfio_v1_load_setup(QEMUFile *f, void *opaque) -{ - VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - int ret = 0; - - if (migration->region.mmaps) { - ret = vfio_region_mmap(&migration->region); - if (ret) { - error_report("%s: Failed to mmap VFIO migration region %d: %s", - vbasedev->name, migration->region.nr, - strerror(-ret)); - error_report("%s: Falling back to slow path", vbasedev->name); - } - } - - ret = vfio_migration_v1_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_V1_RESUMING); - if (ret) { - error_report("%s: Failed to set state RESUMING", vbasedev->name); - if (migration->region.mmaps) { - vfio_region_unmap(&migration->region); - } - } - return ret; -} - static int vfio_load_cleanup(void *opaque) { VFIODevice *vbasedev = opaque; @@ -909,15 +398,6 @@ static int vfio_load_cleanup(void *opaque) return 0; } -static int vfio_v1_load_cleanup(void *opaque) -{ - VFIODevice *vbasedev = opaque; - - vfio_migration_v1_cleanup(vbasedev); - trace_vfio_load_cleanup(vbasedev->name); - return 0; -} - static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) { VFIODevice *vbasedev = opaque; @@ -951,11 +431,7 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) uint64_t data_size = qemu_get_be64(f); if (data_size) { - if (vbasedev->migration->v2) { - ret = vfio_load_buffer(f, vbasedev, data_size); - } else { - ret = vfio_v1_load_buffer(f, vbasedev, data_size); - } + ret = vfio_load_buffer(f, vbasedev, data_size); if (ret < 0) { return ret; } @@ -987,19 +463,6 @@ static const SaveVMHandlers savevm_vfio_handlers = { .load_state = vfio_load_state, }; -static SaveVMHandlers savevm_vfio_v1_handlers = { - .save_setup = vfio_v1_save_setup, - .save_cleanup = vfio_v1_save_cleanup, - .state_pending_exact = vfio_v1_state_pending, - .state_pending_estimate = vfio_v1_state_pending, - .save_live_iterate = vfio_save_iterate, - .save_live_complete_precopy = vfio_v1_save_complete_precopy, - .save_state = vfio_save_state, - .load_setup = vfio_v1_load_setup, - .load_cleanup = vfio_v1_load_cleanup, - .load_state = vfio_load_state, -}; - /* ---------------------------------------------------------------------- */ static void vfio_vmstate_change(void *opaque, bool running, RunState state) @@ -1034,70 +497,12 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) mig_state_to_str(new_state)); } -static void vfio_v1_vmstate_change(void *opaque, bool running, RunState state) -{ - VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - uint32_t value, mask; - int ret; - - if (vbasedev->migration->vm_running == running) { - return; - } - - if (running) { - /* - * Here device state can have one of _SAVING, _RESUMING or _STOP bit. - * Transition from _SAVING to _RUNNING can happen if there is migration - * failure, in that case clear _SAVING bit. - * Transition from _RESUMING to _RUNNING occurs during resuming - * phase, in that case clear _RESUMING bit. - * In both the above cases, set _RUNNING bit. - */ - mask = ~VFIO_DEVICE_STATE_MASK; - value = VFIO_DEVICE_STATE_V1_RUNNING; - } else { - /* - * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset - * _RUNNING bit - */ - mask = ~VFIO_DEVICE_STATE_V1_RUNNING; - - /* - * When VM state transition to stop for savevm command, device should - * start saving data. - */ - if (state == RUN_STATE_SAVE_VM) { - value = VFIO_DEVICE_STATE_V1_SAVING; - } else { - value = 0; - } - } - - ret = vfio_migration_v1_set_state(vbasedev, mask, value); - if (ret) { - /* - * Migration should be aborted in this case, but vm_state_notify() - * currently does not support reporting failures. - */ - error_report("%s: Failed to set device state 0x%x", vbasedev->name, - (migration->device_state_v1 & mask) | value); - if (migrate_get_current()->to_dst_file) { - qemu_file_set_error(migrate_get_current()->to_dst_file, ret); - } - } - vbasedev->migration->vm_running = running; - trace_vfio_v1_vmstate_change(vbasedev->name, running, RunState_str(state), - (migration->device_state_v1 & mask) | value); -} - static void vfio_migration_state_notifier(Notifier *notifier, void *data) { MigrationState *s = data; VFIOMigration *migration = container_of(notifier, VFIOMigration, migration_state); VFIODevice *vbasedev = migration->vbasedev; - int ret; trace_vfio_migration_state_notifier(vbasedev->name, MigrationStatus_str(s->state)); @@ -1107,33 +512,17 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_FAILED: bytes_transferred = 0; - if (migration->v2) { - /* - * If setting the device in RUNNING state fails, the device should - * be reset. To do so, use ERROR state as a recover state. - */ - vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING, - VFIO_DEVICE_STATE_ERROR); - } else { - ret = vfio_migration_v1_set_state(vbasedev, - ~(VFIO_DEVICE_STATE_V1_SAVING | - VFIO_DEVICE_STATE_V1_RESUMING), - VFIO_DEVICE_STATE_V1_RUNNING); - if (ret) { - error_report("%s: Failed to set state RUNNING", vbasedev->name); - } - } + /* + * If setting the device in RUNNING state fails, the device should + * be reset. To do so, use ERROR state as a recover state. + */ + vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING, + VFIO_DEVICE_STATE_ERROR); } } static void vfio_migration_exit(VFIODevice *vbasedev) { - VFIOMigration *migration = vbasedev->migration; - - if (!migration->v2) { - vfio_region_exit(&migration->region); - vfio_region_finalize(&migration->region); - } g_free(vbasedev->migration); vbasedev->migration = NULL; } @@ -1173,7 +562,6 @@ static int vfio_migration_init(VFIODevice *vbasedev) VFIOMigration *migration; char id[256] = ""; g_autofree char *path = NULL, *oid = NULL; - struct vfio_region_info *info; uint64_t mig_flags = 0; if (!vbasedev->ops->vfio_get_object) { @@ -1186,52 +574,20 @@ static int vfio_migration_init(VFIODevice *vbasedev) } ret = vfio_migration_query_flags(vbasedev, &mig_flags); - if (!ret) { - /* Migration v2 */ - /* Basic migration functionality must be supported */ - if (!(mig_flags & VFIO_MIGRATION_STOP_COPY)) { - return -EOPNOTSUPP; - } - vbasedev->migration = g_new0(VFIOMigration, 1); - vbasedev->migration->device_state = VFIO_DEVICE_STATE_RUNNING; - vbasedev->migration->data_fd = -1; - vbasedev->migration->v2 = true; - } else if (ret == -ENOTTY) { - /* Migration v1 */ - ret = vfio_get_dev_region_info(vbasedev, - VFIO_REGION_TYPE_MIGRATION_DEPRECATED, - VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED, - &info); - if (ret) { - return ret; - } - - vbasedev->migration = g_new0(VFIOMigration, 1); - vbasedev->migration->device_state_v1 = VFIO_DEVICE_STATE_V1_RUNNING; - vbasedev->migration->vm_running = runstate_is_running(); - - ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, - info->index, "migration"); - if (ret) { - error_report("%s: Failed to setup VFIO migration region %d: %s", - vbasedev->name, info->index, strerror(-ret)); - goto err; - } - - if (!vbasedev->migration->region.size) { - error_report("%s: Invalid zero-sized VFIO migration region %d", - vbasedev->name, info->index); - ret = -EINVAL; - goto err; - } - - g_free(info); - } else { + if (ret) { return ret; } + /* Basic migration functionality must be supported */ + if (!(mig_flags & VFIO_MIGRATION_STOP_COPY)) { + return -EOPNOTSUPP; + } + + vbasedev->migration = g_new0(VFIOMigration, 1); migration = vbasedev->migration; migration->vbasedev = vbasedev; + migration->device_state = VFIO_DEVICE_STATE_RUNNING; + migration->data_fd = -1; oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj))); if (oid) { @@ -1241,28 +597,16 @@ static int vfio_migration_init(VFIODevice *vbasedev) } strpadcpy(id, sizeof(id), path, '\0'); - if (migration->v2) { - register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, - &savevm_vfio_handlers, vbasedev); - - migration->vm_state = qdev_add_vm_change_state_handler( - vbasedev->dev, vfio_vmstate_change, vbasedev); - } else { - register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, - &savevm_vfio_v1_handlers, vbasedev); - - migration->vm_state = qdev_add_vm_change_state_handler( - vbasedev->dev, vfio_v1_vmstate_change, vbasedev); - } + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, + vbasedev); + migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev, + vfio_vmstate_change, + vbasedev); migration->migration_state.notify = vfio_migration_state_notifier; add_migration_state_change_notifier(&migration->migration_state); - return 0; -err: - g_free(info); - vfio_migration_exit(vbasedev); - return ret; + return 0; } /* ---------------------------------------------------------------------- */ diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index b24e448..9d65c2d 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -150,24 +150,15 @@ vfio_display_edid_write_error(void) "" # migration.c vfio_migration_probe(const char *name) " (%s)" vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" -vfio_migration_v1_set_state(const char *name, uint32_t state) " (%s) state %d" vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" -vfio_v1_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 -vfio_v1_save_setup(const char *name) " (%s)" vfio_save_cleanup(const char *name) " (%s)" -vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64 -vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64 vfio_save_device_config_state(const char *name) " (%s)" vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 -vfio_v1_state_pending(const char *name, uint64_t precopy, uint64_t postcopy) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64 -vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" -vfio_v1_save_complete_precopy(const char *name) " (%s)" vfio_load_device_config_state(const char *name) " (%s)" vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 -vfio_v1_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" vfio_load_cleanup(const char *name) " (%s)" vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 -- cgit v1.1 From 48e4d8289f7bf90947ec5621e3ad05aa594a36ea Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Thu, 16 Feb 2023 16:36:29 +0200 Subject: vfio: Alphabetize migration section of VFIO trace-events file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sort the migration section of VFIO trace events file alphabetically and move two misplaced traces to common.c section. Signed-off-by: Avihai Horon Reviewed-by: Cédric Le Goater Reviewed-by: Juan Quintela Link: https://lore.kernel.org/r/20230216143630.25610-11-avihaih@nvidia.com Signed-off-by: Alex Williamson --- hw/vfio/trace-events | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'hw') diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 9d65c2d..669d9fe 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -119,6 +119,8 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" vfio_dma_unmap_overflow_workaround(void) "" +vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 +vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 # platform.c vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" @@ -148,19 +150,17 @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" vfio_display_edid_write_error(void) "" # migration.c +vfio_load_cleanup(const char *name) " (%s)" +vfio_load_device_config_state(const char *name) " (%s)" +vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 +vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" vfio_migration_probe(const char *name) " (%s)" vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" -vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" -vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 +vfio_save_block(const char *name, int data_size) " (%s) data_size %d" vfio_save_cleanup(const char *name) " (%s)" +vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" vfio_save_device_config_state(const char *name) " (%s)" +vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 -vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" -vfio_load_device_config_state(const char *name) " (%s)" -vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 -vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" -vfio_load_cleanup(const char *name) " (%s)" -vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 -vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 -vfio_save_block(const char *name, int data_size) " (%s) data_size %d" +vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" -- cgit v1.1