diff options
Diffstat (limited to 'hw/vfio')
-rw-r--r-- | hw/vfio/device.c | 6 | ||||
-rw-r--r-- | hw/vfio/helpers.c | 17 | ||||
-rw-r--r-- | hw/vfio/migration-multifd.c | 101 | ||||
-rw-r--r-- | hw/vfio/migration-multifd.h | 3 | ||||
-rw-r--r-- | hw/vfio/migration.c | 10 | ||||
-rw-r--r-- | hw/vfio/pci.c | 48 | ||||
-rw-r--r-- | hw/vfio/pci.h | 6 | ||||
-rw-r--r-- | hw/vfio/trace-events | 1 | ||||
-rw-r--r-- | hw/vfio/vfio-helpers.h | 2 | ||||
-rw-r--r-- | hw/vfio/vfio-migration-internal.h | 1 |
10 files changed, 182 insertions, 13 deletions
diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 96cf214..52a1996 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -463,6 +463,8 @@ void vfio_device_detach(VFIODevice *vbasedev) void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, struct vfio_device_info *info) { + int i; + vbasedev->num_irqs = info->num_irqs; vbasedev->num_regions = info->num_regions; vbasedev->flags = info->flags; @@ -477,6 +479,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, vbasedev->num_regions); if (vbasedev->use_region_fds) { vbasedev->region_fds = g_new0(int, vbasedev->num_regions); + for (i = 0; i < vbasedev->num_regions; i++) { + vbasedev->region_fds[i] = -1; + } } } @@ -489,7 +494,6 @@ void vfio_device_unprepare(VFIODevice *vbasedev) if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) { close(vbasedev->region_fds[i]); } - } g_clear_pointer(&vbasedev->reginfo, g_free); diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 9a5f621..23d13e5 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -209,3 +209,20 @@ retry: return info; } + +bool vfio_arch_wants_loading_config_after_iter(void) +{ + /* + * Starting the config load only after all iterables were loaded (during + * non-iterables loading phase) is required for ARM64 due to this platform + * VFIO dependency on interrupt controller being loaded first. + * + * See commit d329f5032e17 ("vfio: Move the saving of the config space to + * the right place in VFIO migration"). + */ +#if defined(TARGET_ARM) + return true; +#else + return false; +#endif +} diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c index 5563548..e478503 100644 --- a/hw/vfio/migration-multifd.c +++ b/hw/vfio/migration-multifd.c @@ -13,7 +13,6 @@ #include "hw/vfio/vfio-device.h" #include "migration/misc.h" #include "qapi/error.h" -#include "qemu/bswap.h" #include "qemu/error-report.h" #include "qemu/lockable.h" #include "qemu/main-loop.h" @@ -23,6 +22,7 @@ #include "migration-multifd.h" #include "vfio-migration-internal.h" #include "trace.h" +#include "vfio-helpers.h" #define VFIO_DEVICE_STATE_CONFIG_STATE (1) @@ -35,6 +35,18 @@ typedef struct VFIODeviceStatePacket { uint8_t data[0]; } QEMU_PACKED VFIODeviceStatePacket; +bool vfio_load_config_after_iter(VFIODevice *vbasedev) +{ + if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_ON) { + return true; + } else if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_OFF) { + return false; + } + + assert(vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_AUTO); + return vfio_arch_wants_loading_config_after_iter(); +} + /* type safety */ typedef struct VFIOStateBuffers { GArray *array; @@ -50,12 +62,16 @@ typedef struct VFIOMultifd { bool load_bufs_thread_running; bool load_bufs_thread_want_exit; + bool load_bufs_iter_done; + QemuCond load_bufs_iter_done_cond; + VFIOStateBuffers load_bufs; QemuCond load_bufs_buffer_ready_cond; QemuCond load_bufs_thread_finished_cond; QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ uint32_t load_buf_idx; uint32_t load_buf_idx_last; + size_t load_buf_queued_pending_buffers_size; } VFIOMultifd; static void vfio_state_buffer_clear(gpointer data) @@ -112,6 +128,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, VFIOMigration *migration = vbasedev->migration; VFIOMultifd *multifd = migration->multifd; VFIOStateBuffer *lb; + size_t data_size = packet_total_size - sizeof(*packet); vfio_state_buffers_assert_init(&multifd->load_bufs); if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { @@ -127,8 +144,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, assert(packet->idx >= multifd->load_buf_idx); - lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); - lb->len = packet_total_size - sizeof(*packet); + multifd->load_buf_queued_pending_buffers_size += data_size; + if (multifd->load_buf_queued_pending_buffers_size > + vbasedev->migration_max_queued_buffers_size) { + error_setg(errp, + "%s: queuing state buffer %" PRIu32 + " would exceed the size max of %" PRIu64, + vbasedev->name, packet->idx, + vbasedev->migration_max_queued_buffers_size); + return false; + } + + lb->data = g_memdup2(&packet->data, data_size); + lb->len = data_size; lb->is_present = true; return true; @@ -312,6 +340,9 @@ static bool vfio_load_state_buffer_write(VFIODevice *vbasedev, assert(wr_ret <= buf_len); buf_len -= wr_ret; buf_cur += wr_ret; + + assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret); + multifd->load_buf_queued_pending_buffers_size -= wr_ret; } trace_vfio_load_state_device_buffer_load_end(vbasedev->name, @@ -394,6 +425,22 @@ static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp) multifd->load_buf_idx++; } + if (vfio_load_config_after_iter(vbasedev)) { + while (!multifd->load_bufs_iter_done) { + qemu_cond_wait(&multifd->load_bufs_iter_done_cond, + &multifd->load_bufs_mutex); + + /* + * Need to re-check cancellation immediately after wait in case + * cond was signalled by vfio_load_cleanup_load_bufs_thread(). + */ + if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) { + error_setg(errp, "operation cancelled"); + goto thread_exit; + } + } + } + if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) { goto thread_exit; } @@ -413,6 +460,48 @@ thread_exit: return ret; } +int vfio_load_state_config_load_ready(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + VFIOMultifd *multifd = migration->multifd; + int ret = 0; + + if (!vfio_multifd_transfer_enabled(vbasedev)) { + error_report("%s: got DEV_CONFIG_LOAD_READY outside multifd transfer", + vbasedev->name); + return -EINVAL; + } + + if (!vfio_load_config_after_iter(vbasedev)) { + error_report("%s: got DEV_CONFIG_LOAD_READY but was disabled", + vbasedev->name); + return -EINVAL; + } + + assert(multifd); + + /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */ + bql_unlock(); + WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) { + if (multifd->load_bufs_iter_done) { + /* Can't print error here as we're outside BQL */ + ret = -EINVAL; + break; + } + + multifd->load_bufs_iter_done = true; + qemu_cond_signal(&multifd->load_bufs_iter_done_cond); + } + bql_lock(); + + if (ret) { + error_report("%s: duplicate DEV_CONFIG_LOAD_READY", + vbasedev->name); + } + + return ret; +} + static VFIOMultifd *vfio_multifd_new(void) { VFIOMultifd *multifd = g_new(VFIOMultifd, 1); @@ -423,8 +512,12 @@ static VFIOMultifd *vfio_multifd_new(void) multifd->load_buf_idx = 0; multifd->load_buf_idx_last = UINT32_MAX; + multifd->load_buf_queued_pending_buffers_size = 0; qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); + multifd->load_bufs_iter_done = false; + qemu_cond_init(&multifd->load_bufs_iter_done_cond); + multifd->load_bufs_thread_running = false; multifd->load_bufs_thread_want_exit = false; qemu_cond_init(&multifd->load_bufs_thread_finished_cond); @@ -448,6 +541,7 @@ static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd) multifd->load_bufs_thread_want_exit = true; qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); + qemu_cond_signal(&multifd->load_bufs_iter_done_cond); qemu_cond_wait(&multifd->load_bufs_thread_finished_cond, &multifd->load_bufs_mutex); } @@ -460,6 +554,7 @@ static void vfio_multifd_free(VFIOMultifd *multifd) vfio_load_cleanup_load_bufs_thread(multifd); qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond); + qemu_cond_destroy(&multifd->load_bufs_iter_done_cond); vfio_state_buffers_destroy(&multifd->load_bufs); qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); qemu_mutex_destroy(&multifd->load_bufs_mutex); diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h index ebf22a7..82d2d3a 100644 --- a/hw/vfio/migration-multifd.h +++ b/hw/vfio/migration-multifd.h @@ -20,9 +20,12 @@ void vfio_multifd_cleanup(VFIODevice *vbasedev); bool vfio_multifd_transfer_supported(void); bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); +bool vfio_load_config_after_iter(VFIODevice *vbasedev); bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, Error **errp); +int vfio_load_state_config_load_ready(VFIODevice *vbasedev); + void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f); bool diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index c329578..4c06e3d 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -675,7 +675,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque) int ret; if (vfio_multifd_transfer_enabled(vbasedev)) { - vfio_multifd_emit_dummy_eos(vbasedev, f); + if (vfio_load_config_after_iter(vbasedev)) { + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY); + } else { + vfio_multifd_emit_dummy_eos(vbasedev, f); + } return; } @@ -784,6 +788,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) return ret; } + case VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY: + { + return vfio_load_state_config_load_ready(vbasedev); + } default: error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); return -EINVAL; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 1093b28..be05002 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2893,10 +2893,6 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) "vfio-vga-io@0x3c0", QEMU_PCI_VGA_IO_HI_SIZE); - pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, - &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, - &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); - return true; } @@ -3228,6 +3224,23 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) vdev->sub_device_id); } + /* + * Class code is a 24-bit value at config space 0x09. Allow overriding it + * with any 24-bit value. + */ + if (vdev->class_code != PCI_ANY_ID) { + if (vdev->class_code > 0xffffff) { + error_setg(errp, "invalid PCI class code provided"); + return false; + } + /* Higher 24 bits of PCI_CLASS_REVISION are class code */ + vfio_add_emulated_long(vdev, PCI_CLASS_REVISION, + vdev->class_code << 8, ~0xff); + trace_vfio_pci_emulated_class_code(vbasedev->name, vdev->class_code); + } else { + vdev->class_code = pci_get_long(pdev->config + PCI_CLASS_REVISION) >> 8; + } + /* QEMU can change multi-function devices to single function, or reverse */ vdev->emulated_config_bits[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_MULTI_FUNCTION; @@ -3257,6 +3270,12 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) vfio_bars_register(vdev); + if (vdev->vga && vfio_is_vga(vdev)) { + pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); + } + return true; } @@ -3623,6 +3642,11 @@ static const Property vfio_pci_dev_properties[] = { vbasedev.migration_multifd_transfer, vfio_pci_migration_multifd_transfer_prop, OnOffAuto, .set_default = true, .defval.i = ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", VFIOPCIDevice, + vbasedev.migration_load_config_after_iter, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice, + vbasedev.migration_max_queued_buffers_size, UINT64_MAX), DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, vbasedev.migration_events, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), @@ -3643,6 +3667,8 @@ static const Property vfio_pci_dev_properties[] = { sub_vendor_id, PCI_ANY_ID), DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, sub_device_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-class-code", VFIOPCIDevice, + class_code, PCI_ANY_ID), DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0), DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice, nv_gpudirect_clique, @@ -3797,6 +3823,20 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) "x-migration-multifd-transfer", "Transfer this device state via " "multifd channels when live migrating it"); + object_class_property_set_description(klass, /* 10.1 */ + "x-migration-load-config-after-iter", + "Start the config load only after " + "all iterables were loaded (during " + "non-iterables loading phase) when " + "doing live migration of device state " + "via multifd channels"); + object_class_property_set_description(klass, /* 10.1 */ + "x-migration-max-queued-buffers-size", + "Maximum size of in-flight VFIO " + "device state buffers queued at the " + "destination when doing live " + "migration of device state via " + "multifd channels"); } static const TypeInfo vfio_pci_dev_info = { diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 495fae7..4aa6461 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -157,6 +157,7 @@ struct VFIOPCIDevice { uint32_t device_id; uint32_t sub_vendor_id; uint32_t sub_device_id; + uint32_t class_code; uint32_t features; #define VFIO_FEATURE_ENABLE_VGA_BIT 0 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) @@ -205,10 +206,7 @@ static inline bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t de static inline bool vfio_is_vga(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; - uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE); - - return class == PCI_CLASS_DISPLAY_VGA; + return (vdev->class_code >> 8) == PCI_CLASS_DISPLAY_VGA; } /* MSI/MSI-X/INTx */ diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 8ec0ad0..fc6ed23 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -48,6 +48,7 @@ vfio_pci_emulated_vendor_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_device_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s 0x%04x" +vfio_pci_emulated_class_code(const char *name, uint32_t val) "%s 0x%06x" # pci-quirks.c vfio_quirk_rom_in_denylist(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x" diff --git a/hw/vfio/vfio-helpers.h b/hw/vfio/vfio-helpers.h index 54a327f..ce31758 100644 --- a/hw/vfio/vfio-helpers.h +++ b/hw/vfio/vfio-helpers.h @@ -32,4 +32,6 @@ struct vfio_device_info *vfio_get_device_info(int fd); int vfio_kvm_device_add_fd(int fd, Error **errp); int vfio_kvm_device_del_fd(int fd, Error **errp); +bool vfio_arch_wants_loading_config_after_iter(void); + #endif /* HW_VFIO_VFIO_HELPERS_H */ diff --git a/hw/vfio/vfio-migration-internal.h b/hw/vfio/vfio-migration-internal.h index a8b456b..54141e2 100644 --- a/hw/vfio/vfio-migration-internal.h +++ b/hw/vfio/vfio-migration-internal.h @@ -32,6 +32,7 @@ #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) #define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) +#define VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY (0xffffffffef100006ULL) typedef struct VFIODevice VFIODevice; typedef struct VFIOMultifd VFIOMultifd; |