aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2023-02-07 15:16:51 +0000
committerPeter Maydell <peter.maydell@linaro.org>2023-02-07 15:16:51 +0000
commitb86307ecef9222c335ebd0ed4da2b243e86f779e (patch)
tree5612d36ebf94229640ede079655dca134a0a378e /hw
parent285ee77f5b58237c972bbe82aa0e1dc489c147d6 (diff)
parent1b1f4ab69c41279a45ccd0d3178e83471e6e4ec1 (diff)
downloadqemu-b86307ecef9222c335ebd0ed4da2b243e86f779e.zip
qemu-b86307ecef9222c335ebd0ed4da2b243e86f779e.tar.gz
qemu-b86307ecef9222c335ebd0ed4da2b243e86f779e.tar.bz2
Merge tag 'migration-20230206-pull-request' of https://gitlab.com/juan.quintela/qemu into staging
Migration Pull request In this try - rebase to latest upstream - same than previous patch - fix compilation on non linux (userfaultfd.h) (me) - query-migrationthreads (jiang) - fix race on reading MultiFDPages_t.block (zhenzhong) - fix flush of zero copy page send reuest (zhenzhong) Please apply. Previous try: It includes: - David Hildenbrand fixes for virtio-men - David Gilbert canary to detect problems - Fix for rdma return values (Fiona) - Peter Xu uffd_open fixes - Peter Xu show right downtime for postcopy - manish.mishra msg fix fixes - my vfio changes. Please apply. # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmPhobYACgkQ9IfvGFhy # 1yMNaA/9EHDPqrI1HL/VkJG4nNOOsQR7RbburXEberZOzvLjnqpjUD3Ls9qV6rx+ # ieHa5T4imYJFk72Wa5vx4r1/dCjtJD2W6jg5+/0nTvYAHrs1U1VRqpuTr0HiXdbJ # ZLLCnW5eDyO3eMaOX0MUkgHgL0FNkc/Lq5ViCTFsMu9O9xMuDLLdAC3cdvslKuOu # X1gKByr9jT817Y9e36amYmRaJKC6Cr/PIekNVFu12HBW79pPusLX8KWEf4RBw4HR # sPwTvMCR/BwZ0+2Lppan60G5rt/ZxDu40oU7y+RHlfWqevl4hDM84/nhjMvEgzc5 # a4Ahe2ERGLwwnC8z3l7v9+pEzSGzDoPcnRGvZcpUpk68wTDtxd5Bdq8CwmNUfL07 # VzWcYpH0yvmwjBba9jfn9fAVgnG5rVp558XcYLIII3wEToty3UDtm43wSdj2CGr6 # cu+IPAp+n/I5G9SRYBTU9ozJz45ttnEe0hxUtZ4I3MuhzHi1VEDAqTWM/X0LyS41 # TB3Y5B2KKpJYbPyZEH4nyTeetR2k7alTFzahCgKqVfOgL0nJx54petjS1K+B1P72 # g6lhP9WnQ33W+M8S7J/aGEaDJd1lFyFB2Rdjn2ZZnASH/fR9j0mFmXWvulXtjFNp # Sfim3887+Iv4Uzw4VWEe3mM5Ypi/Ba2CmuTjy/pM08Ey8X1Qs5o= # =ZQbR # -----END PGP SIGNATURE----- # gpg: Signature made Tue 07 Feb 2023 00:56:22 GMT # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * tag 'migration-20230206-pull-request' of https://gitlab.com/juan.quintela/qemu: (30 commits) migration: save/delete migration thread info migration: Introduce interface query-migrationthreads multifd: Fix flush of zero copy page send request multifd: Fix a race on reading MultiFDPages_t.block migration: check magic value for deciding the mapping of channels io: Add support for MSG_PEEK for socket channel migration/dirtyrate: Show sample pages only in page-sampling mode migration: Perform vmsd structure check during tests migration: Add canary to VMSTATE_END_OF_LIST migration/rdma: fix return value for qio_channel_rdma_{readv,writev} migration: Show downtime during postcopy phase virtio-mem: Proper support for preallocation with migration virtio-mem: Migrate immutable properties early virtio-mem: Fail if a memory backend with "prealloc=on" is specified migration/ram: Factor out check for advised postcopy migration/vmstate: Introduce VMSTATE_WITH_TMP_TEST() and VMSTATE_BITMAP_TEST() migration/savevm: Allow immutable device state to be migrated early (i.e., before RAM) migration/savevm: Prepare vmdesc json writer in qemu_savevm_state_setup() migration/savevm: Move more savevm handling into vmstate_save() migration/ram: Optimize ram_write_tracking_start() for RamDiscardManager ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/core/machine.c4
-rw-r--r--hw/s390x/s390-stattrib.c11
-rw-r--r--hw/vfio/migration.c20
-rw-r--r--hw/vfio/trace-events2
-rw-r--r--hw/virtio/virtio-mem.c144
5 files changed, 162 insertions, 19 deletions
diff --git a/hw/core/machine.c b/hw/core/machine.c
index f7761ba..b5cd42c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -41,7 +41,9 @@
#include "hw/virtio/virtio-pci.h"
#include "qom/object_interfaces.h"
-GlobalProperty hw_compat_7_2[] = {};
+GlobalProperty hw_compat_7_2[] = {
+ { "virtio-mem", "x-early-migration", "false" },
+};
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
GlobalProperty hw_compat_7_1[] = {
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 9eda1c3..3e32002 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -182,10 +182,10 @@ static int cmma_save_setup(QEMUFile *f, void *opaque)
return 0;
}
-static void cmma_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
- uint64_t *res_precopy_only,
- uint64_t *res_compatible,
- uint64_t *res_postcopy_only)
+static void cmma_state_pending(void *opaque,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
{
S390StAttribState *sas = S390_STATTRIB(opaque);
S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
@@ -371,7 +371,8 @@ static SaveVMHandlers savevm_s390_stattrib_handlers = {
.save_setup = cmma_save_setup,
.save_live_iterate = cmma_save_iterate,
.save_live_complete_precopy = cmma_save_complete,
- .save_live_pending = cmma_save_pending,
+ .state_pending_exact = cmma_state_pending,
+ .state_pending_estimate = cmma_state_pending,
.save_cleanup = cmma_save_cleanup,
.load_state = cmma_load,
.is_active = cmma_active,
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index c74453e..b3318f0 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -456,11 +456,10 @@ static void vfio_save_cleanup(void *opaque)
trace_vfio_save_cleanup(vbasedev->name);
}
-static void vfio_save_pending(QEMUFile *f, void *opaque,
- uint64_t threshold_size,
- uint64_t *res_precopy_only,
- uint64_t *res_compatible,
- uint64_t *res_postcopy_only)
+static void vfio_state_pending(void *opaque,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
{
VFIODevice *vbasedev = opaque;
VFIOMigration *migration = vbasedev->migration;
@@ -473,7 +472,7 @@ static void vfio_save_pending(QEMUFile *f, void *opaque,
*res_precopy_only += migration->pending_bytes;
- trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
+ trace_vfio_state_pending(vbasedev->name, *res_precopy_only,
*res_postcopy_only, *res_compatible);
}
@@ -515,9 +514,9 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
}
/*
- * Reset pending_bytes as .save_live_pending is not called during savevm or
- * snapshot case, in such case vfio_update_pending() at the start of this
- * function updates pending_bytes.
+ * Reset pending_bytes as state_pending* are not called during
+ * savevm or snapshot case, in such case vfio_update_pending() at
+ * the start of this function updates pending_bytes.
*/
migration->pending_bytes = 0;
trace_vfio_save_iterate(vbasedev->name, data_size);
@@ -685,7 +684,8 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
static SaveVMHandlers savevm_vfio_handlers = {
.save_setup = vfio_save_setup,
.save_cleanup = vfio_save_cleanup,
- .save_live_pending = vfio_save_pending,
+ .state_pending_exact = vfio_state_pending,
+ .state_pending_estimate = vfio_state_pending,
.save_live_iterate = vfio_save_iterate,
.save_live_complete_precopy = vfio_save_complete_precopy,
.save_state = vfio_save_state,
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 73dffe9..52de1c8 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -157,7 +157,7 @@ vfio_save_cleanup(const char *name) " (%s)"
vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64
vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64
vfio_save_device_config_state(const char *name) " (%s)"
-vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
+vfio_state_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
vfio_save_complete_precopy(const char *name) " (%s)"
vfio_load_device_config_state(const char *name) " (%s)"
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 1ed1f5a..957fe77 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -31,6 +31,8 @@
#include CONFIG_DEVICES
#include "trace.h"
+static const VMStateDescription vmstate_virtio_mem_device_early;
+
/*
* We only had legacy x86 guests that did not support
* VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
@@ -202,6 +204,30 @@ static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
return ret;
}
+static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg,
+ virtio_mem_range_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ int ret = 0;
+
+ first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size);
+ while (first_bit < vmem->bitmap_size) {
+ offset = first_bit * vmem->block_size;
+ last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * vmem->block_size;
+
+ ret = cb(vmem, arg, offset, size);
+ if (ret) {
+ break;
+ }
+ first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+ last_bit + 2);
+ }
+ return ret;
+}
+
/*
* Adjust the memory section to cover the intersection with the given range.
*
@@ -772,6 +798,12 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
error_setg(errp, "'%s' property specifies an unsupported memdev",
VIRTIO_MEM_MEMDEV_PROP);
return;
+ } else if (vmem->memdev->prealloc) {
+ error_setg(errp, "'%s' property specifies a memdev with preallocation"
+ " enabled: %s. Instead, specify 'prealloc=on' for the"
+ " virtio-mem device. ", VIRTIO_MEM_MEMDEV_PROP,
+ object_get_canonical_path_component(OBJECT(vmem->memdev)));
+ return;
}
if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
@@ -872,6 +904,10 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
host_memory_backend_set_mapped(vmem->memdev, true);
vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
+ if (vmem->early_migration) {
+ vmstate_register(VMSTATE_IF(vmem), VMSTATE_INSTANCE_ID_ANY,
+ &vmstate_virtio_mem_device_early, vmem);
+ }
qemu_register_reset(virtio_mem_system_reset, vmem);
/*
@@ -893,6 +929,10 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
*/
memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
qemu_unregister_reset(virtio_mem_system_reset, vmem);
+ if (vmem->early_migration) {
+ vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early,
+ vmem);
+ }
vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
host_memory_backend_set_mapped(vmem->memdev, false);
virtio_del_queue(vdev, 0);
@@ -922,6 +962,10 @@ static int virtio_mem_post_load(void *opaque, int version_id)
RamDiscardListener *rdl;
int ret;
+ if (vmem->prealloc && !vmem->early_migration) {
+ warn_report("Proper preallocation with migration requires a newer QEMU machine");
+ }
+
/*
* We started out with all memory discarded and our memory region is mapped
* into an address space. Replay, now that we updated the bitmap.
@@ -941,6 +985,64 @@ static int virtio_mem_post_load(void *opaque, int version_id)
return virtio_mem_restore_unplugged(vmem);
}
+static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg,
+ uint64_t offset, uint64_t size)
+{
+ void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
+ int fd = memory_region_get_fd(&vmem->memdev->mr);
+ Error *local_err = NULL;
+
+ qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int virtio_mem_post_load_early(void *opaque, int version_id)
+{
+ VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+ RAMBlock *rb = vmem->memdev->mr.ram_block;
+ int ret;
+
+ if (!vmem->prealloc) {
+ return 0;
+ }
+
+ /*
+ * We restored the bitmap and verified that the basic properties
+ * match on source and destination, so we can go ahead and preallocate
+ * memory for all plugged memory blocks, before actual RAM migration starts
+ * touching this memory.
+ */
+ ret = virtio_mem_for_each_plugged_range(vmem, NULL,
+ virtio_mem_prealloc_range_cb);
+ if (ret) {
+ return ret;
+ }
+
+ /*
+ * This is tricky: postcopy wants to start with a clean slate. On
+ * POSTCOPY_INCOMING_ADVISE, postcopy code discards all (ordinarily
+ * preallocated) RAM such that postcopy will work as expected later.
+ *
+ * However, we run after POSTCOPY_INCOMING_ADVISE -- but before actual
+ * RAM migration. So let's discard all memory again. This looks like an
+ * expensive NOP, but actually serves a purpose: we made sure that we
+ * were able to allocate all required backend memory once. We cannot
+ * guarantee that the backend memory we will free will remain free
+ * until we need it during postcopy, but at least we can catch the
+ * obvious setup issues this way.
+ */
+ if (migration_incoming_postcopy_advised()) {
+ if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
+ return -EBUSY;
+ }
+ }
+ return 0;
+}
+
typedef struct VirtIOMEMMigSanityChecks {
VirtIOMEM *parent;
uint64_t addr;
@@ -1009,6 +1111,14 @@ static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
},
};
+static bool virtio_mem_vmstate_field_exists(void *opaque, int version_id)
+{
+ const VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+
+ /* With early migration, these fields were already migrated. */
+ return !vmem->early_migration;
+}
+
static const VMStateDescription vmstate_virtio_mem_device = {
.name = "virtio-mem-device",
.minimum_version_id = 1,
@@ -1016,11 +1126,39 @@ static const VMStateDescription vmstate_virtio_mem_device = {
.priority = MIG_PRI_VIRTIO_MEM,
.post_load = virtio_mem_post_load,
.fields = (VMStateField[]) {
+ VMSTATE_WITH_TMP_TEST(VirtIOMEM, virtio_mem_vmstate_field_exists,
+ VirtIOMEMMigSanityChecks,
+ vmstate_virtio_mem_sanity_checks),
+ VMSTATE_UINT64(usable_region_size, VirtIOMEM),
+ VMSTATE_UINT64_TEST(size, VirtIOMEM, virtio_mem_vmstate_field_exists),
+ VMSTATE_UINT64(requested_size, VirtIOMEM),
+ VMSTATE_BITMAP_TEST(bitmap, VirtIOMEM, virtio_mem_vmstate_field_exists,
+ 0, bitmap_size),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+/*
+ * Transfer properties that are immutable while migration is active early,
+ * such that we have have this information around before migrating any RAM
+ * content.
+ *
+ * Note that virtio_mem_is_busy() makes sure these properties can no longer
+ * change on the migration source until migration completed.
+ *
+ * With QEMU compat machines, we transmit these properties later, via
+ * vmstate_virtio_mem_device instead -- see virtio_mem_vmstate_field_exists().
+ */
+static const VMStateDescription vmstate_virtio_mem_device_early = {
+ .name = "virtio-mem-device-early",
+ .minimum_version_id = 1,
+ .version_id = 1,
+ .early_setup = true,
+ .post_load = virtio_mem_post_load_early,
+ .fields = (VMStateField[]) {
VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
vmstate_virtio_mem_sanity_checks),
- VMSTATE_UINT64(usable_region_size, VirtIOMEM),
VMSTATE_UINT64(size, VirtIOMEM),
- VMSTATE_UINT64(requested_size, VirtIOMEM),
VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
VMSTATE_END_OF_LIST()
},
@@ -1205,6 +1343,8 @@ static Property virtio_mem_properties[] = {
DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
unplugged_inaccessible, ON_OFF_AUTO_AUTO),
#endif
+ DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,
+ early_migration, true),
DEFINE_PROP_END_OF_LIST(),
};