aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2025-06-24 10:38:33 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2025-06-24 10:38:33 -0400
commit24c00b754121f3569ea9e68f5f188747cf5b8439 (patch)
treead53b951fc205a16ac4af03b4a2a365bc0e38324
parent3d40db0efc22520fa6c399cf73960dced423b048 (diff)
parent2fde3fb916079ee0ff0fc26d9446c813b1d5cc28 (diff)
downloadqemu-24c00b754121f3569ea9e68f5f188747cf5b8439.zip
qemu-24c00b754121f3569ea9e68f5f188747cf5b8439.tar.gz
qemu-24c00b754121f3569ea9e68f5f188747cf5b8439.tar.bz2
Merge tag 'migration-staging-pull-request' of https://gitlab.com/peterx/qemu into staging
Migration / Memory pull - Yanfei's optimization to skip log_clear during completion - Fabiano's cleanup to remove leftover migration-helpers.c file - Juraj's vnc fix on display pause after migration - Jaehoon's cpr test fix on possible race of server establishment - Chenyi's initial support on vfio enablement for guest-memfd # -----BEGIN PGP SIGNATURE----- # # iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCaFmzWhIccGV0ZXJ4QHJl # ZGhhdC5jb20ACgkQO1/MzfOr1wbWYQD/dz08tyaL2J4EHESfBsW4Z1rEggVOM0cB # hlXnvzf/Pb4A/0X3Hn18bOxfPAZOr8NggS5AKgzCCYVeQEWQA2Jj8hwC # =kcTN # -----END PGP SIGNATURE----- # gpg: Signature made Mon 23 Jun 2025 16:04:42 EDT # gpg: using EDDSA key B9184DC20CC457DACF7DD1A93B5FCCCDF3ABD706 # gpg: issuer "peterx@redhat.com" # gpg: Good signature from "Peter Xu <xzpeter@gmail.com>" [full] # gpg: aka "Peter Xu <peterx@redhat.com>" [full] # Primary key fingerprint: B918 4DC2 0CC4 57DA CF7D D1A9 3B5F CCCD F3AB D706 * tag 'migration-staging-pull-request' of https://gitlab.com/peterx/qemu: physmem: Support coordinated discarding of RAM with guest_memfd ram-block-attributes: Introduce RamBlockAttributes to manage RAMBlock with guest_memfd memory: Unify the definiton of ReplayRamPopulate() and ReplayRamDiscard() memory: Change memory_region_set_ram_discard_manager() to return the result memory: Export a helper to get intersection of a MemoryRegionSection with a given range migration: Don't sync volatile memory after migration completes tests/migration: Setup pre-listened cpr.sock to remove race-condition. migration: Support fd-based socket address in cpr_transfer_input ui/vnc: Update display update interval when VM state changes to RUNNING tests/qtest: Remove migration-helpers.c migration/ram: avoid to do log clear in the last round Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r--MAINTAINERS1
-rw-r--r--accel/kvm/kvm-all.c9
-rw-r--r--hw/virtio/virtio-mem.c83
-rw-r--r--include/system/memory.h110
-rw-r--r--include/system/ramblock.h22
-rw-r--r--migration/cpr-transfer.c7
-rw-r--r--migration/ram.c31
-rw-r--r--system/memory.c22
-rw-r--r--system/meson.build1
-rw-r--r--system/physmem.c23
-rw-r--r--system/ram-block-attributes.c444
-rw-r--r--system/trace-events3
-rw-r--r--tests/qtest/migration-helpers.c530
-rw-r--r--tests/qtest/migration/cpr-tests.c14
-rw-r--r--ui/vnc.c12
-rw-r--r--ui/vnc.h2
16 files changed, 688 insertions, 626 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 94c4076..27f4fe3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3175,6 +3175,7 @@ F: system/memory.c
F: system/memory_mapping.c
F: system/physmem.c
F: system/memory-internal.h
+F: system/ram-block-attributes.c
F: scripts/coccinelle/memory-region-housekeeping.cocci
Memory devices
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index a317783..d095d1b 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3091,6 +3091,15 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
rb = qemu_ram_block_from_host(addr, false, &offset);
+ ret = ram_block_attributes_state_change(RAM_BLOCK_ATTRIBUTES(mr->rdm),
+ offset, size, to_private);
+ if (ret) {
+ error_report("Failed to notify the listener the state change of "
+ "(0x%"HWADDR_PRIx" + 0x%"HWADDR_PRIx") to %s",
+ start, size, to_private ? "private" : "shared");
+ goto out_unref;
+ }
+
if (to_private) {
if (rb->page_size != qemu_real_host_page_size()) {
/*
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index a3d1a67..c46f6f9 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -244,28 +244,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg,
return ret;
}
-/*
- * Adjust the memory section to cover the intersection with the given range.
- *
- * Returns false if the intersection is empty, otherwise returns true.
- */
-static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s,
- uint64_t offset, uint64_t size)
-{
- uint64_t start = MAX(s->offset_within_region, offset);
- uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
- offset + size);
-
- if (end <= start) {
- return false;
- }
-
- s->offset_within_address_space += start - s->offset_within_region;
- s->offset_within_region = start;
- s->size = int128_make64(end - start);
- return true;
-}
-
typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
@@ -287,7 +265,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
first_bit + 1) - 1;
size = (last_bit - first_bit + 1) * vmem->block_size;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
break;
}
ret = cb(&tmp, arg);
@@ -319,7 +297,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
first_bit + 1) - 1;
size = (last_bit - first_bit + 1) * vmem->block_size;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
break;
}
ret = cb(&tmp, arg);
@@ -355,7 +333,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
MemoryRegionSection tmp = *rdl->section;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
rdl->notify_discard(rdl, &tmp);
@@ -371,7 +349,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
MemoryRegionSection tmp = *rdl->section;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
ret = rdl->notify_populate(rdl, &tmp);
@@ -388,7 +366,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
if (rdl2 == rdl) {
break;
}
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
rdl2->notify_discard(rdl2, &tmp);
@@ -1070,6 +1048,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
}
/*
+ * Set ourselves as RamDiscardManager before the plug handler maps the
+ * memory region and exposes it via an address space.
+ */
+ if (memory_region_set_ram_discard_manager(&vmem->memdev->mr,
+ RAM_DISCARD_MANAGER(vmem))) {
+ error_setg(errp, "Failed to set RamDiscardManager");
+ ram_block_coordinated_discard_require(false);
+ return;
+ }
+
+ /*
* We don't know at this point whether shared RAM is migrated using
* QEMU or migrated using the file content. "x-ignore-shared" will be
* configured after realizing the device. So in case we have an
@@ -1083,6 +1072,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
if (ret) {
error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
ram_block_coordinated_discard_require(false);
return;
}
@@ -1144,13 +1134,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
vmem->system_reset = VIRTIO_MEM_SYSTEM_RESET(obj);
vmem->system_reset->vmem = vmem;
qemu_register_resettable(obj);
-
- /*
- * Set ourselves as RamDiscardManager before the plug handler maps the
- * memory region and exposes it via an address space.
- */
- memory_region_set_ram_discard_manager(&vmem->memdev->mr,
- RAM_DISCARD_MANAGER(vmem));
}
static void virtio_mem_device_unrealize(DeviceState *dev)
@@ -1158,12 +1141,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOMEM *vmem = VIRTIO_MEM(dev);
- /*
- * The unplug handler unmapped the memory region, it cannot be
- * found via an address space anymore. Unset ourselves.
- */
- memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
-
qemu_unregister_resettable(OBJECT(vmem->system_reset));
object_unref(OBJECT(vmem->system_reset));
@@ -1176,6 +1153,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
virtio_del_queue(vdev, 0);
virtio_cleanup(vdev);
g_free(vmem->bitmap);
+ /*
+ * The unplug handler unmapped the memory region, it cannot be
+ * found via an address space anymore. Unset ourselves.
+ */
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
ram_block_coordinated_discard_require(false);
}
@@ -1750,7 +1732,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
}
struct VirtIOMEMReplayData {
- void *fn;
+ ReplayRamDiscardState fn;
void *opaque;
};
@@ -1758,12 +1740,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
{
struct VirtIOMEMReplayData *data = arg;
- return ((ReplayRamPopulate)data->fn)(s, data->opaque);
+ return data->fn(s, data->opaque);
}
static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
MemoryRegionSection *s,
- ReplayRamPopulate replay_fn,
+ ReplayRamDiscardState replay_fn,
void *opaque)
{
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
@@ -1782,14 +1764,13 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
{
struct VirtIOMEMReplayData *data = arg;
- ((ReplayRamDiscard)data->fn)(s, data->opaque);
- return 0;
+ return data->fn(s, data->opaque);
}
-static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
- MemoryRegionSection *s,
- ReplayRamDiscard replay_fn,
- void *opaque)
+static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *s,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
{
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
struct VirtIOMEMReplayData data = {
@@ -1798,8 +1779,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
};
g_assert(s->mr == &vmem->memdev->mr);
- virtio_mem_for_each_unplugged_section(vmem, s, &data,
- virtio_mem_rdm_replay_discarded_cb);
+ return virtio_mem_for_each_unplugged_section(vmem, s, &data,
+ virtio_mem_rdm_replay_discarded_cb);
}
static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
diff --git a/include/system/memory.h b/include/system/memory.h
index 0848690..46248d4 100644
--- a/include/system/memory.h
+++ b/include/system/memory.h
@@ -576,8 +576,20 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl,
rdl->double_discard_supported = double_discard_supported;
}
-typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque);
-typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque);
+/**
+ * typedef ReplayRamDiscardState:
+ *
+ * The callback handler for #RamDiscardManagerClass.replay_populated/
+ * #RamDiscardManagerClass.replay_discarded to invoke on populated/discarded
+ * parts.
+ *
+ * @section: the #MemoryRegionSection of populated/discarded part
+ * @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if failed.
+ */
+typedef int (*ReplayRamDiscardState)(MemoryRegionSection *section,
+ void *opaque);
/*
* RamDiscardManagerClass:
@@ -651,36 +663,38 @@ struct RamDiscardManagerClass {
/**
* @replay_populated:
*
- * Call the #ReplayRamPopulate callback for all populated parts within the
- * #MemoryRegionSection via the #RamDiscardManager.
+ * Call the #ReplayRamDiscardState callback for all populated parts within
+ * the #MemoryRegionSection via the #RamDiscardManager.
*
* In case any call fails, no further calls are made.
*
* @rdm: the #RamDiscardManager
* @section: the #MemoryRegionSection
- * @replay_fn: the #ReplayRamPopulate callback
+ * @replay_fn: the #ReplayRamDiscardState callback
* @opaque: pointer to forward to the callback
*
* Returns 0 on success, or a negative error if any notification failed.
*/
int (*replay_populated)(const RamDiscardManager *rdm,
MemoryRegionSection *section,
- ReplayRamPopulate replay_fn, void *opaque);
+ ReplayRamDiscardState replay_fn, void *opaque);
/**
* @replay_discarded:
*
- * Call the #ReplayRamDiscard callback for all discarded parts within the
- * #MemoryRegionSection via the #RamDiscardManager.
+ * Call the #ReplayRamDiscardState callback for all discarded parts within
+ * the #MemoryRegionSection via the #RamDiscardManager.
*
* @rdm: the #RamDiscardManager
* @section: the #MemoryRegionSection
- * @replay_fn: the #ReplayRamDiscard callback
+ * @replay_fn: the #ReplayRamDiscardState callback
* @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if any notification failed.
*/
- void (*replay_discarded)(const RamDiscardManager *rdm,
- MemoryRegionSection *section,
- ReplayRamDiscard replay_fn, void *opaque);
+ int (*replay_discarded)(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn, void *opaque);
/**
* @register_listener:
@@ -721,15 +735,41 @@ uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
const MemoryRegionSection *section);
+/**
+ * ram_discard_manager_replay_populated:
+ *
+ * A wrapper to call the #RamDiscardManagerClass.replay_populated callback
+ * of the #RamDiscardManager.
+ *
+ * @rdm: the #RamDiscardManager
+ * @section: the #MemoryRegionSection
+ * @replay_fn: the #ReplayRamDiscardState callback
+ * @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if any notification failed.
+ */
int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
MemoryRegionSection *section,
- ReplayRamPopulate replay_fn,
+ ReplayRamDiscardState replay_fn,
void *opaque);
-void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
- MemoryRegionSection *section,
- ReplayRamDiscard replay_fn,
- void *opaque);
+/**
+ * ram_discard_manager_replay_discarded:
+ *
+ * A wrapper to call the #RamDiscardManagerClass.replay_discarded callback
+ * of the #RamDiscardManager.
+ *
+ * @rdm: the #RamDiscardManager
+ * @section: the #MemoryRegionSection
+ * @replay_fn: the #ReplayRamDiscardState callback
+ * @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if any notification failed.
+ */
+int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque);
void ram_discard_manager_register_listener(RamDiscardManager *rdm,
RamDiscardListener *rdl,
@@ -1212,6 +1252,36 @@ MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s);
void memory_region_section_free_copy(MemoryRegionSection *s);
/**
+ * memory_region_section_intersect_range: Adjust the memory section to cover
+ * the intersection with the given range.
+ *
+ * @s: the #MemoryRegionSection to be adjusted
+ * @offset: the offset of the given range in the memory region
+ * @size: the size of the given range
+ *
+ * Returns false if the intersection is empty, otherwise returns true.
+ */
+static inline bool memory_region_section_intersect_range(MemoryRegionSection *s,
+ uint64_t offset,
+ uint64_t size)
+{
+ uint64_t start = MAX(s->offset_within_region, offset);
+ Int128 end = int128_min(int128_add(int128_make64(s->offset_within_region),
+ s->size),
+ int128_add(int128_make64(offset),
+ int128_make64(size)));
+
+ if (int128_le(end, int128_make64(start))) {
+ return false;
+ }
+
+ s->offset_within_address_space += start - s->offset_within_region;
+ s->offset_within_region = start;
+ s->size = int128_sub(end, int128_make64(start));
+ return true;
+}
+
+/**
* memory_region_init: Initialize a memory region
*
* The region typically acts as a container for other memory regions. Use
@@ -2469,13 +2539,13 @@ static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
*
* This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
* that does not cover RAM, or a #MemoryRegion that already has a
- * #RamDiscardManager assigned.
+ * #RamDiscardManager assigned. Return 0 if the rdm is set successfully.
*
* @mr: the #MemoryRegion
* @rdm: #RamDiscardManager to set
*/
-void memory_region_set_ram_discard_manager(MemoryRegion *mr,
- RamDiscardManager *rdm);
+int memory_region_set_ram_discard_manager(MemoryRegion *mr,
+ RamDiscardManager *rdm);
/**
* memory_region_find: translate an address/size relative to a
diff --git a/include/system/ramblock.h b/include/system/ramblock.h
index d8a116b..87e847e 100644
--- a/include/system/ramblock.h
+++ b/include/system/ramblock.h
@@ -22,6 +22,10 @@
#include "exec/cpu-common.h"
#include "qemu/rcu.h"
#include "exec/ramlist.h"
+#include "system/hostmem.h"
+
+#define TYPE_RAM_BLOCK_ATTRIBUTES "ram-block-attributes"
+OBJECT_DECLARE_SIMPLE_TYPE(RamBlockAttributes, RAM_BLOCK_ATTRIBUTES)
struct RAMBlock {
struct rcu_head rcu;
@@ -42,6 +46,7 @@ struct RAMBlock {
int fd;
uint64_t fd_offset;
int guest_memfd;
+ RamBlockAttributes *attributes;
size_t page_size;
/* dirty bitmap used during migration */
unsigned long *bmap;
@@ -91,4 +96,21 @@ struct RAMBlock {
ram_addr_t postcopy_length;
};
+struct RamBlockAttributes {
+ Object parent;
+
+ RAMBlock *ram_block;
+
+ /* 1-setting of the bitmap represents ram is populated (shared) */
+ unsigned bitmap_size;
+ unsigned long *bitmap;
+
+ QLIST_HEAD(, RamDiscardListener) rdl_list;
+};
+
+RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block);
+void ram_block_attributes_destroy(RamBlockAttributes *attr);
+int ram_block_attributes_state_change(RamBlockAttributes *attr, uint64_t offset,
+ uint64_t size, bool to_discard);
+
#endif
diff --git a/migration/cpr-transfer.c b/migration/cpr-transfer.c
index e1f1403..00371d1 100644
--- a/migration/cpr-transfer.c
+++ b/migration/cpr-transfer.c
@@ -46,7 +46,8 @@ QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp)
MigrationAddress *addr = channel->addr;
if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET &&
- addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX) {
+ (addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX ||
+ addr->u.socket.type == SOCKET_ADDRESS_TYPE_FD)) {
g_autoptr(QIOChannelSocket) sioc = NULL;
SocketAddress *saddr = &addr->u.socket;
@@ -60,7 +61,9 @@ QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp)
sioc = qio_net_listener_wait_client(listener);
ioc = QIO_CHANNEL(sioc);
- trace_cpr_transfer_input(addr->u.socket.u.q_unix.path);
+ trace_cpr_transfer_input(
+ addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX ?
+ addr->u.socket.u.q_unix.path : addr->u.socket.u.fd.str);
qio_channel_set_name(ioc, "cpr-in");
return qemu_file_new_input(ioc);
diff --git a/migration/ram.c b/migration/ram.c
index d26dbd3..2140785 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -831,14 +831,22 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
bool ret;
/*
- * Clear dirty bitmap if needed. This _must_ be called before we
- * send any of the page in the chunk because we need to make sure
- * we can capture further page content changes when we sync dirty
- * log the next time. So as long as we are going to send any of
- * the page in the chunk we clear the remote dirty bitmap for all.
- * Clearing it earlier won't be a problem, but too late will.
+ * During the last stage (after source VM stopped), resetting the write
+ * protections isn't needed as we know there will be either (1) no
+ * further writes if migration will complete, or (2) migration fails
+ * at last then tracking isn't needed either.
*/
- migration_clear_memory_region_dirty_bitmap(rb, page);
+ if (!rs->last_stage) {
+ /*
+ * Clear dirty bitmap if needed. This _must_ be called before we
+ * send any of the page in the chunk because we need to make sure
+ * we can capture further page content changes when we sync dirty
+ * log the next time. So as long as we are going to send any of
+ * the page in the chunk we clear the remote dirty bitmap for all.
+ * Clearing it earlier won't be a problem, but too late will.
+ */
+ migration_clear_memory_region_dirty_bitmap(rb, page);
+ }
ret = test_and_clear_bit(page, rb->bmap);
if (ret) {
@@ -848,8 +856,8 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
return ret;
}
-static void dirty_bitmap_clear_section(MemoryRegionSection *section,
- void *opaque)
+static int dirty_bitmap_clear_section(MemoryRegionSection *section,
+ void *opaque)
{
const hwaddr offset = section->offset_within_region;
const hwaddr size = int128_get64(section->size);
@@ -868,6 +876,7 @@ static void dirty_bitmap_clear_section(MemoryRegionSection *section,
}
*cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
bitmap_clear(rb->bmap, start, npages);
+ return 0;
}
/*
@@ -3672,7 +3681,9 @@ static int ram_load_cleanup(void *opaque)
RAMBlock *rb;
RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
- qemu_ram_block_writeback(rb);
+ if (memory_region_is_nonvolatile(rb->mr)) {
+ qemu_ram_block_writeback(rb);
+ }
}
xbzrle_load_cleanup();
diff --git a/system/memory.c b/system/memory.c
index 306e9ff..76b44b8 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2106,12 +2106,16 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
return mr->rdm;
}
-void memory_region_set_ram_discard_manager(MemoryRegion *mr,
- RamDiscardManager *rdm)
+int memory_region_set_ram_discard_manager(MemoryRegion *mr,
+ RamDiscardManager *rdm)
{
g_assert(memory_region_is_ram(mr));
- g_assert(!rdm || !mr->rdm);
+ if (mr->rdm && rdm) {
+ return -EBUSY;
+ }
+
mr->rdm = rdm;
+ return 0;
}
uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
@@ -2134,7 +2138,7 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
MemoryRegionSection *section,
- ReplayRamPopulate replay_fn,
+ ReplayRamDiscardState replay_fn,
void *opaque)
{
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
@@ -2143,15 +2147,15 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
return rdmc->replay_populated(rdm, section, replay_fn, opaque);
}
-void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
- MemoryRegionSection *section,
- ReplayRamDiscard replay_fn,
- void *opaque)
+int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
{
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
g_assert(rdmc->replay_discarded);
- rdmc->replay_discarded(rdm, section, replay_fn, opaque);
+ return rdmc->replay_discarded(rdm, section, replay_fn, opaque);
}
void ram_discard_manager_register_listener(RamDiscardManager *rdm,
diff --git a/system/meson.build b/system/meson.build
index 7514bf3..6d21ff9 100644
--- a/system/meson.build
+++ b/system/meson.build
@@ -17,6 +17,7 @@ system_ss.add(files(
'dma-helpers.c',
'globals.c',
'ioport.c',
+ 'ram-block-attributes.c',
'memory_mapping.c',
'memory.c',
'physmem.c',
diff --git a/system/physmem.c b/system/physmem.c
index a8a9ca3..ff0ca40 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1916,7 +1916,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
}
assert(new_block->guest_memfd < 0);
- ret = ram_block_discard_require(true);
+ ret = ram_block_coordinated_discard_require(true);
if (ret < 0) {
error_setg_errno(errp, -ret,
"cannot set up private guest memory: discard currently blocked");
@@ -1932,6 +1932,24 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
}
/*
+ * The attribute bitmap of the RamBlockAttributes is default to
+ * discarded, which mimics the behavior of kvm_set_phys_mem() when it
+ * calls kvm_set_memory_attributes_private(). This leads to a brief
+ * period of inconsistency between the creation of the RAMBlock and its
+ * mapping into the physical address space. However, this is not
+ * problematic, as no users rely on the attribute status to perform
+ * any actions during this interval.
+ */
+ new_block->attributes = ram_block_attributes_create(new_block);
+ if (!new_block->attributes) {
+ error_setg(errp, "Failed to create ram block attribute");
+ close(new_block->guest_memfd);
+ ram_block_coordinated_discard_require(false);
+ qemu_mutex_unlock_ramlist();
+ goto out_free;
+ }
+
+ /*
* Add a specific guest_memfd blocker if a generic one would not be
* added by ram_block_add_cpr_blocker.
*/
@@ -2287,8 +2305,9 @@ static void reclaim_ramblock(RAMBlock *block)
}
if (block->guest_memfd >= 0) {
+ ram_block_attributes_destroy(block->attributes);
close(block->guest_memfd);
- ram_block_discard_require(false);
+ ram_block_coordinated_discard_require(false);
}
g_free(block);
diff --git a/system/ram-block-attributes.c b/system/ram-block-attributes.c
new file mode 100644
index 0000000..68e8a02
--- /dev/null
+++ b/system/ram-block-attributes.c
@@ -0,0 +1,444 @@
+/*
+ * QEMU ram block attributes
+ *
+ * Copyright Intel
+ *
+ * Author:
+ * Chenyi Qiang <chenyi.qiang@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "system/ramblock.h"
+#include "trace.h"
+
+OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RamBlockAttributes,
+ ram_block_attributes,
+ RAM_BLOCK_ATTRIBUTES,
+ OBJECT,
+ { TYPE_RAM_DISCARD_MANAGER },
+ { })
+
+static size_t
+ram_block_attributes_get_block_size(const RamBlockAttributes *attr)
+{
+ /*
+ * Because page conversion could be manipulated in the size of at least 4K
+ * or 4K aligned, Use the host page size as the granularity to track the
+ * memory attribute.
+ */
+ g_assert(attr && attr->ram_block);
+ g_assert(attr->ram_block->page_size == qemu_real_host_page_size());
+ return attr->ram_block->page_size;
+}
+
+
+static bool
+ram_block_attributes_rdm_is_populated(const RamDiscardManager *rdm,
+ const MemoryRegionSection *section)
+{
+ const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ const uint64_t first_bit = section->offset_within_region / block_size;
+ const uint64_t last_bit =
+ first_bit + int128_get64(section->size) / block_size - 1;
+ unsigned long first_discarded_bit;
+
+ first_discarded_bit = find_next_zero_bit(attr->bitmap, last_bit + 1,
+ first_bit);
+ return first_discarded_bit > last_bit;
+}
+
+typedef int (*ram_block_attributes_section_cb)(MemoryRegionSection *s,
+ void *arg);
+
+static int
+ram_block_attributes_notify_populate_cb(MemoryRegionSection *section,
+ void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ return rdl->notify_populate(rdl, section);
+}
+
+static int
+ram_block_attributes_notify_discard_cb(MemoryRegionSection *section,
+ void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ rdl->notify_discard(rdl, section);
+ return 0;
+}
+
+static int
+ram_block_attributes_for_each_populated_section(const RamBlockAttributes *attr,
+ MemoryRegionSection *section,
+ void *arg,
+ ram_block_attributes_section_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ int ret = 0;
+
+ first_bit = section->offset_within_region / block_size;
+ first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
+ first_bit);
+
+ while (first_bit < attr->bitmap_size) {
+ MemoryRegionSection tmp = *section;
+
+ offset = first_bit * block_size;
+ last_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * block_size;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ break;
+ }
+
+ ret = cb(&tmp, arg);
+ if (ret) {
+ error_report("%s: Failed to notify RAM discard listener: %s",
+ __func__, strerror(-ret));
+ break;
+ }
+
+ first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
+ last_bit + 2);
+ }
+
+ return ret;
+}
+
+static int
+ram_block_attributes_for_each_discarded_section(const RamBlockAttributes *attr,
+ MemoryRegionSection *section,
+ void *arg,
+ ram_block_attributes_section_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ int ret = 0;
+
+ first_bit = section->offset_within_region / block_size;
+ first_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
+ first_bit);
+
+ while (first_bit < attr->bitmap_size) {
+ MemoryRegionSection tmp = *section;
+
+ offset = first_bit * block_size;
+ last_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * block_size;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ break;
+ }
+
+ ret = cb(&tmp, arg);
+ if (ret) {
+ error_report("%s: Failed to notify RAM discard listener: %s",
+ __func__, strerror(-ret));
+ break;
+ }
+
+ first_bit = find_next_zero_bit(attr->bitmap,
+ attr->bitmap_size,
+ last_bit + 2);
+ }
+
+ return ret;
+}
+
+static uint64_t
+ram_block_attributes_rdm_get_min_granularity(const RamDiscardManager *rdm,
+ const MemoryRegion *mr)
+{
+ const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+
+ g_assert(mr == attr->ram_block->mr);
+ return ram_block_attributes_get_block_size(attr);
+}
+
+static void
+ram_block_attributes_rdm_register_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ int ret;
+
+ g_assert(section->mr == attr->ram_block->mr);
+ rdl->section = memory_region_section_new_copy(section);
+
+ QLIST_INSERT_HEAD(&attr->rdl_list, rdl, next);
+
+ ret = ram_block_attributes_for_each_populated_section(attr, section, rdl,
+ ram_block_attributes_notify_populate_cb);
+ if (ret) {
+ error_report("%s: Failed to register RAM discard listener: %s",
+ __func__, strerror(-ret));
+ exit(1);
+ }
+}
+
+static void
+ram_block_attributes_rdm_unregister_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ int ret;
+
+ g_assert(rdl->section);
+ g_assert(rdl->section->mr == attr->ram_block->mr);
+
+ if (rdl->double_discard_supported) {
+ rdl->notify_discard(rdl, rdl->section);
+ } else {
+ ret = ram_block_attributes_for_each_populated_section(attr,
+ rdl->section, rdl, ram_block_attributes_notify_discard_cb);
+ if (ret) {
+ error_report("%s: Failed to unregister RAM discard listener: %s",
+ __func__, strerror(-ret));
+ exit(1);
+ }
+ }
+
+ memory_region_section_free_copy(rdl->section);
+ rdl->section = NULL;
+ QLIST_REMOVE(rdl, next);
+}
+
+typedef struct RamBlockAttributesReplayData {
+ ReplayRamDiscardState fn;
+ void *opaque;
+} RamBlockAttributesReplayData;
+
+static int ram_block_attributes_rdm_replay_cb(MemoryRegionSection *section,
+ void *arg)
+{
+ RamBlockAttributesReplayData *data = arg;
+
+ return data->fn(section, data->opaque);
+}
+
+static int
+ram_block_attributes_rdm_replay_populated(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
+
+ g_assert(section->mr == attr->ram_block->mr);
+ return ram_block_attributes_for_each_populated_section(attr, section, &data,
+ ram_block_attributes_rdm_replay_cb);
+}
+
+static int
+ram_block_attributes_rdm_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
+
+ g_assert(section->mr == attr->ram_block->mr);
+ return ram_block_attributes_for_each_discarded_section(attr, section, &data,
+ ram_block_attributes_rdm_replay_cb);
+}
+
+static bool
+ram_block_attributes_is_valid_range(RamBlockAttributes *attr, uint64_t offset,
+ uint64_t size)
+{
+ MemoryRegion *mr = attr->ram_block->mr;
+
+ g_assert(mr);
+
+ uint64_t region_size = memory_region_size(mr);
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+
+ if (!QEMU_IS_ALIGNED(offset, block_size) ||
+ !QEMU_IS_ALIGNED(size, block_size)) {
+ return false;
+ }
+ if (offset + size <= offset) {
+ return false;
+ }
+ if (offset + size > region_size) {
+ return false;
+ }
+ return true;
+}
+
+static void ram_block_attributes_notify_discard(RamBlockAttributes *attr,
+ uint64_t offset,
+ uint64_t size)
+{
+ RamDiscardListener *rdl;
+
+ QLIST_FOREACH(rdl, &attr->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ continue;
+ }
+ rdl->notify_discard(rdl, &tmp);
+ }
+}
+
+static int
+ram_block_attributes_notify_populate(RamBlockAttributes *attr,
+ uint64_t offset, uint64_t size)
+{
+ RamDiscardListener *rdl;
+ int ret = 0;
+
+ QLIST_FOREACH(rdl, &attr->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ continue;
+ }
+ ret = rdl->notify_populate(rdl, &tmp);
+ if (ret) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int ram_block_attributes_state_change(RamBlockAttributes *attr,
+ uint64_t offset, uint64_t size,
+ bool to_discard)
+{
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ const unsigned long first_bit = offset / block_size;
+ const unsigned long nbits = size / block_size;
+ const unsigned long last_bit = first_bit + nbits - 1;
+ const bool is_discarded = find_next_bit(attr->bitmap, attr->bitmap_size,
+ first_bit) > last_bit;
+ const bool is_populated = find_next_zero_bit(attr->bitmap,
+ attr->bitmap_size, first_bit) > last_bit;
+ unsigned long bit;
+ int ret = 0;
+
+ if (!ram_block_attributes_is_valid_range(attr, offset, size)) {
+ error_report("%s, invalid range: offset 0x%" PRIx64 ", size "
+ "0x%" PRIx64, __func__, offset, size);
+ return -EINVAL;
+ }
+
+ trace_ram_block_attributes_state_change(offset, size,
+ is_discarded ? "discarded" :
+ is_populated ? "populated" :
+ "mixture",
+ to_discard ? "discarded" :
+ "populated");
+ if (to_discard) {
+ if (is_discarded) {
+ /* Already private */
+ } else if (is_populated) {
+ /* Completely shared */
+ bitmap_clear(attr->bitmap, first_bit, nbits);
+ ram_block_attributes_notify_discard(attr, offset, size);
+ } else {
+ /* Unexpected mixture: process individual blocks */
+ for (bit = first_bit; bit < first_bit + nbits; bit++) {
+ if (!test_bit(bit, attr->bitmap)) {
+ continue;
+ }
+ clear_bit(bit, attr->bitmap);
+ ram_block_attributes_notify_discard(attr, bit * block_size,
+ block_size);
+ }
+ }
+ } else {
+ if (is_populated) {
+ /* Already shared */
+ } else if (is_discarded) {
+ /* Completely private */
+ bitmap_set(attr->bitmap, first_bit, nbits);
+ ret = ram_block_attributes_notify_populate(attr, offset, size);
+ } else {
+ /* Unexpected mixture: process individual blocks */
+ for (bit = first_bit; bit < first_bit + nbits; bit++) {
+ if (test_bit(bit, attr->bitmap)) {
+ continue;
+ }
+ set_bit(bit, attr->bitmap);
+ ret = ram_block_attributes_notify_populate(attr,
+ bit * block_size,
+ block_size);
+ if (ret) {
+ break;
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block)
+{
+ const int block_size = qemu_real_host_page_size();
+ RamBlockAttributes *attr;
+ MemoryRegion *mr = ram_block->mr;
+
+ attr = RAM_BLOCK_ATTRIBUTES(object_new(TYPE_RAM_BLOCK_ATTRIBUTES));
+
+ attr->ram_block = ram_block;
+ if (memory_region_set_ram_discard_manager(mr, RAM_DISCARD_MANAGER(attr))) {
+ object_unref(OBJECT(attr));
+ return NULL;
+ }
+ attr->bitmap_size =
+ ROUND_UP(int128_get64(mr->size), block_size) / block_size;
+ attr->bitmap = bitmap_new(attr->bitmap_size);
+
+ return attr;
+}
+
+void ram_block_attributes_destroy(RamBlockAttributes *attr)
+{
+ g_assert(attr);
+
+ g_free(attr->bitmap);
+ memory_region_set_ram_discard_manager(attr->ram_block->mr, NULL);
+ object_unref(OBJECT(attr));
+}
+
+static void ram_block_attributes_init(Object *obj)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(obj);
+
+ QLIST_INIT(&attr->rdl_list);
+}
+
+static void ram_block_attributes_finalize(Object *obj)
+{
+}
+
+static void ram_block_attributes_class_init(ObjectClass *klass,
+ const void *data)
+{
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
+
+ rdmc->get_min_granularity = ram_block_attributes_rdm_get_min_granularity;
+ rdmc->register_listener = ram_block_attributes_rdm_register_listener;
+ rdmc->unregister_listener = ram_block_attributes_rdm_unregister_listener;
+ rdmc->is_populated = ram_block_attributes_rdm_is_populated;
+ rdmc->replay_populated = ram_block_attributes_rdm_replay_populated;
+ rdmc->replay_discarded = ram_block_attributes_rdm_replay_discarded;
+}
diff --git a/system/trace-events b/system/trace-events
index be12ebf..82856e4 100644
--- a/system/trace-events
+++ b/system/trace-events
@@ -52,3 +52,6 @@ dirtylimit_state_finalize(void)
dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"
+
+# ram-block-attributes.c
+ram_block_attributes_state_change(uint64_t offset, uint64_t size, const char *from, const char *to) "offset 0x%"PRIx64" size 0x%"PRIx64" from '%s' to '%s'"
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
deleted file mode 100644
index b08b49b..0000000
--- a/tests/qtest/migration-helpers.c
+++ /dev/null
@@ -1,530 +0,0 @@
-/*
- * QTest migration helpers
- *
- * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates
- * based on the vhost-user-test.c that is:
- * Copyright (c) 2014 Virtual Open Systems Sarl.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu/ctype.h"
-#include "qobject/qjson.h"
-#include "qapi/qapi-visit-sockets.h"
-#include "qapi/qobject-input-visitor.h"
-#include "qapi/error.h"
-#include "qobject/qlist.h"
-#include "qemu/cutils.h"
-#include "qemu/memalign.h"
-
-#include "migration-helpers.h"
-
-/*
- * Number of seconds we wait when looking for migration
- * status changes, to avoid test suite hanging forever
- * when things go wrong. Needs to be higher enough to
- * avoid false positives on loaded hosts.
- */
-#define MIGRATION_STATUS_WAIT_TIMEOUT 120
-
-static char *SocketAddress_to_str(SocketAddress *addr)
-{
- switch (addr->type) {
- case SOCKET_ADDRESS_TYPE_INET:
- return g_strdup_printf("tcp:%s:%s",
- addr->u.inet.host,
- addr->u.inet.port);
- case SOCKET_ADDRESS_TYPE_UNIX:
- return g_strdup_printf("unix:%s",
- addr->u.q_unix.path);
- case SOCKET_ADDRESS_TYPE_FD:
- return g_strdup_printf("fd:%s", addr->u.fd.str);
- case SOCKET_ADDRESS_TYPE_VSOCK:
- return g_strdup_printf("vsock:%s:%s",
- addr->u.vsock.cid,
- addr->u.vsock.port);
- default:
- return g_strdup("unknown address type");
- }
-}
-
-static QDict *SocketAddress_to_qdict(SocketAddress *addr)
-{
- QDict *dict = qdict_new();
-
- switch (addr->type) {
- case SOCKET_ADDRESS_TYPE_INET:
- qdict_put_str(dict, "type", "inet");
- qdict_put_str(dict, "host", addr->u.inet.host);
- qdict_put_str(dict, "port", addr->u.inet.port);
- break;
- case SOCKET_ADDRESS_TYPE_UNIX:
- qdict_put_str(dict, "type", "unix");
- qdict_put_str(dict, "path", addr->u.q_unix.path);
- break;
- case SOCKET_ADDRESS_TYPE_FD:
- qdict_put_str(dict, "type", "fd");
- qdict_put_str(dict, "str", addr->u.fd.str);
- break;
- case SOCKET_ADDRESS_TYPE_VSOCK:
- qdict_put_str(dict, "type", "vsock");
- qdict_put_str(dict, "cid", addr->u.vsock.cid);
- qdict_put_str(dict, "port", addr->u.vsock.port);
- break;
- default:
- g_assert_not_reached();
- }
-
- return dict;
-}
-
-static SocketAddressList *migrate_get_socket_address(QTestState *who)
-{
- QDict *rsp;
- SocketAddressList *addrs;
- Visitor *iv = NULL;
- QObject *object;
-
- rsp = migrate_query(who);
- object = qdict_get(rsp, "socket-address");
-
- iv = qobject_input_visitor_new(object);
- visit_type_SocketAddressList(iv, NULL, &addrs, &error_abort);
- visit_free(iv);
-
- qobject_unref(rsp);
- return addrs;
-}
-
-static char *
-migrate_get_connect_uri(QTestState *who)
-{
- SocketAddressList *addrs;
- char *connect_uri;
-
- addrs = migrate_get_socket_address(who);
- connect_uri = SocketAddress_to_str(addrs->value);
-
- qapi_free_SocketAddressList(addrs);
- return connect_uri;
-}
-
-static QDict *
-migrate_get_connect_qdict(QTestState *who)
-{
- SocketAddressList *addrs;
- QDict *connect_qdict;
-
- addrs = migrate_get_socket_address(who);
- connect_qdict = SocketAddress_to_qdict(addrs->value);
-
- qapi_free_SocketAddressList(addrs);
- return connect_qdict;
-}
-
-static void migrate_set_ports(QTestState *to, QList *channel_list)
-{
- QDict *addr;
- QListEntry *entry;
- const char *addr_port = NULL;
-
- addr = migrate_get_connect_qdict(to);
-
- QLIST_FOREACH_ENTRY(channel_list, entry) {
- QDict *channel = qobject_to(QDict, qlist_entry_obj(entry));
- QDict *addrdict = qdict_get_qdict(channel, "addr");
-
- if (qdict_haskey(addrdict, "port") &&
- qdict_haskey(addr, "port") &&
- (strcmp(qdict_get_str(addrdict, "port"), "0") == 0)) {
- addr_port = qdict_get_str(addr, "port");
- qdict_put_str(addrdict, "port", addr_port);
- }
- }
-
- qobject_unref(addr);
-}
-
-bool migrate_watch_for_events(QTestState *who, const char *name,
- QDict *event, void *opaque)
-{
- QTestMigrationState *state = opaque;
-
- if (g_str_equal(name, "STOP")) {
- state->stop_seen = true;
- return true;
- } else if (g_str_equal(name, "SUSPEND")) {
- state->suspend_seen = true;
- return true;
- } else if (g_str_equal(name, "RESUME")) {
- state->resume_seen = true;
- return true;
- }
-
- return false;
-}
-
-void migrate_qmp_fail(QTestState *who, const char *uri,
- const char *channels, const char *fmt, ...)
-{
- va_list ap;
- QDict *args, *err;
-
- va_start(ap, fmt);
- args = qdict_from_vjsonf_nofail(fmt, ap);
- va_end(ap);
-
- g_assert(!qdict_haskey(args, "uri"));
- if (uri) {
- qdict_put_str(args, "uri", uri);
- }
-
- g_assert(!qdict_haskey(args, "channels"));
- if (channels) {
- QObject *channels_obj = qobject_from_json(channels, &error_abort);
- qdict_put_obj(args, "channels", channels_obj);
- }
-
- err = qtest_qmp_assert_failure_ref(
- who, "{ 'execute': 'migrate', 'arguments': %p}", args);
-
- g_assert(qdict_haskey(err, "desc"));
-
- qobject_unref(err);
-}
-
-/*
- * Send QMP command "migrate".
- * Arguments are built from @fmt... (formatted like
- * qobject_from_jsonf_nofail()) with "uri": @uri spliced in.
- */
-void migrate_qmp(QTestState *who, QTestState *to, const char *uri,
- const char *channels, const char *fmt, ...)
-{
- va_list ap;
- QDict *args;
- g_autofree char *connect_uri = NULL;
-
- va_start(ap, fmt);
- args = qdict_from_vjsonf_nofail(fmt, ap);
- va_end(ap);
-
- g_assert(!qdict_haskey(args, "uri"));
- if (uri) {
- qdict_put_str(args, "uri", uri);
- } else if (!channels) {
- connect_uri = migrate_get_connect_uri(to);
- qdict_put_str(args, "uri", connect_uri);
- }
-
- g_assert(!qdict_haskey(args, "channels"));
- if (channels) {
- QObject *channels_obj = qobject_from_json(channels, &error_abort);
- QList *channel_list = qobject_to(QList, channels_obj);
- migrate_set_ports(to, channel_list);
- qdict_put_obj(args, "channels", channels_obj);
- }
-
- qtest_qmp_assert_success(who,
- "{ 'execute': 'migrate', 'arguments': %p}", args);
-}
-
-void migrate_set_capability(QTestState *who, const char *capability,
- bool value)
-{
- qtest_qmp_assert_success(who,
- "{ 'execute': 'migrate-set-capabilities',"
- "'arguments': { "
- "'capabilities': [ { "
- "'capability': %s, 'state': %i } ] } }",
- capability, value);
-}
-
-void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...)
-{
- va_list ap;
- QDict *args, *rsp;
-
- va_start(ap, fmt);
- args = qdict_from_vjsonf_nofail(fmt, ap);
- va_end(ap);
-
- g_assert(!qdict_haskey(args, "uri"));
- qdict_put_str(args, "uri", uri);
-
- /* This function relies on the event to work, make sure it's enabled */
- migrate_set_capability(to, "events", true);
-
- rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}",
- args);
-
- if (!qdict_haskey(rsp, "return")) {
- g_autoptr(GString) s = qobject_to_json_pretty(QOBJECT(rsp), true);
- g_test_message("%s", s->str);
- }
-
- g_assert(qdict_haskey(rsp, "return"));
- qobject_unref(rsp);
-
- migration_event_wait(to, "setup");
-}
-
-/*
- * Note: caller is responsible to free the returned object via
- * qobject_unref() after use
- */
-QDict *migrate_query(QTestState *who)
-{
- return qtest_qmp_assert_success_ref(who, "{ 'execute': 'query-migrate' }");
-}
-
-QDict *migrate_query_not_failed(QTestState *who)
-{
- const char *status;
- QDict *rsp = migrate_query(who);
- status = qdict_get_str(rsp, "status");
- if (g_str_equal(status, "failed")) {
- g_printerr("query-migrate shows failed migration: %s\n",
- qdict_get_str(rsp, "error-desc"));
- }
- g_assert(!g_str_equal(status, "failed"));
- return rsp;
-}
-
-/*
- * Note: caller is responsible to free the returned object via
- * g_free() after use
- */
-static gchar *migrate_query_status(QTestState *who)
-{
- QDict *rsp_return = migrate_query(who);
- gchar *status = g_strdup(qdict_get_str(rsp_return, "status"));
-
- g_assert(status);
- qobject_unref(rsp_return);
-
- return status;
-}
-
-static bool check_migration_status(QTestState *who, const char *goal,
- const char **ungoals)
-{
- bool ready;
- char *current_status;
- const char **ungoal;
-
- current_status = migrate_query_status(who);
- ready = strcmp(current_status, goal) == 0;
- if (!ungoals) {
- g_assert_cmpstr(current_status, !=, "failed");
- /*
- * If looking for a state other than completed,
- * completion of migration would cause the test to
- * hang.
- */
- if (strcmp(goal, "completed") != 0) {
- g_assert_cmpstr(current_status, !=, "completed");
- }
- } else {
- for (ungoal = ungoals; *ungoal; ungoal++) {
- g_assert_cmpstr(current_status, !=, *ungoal);
- }
- }
- g_free(current_status);
- return ready;
-}
-
-void wait_for_migration_status(QTestState *who,
- const char *goal, const char **ungoals)
-{
- g_test_timer_start();
- while (!check_migration_status(who, goal, ungoals)) {
- usleep(1000);
-
- g_assert(g_test_timer_elapsed() < MIGRATION_STATUS_WAIT_TIMEOUT);
- }
-}
-
-void wait_for_migration_complete(QTestState *who)
-{
- wait_for_migration_status(who, "completed", NULL);
-}
-
-void wait_for_migration_fail(QTestState *from, bool allow_active)
-{
- g_test_timer_start();
- QDict *rsp_return;
- char *status;
- bool failed;
-
- do {
- status = migrate_query_status(from);
- bool result = !strcmp(status, "setup") || !strcmp(status, "failed") ||
- (allow_active && !strcmp(status, "active"));
- if (!result) {
- fprintf(stderr, "%s: unexpected status status=%s allow_active=%d\n",
- __func__, status, allow_active);
- }
- g_assert(result);
- failed = !strcmp(status, "failed");
- g_free(status);
-
- g_assert(g_test_timer_elapsed() < MIGRATION_STATUS_WAIT_TIMEOUT);
- } while (!failed);
-
- /* Is the machine currently running? */
- rsp_return = qtest_qmp_assert_success_ref(from,
- "{ 'execute': 'query-status' }");
- g_assert(qdict_haskey(rsp_return, "running"));
- g_assert(qdict_get_bool(rsp_return, "running"));
- qobject_unref(rsp_return);
-}
-
-char *find_common_machine_version(const char *mtype, const char *var1,
- const char *var2)
-{
- g_autofree char *type1 = qtest_resolve_machine_alias(var1, mtype);
- g_autofree char *type2 = qtest_resolve_machine_alias(var2, mtype);
-
- g_assert(type1 && type2);
-
- if (g_str_equal(type1, type2)) {
- /* either can be used */
- return g_strdup(type1);
- }
-
- if (qtest_has_machine_with_env(var2, type1)) {
- return g_strdup(type1);
- }
-
- if (qtest_has_machine_with_env(var1, type2)) {
- return g_strdup(type2);
- }
-
- g_test_message("No common machine version for machine type '%s' between "
- "binaries %s and %s", mtype, getenv(var1), getenv(var2));
- g_assert_not_reached();
-}
-
-char *resolve_machine_version(const char *alias, const char *var1,
- const char *var2)
-{
- const char *mname = g_getenv("QTEST_QEMU_MACHINE_TYPE");
- g_autofree char *machine_name = NULL;
-
- if (mname) {
- const char *dash = strrchr(mname, '-');
- const char *dot = strrchr(mname, '.');
-
- machine_name = g_strdup(mname);
-
- if (dash && dot) {
- assert(qtest_has_machine(machine_name));
- return g_steal_pointer(&machine_name);
- }
- /* else: probably an alias, let it be resolved below */
- } else {
- /* use the hardcoded alias */
- machine_name = g_strdup(alias);
- }
-
- return find_common_machine_version(machine_name, var1, var2);
-}
-
-typedef struct {
- char *name;
- void (*func)(void);
-} MigrationTest;
-
-static void migration_test_destroy(gpointer data)
-{
- MigrationTest *test = (MigrationTest *)data;
-
- g_free(test->name);
- g_free(test);
-}
-
-static void migration_test_wrapper(const void *data)
-{
- MigrationTest *test = (MigrationTest *)data;
-
- g_test_message("Running /%s%s", qtest_get_arch(), test->name);
- test->func();
-}
-
-void migration_test_add(const char *path, void (*fn)(void))
-{
- MigrationTest *test = g_new0(MigrationTest, 1);
-
- test->func = fn;
- test->name = g_strdup(path);
-
- qtest_add_data_func_full(path, test, migration_test_wrapper,
- migration_test_destroy);
-}
-
-#ifdef O_DIRECT
-/*
- * Probe for O_DIRECT support on the filesystem. Since this is used
- * for tests, be conservative, if anything fails, assume it's
- * unsupported.
- */
-bool probe_o_direct_support(const char *tmpfs)
-{
- g_autofree char *filename = g_strdup_printf("%s/probe-o-direct", tmpfs);
- int fd, flags = O_CREAT | O_RDWR | O_TRUNC | O_DIRECT;
- void *buf;
- ssize_t ret, len;
- uint64_t offset;
-
- fd = open(filename, flags, 0660);
- if (fd < 0) {
- unlink(filename);
- return false;
- }
-
- /*
- * Using 1MB alignment as conservative choice to satisfy any
- * plausible architecture default page size, and/or filesystem
- * alignment restrictions.
- */
- len = 0x100000;
- offset = 0x100000;
-
- buf = qemu_try_memalign(len, len);
- g_assert(buf);
-
- ret = pwrite(fd, buf, len, offset);
- unlink(filename);
- g_free(buf);
-
- if (ret < 0) {
- return false;
- }
-
- return true;
-}
-#endif
-
-/*
- * Wait for a "MIGRATION" event. This is what Libvirt uses to track
- * migration status changes.
- */
-void migration_event_wait(QTestState *s, const char *target)
-{
- QDict *response, *data;
- const char *status;
- bool found;
-
- do {
- response = qtest_qmp_eventwait_ref(s, "MIGRATION");
- data = qdict_get_qdict(response, "data");
- g_assert(data);
- status = qdict_get_str(data, "status");
- found = (strcmp(status, target) == 0);
- qobject_unref(response);
- } while (!found);
-}
diff --git a/tests/qtest/migration/cpr-tests.c b/tests/qtest/migration/cpr-tests.c
index 5536e14..5e764a6 100644
--- a/tests/qtest/migration/cpr-tests.c
+++ b/tests/qtest/migration/cpr-tests.c
@@ -60,13 +60,12 @@ static void test_mode_transfer_common(bool incoming_defer)
g_autofree char *cpr_path = g_strdup_printf("%s/cpr.sock", tmpfs);
g_autofree char *mig_path = g_strdup_printf("%s/migsocket", tmpfs);
g_autofree char *uri = g_strdup_printf("unix:%s", mig_path);
+ g_autofree char *opts_target = NULL;
const char *opts = "-machine aux-ram-share=on -nodefaults";
g_autofree const char *cpr_channel = g_strdup_printf(
"cpr,addr.transport=socket,addr.type=unix,addr.path=%s",
cpr_path);
- g_autofree char *opts_target = g_strdup_printf("-incoming %s %s",
- cpr_channel, opts);
g_autofree char *connect_channels = g_strdup_printf(
"[ { 'channel-type': 'main',"
@@ -75,6 +74,17 @@ static void test_mode_transfer_common(bool incoming_defer)
" 'path': '%s' } } ]",
mig_path);
+ /*
+ * Set up a UNIX domain socket for the CPR channel before
+ * launching the destination VM, to avoid timing issues
+ * during connection setup.
+ */
+ int cpr_sockfd = qtest_socket_server(cpr_path);
+ g_assert(cpr_sockfd >= 0);
+
+ opts_target = g_strdup_printf("-incoming cpr,addr.transport=socket,"
+ "addr.type=fd,addr.str=%d %s",
+ cpr_sockfd, opts);
MigrateCommon args = {
.start.opts_source = opts,
.start.opts_target = opts_target,
diff --git a/ui/vnc.c b/ui/vnc.c
index d095cd7..e9c30aa 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3385,6 +3385,16 @@ static const DisplayChangeListenerOps dcl_ops = {
.dpy_cursor_define = vnc_dpy_cursor_define,
};
+static void vmstate_change_handler(void *opaque, bool running, RunState state)
+{
+ VncDisplay *vd = opaque;
+
+ if (state != RUN_STATE_RUNNING) {
+ return;
+ }
+ update_displaychangelistener(&vd->dcl, VNC_REFRESH_INTERVAL_BASE);
+}
+
void vnc_display_init(const char *id, Error **errp)
{
VncDisplay *vd;
@@ -3421,6 +3431,8 @@ void vnc_display_init(const char *id, Error **errp)
vd->dcl.ops = &dcl_ops;
register_displaychangelistener(&vd->dcl);
vd->kbd = qkbd_state_init(vd->dcl.con);
+ vd->vmstate_handler_entry = qemu_add_vm_change_state_handler(
+ &vmstate_change_handler, vd);
}
diff --git a/ui/vnc.h b/ui/vnc.h
index 02613aa..b3e0726 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -185,6 +185,8 @@ struct VncDisplay
#endif
AudioState *audio_state;
+
+ VMChangeStateEntry *vmstate_handler_entry;
};
typedef struct VncTight {