diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2025-06-24 10:38:33 -0400 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2025-06-24 10:38:33 -0400 |
commit | 24c00b754121f3569ea9e68f5f188747cf5b8439 (patch) | |
tree | ad53b951fc205a16ac4af03b4a2a365bc0e38324 | |
parent | 3d40db0efc22520fa6c399cf73960dced423b048 (diff) | |
parent | 2fde3fb916079ee0ff0fc26d9446c813b1d5cc28 (diff) | |
download | qemu-24c00b754121f3569ea9e68f5f188747cf5b8439.zip qemu-24c00b754121f3569ea9e68f5f188747cf5b8439.tar.gz qemu-24c00b754121f3569ea9e68f5f188747cf5b8439.tar.bz2 |
Merge tag 'migration-staging-pull-request' of https://gitlab.com/peterx/qemu into staging
Migration / Memory pull
- Yanfei's optimization to skip log_clear during completion
- Fabiano's cleanup to remove leftover migration-helpers.c file
- Juraj's vnc fix on display pause after migration
- Jaehoon's cpr test fix on possible race of server establishment
- Chenyi's initial support on vfio enablement for guest-memfd
# -----BEGIN PGP SIGNATURE-----
#
# iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCaFmzWhIccGV0ZXJ4QHJl
# ZGhhdC5jb20ACgkQO1/MzfOr1wbWYQD/dz08tyaL2J4EHESfBsW4Z1rEggVOM0cB
# hlXnvzf/Pb4A/0X3Hn18bOxfPAZOr8NggS5AKgzCCYVeQEWQA2Jj8hwC
# =kcTN
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 23 Jun 2025 16:04:42 EDT
# gpg: using EDDSA key B9184DC20CC457DACF7DD1A93B5FCCCDF3ABD706
# gpg: issuer "peterx@redhat.com"
# gpg: Good signature from "Peter Xu <xzpeter@gmail.com>" [full]
# gpg: aka "Peter Xu <peterx@redhat.com>" [full]
# Primary key fingerprint: B918 4DC2 0CC4 57DA CF7D D1A9 3B5F CCCD F3AB D706
* tag 'migration-staging-pull-request' of https://gitlab.com/peterx/qemu:
physmem: Support coordinated discarding of RAM with guest_memfd
ram-block-attributes: Introduce RamBlockAttributes to manage RAMBlock with guest_memfd
memory: Unify the definiton of ReplayRamPopulate() and ReplayRamDiscard()
memory: Change memory_region_set_ram_discard_manager() to return the result
memory: Export a helper to get intersection of a MemoryRegionSection with a given range
migration: Don't sync volatile memory after migration completes
tests/migration: Setup pre-listened cpr.sock to remove race-condition.
migration: Support fd-based socket address in cpr_transfer_input
ui/vnc: Update display update interval when VM state changes to RUNNING
tests/qtest: Remove migration-helpers.c
migration/ram: avoid to do log clear in the last round
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | accel/kvm/kvm-all.c | 9 | ||||
-rw-r--r-- | hw/virtio/virtio-mem.c | 83 | ||||
-rw-r--r-- | include/system/memory.h | 110 | ||||
-rw-r--r-- | include/system/ramblock.h | 22 | ||||
-rw-r--r-- | migration/cpr-transfer.c | 7 | ||||
-rw-r--r-- | migration/ram.c | 31 | ||||
-rw-r--r-- | system/memory.c | 22 | ||||
-rw-r--r-- | system/meson.build | 1 | ||||
-rw-r--r-- | system/physmem.c | 23 | ||||
-rw-r--r-- | system/ram-block-attributes.c | 444 | ||||
-rw-r--r-- | system/trace-events | 3 | ||||
-rw-r--r-- | tests/qtest/migration-helpers.c | 530 | ||||
-rw-r--r-- | tests/qtest/migration/cpr-tests.c | 14 | ||||
-rw-r--r-- | ui/vnc.c | 12 | ||||
-rw-r--r-- | ui/vnc.h | 2 |
16 files changed, 688 insertions, 626 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 94c4076..27f4fe3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3175,6 +3175,7 @@ F: system/memory.c F: system/memory_mapping.c F: system/physmem.c F: system/memory-internal.h +F: system/ram-block-attributes.c F: scripts/coccinelle/memory-region-housekeeping.cocci Memory devices diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index a317783..d095d1b 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -3091,6 +3091,15 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; rb = qemu_ram_block_from_host(addr, false, &offset); + ret = ram_block_attributes_state_change(RAM_BLOCK_ATTRIBUTES(mr->rdm), + offset, size, to_private); + if (ret) { + error_report("Failed to notify the listener the state change of " + "(0x%"HWADDR_PRIx" + 0x%"HWADDR_PRIx") to %s", + start, size, to_private ? "private" : "shared"); + goto out_unref; + } + if (to_private) { if (rb->page_size != qemu_real_host_page_size()) { /* diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index a3d1a67..c46f6f9 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -244,28 +244,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg, return ret; } -/* - * Adjust the memory section to cover the intersection with the given range. - * - * Returns false if the intersection is empty, otherwise returns true. - */ -static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s, - uint64_t offset, uint64_t size) -{ - uint64_t start = MAX(s->offset_within_region, offset); - uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), - offset + size); - - if (end <= start) { - return false; - } - - s->offset_within_address_space += start - s->offset_within_region; - s->offset_within_region = start; - s->size = int128_make64(end - start); - return true; -} - typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, @@ -287,7 +265,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -319,7 +297,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -355,7 +333,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl->notify_discard(rdl, &tmp); @@ -371,7 +349,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } ret = rdl->notify_populate(rdl, &tmp); @@ -388,7 +366,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, if (rdl2 == rdl) { break; } - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl2->notify_discard(rdl2, &tmp); @@ -1070,6 +1048,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) } /* + * Set ourselves as RamDiscardManager before the plug handler maps the + * memory region and exposes it via an address space. + */ + if (memory_region_set_ram_discard_manager(&vmem->memdev->mr, + RAM_DISCARD_MANAGER(vmem))) { + error_setg(errp, "Failed to set RamDiscardManager"); + ram_block_coordinated_discard_require(false); + return; + } + + /* * We don't know at this point whether shared RAM is migrated using * QEMU or migrated using the file content. "x-ignore-shared" will be * configured after realizing the device. So in case we have an @@ -1083,6 +1072,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); if (ret) { error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); return; } @@ -1144,13 +1134,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) vmem->system_reset = VIRTIO_MEM_SYSTEM_RESET(obj); vmem->system_reset->vmem = vmem; qemu_register_resettable(obj); - - /* - * Set ourselves as RamDiscardManager before the plug handler maps the - * memory region and exposes it via an address space. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, - RAM_DISCARD_MANAGER(vmem)); } static void virtio_mem_device_unrealize(DeviceState *dev) @@ -1158,12 +1141,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOMEM *vmem = VIRTIO_MEM(dev); - /* - * The unplug handler unmapped the memory region, it cannot be - * found via an address space anymore. Unset ourselves. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); - qemu_unregister_resettable(OBJECT(vmem->system_reset)); object_unref(OBJECT(vmem->system_reset)); @@ -1176,6 +1153,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev) virtio_del_queue(vdev, 0); virtio_cleanup(vdev); g_free(vmem->bitmap); + /* + * The unplug handler unmapped the memory region, it cannot be + * found via an address space anymore. Unset ourselves. + */ + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); } @@ -1750,7 +1732,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, } struct VirtIOMEMReplayData { - void *fn; + ReplayRamDiscardState fn; void *opaque; }; @@ -1758,12 +1740,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) { struct VirtIOMEMReplayData *data = arg; - return ((ReplayRamPopulate)data->fn)(s, data->opaque); + return data->fn(s, data->opaque); } static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *s, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); @@ -1782,14 +1764,13 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, { struct VirtIOMEMReplayData *data = arg; - ((ReplayRamDiscard)data->fn)(s, data->opaque); - return 0; + return data->fn(s, data->opaque); } -static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *s, - ReplayRamDiscard replay_fn, - void *opaque) +static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *s, + ReplayRamDiscardState replay_fn, + void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); struct VirtIOMEMReplayData data = { @@ -1798,8 +1779,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, }; g_assert(s->mr == &vmem->memdev->mr); - virtio_mem_for_each_unplugged_section(vmem, s, &data, - virtio_mem_rdm_replay_discarded_cb); + return virtio_mem_for_each_unplugged_section(vmem, s, &data, + virtio_mem_rdm_replay_discarded_cb); } static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, diff --git a/include/system/memory.h b/include/system/memory.h index 0848690..46248d4 100644 --- a/include/system/memory.h +++ b/include/system/memory.h @@ -576,8 +576,20 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl, rdl->double_discard_supported = double_discard_supported; } -typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque); -typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque); +/** + * typedef ReplayRamDiscardState: + * + * The callback handler for #RamDiscardManagerClass.replay_populated/ + * #RamDiscardManagerClass.replay_discarded to invoke on populated/discarded + * parts. + * + * @section: the #MemoryRegionSection of populated/discarded part + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if failed. + */ +typedef int (*ReplayRamDiscardState)(MemoryRegionSection *section, + void *opaque); /* * RamDiscardManagerClass: @@ -651,36 +663,38 @@ struct RamDiscardManagerClass { /** * @replay_populated: * - * Call the #ReplayRamPopulate callback for all populated parts within the - * #MemoryRegionSection via the #RamDiscardManager. + * Call the #ReplayRamDiscardState callback for all populated parts within + * the #MemoryRegionSection via the #RamDiscardManager. * * In case any call fails, no further calls are made. * * @rdm: the #RamDiscardManager * @section: the #MemoryRegionSection - * @replay_fn: the #ReplayRamPopulate callback + * @replay_fn: the #ReplayRamDiscardState callback * @opaque: pointer to forward to the callback * * Returns 0 on success, or a negative error if any notification failed. */ int (*replay_populated)(const RamDiscardManager *rdm, MemoryRegionSection *section, - ReplayRamPopulate replay_fn, void *opaque); + ReplayRamDiscardState replay_fn, void *opaque); /** * @replay_discarded: * - * Call the #ReplayRamDiscard callback for all discarded parts within the - * #MemoryRegionSection via the #RamDiscardManager. + * Call the #ReplayRamDiscardState callback for all discarded parts within + * the #MemoryRegionSection via the #RamDiscardManager. * * @rdm: the #RamDiscardManager * @section: the #MemoryRegionSection - * @replay_fn: the #ReplayRamDiscard callback + * @replay_fn: the #ReplayRamDiscardState callback * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. */ - void (*replay_discarded)(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, void *opaque); + int (*replay_discarded)(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, void *opaque); /** * @register_listener: @@ -721,15 +735,41 @@ uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, const MemoryRegionSection *section); +/** + * ram_discard_manager_replay_populated: + * + * A wrapper to call the #RamDiscardManagerClass.replay_populated callback + * of the #RamDiscardManager. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection + * @replay_fn: the #ReplayRamDiscardState callback + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. + */ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *section, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque); -void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, - void *opaque); +/** + * ram_discard_manager_replay_discarded: + * + * A wrapper to call the #RamDiscardManagerClass.replay_discarded callback + * of the #RamDiscardManager. + * + * @rdm: the #RamDiscardManager + * @section: the #MemoryRegionSection + * @replay_fn: the #ReplayRamDiscardState callback + * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. + */ +int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque); void ram_discard_manager_register_listener(RamDiscardManager *rdm, RamDiscardListener *rdl, @@ -1212,6 +1252,36 @@ MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s); void memory_region_section_free_copy(MemoryRegionSection *s); /** + * memory_region_section_intersect_range: Adjust the memory section to cover + * the intersection with the given range. + * + * @s: the #MemoryRegionSection to be adjusted + * @offset: the offset of the given range in the memory region + * @size: the size of the given range + * + * Returns false if the intersection is empty, otherwise returns true. + */ +static inline bool memory_region_section_intersect_range(MemoryRegionSection *s, + uint64_t offset, + uint64_t size) +{ + uint64_t start = MAX(s->offset_within_region, offset); + Int128 end = int128_min(int128_add(int128_make64(s->offset_within_region), + s->size), + int128_add(int128_make64(offset), + int128_make64(size))); + + if (int128_le(end, int128_make64(start))) { + return false; + } + + s->offset_within_address_space += start - s->offset_within_region; + s->offset_within_region = start; + s->size = int128_sub(end, int128_make64(start)); + return true; +} + +/** * memory_region_init: Initialize a memory region * * The region typically acts as a container for other memory regions. Use @@ -2469,13 +2539,13 @@ static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr) * * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion * that does not cover RAM, or a #MemoryRegion that already has a - * #RamDiscardManager assigned. + * #RamDiscardManager assigned. Return 0 if the rdm is set successfully. * * @mr: the #MemoryRegion * @rdm: #RamDiscardManager to set */ -void memory_region_set_ram_discard_manager(MemoryRegion *mr, - RamDiscardManager *rdm); +int memory_region_set_ram_discard_manager(MemoryRegion *mr, + RamDiscardManager *rdm); /** * memory_region_find: translate an address/size relative to a diff --git a/include/system/ramblock.h b/include/system/ramblock.h index d8a116b..87e847e 100644 --- a/include/system/ramblock.h +++ b/include/system/ramblock.h @@ -22,6 +22,10 @@ #include "exec/cpu-common.h" #include "qemu/rcu.h" #include "exec/ramlist.h" +#include "system/hostmem.h" + +#define TYPE_RAM_BLOCK_ATTRIBUTES "ram-block-attributes" +OBJECT_DECLARE_SIMPLE_TYPE(RamBlockAttributes, RAM_BLOCK_ATTRIBUTES) struct RAMBlock { struct rcu_head rcu; @@ -42,6 +46,7 @@ struct RAMBlock { int fd; uint64_t fd_offset; int guest_memfd; + RamBlockAttributes *attributes; size_t page_size; /* dirty bitmap used during migration */ unsigned long *bmap; @@ -91,4 +96,21 @@ struct RAMBlock { ram_addr_t postcopy_length; }; +struct RamBlockAttributes { + Object parent; + + RAMBlock *ram_block; + + /* 1-setting of the bitmap represents ram is populated (shared) */ + unsigned bitmap_size; + unsigned long *bitmap; + + QLIST_HEAD(, RamDiscardListener) rdl_list; +}; + +RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block); +void ram_block_attributes_destroy(RamBlockAttributes *attr); +int ram_block_attributes_state_change(RamBlockAttributes *attr, uint64_t offset, + uint64_t size, bool to_discard); + #endif diff --git a/migration/cpr-transfer.c b/migration/cpr-transfer.c index e1f1403..00371d1 100644 --- a/migration/cpr-transfer.c +++ b/migration/cpr-transfer.c @@ -46,7 +46,8 @@ QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp) MigrationAddress *addr = channel->addr; if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET && - addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX) { + (addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX || + addr->u.socket.type == SOCKET_ADDRESS_TYPE_FD)) { g_autoptr(QIOChannelSocket) sioc = NULL; SocketAddress *saddr = &addr->u.socket; @@ -60,7 +61,9 @@ QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp) sioc = qio_net_listener_wait_client(listener); ioc = QIO_CHANNEL(sioc); - trace_cpr_transfer_input(addr->u.socket.u.q_unix.path); + trace_cpr_transfer_input( + addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX ? + addr->u.socket.u.q_unix.path : addr->u.socket.u.fd.str); qio_channel_set_name(ioc, "cpr-in"); return qemu_file_new_input(ioc); diff --git a/migration/ram.c b/migration/ram.c index d26dbd3..2140785 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -831,14 +831,22 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, bool ret; /* - * Clear dirty bitmap if needed. This _must_ be called before we - * send any of the page in the chunk because we need to make sure - * we can capture further page content changes when we sync dirty - * log the next time. So as long as we are going to send any of - * the page in the chunk we clear the remote dirty bitmap for all. - * Clearing it earlier won't be a problem, but too late will. + * During the last stage (after source VM stopped), resetting the write + * protections isn't needed as we know there will be either (1) no + * further writes if migration will complete, or (2) migration fails + * at last then tracking isn't needed either. */ - migration_clear_memory_region_dirty_bitmap(rb, page); + if (!rs->last_stage) { + /* + * Clear dirty bitmap if needed. This _must_ be called before we + * send any of the page in the chunk because we need to make sure + * we can capture further page content changes when we sync dirty + * log the next time. So as long as we are going to send any of + * the page in the chunk we clear the remote dirty bitmap for all. + * Clearing it earlier won't be a problem, but too late will. + */ + migration_clear_memory_region_dirty_bitmap(rb, page); + } ret = test_and_clear_bit(page, rb->bmap); if (ret) { @@ -848,8 +856,8 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, return ret; } -static void dirty_bitmap_clear_section(MemoryRegionSection *section, - void *opaque) +static int dirty_bitmap_clear_section(MemoryRegionSection *section, + void *opaque) { const hwaddr offset = section->offset_within_region; const hwaddr size = int128_get64(section->size); @@ -868,6 +876,7 @@ static void dirty_bitmap_clear_section(MemoryRegionSection *section, } *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages); bitmap_clear(rb->bmap, start, npages); + return 0; } /* @@ -3672,7 +3681,9 @@ static int ram_load_cleanup(void *opaque) RAMBlock *rb; RAMBLOCK_FOREACH_NOT_IGNORED(rb) { - qemu_ram_block_writeback(rb); + if (memory_region_is_nonvolatile(rb->mr)) { + qemu_ram_block_writeback(rb); + } } xbzrle_load_cleanup(); diff --git a/system/memory.c b/system/memory.c index 306e9ff..76b44b8 100644 --- a/system/memory.c +++ b/system/memory.c @@ -2106,12 +2106,16 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) return mr->rdm; } -void memory_region_set_ram_discard_manager(MemoryRegion *mr, - RamDiscardManager *rdm) +int memory_region_set_ram_discard_manager(MemoryRegion *mr, + RamDiscardManager *rdm) { g_assert(memory_region_is_ram(mr)); - g_assert(!rdm || !mr->rdm); + if (mr->rdm && rdm) { + return -EBUSY; + } + mr->rdm = rdm; + return 0; } uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, @@ -2134,7 +2138,7 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *section, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque) { RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); @@ -2143,15 +2147,15 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, return rdmc->replay_populated(rdm, section, replay_fn, opaque); } -void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, - void *opaque) +int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque) { RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); g_assert(rdmc->replay_discarded); - rdmc->replay_discarded(rdm, section, replay_fn, opaque); + return rdmc->replay_discarded(rdm, section, replay_fn, opaque); } void ram_discard_manager_register_listener(RamDiscardManager *rdm, diff --git a/system/meson.build b/system/meson.build index 7514bf3..6d21ff9 100644 --- a/system/meson.build +++ b/system/meson.build @@ -17,6 +17,7 @@ system_ss.add(files( 'dma-helpers.c', 'globals.c', 'ioport.c', + 'ram-block-attributes.c', 'memory_mapping.c', 'memory.c', 'physmem.c', diff --git a/system/physmem.c b/system/physmem.c index a8a9ca3..ff0ca40 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1916,7 +1916,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) } assert(new_block->guest_memfd < 0); - ret = ram_block_discard_require(true); + ret = ram_block_coordinated_discard_require(true); if (ret < 0) { error_setg_errno(errp, -ret, "cannot set up private guest memory: discard currently blocked"); @@ -1932,6 +1932,24 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) } /* + * The attribute bitmap of the RamBlockAttributes is default to + * discarded, which mimics the behavior of kvm_set_phys_mem() when it + * calls kvm_set_memory_attributes_private(). This leads to a brief + * period of inconsistency between the creation of the RAMBlock and its + * mapping into the physical address space. However, this is not + * problematic, as no users rely on the attribute status to perform + * any actions during this interval. + */ + new_block->attributes = ram_block_attributes_create(new_block); + if (!new_block->attributes) { + error_setg(errp, "Failed to create ram block attribute"); + close(new_block->guest_memfd); + ram_block_coordinated_discard_require(false); + qemu_mutex_unlock_ramlist(); + goto out_free; + } + + /* * Add a specific guest_memfd blocker if a generic one would not be * added by ram_block_add_cpr_blocker. */ @@ -2287,8 +2305,9 @@ static void reclaim_ramblock(RAMBlock *block) } if (block->guest_memfd >= 0) { + ram_block_attributes_destroy(block->attributes); close(block->guest_memfd); - ram_block_discard_require(false); + ram_block_coordinated_discard_require(false); } g_free(block); diff --git a/system/ram-block-attributes.c b/system/ram-block-attributes.c new file mode 100644 index 0000000..68e8a02 --- /dev/null +++ b/system/ram-block-attributes.c @@ -0,0 +1,444 @@ +/* + * QEMU ram block attributes + * + * Copyright Intel + * + * Author: + * Chenyi Qiang <chenyi.qiang@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "system/ramblock.h" +#include "trace.h" + +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RamBlockAttributes, + ram_block_attributes, + RAM_BLOCK_ATTRIBUTES, + OBJECT, + { TYPE_RAM_DISCARD_MANAGER }, + { }) + +static size_t +ram_block_attributes_get_block_size(const RamBlockAttributes *attr) +{ + /* + * Because page conversion could be manipulated in the size of at least 4K + * or 4K aligned, Use the host page size as the granularity to track the + * memory attribute. + */ + g_assert(attr && attr->ram_block); + g_assert(attr->ram_block->page_size == qemu_real_host_page_size()); + return attr->ram_block->page_size; +} + + +static bool +ram_block_attributes_rdm_is_populated(const RamDiscardManager *rdm, + const MemoryRegionSection *section) +{ + const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + const size_t block_size = ram_block_attributes_get_block_size(attr); + const uint64_t first_bit = section->offset_within_region / block_size; + const uint64_t last_bit = + first_bit + int128_get64(section->size) / block_size - 1; + unsigned long first_discarded_bit; + + first_discarded_bit = find_next_zero_bit(attr->bitmap, last_bit + 1, + first_bit); + return first_discarded_bit > last_bit; +} + +typedef int (*ram_block_attributes_section_cb)(MemoryRegionSection *s, + void *arg); + +static int +ram_block_attributes_notify_populate_cb(MemoryRegionSection *section, + void *arg) +{ + RamDiscardListener *rdl = arg; + + return rdl->notify_populate(rdl, section); +} + +static int +ram_block_attributes_notify_discard_cb(MemoryRegionSection *section, + void *arg) +{ + RamDiscardListener *rdl = arg; + + rdl->notify_discard(rdl, section); + return 0; +} + +static int +ram_block_attributes_for_each_populated_section(const RamBlockAttributes *attr, + MemoryRegionSection *section, + void *arg, + ram_block_attributes_section_cb cb) +{ + unsigned long first_bit, last_bit; + uint64_t offset, size; + const size_t block_size = ram_block_attributes_get_block_size(attr); + int ret = 0; + + first_bit = section->offset_within_region / block_size; + first_bit = find_next_bit(attr->bitmap, attr->bitmap_size, + first_bit); + + while (first_bit < attr->bitmap_size) { + MemoryRegionSection tmp = *section; + + offset = first_bit * block_size; + last_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * block_size; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + break; + } + + ret = cb(&tmp, arg); + if (ret) { + error_report("%s: Failed to notify RAM discard listener: %s", + __func__, strerror(-ret)); + break; + } + + first_bit = find_next_bit(attr->bitmap, attr->bitmap_size, + last_bit + 2); + } + + return ret; +} + +static int +ram_block_attributes_for_each_discarded_section(const RamBlockAttributes *attr, + MemoryRegionSection *section, + void *arg, + ram_block_attributes_section_cb cb) +{ + unsigned long first_bit, last_bit; + uint64_t offset, size; + const size_t block_size = ram_block_attributes_get_block_size(attr); + int ret = 0; + + first_bit = section->offset_within_region / block_size; + first_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size, + first_bit); + + while (first_bit < attr->bitmap_size) { + MemoryRegionSection tmp = *section; + + offset = first_bit * block_size; + last_bit = find_next_bit(attr->bitmap, attr->bitmap_size, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * block_size; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + break; + } + + ret = cb(&tmp, arg); + if (ret) { + error_report("%s: Failed to notify RAM discard listener: %s", + __func__, strerror(-ret)); + break; + } + + first_bit = find_next_zero_bit(attr->bitmap, + attr->bitmap_size, + last_bit + 2); + } + + return ret; +} + +static uint64_t +ram_block_attributes_rdm_get_min_granularity(const RamDiscardManager *rdm, + const MemoryRegion *mr) +{ + const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + + g_assert(mr == attr->ram_block->mr); + return ram_block_attributes_get_block_size(attr); +} + +static void +ram_block_attributes_rdm_register_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + int ret; + + g_assert(section->mr == attr->ram_block->mr); + rdl->section = memory_region_section_new_copy(section); + + QLIST_INSERT_HEAD(&attr->rdl_list, rdl, next); + + ret = ram_block_attributes_for_each_populated_section(attr, section, rdl, + ram_block_attributes_notify_populate_cb); + if (ret) { + error_report("%s: Failed to register RAM discard listener: %s", + __func__, strerror(-ret)); + exit(1); + } +} + +static void +ram_block_attributes_rdm_unregister_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + int ret; + + g_assert(rdl->section); + g_assert(rdl->section->mr == attr->ram_block->mr); + + if (rdl->double_discard_supported) { + rdl->notify_discard(rdl, rdl->section); + } else { + ret = ram_block_attributes_for_each_populated_section(attr, + rdl->section, rdl, ram_block_attributes_notify_discard_cb); + if (ret) { + error_report("%s: Failed to unregister RAM discard listener: %s", + __func__, strerror(-ret)); + exit(1); + } + } + + memory_region_section_free_copy(rdl->section); + rdl->section = NULL; + QLIST_REMOVE(rdl, next); +} + +typedef struct RamBlockAttributesReplayData { + ReplayRamDiscardState fn; + void *opaque; +} RamBlockAttributesReplayData; + +static int ram_block_attributes_rdm_replay_cb(MemoryRegionSection *section, + void *arg) +{ + RamBlockAttributesReplayData *data = arg; + + return data->fn(section, data->opaque); +} + +static int +ram_block_attributes_rdm_replay_populated(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque }; + + g_assert(section->mr == attr->ram_block->mr); + return ram_block_attributes_for_each_populated_section(attr, section, &data, + ram_block_attributes_rdm_replay_cb); +} + +static int +ram_block_attributes_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque }; + + g_assert(section->mr == attr->ram_block->mr); + return ram_block_attributes_for_each_discarded_section(attr, section, &data, + ram_block_attributes_rdm_replay_cb); +} + +static bool +ram_block_attributes_is_valid_range(RamBlockAttributes *attr, uint64_t offset, + uint64_t size) +{ + MemoryRegion *mr = attr->ram_block->mr; + + g_assert(mr); + + uint64_t region_size = memory_region_size(mr); + const size_t block_size = ram_block_attributes_get_block_size(attr); + + if (!QEMU_IS_ALIGNED(offset, block_size) || + !QEMU_IS_ALIGNED(size, block_size)) { + return false; + } + if (offset + size <= offset) { + return false; + } + if (offset + size > region_size) { + return false; + } + return true; +} + +static void ram_block_attributes_notify_discard(RamBlockAttributes *attr, + uint64_t offset, + uint64_t size) +{ + RamDiscardListener *rdl; + + QLIST_FOREACH(rdl, &attr->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } + rdl->notify_discard(rdl, &tmp); + } +} + +static int +ram_block_attributes_notify_populate(RamBlockAttributes *attr, + uint64_t offset, uint64_t size) +{ + RamDiscardListener *rdl; + int ret = 0; + + QLIST_FOREACH(rdl, &attr->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } + ret = rdl->notify_populate(rdl, &tmp); + if (ret) { + break; + } + } + + return ret; +} + +int ram_block_attributes_state_change(RamBlockAttributes *attr, + uint64_t offset, uint64_t size, + bool to_discard) +{ + const size_t block_size = ram_block_attributes_get_block_size(attr); + const unsigned long first_bit = offset / block_size; + const unsigned long nbits = size / block_size; + const unsigned long last_bit = first_bit + nbits - 1; + const bool is_discarded = find_next_bit(attr->bitmap, attr->bitmap_size, + first_bit) > last_bit; + const bool is_populated = find_next_zero_bit(attr->bitmap, + attr->bitmap_size, first_bit) > last_bit; + unsigned long bit; + int ret = 0; + + if (!ram_block_attributes_is_valid_range(attr, offset, size)) { + error_report("%s, invalid range: offset 0x%" PRIx64 ", size " + "0x%" PRIx64, __func__, offset, size); + return -EINVAL; + } + + trace_ram_block_attributes_state_change(offset, size, + is_discarded ? "discarded" : + is_populated ? "populated" : + "mixture", + to_discard ? "discarded" : + "populated"); + if (to_discard) { + if (is_discarded) { + /* Already private */ + } else if (is_populated) { + /* Completely shared */ + bitmap_clear(attr->bitmap, first_bit, nbits); + ram_block_attributes_notify_discard(attr, offset, size); + } else { + /* Unexpected mixture: process individual blocks */ + for (bit = first_bit; bit < first_bit + nbits; bit++) { + if (!test_bit(bit, attr->bitmap)) { + continue; + } + clear_bit(bit, attr->bitmap); + ram_block_attributes_notify_discard(attr, bit * block_size, + block_size); + } + } + } else { + if (is_populated) { + /* Already shared */ + } else if (is_discarded) { + /* Completely private */ + bitmap_set(attr->bitmap, first_bit, nbits); + ret = ram_block_attributes_notify_populate(attr, offset, size); + } else { + /* Unexpected mixture: process individual blocks */ + for (bit = first_bit; bit < first_bit + nbits; bit++) { + if (test_bit(bit, attr->bitmap)) { + continue; + } + set_bit(bit, attr->bitmap); + ret = ram_block_attributes_notify_populate(attr, + bit * block_size, + block_size); + if (ret) { + break; + } + } + } + } + + return ret; +} + +RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block) +{ + const int block_size = qemu_real_host_page_size(); + RamBlockAttributes *attr; + MemoryRegion *mr = ram_block->mr; + + attr = RAM_BLOCK_ATTRIBUTES(object_new(TYPE_RAM_BLOCK_ATTRIBUTES)); + + attr->ram_block = ram_block; + if (memory_region_set_ram_discard_manager(mr, RAM_DISCARD_MANAGER(attr))) { + object_unref(OBJECT(attr)); + return NULL; + } + attr->bitmap_size = + ROUND_UP(int128_get64(mr->size), block_size) / block_size; + attr->bitmap = bitmap_new(attr->bitmap_size); + + return attr; +} + +void ram_block_attributes_destroy(RamBlockAttributes *attr) +{ + g_assert(attr); + + g_free(attr->bitmap); + memory_region_set_ram_discard_manager(attr->ram_block->mr, NULL); + object_unref(OBJECT(attr)); +} + +static void ram_block_attributes_init(Object *obj) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(obj); + + QLIST_INIT(&attr->rdl_list); +} + +static void ram_block_attributes_finalize(Object *obj) +{ +} + +static void ram_block_attributes_class_init(ObjectClass *klass, + const void *data) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass); + + rdmc->get_min_granularity = ram_block_attributes_rdm_get_min_granularity; + rdmc->register_listener = ram_block_attributes_rdm_register_listener; + rdmc->unregister_listener = ram_block_attributes_rdm_unregister_listener; + rdmc->is_populated = ram_block_attributes_rdm_is_populated; + rdmc->replay_populated = ram_block_attributes_rdm_replay_populated; + rdmc->replay_discarded = ram_block_attributes_rdm_replay_discarded; +} diff --git a/system/trace-events b/system/trace-events index be12ebf..82856e4 100644 --- a/system/trace-events +++ b/system/trace-events @@ -52,3 +52,6 @@ dirtylimit_state_finalize(void) dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us" dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64 dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us" + +# ram-block-attributes.c +ram_block_attributes_state_change(uint64_t offset, uint64_t size, const char *from, const char *to) "offset 0x%"PRIx64" size 0x%"PRIx64" from '%s' to '%s'" diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c deleted file mode 100644 index b08b49b..0000000 --- a/tests/qtest/migration-helpers.c +++ /dev/null @@ -1,530 +0,0 @@ -/* - * QTest migration helpers - * - * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates - * based on the vhost-user-test.c that is: - * Copyright (c) 2014 Virtual Open Systems Sarl. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qemu/ctype.h" -#include "qobject/qjson.h" -#include "qapi/qapi-visit-sockets.h" -#include "qapi/qobject-input-visitor.h" -#include "qapi/error.h" -#include "qobject/qlist.h" -#include "qemu/cutils.h" -#include "qemu/memalign.h" - -#include "migration-helpers.h" - -/* - * Number of seconds we wait when looking for migration - * status changes, to avoid test suite hanging forever - * when things go wrong. Needs to be higher enough to - * avoid false positives on loaded hosts. - */ -#define MIGRATION_STATUS_WAIT_TIMEOUT 120 - -static char *SocketAddress_to_str(SocketAddress *addr) -{ - switch (addr->type) { - case SOCKET_ADDRESS_TYPE_INET: - return g_strdup_printf("tcp:%s:%s", - addr->u.inet.host, - addr->u.inet.port); - case SOCKET_ADDRESS_TYPE_UNIX: - return g_strdup_printf("unix:%s", - addr->u.q_unix.path); - case SOCKET_ADDRESS_TYPE_FD: - return g_strdup_printf("fd:%s", addr->u.fd.str); - case SOCKET_ADDRESS_TYPE_VSOCK: - return g_strdup_printf("vsock:%s:%s", - addr->u.vsock.cid, - addr->u.vsock.port); - default: - return g_strdup("unknown address type"); - } -} - -static QDict *SocketAddress_to_qdict(SocketAddress *addr) -{ - QDict *dict = qdict_new(); - - switch (addr->type) { - case SOCKET_ADDRESS_TYPE_INET: - qdict_put_str(dict, "type", "inet"); - qdict_put_str(dict, "host", addr->u.inet.host); - qdict_put_str(dict, "port", addr->u.inet.port); - break; - case SOCKET_ADDRESS_TYPE_UNIX: - qdict_put_str(dict, "type", "unix"); - qdict_put_str(dict, "path", addr->u.q_unix.path); - break; - case SOCKET_ADDRESS_TYPE_FD: - qdict_put_str(dict, "type", "fd"); - qdict_put_str(dict, "str", addr->u.fd.str); - break; - case SOCKET_ADDRESS_TYPE_VSOCK: - qdict_put_str(dict, "type", "vsock"); - qdict_put_str(dict, "cid", addr->u.vsock.cid); - qdict_put_str(dict, "port", addr->u.vsock.port); - break; - default: - g_assert_not_reached(); - } - - return dict; -} - -static SocketAddressList *migrate_get_socket_address(QTestState *who) -{ - QDict *rsp; - SocketAddressList *addrs; - Visitor *iv = NULL; - QObject *object; - - rsp = migrate_query(who); - object = qdict_get(rsp, "socket-address"); - - iv = qobject_input_visitor_new(object); - visit_type_SocketAddressList(iv, NULL, &addrs, &error_abort); - visit_free(iv); - - qobject_unref(rsp); - return addrs; -} - -static char * -migrate_get_connect_uri(QTestState *who) -{ - SocketAddressList *addrs; - char *connect_uri; - - addrs = migrate_get_socket_address(who); - connect_uri = SocketAddress_to_str(addrs->value); - - qapi_free_SocketAddressList(addrs); - return connect_uri; -} - -static QDict * -migrate_get_connect_qdict(QTestState *who) -{ - SocketAddressList *addrs; - QDict *connect_qdict; - - addrs = migrate_get_socket_address(who); - connect_qdict = SocketAddress_to_qdict(addrs->value); - - qapi_free_SocketAddressList(addrs); - return connect_qdict; -} - -static void migrate_set_ports(QTestState *to, QList *channel_list) -{ - QDict *addr; - QListEntry *entry; - const char *addr_port = NULL; - - addr = migrate_get_connect_qdict(to); - - QLIST_FOREACH_ENTRY(channel_list, entry) { - QDict *channel = qobject_to(QDict, qlist_entry_obj(entry)); - QDict *addrdict = qdict_get_qdict(channel, "addr"); - - if (qdict_haskey(addrdict, "port") && - qdict_haskey(addr, "port") && - (strcmp(qdict_get_str(addrdict, "port"), "0") == 0)) { - addr_port = qdict_get_str(addr, "port"); - qdict_put_str(addrdict, "port", addr_port); - } - } - - qobject_unref(addr); -} - -bool migrate_watch_for_events(QTestState *who, const char *name, - QDict *event, void *opaque) -{ - QTestMigrationState *state = opaque; - - if (g_str_equal(name, "STOP")) { - state->stop_seen = true; - return true; - } else if (g_str_equal(name, "SUSPEND")) { - state->suspend_seen = true; - return true; - } else if (g_str_equal(name, "RESUME")) { - state->resume_seen = true; - return true; - } - - return false; -} - -void migrate_qmp_fail(QTestState *who, const char *uri, - const char *channels, const char *fmt, ...) -{ - va_list ap; - QDict *args, *err; - - va_start(ap, fmt); - args = qdict_from_vjsonf_nofail(fmt, ap); - va_end(ap); - - g_assert(!qdict_haskey(args, "uri")); - if (uri) { - qdict_put_str(args, "uri", uri); - } - - g_assert(!qdict_haskey(args, "channels")); - if (channels) { - QObject *channels_obj = qobject_from_json(channels, &error_abort); - qdict_put_obj(args, "channels", channels_obj); - } - - err = qtest_qmp_assert_failure_ref( - who, "{ 'execute': 'migrate', 'arguments': %p}", args); - - g_assert(qdict_haskey(err, "desc")); - - qobject_unref(err); -} - -/* - * Send QMP command "migrate". - * Arguments are built from @fmt... (formatted like - * qobject_from_jsonf_nofail()) with "uri": @uri spliced in. - */ -void migrate_qmp(QTestState *who, QTestState *to, const char *uri, - const char *channels, const char *fmt, ...) -{ - va_list ap; - QDict *args; - g_autofree char *connect_uri = NULL; - - va_start(ap, fmt); - args = qdict_from_vjsonf_nofail(fmt, ap); - va_end(ap); - - g_assert(!qdict_haskey(args, "uri")); - if (uri) { - qdict_put_str(args, "uri", uri); - } else if (!channels) { - connect_uri = migrate_get_connect_uri(to); - qdict_put_str(args, "uri", connect_uri); - } - - g_assert(!qdict_haskey(args, "channels")); - if (channels) { - QObject *channels_obj = qobject_from_json(channels, &error_abort); - QList *channel_list = qobject_to(QList, channels_obj); - migrate_set_ports(to, channel_list); - qdict_put_obj(args, "channels", channels_obj); - } - - qtest_qmp_assert_success(who, - "{ 'execute': 'migrate', 'arguments': %p}", args); -} - -void migrate_set_capability(QTestState *who, const char *capability, - bool value) -{ - qtest_qmp_assert_success(who, - "{ 'execute': 'migrate-set-capabilities'," - "'arguments': { " - "'capabilities': [ { " - "'capability': %s, 'state': %i } ] } }", - capability, value); -} - -void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...) -{ - va_list ap; - QDict *args, *rsp; - - va_start(ap, fmt); - args = qdict_from_vjsonf_nofail(fmt, ap); - va_end(ap); - - g_assert(!qdict_haskey(args, "uri")); - qdict_put_str(args, "uri", uri); - - /* This function relies on the event to work, make sure it's enabled */ - migrate_set_capability(to, "events", true); - - rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}", - args); - - if (!qdict_haskey(rsp, "return")) { - g_autoptr(GString) s = qobject_to_json_pretty(QOBJECT(rsp), true); - g_test_message("%s", s->str); - } - - g_assert(qdict_haskey(rsp, "return")); - qobject_unref(rsp); - - migration_event_wait(to, "setup"); -} - -/* - * Note: caller is responsible to free the returned object via - * qobject_unref() after use - */ -QDict *migrate_query(QTestState *who) -{ - return qtest_qmp_assert_success_ref(who, "{ 'execute': 'query-migrate' }"); -} - -QDict *migrate_query_not_failed(QTestState *who) -{ - const char *status; - QDict *rsp = migrate_query(who); - status = qdict_get_str(rsp, "status"); - if (g_str_equal(status, "failed")) { - g_printerr("query-migrate shows failed migration: %s\n", - qdict_get_str(rsp, "error-desc")); - } - g_assert(!g_str_equal(status, "failed")); - return rsp; -} - -/* - * Note: caller is responsible to free the returned object via - * g_free() after use - */ -static gchar *migrate_query_status(QTestState *who) -{ - QDict *rsp_return = migrate_query(who); - gchar *status = g_strdup(qdict_get_str(rsp_return, "status")); - - g_assert(status); - qobject_unref(rsp_return); - - return status; -} - -static bool check_migration_status(QTestState *who, const char *goal, - const char **ungoals) -{ - bool ready; - char *current_status; - const char **ungoal; - - current_status = migrate_query_status(who); - ready = strcmp(current_status, goal) == 0; - if (!ungoals) { - g_assert_cmpstr(current_status, !=, "failed"); - /* - * If looking for a state other than completed, - * completion of migration would cause the test to - * hang. - */ - if (strcmp(goal, "completed") != 0) { - g_assert_cmpstr(current_status, !=, "completed"); - } - } else { - for (ungoal = ungoals; *ungoal; ungoal++) { - g_assert_cmpstr(current_status, !=, *ungoal); - } - } - g_free(current_status); - return ready; -} - -void wait_for_migration_status(QTestState *who, - const char *goal, const char **ungoals) -{ - g_test_timer_start(); - while (!check_migration_status(who, goal, ungoals)) { - usleep(1000); - - g_assert(g_test_timer_elapsed() < MIGRATION_STATUS_WAIT_TIMEOUT); - } -} - -void wait_for_migration_complete(QTestState *who) -{ - wait_for_migration_status(who, "completed", NULL); -} - -void wait_for_migration_fail(QTestState *from, bool allow_active) -{ - g_test_timer_start(); - QDict *rsp_return; - char *status; - bool failed; - - do { - status = migrate_query_status(from); - bool result = !strcmp(status, "setup") || !strcmp(status, "failed") || - (allow_active && !strcmp(status, "active")); - if (!result) { - fprintf(stderr, "%s: unexpected status status=%s allow_active=%d\n", - __func__, status, allow_active); - } - g_assert(result); - failed = !strcmp(status, "failed"); - g_free(status); - - g_assert(g_test_timer_elapsed() < MIGRATION_STATUS_WAIT_TIMEOUT); - } while (!failed); - - /* Is the machine currently running? */ - rsp_return = qtest_qmp_assert_success_ref(from, - "{ 'execute': 'query-status' }"); - g_assert(qdict_haskey(rsp_return, "running")); - g_assert(qdict_get_bool(rsp_return, "running")); - qobject_unref(rsp_return); -} - -char *find_common_machine_version(const char *mtype, const char *var1, - const char *var2) -{ - g_autofree char *type1 = qtest_resolve_machine_alias(var1, mtype); - g_autofree char *type2 = qtest_resolve_machine_alias(var2, mtype); - - g_assert(type1 && type2); - - if (g_str_equal(type1, type2)) { - /* either can be used */ - return g_strdup(type1); - } - - if (qtest_has_machine_with_env(var2, type1)) { - return g_strdup(type1); - } - - if (qtest_has_machine_with_env(var1, type2)) { - return g_strdup(type2); - } - - g_test_message("No common machine version for machine type '%s' between " - "binaries %s and %s", mtype, getenv(var1), getenv(var2)); - g_assert_not_reached(); -} - -char *resolve_machine_version(const char *alias, const char *var1, - const char *var2) -{ - const char *mname = g_getenv("QTEST_QEMU_MACHINE_TYPE"); - g_autofree char *machine_name = NULL; - - if (mname) { - const char *dash = strrchr(mname, '-'); - const char *dot = strrchr(mname, '.'); - - machine_name = g_strdup(mname); - - if (dash && dot) { - assert(qtest_has_machine(machine_name)); - return g_steal_pointer(&machine_name); - } - /* else: probably an alias, let it be resolved below */ - } else { - /* use the hardcoded alias */ - machine_name = g_strdup(alias); - } - - return find_common_machine_version(machine_name, var1, var2); -} - -typedef struct { - char *name; - void (*func)(void); -} MigrationTest; - -static void migration_test_destroy(gpointer data) -{ - MigrationTest *test = (MigrationTest *)data; - - g_free(test->name); - g_free(test); -} - -static void migration_test_wrapper(const void *data) -{ - MigrationTest *test = (MigrationTest *)data; - - g_test_message("Running /%s%s", qtest_get_arch(), test->name); - test->func(); -} - -void migration_test_add(const char *path, void (*fn)(void)) -{ - MigrationTest *test = g_new0(MigrationTest, 1); - - test->func = fn; - test->name = g_strdup(path); - - qtest_add_data_func_full(path, test, migration_test_wrapper, - migration_test_destroy); -} - -#ifdef O_DIRECT -/* - * Probe for O_DIRECT support on the filesystem. Since this is used - * for tests, be conservative, if anything fails, assume it's - * unsupported. - */ -bool probe_o_direct_support(const char *tmpfs) -{ - g_autofree char *filename = g_strdup_printf("%s/probe-o-direct", tmpfs); - int fd, flags = O_CREAT | O_RDWR | O_TRUNC | O_DIRECT; - void *buf; - ssize_t ret, len; - uint64_t offset; - - fd = open(filename, flags, 0660); - if (fd < 0) { - unlink(filename); - return false; - } - - /* - * Using 1MB alignment as conservative choice to satisfy any - * plausible architecture default page size, and/or filesystem - * alignment restrictions. - */ - len = 0x100000; - offset = 0x100000; - - buf = qemu_try_memalign(len, len); - g_assert(buf); - - ret = pwrite(fd, buf, len, offset); - unlink(filename); - g_free(buf); - - if (ret < 0) { - return false; - } - - return true; -} -#endif - -/* - * Wait for a "MIGRATION" event. This is what Libvirt uses to track - * migration status changes. - */ -void migration_event_wait(QTestState *s, const char *target) -{ - QDict *response, *data; - const char *status; - bool found; - - do { - response = qtest_qmp_eventwait_ref(s, "MIGRATION"); - data = qdict_get_qdict(response, "data"); - g_assert(data); - status = qdict_get_str(data, "status"); - found = (strcmp(status, target) == 0); - qobject_unref(response); - } while (!found); -} diff --git a/tests/qtest/migration/cpr-tests.c b/tests/qtest/migration/cpr-tests.c index 5536e14..5e764a6 100644 --- a/tests/qtest/migration/cpr-tests.c +++ b/tests/qtest/migration/cpr-tests.c @@ -60,13 +60,12 @@ static void test_mode_transfer_common(bool incoming_defer) g_autofree char *cpr_path = g_strdup_printf("%s/cpr.sock", tmpfs); g_autofree char *mig_path = g_strdup_printf("%s/migsocket", tmpfs); g_autofree char *uri = g_strdup_printf("unix:%s", mig_path); + g_autofree char *opts_target = NULL; const char *opts = "-machine aux-ram-share=on -nodefaults"; g_autofree const char *cpr_channel = g_strdup_printf( "cpr,addr.transport=socket,addr.type=unix,addr.path=%s", cpr_path); - g_autofree char *opts_target = g_strdup_printf("-incoming %s %s", - cpr_channel, opts); g_autofree char *connect_channels = g_strdup_printf( "[ { 'channel-type': 'main'," @@ -75,6 +74,17 @@ static void test_mode_transfer_common(bool incoming_defer) " 'path': '%s' } } ]", mig_path); + /* + * Set up a UNIX domain socket for the CPR channel before + * launching the destination VM, to avoid timing issues + * during connection setup. + */ + int cpr_sockfd = qtest_socket_server(cpr_path); + g_assert(cpr_sockfd >= 0); + + opts_target = g_strdup_printf("-incoming cpr,addr.transport=socket," + "addr.type=fd,addr.str=%d %s", + cpr_sockfd, opts); MigrateCommon args = { .start.opts_source = opts, .start.opts_target = opts_target, @@ -3385,6 +3385,16 @@ static const DisplayChangeListenerOps dcl_ops = { .dpy_cursor_define = vnc_dpy_cursor_define, }; +static void vmstate_change_handler(void *opaque, bool running, RunState state) +{ + VncDisplay *vd = opaque; + + if (state != RUN_STATE_RUNNING) { + return; + } + update_displaychangelistener(&vd->dcl, VNC_REFRESH_INTERVAL_BASE); +} + void vnc_display_init(const char *id, Error **errp) { VncDisplay *vd; @@ -3421,6 +3431,8 @@ void vnc_display_init(const char *id, Error **errp) vd->dcl.ops = &dcl_ops; register_displaychangelistener(&vd->dcl); vd->kbd = qkbd_state_init(vd->dcl.con); + vd->vmstate_handler_entry = qemu_add_vm_change_state_handler( + &vmstate_change_handler, vd); } @@ -185,6 +185,8 @@ struct VncDisplay #endif AudioState *audio_state; + + VMChangeStateEntry *vmstate_handler_entry; }; typedef struct VncTight { |