diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2025-03-11 09:32:07 +0800 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2025-03-11 09:32:07 +0800 |
commit | 825b96dbcee23d134b691fc75618b59c5f53da32 (patch) | |
tree | 60d8ca07dab2874e65d6025d765b7bc150865245 | |
parent | 1a5f3d2eee2cd26290506ad3ba7f04086ff37fe5 (diff) | |
parent | baa41af1c083446971feac39b0da845e547ca068 (diff) | |
download | qemu-825b96dbcee23d134b691fc75618b59c5f53da32.zip qemu-825b96dbcee23d134b691fc75618b59c5f53da32.tar.gz qemu-825b96dbcee23d134b691fc75618b59c5f53da32.tar.bz2 |
Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging
Migration pull request
- Fix use-after-free in incoming migration
- Improve cpr migration blocker for volatile ram
- Fix RDMA migration
# -----BEGIN PGP SIGNATURE-----
#
# iQJEBAABCAAuFiEEqhtIsKIjJqWkw2TPx5jcdBvsMZ0FAmfPaCAQHGZhcm9zYXNA
# c3VzZS5kZQAKCRDHmNx0G+wxnQy9EADRp/6GaSzoqWgafU8DGM5Q69HyKiZ888DZ
# 7qXqJeH3c95nvOnIw2BMhUYX4t8kkAbUcWlr7L8KCjZT/6N/d1/Z5fimqymRkw4x
# +8kDyADv5FY0339aMLf3qBbIAQj/gvPvg8H+e+hXfokZqoYgLXZ0eqNAz8MjIcyN
# +A+waEBMLNvTgZyTQl2TbCvb+mbRial8u8C9BIoILhn/gNuoMX7lbt0tq41HZwe0
# l3v16jnXlsDvQUXp99bGySomRgkcYqdAt+HWHLje3frT/Ap8dGaUJKlpgJ8DXJiA
# fV1reKihJdj37q9GSG8cR02W+ATBesiecufV4TUPNQYQzTdxn3fOMwdc3Pck074D
# YAQxFT20OPou+NRxjYoHT/GqFUY36/2qBJpt7TY3ramdklHJhXpRyedK4rppTZNn
# pC3lnbpA/LHRmfD1Nh0CRmqZpbV+qW1BWEgMwk4qui46BxYWHxKHFpxAuwlJQmcw
# RxY8qPhIXQM03tiTgIddBNDZLoVqRoUP7YpzR7MMa1rz0T5inNFMcNGm72WpKODE
# rzpw4ezXO7+D4/QmMq3PoPfhFv3QFnH6jaGj8JkJM378KLvh4fQ0woXtDKFl4Tbq
# 1oBZ17WUv6aHr75b+KMyKJNLinvMu5WF5WoRYIt1lNXaqk7I494yvIjtRrimWZIS
# Z5Q0tpUmpw==
# =yEH0
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 11 Mar 2025 06:30:56 HKT
# gpg: using RSA key AA1B48B0A22326A5A4C364CFC798DC741BEC319D
# gpg: issuer "farosas@suse.de"
# gpg: Good signature from "Fabiano Rosas <farosas@suse.de>" [unknown]
# gpg: aka "Fabiano Almeida Rosas <fabiano.rosas@suse.com>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: AA1B 48B0 A223 26A5 A4C3 64CF C798 DC74 1BEC 319D
* tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu:
migration: Prioritize RDMA in ram_save_target_page()
migration: ram block cpr blockers
migration: Fix UAF for incoming migration on MigrationState
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r-- | include/exec/memory.h | 3 | ||||
-rw-r--r-- | include/exec/ramblock.h | 1 | ||||
-rw-r--r-- | migration/migration.c | 40 | ||||
-rw-r--r-- | migration/ram.c | 9 | ||||
-rw-r--r-- | migration/savevm.c | 2 | ||||
-rw-r--r-- | system/physmem.c | 66 |
6 files changed, 115 insertions, 6 deletions
diff --git a/include/exec/memory.h b/include/exec/memory.h index 78c4e0a..d09af58 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -3203,6 +3203,9 @@ bool ram_block_discard_is_disabled(void); */ bool ram_block_discard_is_required(void); +void ram_block_add_cpr_blocker(RAMBlock *rb, Error **errp); +void ram_block_del_cpr_blocker(RAMBlock *rb); + #endif #endif diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 0babd10..64484cd 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -39,6 +39,7 @@ struct RAMBlock { /* RCU-enabled, writes protected by the ramlist lock */ QLIST_ENTRY(RAMBlock) next; QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; + Error *cpr_blocker; int fd; uint64_t fd_offset; int guest_memfd; diff --git a/migration/migration.c b/migration/migration.c index 1833cfe..d46e776 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -116,6 +116,27 @@ static void migration_downtime_start(MigrationState *s) s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); } +/* + * This is unfortunate: incoming migration actually needs the outgoing + * migration state (MigrationState) to be there too, e.g. to query + * capabilities, parameters, using locks, setup errors, etc. + * + * NOTE: when calling this, making sure current_migration exists and not + * been freed yet! Otherwise trying to access the refcount is already + * an use-after-free itself.. + * + * TODO: Move shared part of incoming / outgoing out into separate object. + * Then this is not needed. + */ +static void migrate_incoming_ref_outgoing_state(void) +{ + object_ref(migrate_get_current()); +} +static void migrate_incoming_unref_outgoing_state(void) +{ + object_unref(migrate_get_current()); +} + static void migration_downtime_end(MigrationState *s) { int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); @@ -863,7 +884,7 @@ process_incoming_migration_co(void *opaque) * postcopy thread. */ trace_process_incoming_migration_co_postcopy_end_main(); - return; + goto out; } /* Else if something went wrong then just fall out of the normal exit */ } @@ -879,7 +900,8 @@ process_incoming_migration_co(void *opaque) } migration_bh_schedule(process_incoming_migration_bh, mis); - return; + goto out; + fail: migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); @@ -896,6 +918,9 @@ fail: exit(EXIT_FAILURE); } +out: + /* Pairs with the refcount taken in qmp_migrate_incoming() */ + migrate_incoming_unref_outgoing_state(); } /** @@ -1901,6 +1926,17 @@ void qmp_migrate_incoming(const char *uri, bool has_channels, return; } + /* + * Making sure MigrationState is available until incoming migration + * completes. + * + * NOTE: QEMU _might_ leak this refcount in some failure paths, but + * that's OK. This is the minimum change we need to at least making + * sure success case is clean on the refcount. We can try harder to + * make it accurate for any kind of failures, but it might be an + * overkill and doesn't bring us much benefit. + */ + migrate_incoming_ref_outgoing_state(); once = false; } diff --git a/migration/ram.c b/migration/ram.c index 589b650..424df6d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1964,6 +1964,11 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; int res; + /* Hand over to RDMA first */ + if (control_save_page(pss, offset, &res)) { + return res; + } + if (!migrate_multifd() || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) { if (save_zero_page(rs, pss, offset)) { @@ -1976,10 +1981,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) return ram_save_multifd_page(block, offset); } - if (control_save_page(pss, offset, &res)) { - return res; - } - return ram_save_page(rs, pss); } diff --git a/migration/savevm.c b/migration/savevm.c index 5c4fdfd..ce158c3 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -3514,12 +3514,14 @@ void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev) qemu_ram_set_idstr(mr->ram_block, memory_region_name(mr), dev); qemu_ram_set_migratable(mr->ram_block); + ram_block_add_cpr_blocker(mr->ram_block, &error_fatal); } void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev) { qemu_ram_unset_idstr(mr->ram_block); qemu_ram_unset_migratable(mr->ram_block); + ram_block_del_cpr_blocker(mr->ram_block); } void vmstate_register_ram_global(MemoryRegion *mr) diff --git a/system/physmem.c b/system/physmem.c index a6af555..e97de3e 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -71,7 +71,10 @@ #include "qemu/pmem.h" +#include "qapi/qapi-types-migration.h" +#include "migration/blocker.h" #include "migration/cpr.h" +#include "migration/options.h" #include "migration/vmstate.h" #include "qemu/range.h" @@ -1904,6 +1907,14 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) qemu_mutex_unlock_ramlist(); goto out_free; } + + error_setg(&new_block->cpr_blocker, + "Memory region %s uses guest_memfd, " + "which is not supported with CPR.", + memory_region_name(new_block->mr)); + migrate_add_blocker_modes(&new_block->cpr_blocker, errp, + MIG_MODE_CPR_TRANSFER, + -1); } ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS; @@ -4095,3 +4106,58 @@ bool ram_block_discard_is_required(void) return qatomic_read(&ram_block_discard_required_cnt) || qatomic_read(&ram_block_coordinated_discard_required_cnt); } + +/* + * Return true if ram is compatible with CPR. Do not exclude rom, + * because the rom file could change in new QEMU. + */ +static bool ram_is_cpr_compatible(RAMBlock *rb) +{ + MemoryRegion *mr = rb->mr; + + if (!mr || !memory_region_is_ram(mr)) { + return true; + } + + /* Ram device is remapped in new QEMU */ + if (memory_region_is_ram_device(mr)) { + return true; + } + + /* + * A file descriptor is passed to new QEMU and remapped, or its backing + * file is reopened and mapped. It must be shared to avoid COW. + */ + if (rb->fd >= 0 && qemu_ram_is_shared(rb)) { + return true; + } + + return false; +} + +/* + * Add a blocker for each volatile ram block. This function should only be + * called after we know that the block is migratable. Non-migratable blocks + * are either re-created in new QEMU, or are handled specially, or are covered + * by a device-level CPR blocker. + */ +void ram_block_add_cpr_blocker(RAMBlock *rb, Error **errp) +{ + assert(qemu_ram_is_migratable(rb)); + + if (ram_is_cpr_compatible(rb)) { + return; + } + + error_setg(&rb->cpr_blocker, + "Memory region %s is not compatible with CPR. share=on is " + "required for memory-backend objects, and aux-ram-share=on is " + "required.", memory_region_name(rb->mr)); + migrate_add_blocker_modes(&rb->cpr_blocker, errp, MIG_MODE_CPR_TRANSFER, + -1); +} + +void ram_block_del_cpr_blocker(RAMBlock *rb) +{ + migrate_del_blocker(&rb->cpr_blocker); +} |