aboutsummaryrefslogtreecommitdiff
path: root/migration
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-10-27 10:25:42 +0000
committerPeter Maydell <peter.maydell@linaro.org>2020-10-27 10:25:42 +0000
commitd55450df995d6223486db11c66491cbf6c131523 (patch)
tree2e8ffe0518062f0893b4584e91f438bb83091c00 /migration
parent091e3e3dbc499d84c004e1c50bc9870af37f6e99 (diff)
parenta47295014de56e108f359ec859d5499b851f62b8 (diff)
downloadqemu-d55450df995d6223486db11c66491cbf6c131523.zip
qemu-d55450df995d6223486db11c66491cbf6c131523.tar.gz
qemu-d55450df995d6223486db11c66491cbf6c131523.tar.bz2
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20201026a' into staging
migration pull: 2020-10-26 Another go at Peter's postcopy fixes Cleanups from Bihong Yu and Peter Maydell. Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> # gpg: Signature made Mon 26 Oct 2020 16:17:03 GMT # gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full] # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20201026a: migration-test: Only hide error if !QTEST_LOG migration/postcopy: Release fd before going into 'postcopy-pause' migration: Sync requested pages after postcopy recovery migration: Maintain postcopy faulted addresses migration: Introduce migrate_send_rp_message_req_pages() migration: Pass incoming state into qemu_ufd_copy_ioctl() migration: using trace_ to replace DPRINTF migration: Delete redundant spaces migration: Open brace '{' following function declarations go on the next line migration: Do not initialise statics and globals to 0 or NULL migration: Add braces {} for if statement migration: Open brace '{' following struct go on the same line migration: Add spaces around operator migration: Don't use '#' flag of printf format migration: Do not use C99 // comments migration: Drop unused VMSTATE_FLOAT64 support Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'migration')
-rw-r--r--migration/block.c40
-rw-r--r--migration/migration.c59
-rw-r--r--migration/migration.h24
-rw-r--r--migration/page_cache.c13
-rw-r--r--migration/postcopy-ram.c27
-rw-r--r--migration/ram.c14
-rw-r--r--migration/rdma.c7
-rw-r--r--migration/savevm.c61
-rw-r--r--migration/trace-events16
-rw-r--r--migration/vmstate-types.c26
-rw-r--r--migration/vmstate.c10
11 files changed, 208 insertions, 89 deletions
diff --git a/migration/block.c b/migration/block.c
index 737b649..a950977 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -26,6 +26,7 @@
#include "qemu-file.h"
#include "migration/vmstate.h"
#include "sysemu/block-backend.h"
+#include "trace.h"
#define BLK_MIG_BLOCK_SIZE (1 << 20)
#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS)
@@ -40,7 +41,7 @@
#define MAX_IO_BUFFERS 512
#define MAX_PARALLEL_IO 16
-//#define DEBUG_BLK_MIGRATION
+/* #define DEBUG_BLK_MIGRATION */
#ifdef DEBUG_BLK_MIGRATION
#define DPRINTF(fmt, ...) \
@@ -434,10 +435,9 @@ static int init_blk_migration(QEMUFile *f)
block_mig_state.total_sector_sum += sectors;
if (bmds->shared_base) {
- DPRINTF("Start migration for %s with shared base image\n",
- bdrv_get_device_name(bs));
+ trace_migration_block_init_shared(bdrv_get_device_name(bs));
} else {
- DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs));
+ trace_migration_block_init_full(bdrv_get_device_name(bs));
}
QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
@@ -592,7 +592,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
return (bmds->cur_dirty >= bmds->total_sectors);
error:
- DPRINTF("Error reading sector %" PRId64 "\n", sector);
+ trace_migration_block_save_device_dirty(sector);
g_free(blk->buf);
g_free(blk);
return ret;
@@ -628,9 +628,9 @@ static int flush_blks(QEMUFile *f)
BlkMigBlock *blk;
int ret = 0;
- DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
- __func__, block_mig_state.submitted, block_mig_state.read_done,
- block_mig_state.transferred);
+ trace_migration_block_flush_blks("Enter", block_mig_state.submitted,
+ block_mig_state.read_done,
+ block_mig_state.transferred);
blk_mig_lock();
while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
@@ -656,9 +656,9 @@ static int flush_blks(QEMUFile *f)
}
blk_mig_unlock();
- DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __func__,
- block_mig_state.submitted, block_mig_state.read_done,
- block_mig_state.transferred);
+ trace_migration_block_flush_blks("Exit", block_mig_state.submitted,
+ block_mig_state.read_done,
+ block_mig_state.transferred);
return ret;
}
@@ -727,8 +727,8 @@ static int block_save_setup(QEMUFile *f, void *opaque)
{
int ret;
- DPRINTF("Enter save live setup submitted %d transferred %d\n",
- block_mig_state.submitted, block_mig_state.transferred);
+ trace_migration_block_save("setup", block_mig_state.submitted,
+ block_mig_state.transferred);
qemu_mutex_lock_iothread();
ret = init_blk_migration(f);
@@ -759,8 +759,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
int64_t last_ftell = qemu_ftell(f);
int64_t delta_ftell;
- DPRINTF("Enter save live iterate submitted %d transferred %d\n",
- block_mig_state.submitted, block_mig_state.transferred);
+ trace_migration_block_save("iterate", block_mig_state.submitted,
+ block_mig_state.transferred);
ret = flush_blks(f);
if (ret) {
@@ -825,8 +825,8 @@ static int block_save_complete(QEMUFile *f, void *opaque)
{
int ret;
- DPRINTF("Enter save live complete submitted %d transferred %d\n",
- block_mig_state.submitted, block_mig_state.transferred);
+ trace_migration_block_save("complete", block_mig_state.submitted,
+ block_mig_state.transferred);
ret = flush_blks(f);
if (ret) {
@@ -851,7 +851,7 @@ static int block_save_complete(QEMUFile *f, void *opaque)
/* report completion */
qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
- DPRINTF("Block migration completed\n");
+ trace_migration_block_save_complete();
qemu_put_be64(f, BLK_MIG_FLAG_EOS);
@@ -884,7 +884,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
pending = max_size + BLK_MIG_BLOCK_SIZE;
}
- DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
+ trace_migration_block_save_pending(pending);
/* We don't do postcopy */
*res_precopy_only += pending;
}
@@ -998,7 +998,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
(addr == 100) ? '\n' : '\r');
fflush(stdout);
} else if (!(flags & BLK_MIG_FLAG_EOS)) {
- fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
+ fprintf(stderr, "Unknown block migration flags: 0x%x\n", flags);
return -EINVAL;
}
ret = qemu_file_get_error(f);
diff --git a/migration/migration.c b/migration/migration.c
index deb6005..9bb4fee 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -143,6 +143,13 @@ static int migration_maybe_pause(MigrationState *s,
int new_state);
static void migrate_fd_cancel(MigrationState *s);
+static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
+{
+ uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
+
+ return (a > b) - (a < b);
+}
+
void migration_object_init(void)
{
Error *err = NULL;
@@ -164,6 +171,8 @@ void migration_object_init(void)
qemu_event_init(&current_incoming->main_thread_load_event, false);
qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
+ qemu_mutex_init(&current_incoming->page_request_mutex);
+ current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
if (!migration_object_check(current_migration, &err)) {
error_report_err(err);
@@ -230,6 +239,11 @@ void migration_incoming_state_destroy(void)
qemu_event_reset(&mis->main_thread_load_event);
+ if (mis->page_requested) {
+ g_tree_destroy(mis->page_requested);
+ mis->page_requested = NULL;
+ }
+
if (mis->socket_address_list) {
qapi_free_SocketAddressList(mis->socket_address_list);
mis->socket_address_list = NULL;
@@ -306,8 +320,8 @@ error:
* Start: Address offset within the RB
* Len: Length in bytes required - must be a multiple of pagesize
*/
-int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
- ram_addr_t start)
+int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
+ RAMBlock *rb, ram_addr_t start)
{
uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
size_t msglen = 12; /* start + len */
@@ -343,6 +357,37 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
return migrate_send_rp_message(mis, msg_type, msglen, bufc);
}
+int migrate_send_rp_req_pages(MigrationIncomingState *mis,
+ RAMBlock *rb, ram_addr_t start, uint64_t haddr)
+{
+ void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb)));
+ bool received;
+
+ WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
+ received = ramblock_recv_bitmap_test_byte_offset(rb, start);
+ if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
+ /*
+ * The page has not been received, and it's not yet in the page
+ * request list. Queue it. Set the value of element to 1, so that
+ * things like g_tree_lookup() will return TRUE (1) when found.
+ */
+ g_tree_insert(mis->page_requested, aligned, (gpointer)1);
+ mis->page_requested_count++;
+ trace_postcopy_page_req_add(aligned, mis->page_requested_count);
+ }
+ }
+
+ /*
+ * If the page is there, skip sending the message. We don't even need the
+ * lock because as long as the page arrived, it'll be there forever.
+ */
+ if (received) {
+ return 0;
+ }
+
+ return migrate_send_rp_message_req_pages(mis, rb, start);
+}
+
static bool migration_colo_enabled;
bool migration_incoming_colo_enabled(void)
{
@@ -2468,8 +2513,8 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
* Since we currently insist on matching page sizes, just sanity check
* we're being asked for whole host pages.
*/
- if (start & (our_host_ps-1) ||
- (len & (our_host_ps-1))) {
+ if (start & (our_host_ps - 1) ||
+ (len & (our_host_ps - 1))) {
error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
" len: %zd", __func__, start, len);
mark_source_rp_bad(ms);
@@ -3123,9 +3168,6 @@ static MigThrError postcopy_pause(MigrationState *s)
while (true) {
QEMUFile *file;
- migrate_set_state(&s->state, s->state,
- MIGRATION_STATUS_POSTCOPY_PAUSED);
-
/* Current channel is possibly broken. Release it. */
assert(s->to_dst_file);
qemu_mutex_lock(&s->qemu_file_lock);
@@ -3136,6 +3178,9 @@ static MigThrError postcopy_pause(MigrationState *s)
qemu_file_shutdown(file);
qemu_fclose(file);
+ migrate_set_state(&s->state, s->state,
+ MIGRATION_STATUS_POSTCOPY_PAUSED);
+
error_report("Detected IO failure for postcopy. "
"Migration paused.");
diff --git a/migration/migration.h b/migration/migration.h
index deb411a..d096b77 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -104,6 +104,23 @@ struct MigrationIncomingState {
/* List of listening socket addresses */
SocketAddressList *socket_address_list;
+
+ /* A tree of pages that we requested to the source VM */
+ GTree *page_requested;
+ /* For debugging purpose only, but would be nice to keep */
+ int page_requested_count;
+ /*
+ * The mutex helps to maintain the requested pages that we sent to the
+ * source, IOW, to guarantee coherent between the page_requests tree and
+ * the per-ramblock receivedmap. Note! This does not guarantee consistency
+ * of the real page copy procedures (using UFFDIO_[ZERO]COPY). E.g., even
+ * if one bit in receivedmap is cleared, UFFDIO_COPY could have happened
+ * for that page already. This is intended so that the mutex won't
+ * serialize and blocked by slow operations like UFFDIO_* ioctls. However
+ * this should be enough to make sure the page_requested tree always
+ * contains valid information.
+ */
+ QemuMutex page_request_mutex;
};
MigrationIncomingState *migration_incoming_get_current(void);
@@ -124,8 +141,7 @@ struct MigrationClass {
DeviceClass parent_class;
};
-struct MigrationState
-{
+struct MigrationState {
/*< private >*/
DeviceState parent_obj;
@@ -332,7 +348,9 @@ void migrate_send_rp_shut(MigrationIncomingState *mis,
void migrate_send_rp_pong(MigrationIncomingState *mis,
uint32_t value);
int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
- ram_addr_t start);
+ ram_addr_t start, uint64_t haddr);
+int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
+ RAMBlock *rb, ram_addr_t start);
void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
char *block_name);
void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value);
diff --git a/migration/page_cache.c b/migration/page_cache.c
index 775582f..098b436 100644
--- a/migration/page_cache.c
+++ b/migration/page_cache.c
@@ -18,14 +18,7 @@
#include "qapi/error.h"
#include "qemu/host-utils.h"
#include "page_cache.h"
-
-#ifdef DEBUG_CACHE
-#define DPRINTF(fmt, ...) \
- do { fprintf(stdout, "cache: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
- do { } while (0)
-#endif
+#include "trace.h"
/* the page in cache will not be replaced in two cycles */
#define CACHED_PAGE_LIFETIME 2
@@ -75,7 +68,7 @@ PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp)
cache->num_items = 0;
cache->max_num_items = num_pages;
- DPRINTF("Setting cache buckets to %" PRId64 "\n", cache->max_num_items);
+ trace_migration_pagecache_init(cache->max_num_items);
/* We prefer not to abort if there is no memory */
cache->page_cache = g_try_malloc((cache->max_num_items) *
@@ -169,7 +162,7 @@ int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata,
if (!it->it_data) {
it->it_data = g_try_malloc(cache->page_size);
if (!it->it_data) {
- DPRINTF("Error allocating page\n");
+ trace_migration_pagecache_insert();
return -1;
}
cache->num_items++;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 0a2f88a8..d3bb3a7 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -403,7 +403,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
strerror(errno));
goto out;
}
- g_assert(((size_t)testarea & (pagesize-1)) == 0);
+ g_assert(((size_t)testarea & (pagesize - 1)) == 0);
reg_struct.range.start = (uintptr_t)testarea;
reg_struct.range.len = pagesize;
@@ -684,7 +684,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
qemu_ram_get_idstr(rb), rb_offset);
return postcopy_wake_shared(pcfd, client_addr, rb);
}
- migrate_send_rp_req_pages(mis, rb, aligned_rbo);
+ migrate_send_rp_req_pages(mis, rb, aligned_rbo, client_addr);
return 0;
}
@@ -979,7 +979,8 @@ retry:
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
- ret = migrate_send_rp_req_pages(mis, rb, rb_offset);
+ ret = migrate_send_rp_req_pages(mis, rb, rb_offset,
+ msg.arg.pagefault.address);
if (ret) {
/* May be network failure, try to wait for recovery */
if (ret == -EIO && postcopy_pause_fault_thread(mis)) {
@@ -1128,10 +1129,12 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
return 0;
}
-static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
+static int qemu_ufd_copy_ioctl(MigrationIncomingState *mis, void *host_addr,
void *from_addr, uint64_t pagesize, RAMBlock *rb)
{
+ int userfault_fd = mis->userfault_fd;
int ret;
+
if (from_addr) {
struct uffdio_copy copy_struct;
copy_struct.dst = (uint64_t)(uintptr_t)host_addr;
@@ -1147,10 +1150,20 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr,
ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct);
}
if (!ret) {
+ qemu_mutex_lock(&mis->page_request_mutex);
ramblock_recv_bitmap_set_range(rb, host_addr,
pagesize / qemu_target_page_size());
+ /*
+ * If this page resolves a page fault for a previous recorded faulted
+ * address, take a special note to maintain the requested page list.
+ */
+ if (g_tree_lookup(mis->page_requested, host_addr)) {
+ g_tree_remove(mis->page_requested, host_addr);
+ mis->page_requested_count--;
+ trace_postcopy_page_req_del(host_addr, mis->page_requested_count);
+ }
+ qemu_mutex_unlock(&mis->page_request_mutex);
mark_postcopy_blocktime_end((uintptr_t)host_addr);
-
}
return ret;
}
@@ -1185,7 +1198,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
* which would be slightly cheaper, but we'd have to be careful
* of the order of updating our page state.
*/
- if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) {
+ if (qemu_ufd_copy_ioctl(mis, host, from, pagesize, rb)) {
int e = errno;
error_report("%s: %s copy host: %p from: %p (size: %zd)",
__func__, strerror(e), host, from, pagesize);
@@ -1212,7 +1225,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
* but it's not available for everything (e.g. hugetlbpages)
*/
if (qemu_ram_is_uf_zeroable(rb)) {
- if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, pagesize, rb)) {
+ if (qemu_ufd_copy_ioctl(mis, host, NULL, pagesize, rb)) {
int e = errno;
error_report("%s: %s zero host: %p",
__func__, strerror(e), host);
diff --git a/migration/ram.c b/migration/ram.c
index 433489d..2da2b62 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -101,14 +101,16 @@ static struct {
static void XBZRLE_cache_lock(void)
{
- if (migrate_use_xbzrle())
+ if (migrate_use_xbzrle()) {
qemu_mutex_lock(&XBZRLE.lock);
+ }
}
static void XBZRLE_cache_unlock(void)
{
- if (migrate_use_xbzrle())
+ if (migrate_use_xbzrle()) {
qemu_mutex_unlock(&XBZRLE.lock);
+ }
}
/**
@@ -1563,7 +1565,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
rs->last_req_rb = ramblock;
}
trace_ram_save_queue_pages(ramblock->idstr, start, len);
- if (start+len > ramblock->used_length) {
+ if (start + len > ramblock->used_length) {
error_report("%s request overrun start=" RAM_ADDR_FMT " len="
RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
__func__, start, len, ramblock->used_length);
@@ -2741,7 +2743,7 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
*/
static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
{
- static RAMBlock *block = NULL;
+ static RAMBlock *block;
char id[256];
uint8_t len;
@@ -3298,7 +3300,7 @@ static int ram_load_postcopy(QEMUFile *f)
multifd_recv_sync_main();
break;
default:
- error_report("Unknown combination of migration flags: %#x"
+ error_report("Unknown combination of migration flags: 0x%x"
" (postcopy mode)", flags);
ret = -EINVAL;
break;
@@ -3576,7 +3578,7 @@ static int ram_load_precopy(QEMUFile *f)
if (flags & RAM_SAVE_FLAG_HOOK) {
ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
} else {
- error_report("Unknown combination of migration flags: %#x",
+ error_report("Unknown combination of migration flags: 0x%x",
flags);
ret = -EINVAL;
}
diff --git a/migration/rdma.c b/migration/rdma.c
index 0340841..00eac34 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -273,7 +273,8 @@ static uint64_t htonll(uint64_t v)
return u.llv;
}
-static uint64_t ntohll(uint64_t v) {
+static uint64_t ntohll(uint64_t v)
+{
union { uint32_t lv[2]; uint64_t llv; } u;
u.llv = v;
return ((uint64_t)ntohl(u.lv[0]) << 32) | (uint64_t) ntohl(u.lv[1]);
@@ -854,7 +855,7 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
*/
if (!verbs) {
int num_devices, x;
- struct ibv_device ** dev_list = ibv_get_device_list(&num_devices);
+ struct ibv_device **dev_list = ibv_get_device_list(&num_devices);
bool roce_found = false;
bool ib_found = false;
@@ -1288,7 +1289,7 @@ const char *print_wrid(int wrid)
* workload information or LRU information is available, do not attempt to use
* this feature except for basic testing.
*/
-//#define RDMA_UNREGISTRATION_EXAMPLE
+/* #define RDMA_UNREGISTRATION_EXAMPLE */
/*
* Perform a non-optimized memory unregistration after every transfer
diff --git a/migration/savevm.c b/migration/savevm.c
index ff33e21..21ccba9 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -63,7 +63,7 @@
#include "qemu/bitmap.h"
#include "net/announce.h"
-const unsigned int postcopy_ram_discard_version = 0;
+const unsigned int postcopy_ram_discard_version;
/* Subcommands for QEMU_VM_COMMAND */
enum qemu_vm_cmd {
@@ -520,7 +520,7 @@ static const VMStateDescription vmstate_configuration = {
VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
VMSTATE_END_OF_LIST()
},
- .subsections = (const VMStateDescription*[]) {
+ .subsections = (const VMStateDescription *[]) {
&vmstate_target_page_bits,
&vmstate_capabilites,
&vmstate_uuid,
@@ -2010,6 +2010,49 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
return LOADVM_QUIT;
}
+/* We must be with page_request_mutex held */
+static gboolean postcopy_sync_page_req(gpointer key, gpointer value,
+ gpointer data)
+{
+ MigrationIncomingState *mis = data;
+ void *host_addr = (void *) key;
+ ram_addr_t rb_offset;
+ RAMBlock *rb;
+ int ret;
+
+ rb = qemu_ram_block_from_host(host_addr, true, &rb_offset);
+ if (!rb) {
+ /*
+ * This should _never_ happen. However be nice for a migrating VM to
+ * not crash/assert. Post an error (note: intended to not use *_once
+ * because we do want to see all the illegal addresses; and this can
+ * never be triggered by the guest so we're safe) and move on next.
+ */
+ error_report("%s: illegal host addr %p", __func__, host_addr);
+ /* Try the next entry */
+ return FALSE;
+ }
+
+ ret = migrate_send_rp_message_req_pages(mis, rb, rb_offset);
+ if (ret) {
+ /* Please refer to above comment. */
+ error_report("%s: send rp message failed for addr %p",
+ __func__, host_addr);
+ return FALSE;
+ }
+
+ trace_postcopy_page_req_sync(host_addr);
+
+ return FALSE;
+}
+
+static void migrate_send_rp_req_pages_pending(MigrationIncomingState *mis)
+{
+ WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
+ g_tree_foreach(mis->page_requested, postcopy_sync_page_req, mis);
+ }
+}
+
static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
{
if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
@@ -2032,6 +2075,20 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
/* Tell source that "we are ready" */
migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE);
+ /*
+ * After a postcopy recovery, the source should have lost the postcopy
+ * queue, or potentially the requested pages could have been lost during
+ * the network down phase. Let's re-sync with the source VM by re-sending
+ * all the pending pages that we eagerly need, so these threads won't get
+ * blocked too long due to the recovery.
+ *
+ * Without this procedure, the faulted destination VM threads (waiting for
+ * page requests right before the postcopy is interrupted) can keep hanging
+ * until the pages are sent by the source during the background copying of
+ * pages, or another thread faulted on the same address accidentally.
+ */
+ migrate_send_rp_req_pages_pending(mis);
+
return 0;
}
diff --git a/migration/trace-events b/migration/trace-events
index 338f38b..75de500 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -49,6 +49,7 @@ vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
postcopy_pause_incoming(void) ""
postcopy_pause_incoming_continued(void) ""
+postcopy_page_req_sync(void *host_addr) "sync page req %p"
# vmstate.c
vmstate_load_field_error(const char *field, int ret) "field \"%s\" load failed, ret = %d"
@@ -162,6 +163,7 @@ postcopy_pause_return_path(void) ""
postcopy_pause_return_path_continued(void) ""
postcopy_pause_continued(void) ""
postcopy_start_set_run(void) ""
+postcopy_page_req_add(void *addr, int count) "new page req %p total %d"
source_return_path_thread_bad_end(void) ""
source_return_path_thread_end(void) ""
source_return_path_thread_entry(void) ""
@@ -272,6 +274,7 @@ postcopy_ram_incoming_cleanup_blocktime(uint64_t total) "total blocktime %" PRIu
postcopy_request_shared_page(const char *sharer, const char *rb, uint64_t rb_offset) "for %s in %s offset 0x%"PRIx64
postcopy_request_shared_page_present(const char *sharer, const char *rb, uint64_t rb_offset) "%s already %s offset 0x%"PRIx64
postcopy_wake_shared(uint64_t client_addr, const char *rb) "at 0x%"PRIx64" in %s"
+postcopy_page_req_del(void *addr, int count) "resolved page req %p total %d"
get_mem_fault_cpu_index(int cpu, uint32_t pid) "cpu: %d, pid: %u"
@@ -325,3 +328,16 @@ get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock n
calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32
skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64
find_page_matched(const char *idstr) "ramblock %s addr or size changed"
+
+# block.c
+migration_block_init_shared(const char *blk_device_name) "Start migration for %s with shared base image"
+migration_block_init_full(const char *blk_device_name) "Start full migration for %s"
+migration_block_save_device_dirty(int64_t sector) "Error reading sector %" PRId64
+migration_block_flush_blks(const char *action, int submitted, int read_done, int transferred) "%s submitted %d read_done %d transferred %d"
+migration_block_save(const char *mig_stage, int submitted, int transferred) "Enter save live %s submitted %d transferred %d"
+migration_block_save_complete(void) "Block migration completed"
+migration_block_save_pending(uint64_t pending) "Enter save live pending %" PRIu64
+
+# page_cache.c
+migration_pagecache_init(int64_t max_num_items) "Setting cache buckets to %" PRId64
+migration_pagecache_insert(void) "Error allocating page"
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index 35e784c..e22d41d 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -420,32 +420,6 @@ const VMStateInfo vmstate_info_uint16_equal = {
.put = put_uint16,
};
-/* floating point */
-
-static int get_float64(QEMUFile *f, void *pv, size_t size,
- const VMStateField *field)
-{
- float64 *v = pv;
-
- *v = make_float64(qemu_get_be64(f));
- return 0;
-}
-
-static int put_float64(QEMUFile *f, void *pv, size_t size,
- const VMStateField *field, QJSON *vmdesc)
-{
- uint64_t *v = pv;
-
- qemu_put_be64(f, float64_val(*v));
- return 0;
-}
-
-const VMStateInfo vmstate_info_float64 = {
- .name = "float64",
- .get = get_float64,
- .put = put_float64,
-};
-
/* CPU_DoubleU type */
static int get_cpudouble(QEMUFile *f, void *pv, size_t size,
diff --git a/migration/vmstate.c b/migration/vmstate.c
index bafa890..e9d2aef 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -32,13 +32,13 @@ static int vmstate_n_elems(void *opaque, const VMStateField *field)
if (field->flags & VMS_ARRAY) {
n_elems = field->num;
} else if (field->flags & VMS_VARRAY_INT32) {
- n_elems = *(int32_t *)(opaque+field->num_offset);
+ n_elems = *(int32_t *)(opaque + field->num_offset);
} else if (field->flags & VMS_VARRAY_UINT32) {
- n_elems = *(uint32_t *)(opaque+field->num_offset);
+ n_elems = *(uint32_t *)(opaque + field->num_offset);
} else if (field->flags & VMS_VARRAY_UINT16) {
- n_elems = *(uint16_t *)(opaque+field->num_offset);
+ n_elems = *(uint16_t *)(opaque + field->num_offset);
} else if (field->flags & VMS_VARRAY_UINT8) {
- n_elems = *(uint8_t *)(opaque+field->num_offset);
+ n_elems = *(uint8_t *)(opaque + field->num_offset);
}
if (field->flags & VMS_MULTIPLY_ELEMENTS) {
@@ -54,7 +54,7 @@ static int vmstate_size(void *opaque, const VMStateField *field)
int size = field->size;
if (field->flags & VMS_VBUFFER) {
- size = *(int32_t *)(opaque+field->size_offset);
+ size = *(int32_t *)(opaque + field->size_offset);
if (field->flags & VMS_MULTIPLY) {
size *= field->size;
}