aboutsummaryrefslogtreecommitdiff
path: root/migration/ram.c
diff options
context:
space:
mode:
Diffstat (limited to 'migration/ram.c')
-rw-r--r--migration/ram.c208
1 files changed, 164 insertions, 44 deletions
diff --git a/migration/ram.c b/migration/ram.c
index 424df6d..2140785 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -48,7 +48,7 @@
#include "qapi/qapi-commands-migration.h"
#include "qapi/qmp/qerror.h"
#include "trace.h"
-#include "exec/ram_addr.h"
+#include "system/ram_addr.h"
#include "exec/target_page.h"
#include "qemu/rcu_queue.h"
#include "migration/colo.h"
@@ -91,6 +91,36 @@
XBZRLECacheStats xbzrle_counters;
+/*
+ * This structure locates a specific location of a guest page. In QEMU,
+ * it's described in a tuple of (ramblock, offset).
+ */
+struct PageLocation {
+ RAMBlock *block;
+ unsigned long offset;
+};
+typedef struct PageLocation PageLocation;
+
+/**
+ * PageLocationHint: describes a hint to a page location
+ *
+ * @valid set if the hint is vaild and to be consumed
+ * @location: the hint content
+ *
+ * In postcopy preempt mode, the urgent channel may provide hints to the
+ * background channel, so that QEMU source can try to migrate whatever is
+ * right after the requested urgent pages.
+ *
+ * This is based on the assumption that the VM (already running on the
+ * destination side) tends to access the memory with spatial locality.
+ * This is also the default behavior of vanilla postcopy (preempt off).
+ */
+struct PageLocationHint {
+ bool valid;
+ PageLocation location;
+};
+typedef struct PageLocationHint PageLocationHint;
+
/* used by the search for pages to send */
struct PageSearchStatus {
/* The migration channel used for a specific host page */
@@ -395,6 +425,13 @@ struct RAMState {
* RAM migration.
*/
unsigned int postcopy_bmap_sync_requested;
+ /*
+ * Page hint during postcopy when preempt mode is on. Return path
+ * thread sets it, while background migration thread consumes it.
+ *
+ * Protected by @bitmap_mutex.
+ */
+ PageLocationHint page_hint;
};
typedef struct RAMState RAMState;
@@ -794,14 +831,22 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
bool ret;
/*
- * Clear dirty bitmap if needed. This _must_ be called before we
- * send any of the page in the chunk because we need to make sure
- * we can capture further page content changes when we sync dirty
- * log the next time. So as long as we are going to send any of
- * the page in the chunk we clear the remote dirty bitmap for all.
- * Clearing it earlier won't be a problem, but too late will.
+ * During the last stage (after source VM stopped), resetting the write
+ * protections isn't needed as we know there will be either (1) no
+ * further writes if migration will complete, or (2) migration fails
+ * at last then tracking isn't needed either.
*/
- migration_clear_memory_region_dirty_bitmap(rb, page);
+ if (!rs->last_stage) {
+ /*
+ * Clear dirty bitmap if needed. This _must_ be called before we
+ * send any of the page in the chunk because we need to make sure
+ * we can capture further page content changes when we sync dirty
+ * log the next time. So as long as we are going to send any of
+ * the page in the chunk we clear the remote dirty bitmap for all.
+ * Clearing it earlier won't be a problem, but too late will.
+ */
+ migration_clear_memory_region_dirty_bitmap(rb, page);
+ }
ret = test_and_clear_bit(page, rb->bmap);
if (ret) {
@@ -811,8 +856,8 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
return ret;
}
-static void dirty_bitmap_clear_section(MemoryRegionSection *section,
- void *opaque)
+static int dirty_bitmap_clear_section(MemoryRegionSection *section,
+ void *opaque)
{
const hwaddr offset = section->offset_within_region;
const hwaddr size = int128_get64(section->size);
@@ -831,6 +876,7 @@ static void dirty_bitmap_clear_section(MemoryRegionSection *section,
}
*cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
bitmap_clear(rb->bmap, start, npages);
+ return 0;
}
/*
@@ -1144,32 +1190,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
}
/*
- * @pages: the number of pages written by the control path,
- * < 0 - error
- * > 0 - number of pages written
- *
- * Return true if the pages has been saved, otherwise false is returned.
- */
-static bool control_save_page(PageSearchStatus *pss,
- ram_addr_t offset, int *pages)
-{
- int ret;
-
- ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset,
- TARGET_PAGE_SIZE);
- if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
- return false;
- }
-
- if (ret == RAM_SAVE_CONTROL_DELAYED) {
- *pages = 1;
- return true;
- }
- *pages = ret;
- return true;
-}
-
-/*
* directly send the page to the stream
*
* Returns the number of pages written.
@@ -1965,7 +1985,13 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
int res;
/* Hand over to RDMA first */
- if (control_save_page(pss, offset, &res)) {
+ if (migrate_rdma()) {
+ res = rdma_control_save_page(pss->pss_channel, pss->block->offset,
+ offset, TARGET_PAGE_SIZE);
+
+ if (res == RAM_SAVE_CONTROL_DELAYED) {
+ res = 1;
+ }
return res;
}
@@ -1976,9 +2002,8 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
}
}
- if (migrate_multifd()) {
- RAMBlock *block = pss->block;
- return ram_save_multifd_page(block, offset);
+ if (migrate_multifd() && !migration_in_postcopy()) {
+ return ram_save_multifd_page(pss->block, offset);
}
return ram_save_page(rs, pss);
@@ -2039,6 +2064,21 @@ static void pss_host_page_finish(PageSearchStatus *pss)
pss->host_page_start = pss->host_page_end = 0;
}
+static void ram_page_hint_update(RAMState *rs, PageSearchStatus *pss)
+{
+ PageLocationHint *hint = &rs->page_hint;
+
+ /* If there's a pending hint not consumed, don't bother */
+ if (hint->valid) {
+ return;
+ }
+
+ /* Provide a hint to the background stream otherwise */
+ hint->location.block = pss->block;
+ hint->location.offset = pss->page;
+ hint->valid = true;
+}
+
/*
* Send an urgent host page specified by `pss'. Need to be called with
* bitmap_mutex held.
@@ -2084,6 +2124,7 @@ out:
/* For urgent requests, flush immediately if sent */
if (sent) {
qemu_fflush(pss->pss_channel);
+ ram_page_hint_update(rs, pss);
}
return ret;
}
@@ -2171,6 +2212,30 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
return (res < 0 ? res : pages);
}
+static bool ram_page_hint_valid(RAMState *rs)
+{
+ /* There's only page hint during postcopy preempt mode */
+ if (!postcopy_preempt_active()) {
+ return false;
+ }
+
+ return rs->page_hint.valid;
+}
+
+static void ram_page_hint_collect(RAMState *rs, RAMBlock **block,
+ unsigned long *page)
+{
+ PageLocationHint *hint = &rs->page_hint;
+
+ assert(hint->valid);
+
+ *block = hint->location.block;
+ *page = hint->location.offset;
+
+ /* Mark the hint consumed */
+ hint->valid = false;
+}
+
/**
* ram_find_and_save_block: finds a dirty page and sends it to f
*
@@ -2187,6 +2252,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
static int ram_find_and_save_block(RAMState *rs)
{
PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY];
+ unsigned long next_page;
+ RAMBlock *next_block;
int pages = 0;
/* No dirty page as there is zero RAM */
@@ -2206,7 +2273,14 @@ static int ram_find_and_save_block(RAMState *rs)
rs->last_page = 0;
}
- pss_init(pss, rs->last_seen_block, rs->last_page);
+ if (ram_page_hint_valid(rs)) {
+ ram_page_hint_collect(rs, &next_block, &next_page);
+ } else {
+ next_block = rs->last_seen_block;
+ next_page = rs->last_page;
+ }
+
+ pss_init(pss, next_block, next_page);
while (true){
if (!get_queued_page(rs, pss)) {
@@ -2339,6 +2413,13 @@ static void ram_save_cleanup(void *opaque)
ram_state_cleanup(rsp);
}
+static void ram_page_hint_reset(PageLocationHint *hint)
+{
+ hint->location.block = NULL;
+ hint->location.offset = 0;
+ hint->valid = false;
+}
+
static void ram_state_reset(RAMState *rs)
{
int i;
@@ -2351,6 +2432,8 @@ static void ram_state_reset(RAMState *rs)
rs->last_page = 0;
rs->last_version = ram_list.version;
rs->xbzrle_started = false;
+
+ ram_page_hint_reset(&rs->page_hint);
}
#define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -3598,7 +3681,9 @@ static int ram_load_cleanup(void *opaque)
RAMBlock *rb;
RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
- qemu_ram_block_writeback(rb);
+ if (memory_region_is_nonvolatile(rb->mr)) {
+ qemu_ram_block_writeback(rb);
+ }
}
xbzrle_load_cleanup();
@@ -3963,8 +4048,6 @@ static void parse_ramblock_mapped_ram(QEMUFile *f, RAMBlock *block,
/* Skip pages array */
qemu_set_offset(f, block->pages_offset + length, SEEK_SET);
-
- return;
}
static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length)
@@ -4420,6 +4503,42 @@ static int ram_resume_prepare(MigrationState *s, void *opaque)
return 0;
}
+static bool ram_save_postcopy_prepare(QEMUFile *f, void *opaque, Error **errp)
+{
+ int ret;
+
+ if (migrate_multifd()) {
+ /*
+ * When multifd is enabled, source QEMU needs to make sure all the
+ * pages queued before postcopy starts have been flushed.
+ *
+ * The load of these pages must happen before switching to postcopy.
+ * It's because loading of guest pages (so far) in multifd recv
+ * threads is still non-atomic, so the load cannot happen with vCPUs
+ * running on the destination side.
+ *
+ * This flush and sync will guarantee that those pages are loaded
+ * _before_ postcopy starts on the destination. The rationale is,
+ * this happens before VM stops (and before source QEMU sends all
+ * the rest of the postcopy messages). So when the destination QEMU
+ * receives the postcopy messages, it must have received the sync
+ * message on the main channel (either RAM_SAVE_FLAG_MULTIFD_FLUSH,
+ * or RAM_SAVE_FLAG_EOS), and such message would guarantee that
+ * all previous guest pages queued in the multifd channels are
+ * completely loaded.
+ */
+ ret = multifd_ram_flush_and_sync(f);
+ if (ret < 0) {
+ error_setg(errp, "%s: multifd flush and sync failed", __func__);
+ return false;
+ }
+ }
+
+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+ return true;
+}
+
void postcopy_preempt_shutdown_file(MigrationState *s)
{
qemu_put_be64(s->postcopy_qemufile_src, RAM_SAVE_FLAG_EOS);
@@ -4439,6 +4558,7 @@ static SaveVMHandlers savevm_ram_handlers = {
.load_setup = ram_load_setup,
.load_cleanup = ram_load_cleanup,
.resume_prepare = ram_resume_prepare,
+ .save_postcopy_prepare = ram_save_postcopy_prepare,
};
static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,