aboutsummaryrefslogtreecommitdiff
path: root/migration
diff options
context:
space:
mode:
Diffstat (limited to 'migration')
-rw-r--r--migration/colo.c2
-rw-r--r--migration/migration.c62
-rw-r--r--migration/migration.h9
-rw-r--r--migration/postcopy-ram.c48
-rw-r--r--migration/ram.c231
-rw-r--r--migration/rdma.c18
-rw-r--r--migration/savevm.c152
-rw-r--r--migration/socket.c11
8 files changed, 471 insertions, 62 deletions
diff --git a/migration/colo.c b/migration/colo.c
index 398b239..5ba610d 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -872,8 +872,8 @@ out:
/* Must be called after failover BH is completed */
if (mis->to_src_file) {
qemu_fclose(mis->to_src_file);
+ mis->to_src_file = NULL;
}
- migration_incoming_disable_colo();
rcu_unregister_thread();
return NULL;
diff --git a/migration/migration.c b/migration/migration.c
index c39d305..df6fd8e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -31,6 +31,8 @@
#include "migration/vmstate.h"
#include "block/block.h"
#include "qapi/error.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/qapi-visit-sockets.h"
#include "qapi/qapi-commands-migration.h"
#include "qapi/qapi-events-migration.h"
#include "qapi/qmp/qerror.h"
@@ -126,6 +128,7 @@ static bool migration_object_check(MigrationState *ms, Error **errp);
static int migration_maybe_pause(MigrationState *s,
int *current_active_state,
int new_state);
+static void migrate_fd_cancel(MigrationState *s);
void migration_object_init(void)
{
@@ -167,8 +170,13 @@ void migration_object_init(void)
}
}
-void migration_object_finalize(void)
+void migration_shutdown(void)
{
+ /*
+ * Cancel the current migration - that will (eventually)
+ * stop the migration using this structure
+ */
+ migrate_fd_cancel(current_migration);
object_unref(OBJECT(current_migration));
}
@@ -207,6 +215,11 @@ void migration_incoming_state_destroy(void)
}
qemu_event_reset(&mis->main_thread_load_event);
+
+ if (mis->socket_address_list) {
+ qapi_free_SocketAddressList(mis->socket_address_list);
+ mis->socket_address_list = NULL;
+ }
}
static void migrate_generate_event(int new_state)
@@ -322,6 +335,17 @@ void migration_incoming_enable_colo(void)
migration_colo_enabled = true;
}
+void migrate_add_address(SocketAddress *address)
+{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ SocketAddressList *addrs;
+
+ addrs = g_new0(SocketAddressList, 1);
+ addrs->next = mis->socket_address_list;
+ mis->socket_address_list = addrs;
+ addrs->value = QAPI_CLONE(SocketAddress, address);
+}
+
void qemu_start_incoming_migration(const char *uri, Error **errp)
{
const char *p;
@@ -393,6 +417,9 @@ static void process_incoming_migration_bh(void *opaque)
} else {
runstate_set(RUN_STATE_PAUSED);
}
+ } else if (migration_incoming_colo_enabled()) {
+ migration_incoming_disable_colo();
+ vm_start();
} else {
runstate_set(global_state_get_runstate());
}
@@ -989,6 +1016,11 @@ static bool migrate_caps_check(bool *cap_list,
error_setg(errp, "Postcopy is not supported");
return false;
}
+
+ if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
+ error_setg(errp, "Postcopy is not compatible with ignore-shared");
+ return false;
+ }
}
return true;
@@ -998,6 +1030,12 @@ static void fill_destination_migration_info(MigrationInfo *info)
{
MigrationIncomingState *mis = migration_incoming_get_current();
+ if (mis->socket_address_list) {
+ info->has_socket_address = true;
+ info->socket_address =
+ QAPI_CLONE(SocketAddressList, mis->socket_address_list);
+ }
+
switch (mis->state) {
case MIGRATION_STATUS_NONE:
return;
@@ -2068,6 +2106,15 @@ bool migrate_dirty_bitmaps(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
}
+bool migrate_ignore_shared(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
+}
+
bool migrate_use_events(void)
{
MigrationState *s;
@@ -2911,6 +2958,13 @@ static MigThrError postcopy_pause(MigrationState *s)
static MigThrError migration_detect_error(MigrationState *s)
{
int ret;
+ int state = s->state;
+
+ if (state == MIGRATION_STATUS_CANCELLING ||
+ state == MIGRATION_STATUS_CANCELLED) {
+ /* End the migration, but don't set the state to failed */
+ return MIG_THR_ERR_FATAL;
+ }
/* Try to detect any file errors */
ret = qemu_file_get_error(s->to_dst_file);
@@ -2920,7 +2974,7 @@ static MigThrError migration_detect_error(MigrationState *s)
return MIG_THR_ERR_NONE;
}
- if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
+ if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
/*
* For postcopy, we allow the network to be down for a
* while. After that, it can be continued by a
@@ -2932,7 +2986,7 @@ static MigThrError migration_detect_error(MigrationState *s)
* For precopy (or postcopy with error outside IO), we fail
* with no time.
*/
- migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+ migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
trace_migration_thread_file_err();
/* Time to stop the migration, now. */
@@ -3127,6 +3181,7 @@ static void *migration_thread(void *opaque)
rcu_register_thread();
+ object_ref(OBJECT(s));
s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
qemu_savevm_state_header(s->to_dst_file);
@@ -3223,6 +3278,7 @@ static void *migration_thread(void *opaque)
trace_migration_thread_after_loop();
migration_iteration_finish(s);
+ object_unref(OBJECT(s));
rcu_unregister_thread();
return NULL;
}
diff --git a/migration/migration.h b/migration/migration.h
index c99154d..99e99e5 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -84,6 +84,9 @@ struct MigrationIncomingState {
bool postcopy_recover_triggered;
QemuSemaphore postcopy_pause_sem_dst;
QemuSemaphore postcopy_pause_sem_fault;
+
+ /* List of listening socket addresses */
+ SocketAddressList *socket_address_list;
};
MigrationIncomingState *migration_incoming_get_current(void);
@@ -265,6 +268,7 @@ bool migrate_release_ram(void);
bool migrate_postcopy_ram(void);
bool migrate_zero_blocks(void);
bool migrate_dirty_bitmaps(void);
+bool migrate_ignore_shared(void);
bool migrate_auto_converge(void);
bool migrate_use_multifd(void);
@@ -304,9 +308,12 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value);
void dirty_bitmap_mig_before_vm_start(void);
void init_dirty_bitmap_incoming_migration(void);
+void migrate_add_address(SocketAddress *address);
+
+int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
#define qemu_ram_foreach_block \
- #warning "Use qemu_ram_foreach_block_migratable in migration code"
+ #warning "Use foreach_not_ignored_block in migration code"
void migration_make_urgent_request(void);
void migration_consume_urgent_request(void);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index fa09dba..e2aa57a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -319,10 +319,10 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
/* Callback from postcopy_ram_supported_by_host block iterator.
*/
-static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque)
{
- RAMBlock *rb = qemu_ram_block_by_name(block_name);
+ const char *block_name = qemu_ram_get_idstr(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
size_t pagesize = qemu_ram_pagesize(rb);
if (length % pagesize) {
@@ -374,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
}
/* We don't support postcopy with shared RAM yet */
- if (qemu_ram_foreach_migratable_block(test_ramblock_postcopiable, NULL)) {
+ if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) {
goto out;
}
@@ -443,9 +443,12 @@ out:
* must be done right at the start prior to pre-copy.
* opaque should be the MIS.
*/
-static int init_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int init_range(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
trace_postcopy_init_range(block_name, host_addr, offset, length);
/*
@@ -465,9 +468,12 @@ static int init_range(const char *block_name, void *host_addr,
* At the end of migration, undo the effects of init_range
* opaque should be the MIS.
*/
-static int cleanup_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int cleanup_range(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
MigrationIncomingState *mis = opaque;
struct uffdio_range range_struct;
trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
@@ -502,7 +508,7 @@ static int cleanup_range(const char *block_name, void *host_addr,
*/
int postcopy_ram_incoming_init(MigrationIncomingState *mis)
{
- if (qemu_ram_foreach_migratable_block(init_range, NULL)) {
+ if (foreach_not_ignored_block(init_range, NULL)) {
return -1;
}
@@ -544,7 +550,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
return -1;
}
- if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) {
+ if (foreach_not_ignored_block(cleanup_range, mis)) {
return -1;
}
@@ -586,9 +592,12 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
/*
* Disable huge pages on an area
*/
-static int nhp_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int nhp_range(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
trace_postcopy_nhp_range(block_name, host_addr, offset, length);
/*
@@ -608,7 +617,7 @@ static int nhp_range(const char *block_name, void *host_addr,
*/
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
{
- if (qemu_ram_foreach_migratable_block(nhp_range, mis)) {
+ if (foreach_not_ignored_block(nhp_range, mis)) {
return -1;
}
@@ -619,22 +628,20 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
/*
* Mark the given area of RAM as requiring notification to unwritten areas
- * Used as a callback on qemu_ram_foreach_migratable_block.
+ * Used as a callback on foreach_not_ignored_block.
* host_addr: Base of area to mark
* offset: Offset in the whole ram arena
* length: Length of the section
* opaque: MigrationIncomingState pointer
* Returns 0 on success
*/
-static int ram_block_enable_notify(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length,
- void *opaque)
+static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
{
MigrationIncomingState *mis = opaque;
struct uffdio_register reg_struct;
- reg_struct.range.start = (uintptr_t)host_addr;
- reg_struct.range.len = length;
+ reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb);
+ reg_struct.range.len = qemu_ram_get_used_length(rb);
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
/* Now tell our userfault_fd that it's responsible for this area */
@@ -647,7 +654,6 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
return -1;
}
if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) {
- RAMBlock *rb = qemu_ram_block_by_name(block_name);
qemu_ram_set_uf_zeroable(rb);
}
@@ -1116,7 +1122,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
mis->have_fault_thread = true;
/* Mark so that we get notified of accesses to unwritten areas */
- if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
+ if (foreach_not_ignored_block(ram_block_enable_notify, mis)) {
error_report("ram_block_enable_notify failed");
return -1;
}
diff --git a/migration/ram.c b/migration/ram.c
index 59191c1..35bd621 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -159,18 +159,44 @@ out:
return ret;
}
+static bool ramblock_is_ignored(RAMBlock *block)
+{
+ return !qemu_ram_is_migratable(block) ||
+ (migrate_ignore_shared() && qemu_ram_is_shared(block));
+}
+
/* Should be holding either ram_list.mutex, or the RCU lock. */
+#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \
+ INTERNAL_RAMBLOCK_FOREACH(block) \
+ if (ramblock_is_ignored(block)) {} else
+
#define RAMBLOCK_FOREACH_MIGRATABLE(block) \
INTERNAL_RAMBLOCK_FOREACH(block) \
if (!qemu_ram_is_migratable(block)) {} else
#undef RAMBLOCK_FOREACH
+int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
+{
+ RAMBlock *block;
+ int ret = 0;
+
+ rcu_read_lock();
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+ ret = func(block, opaque);
+ if (ret) {
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
static void ramblock_recv_map_init(void)
{
RAMBlock *rb;
- RAMBLOCK_FOREACH_MIGRATABLE(rb) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
assert(!rb->receivedmap);
rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
}
@@ -290,6 +316,8 @@ struct RAMState {
uint32_t last_version;
/* We are in the first round */
bool ram_bulk_stage;
+ /* The free page optimization is enabled */
+ bool fpo_enabled;
/* How many times we have dirty too many pages */
int dirty_rate_high_cnt;
/* these variables are used for bitmap sync */
@@ -316,7 +344,7 @@ struct RAMState {
uint64_t target_page_count;
/* number of dirty bits in the bitmap */
uint64_t migration_dirty_pages;
- /* protects modification of the bitmap */
+ /* Protects modification of the bitmap and migration dirty pages */
QemuMutex bitmap_mutex;
/* The RAMBlock used in the last src_page_requests */
RAMBlock *last_req_rb;
@@ -328,6 +356,41 @@ typedef struct RAMState RAMState;
static RAMState *ram_state;
+static NotifierWithReturnList precopy_notifier_list;
+
+void precopy_infrastructure_init(void)
+{
+ notifier_with_return_list_init(&precopy_notifier_list);
+}
+
+void precopy_add_notifier(NotifierWithReturn *n)
+{
+ notifier_with_return_list_add(&precopy_notifier_list, n);
+}
+
+void precopy_remove_notifier(NotifierWithReturn *n)
+{
+ notifier_with_return_remove(n);
+}
+
+int precopy_notify(PrecopyNotifyReason reason, Error **errp)
+{
+ PrecopyNotifyData pnd;
+ pnd.reason = reason;
+ pnd.errp = errp;
+
+ return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
+}
+
+void precopy_enable_free_page_optimization(void)
+{
+ if (!ram_state) {
+ return;
+ }
+
+ ram_state->fpo_enabled = true;
+}
+
uint64_t ram_bytes_remaining(void)
{
return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
@@ -1545,11 +1608,15 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
unsigned long *bitmap = rb->bmap;
unsigned long next;
- if (!qemu_ram_is_migratable(rb)) {
+ if (ramblock_is_ignored(rb)) {
return size;
}
- if (rs->ram_bulk_stage && start > 0) {
+ /*
+ * When the free page optimization is enabled, we need to check the bitmap
+ * to send the non-free pages rather than all the pages in the bulk stage.
+ */
+ if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
next = start + 1;
} else {
next = find_next_bit(bitmap, size, start);
@@ -1564,11 +1631,14 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
{
bool ret;
+ qemu_mutex_lock(&rs->bitmap_mutex);
ret = test_and_clear_bit(page, rb->bmap);
if (ret) {
rs->migration_dirty_pages--;
}
+ qemu_mutex_unlock(&rs->bitmap_mutex);
+
return ret;
}
@@ -1594,7 +1664,7 @@ uint64_t ram_pagesize_summary(void)
RAMBlock *block;
uint64_t summary = 0;
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
summary |= block->page_size;
}
@@ -1664,7 +1734,7 @@ static void migration_bitmap_sync(RAMState *rs)
qemu_mutex_lock(&rs->bitmap_mutex);
rcu_read_lock();
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
migration_bitmap_sync_range(rs, block, 0, block->used_length);
}
ram_counters.remaining = ram_bytes_remaining();
@@ -1712,6 +1782,25 @@ static void migration_bitmap_sync(RAMState *rs)
}
}
+static void migration_bitmap_sync_precopy(RAMState *rs)
+{
+ Error *local_err = NULL;
+
+ /*
+ * The current notifier usage is just an optimization to migration, so we
+ * don't stop the normal migration process in the error case.
+ */
+ if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
+ error_report_err(local_err);
+ }
+
+ migration_bitmap_sync(rs);
+
+ if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
+ error_report_err(local_err);
+ }
+}
+
/**
* save_zero_page_to_file: send the zero page to the file
*
@@ -2388,7 +2477,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
size_t pagesize_bits =
qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
- if (!qemu_ram_is_migratable(pss->block)) {
+ if (ramblock_is_ignored(pss->block)) {
error_report("block %s should not be migrated !", pss->block->idstr);
return 0;
}
@@ -2486,19 +2575,30 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero)
}
}
-uint64_t ram_bytes_total(void)
+static uint64_t ram_bytes_total_common(bool count_ignored)
{
RAMBlock *block;
uint64_t total = 0;
rcu_read_lock();
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
- total += block->used_length;
+ if (count_ignored) {
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ total += block->used_length;
+ }
+ } else {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+ total += block->used_length;
+ }
}
rcu_read_unlock();
return total;
}
+uint64_t ram_bytes_total(void)
+{
+ return ram_bytes_total_common(false);
+}
+
static void xbzrle_load_setup(void)
{
XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
@@ -2547,7 +2647,7 @@ static void ram_save_cleanup(void *opaque)
*/
memory_global_dirty_log_stop();
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
g_free(block->bmap);
block->bmap = NULL;
g_free(block->unsentmap);
@@ -2566,6 +2666,7 @@ static void ram_state_reset(RAMState *rs)
rs->last_page = 0;
rs->last_version = ram_list.version;
rs->ram_bulk_stage = true;
+ rs->fpo_enabled = false;
}
#define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -2610,7 +2711,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
{
struct RAMBlock *block;
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
unsigned long *bitmap = block->bmap;
unsigned long range = block->used_length >> TARGET_PAGE_BITS;
unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
@@ -2688,7 +2789,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
struct RAMBlock *block;
int ret;
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
PostcopyDiscardState *pds =
postcopy_discard_send_init(ms, block->idstr);
@@ -2896,7 +2997,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
rs->last_sent_block = NULL;
rs->last_page = 0;
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
unsigned long *bitmap = block->bmap;
unsigned long *unsentmap = block->unsentmap;
@@ -3062,7 +3163,7 @@ static void ram_list_init_bitmaps(void)
/* Skip setting bitmap if there is no RAM */
if (ram_bytes_total()) {
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
pages = block->max_length >> TARGET_PAGE_BITS;
block->bmap = bitmap_new(pages);
bitmap_set(block->bmap, 0, pages);
@@ -3083,7 +3184,7 @@ static void ram_init_bitmaps(RAMState *rs)
ram_list_init_bitmaps();
memory_global_dirty_log_start();
- migration_bitmap_sync(rs);
+ migration_bitmap_sync_precopy(rs);
rcu_read_unlock();
qemu_mutex_unlock_ramlist();
@@ -3117,7 +3218,7 @@ static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
* about dirty page logging as well.
*/
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
pages += bitmap_count_one(block->bmap,
block->used_length >> TARGET_PAGE_BITS);
}
@@ -3142,6 +3243,53 @@ static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
}
/*
+ * This function clears bits of the free pages reported by the caller from the
+ * migration dirty bitmap. @addr is the host address corresponding to the
+ * start of the continuous guest free pages, and @len is the total bytes of
+ * those pages.
+ */
+void qemu_guest_free_page_hint(void *addr, size_t len)
+{
+ RAMBlock *block;
+ ram_addr_t offset;
+ size_t used_len, start, npages;
+ MigrationState *s = migrate_get_current();
+
+ /* This function is currently expected to be used during live migration */
+ if (!migration_is_setup_or_active(s->state)) {
+ return;
+ }
+
+ for (; len > 0; len -= used_len, addr += used_len) {
+ block = qemu_ram_block_from_host(addr, false, &offset);
+ if (unlikely(!block || offset >= block->used_length)) {
+ /*
+ * The implementation might not support RAMBlock resize during
+ * live migration, but it could happen in theory with future
+ * updates. So we add a check here to capture that case.
+ */
+ error_report_once("%s unexpected error", __func__);
+ return;
+ }
+
+ if (len <= block->used_length - offset) {
+ used_len = len;
+ } else {
+ used_len = block->used_length - offset;
+ }
+
+ start = offset >> TARGET_PAGE_BITS;
+ npages = used_len >> TARGET_PAGE_BITS;
+
+ qemu_mutex_lock(&ram_state->bitmap_mutex);
+ ram_state->migration_dirty_pages -=
+ bitmap_count_one_with_offset(block->bmap, start, npages);
+ bitmap_clear(block->bmap, start, npages);
+ qemu_mutex_unlock(&ram_state->bitmap_mutex);
+ }
+}
+
+/*
* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
* long-running RCU critical section. When rcu-reclaims in the code
* start to become numerous it will be necessary to reduce the
@@ -3176,7 +3324,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
rcu_read_lock();
- qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
+ qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
RAMBLOCK_FOREACH_MIGRATABLE(block) {
qemu_put_byte(f, strlen(block->idstr));
@@ -3185,6 +3333,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
qemu_put_be64(f, block->page_size);
}
+ if (migrate_ignore_shared()) {
+ qemu_put_be64(f, block->mr->addr);
+ qemu_put_byte(f, ramblock_is_ignored(block) ? 1 : 0);
+ }
}
rcu_read_unlock();
@@ -3312,7 +3464,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
rcu_read_lock();
if (!migration_in_postcopy()) {
- migration_bitmap_sync(rs);
+ migration_bitmap_sync_precopy(rs);
}
ram_control_before_iterate(f, RAM_CONTROL_FINISH);
@@ -3361,7 +3513,7 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
remaining_size < max_size) {
qemu_mutex_lock_iothread();
rcu_read_lock();
- migration_bitmap_sync(rs);
+ migration_bitmap_sync_precopy(rs);
rcu_read_unlock();
qemu_mutex_unlock_iothread();
remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
@@ -3443,7 +3595,7 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
return NULL;
}
- if (!qemu_ram_is_migratable(block)) {
+ if (ramblock_is_ignored(block)) {
error_report("block %s should not be migrated !", id);
return NULL;
}
@@ -3698,7 +3850,7 @@ int colo_init_ram_cache(void)
RAMBlock *block;
rcu_read_lock();
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
block->colo_cache = qemu_anon_ram_alloc(block->used_length,
NULL,
false);
@@ -3719,7 +3871,7 @@ int colo_init_ram_cache(void)
if (ram_bytes_total()) {
RAMBlock *block;
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
block->bmap = bitmap_new(pages);
@@ -3734,7 +3886,7 @@ int colo_init_ram_cache(void)
out_locked:
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
if (block->colo_cache) {
qemu_anon_ram_free(block->colo_cache, block->used_length);
block->colo_cache = NULL;
@@ -3751,14 +3903,14 @@ void colo_release_ram_cache(void)
RAMBlock *block;
memory_global_dirty_log_stop();
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
g_free(block->bmap);
block->bmap = NULL;
}
rcu_read_lock();
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
if (block->colo_cache) {
qemu_anon_ram_free(block->colo_cache, block->used_length);
block->colo_cache = NULL;
@@ -3794,7 +3946,7 @@ static int ram_load_cleanup(void *opaque)
{
RAMBlock *rb;
- RAMBLOCK_FOREACH_MIGRATABLE(rb) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
if (ramblock_is_pmem(rb)) {
pmem_persist(rb->host, rb->used_length);
}
@@ -3803,7 +3955,7 @@ static int ram_load_cleanup(void *opaque)
xbzrle_load_cleanup();
compress_threads_load_cleanup();
- RAMBLOCK_FOREACH_MIGRATABLE(rb) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
g_free(rb->receivedmap);
rb->receivedmap = NULL;
}
@@ -4003,7 +4155,7 @@ static void colo_flush_ram_cache(void)
memory_global_dirty_log_sync();
rcu_read_lock();
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
migration_bitmap_sync_range(ram_state, block, 0, block->used_length);
}
rcu_read_unlock();
@@ -4146,6 +4298,23 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ret = -EINVAL;
}
}
+ if (migrate_ignore_shared()) {
+ hwaddr addr = qemu_get_be64(f);
+ bool ignored = qemu_get_byte(f);
+ if (ignored != ramblock_is_ignored(block)) {
+ error_report("RAM block %s should %s be migrated",
+ id, ignored ? "" : "not");
+ ret = -EINVAL;
+ }
+ if (ramblock_is_ignored(block) &&
+ block->mr->addr != addr) {
+ error_report("Mismatched GPAs for block %s "
+ "%" PRId64 "!= %" PRId64,
+ id, (uint64_t)addr,
+ (uint64_t)block->mr->addr);
+ ret = -EINVAL;
+ }
+ }
ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
block->idstr);
} else {
@@ -4216,7 +4385,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
static bool ram_has_postcopy(void *opaque)
{
RAMBlock *rb;
- RAMBLOCK_FOREACH_MIGRATABLE(rb) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
if (ramblock_is_pmem(rb)) {
info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
"is not supported now!", rb->idstr, rb->host);
@@ -4236,7 +4405,7 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
trace_ram_dirty_bitmap_sync_start();
- RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ RAMBLOCK_FOREACH_NOT_IGNORED(block) {
qemu_savevm_send_recv_bitmap(file, block->idstr);
trace_ram_dirty_bitmap_request(block->idstr);
ramblock_count++;
diff --git a/migration/rdma.c b/migration/rdma.c
index 54a3c11..63c118a 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -624,9 +624,12 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name,
* in advanced before the migration starts. This tells us where the RAM blocks
* are so that we can register them individually.
*/
-static int qemu_rdma_init_one_block(const char *block_name, void *host_addr,
- ram_addr_t block_offset, ram_addr_t length, void *opaque)
+static int qemu_rdma_init_one_block(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t block_offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
return rdma_add_block(opaque, block_name, host_addr, block_offset, length);
}
@@ -641,7 +644,7 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
assert(rdma->blockmap == NULL);
memset(local, 0, sizeof *local);
- qemu_ram_foreach_migratable_block(qemu_rdma_init_one_block, rdma);
+ foreach_not_ignored_block(qemu_rdma_init_one_block, rdma);
trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
rdma->dest_blocks = g_new0(RDMADestBlock,
rdma->local_ram_blocks.nb_blocks);
@@ -2321,7 +2324,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
rdma->connected = false;
}
- qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL);
+ if (rdma->channel) {
+ qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL);
+ }
g_free(rdma->dest_blocks);
rdma->dest_blocks = NULL;
@@ -3611,13 +3616,16 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
}
chunk_start = ram_chunk_start(block, chunk);
chunk_end = ram_chunk_end(block, chunk + reg->chunks);
+ /* avoid "-Waddress-of-packed-member" warning */
+ uint32_t tmp_rkey = 0;
if (qemu_rdma_register_and_get_keys(rdma, block,
- (uintptr_t)host_addr, NULL, &reg_result->rkey,
+ (uintptr_t)host_addr, NULL, &tmp_rkey,
chunk, chunk_start, chunk_end)) {
error_report("cannot get rkey");
ret = -EINVAL;
goto out;
}
+ reg_result->rkey = tmp_rkey;
reg_result->host_addr = (uintptr_t)block->local_host_addr;
diff --git a/migration/savevm.c b/migration/savevm.c
index b3868f7..1415001 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -57,6 +57,7 @@
#include "sysemu/replay.h"
#include "qjson.h"
#include "migration/colo.h"
+#include "qemu/bitmap.h"
#include "net/announce.h"
const unsigned int postcopy_ram_discard_version = 0;
@@ -249,6 +250,8 @@ typedef struct SaveState {
uint32_t len;
const char *name;
uint32_t target_page_bits;
+ uint32_t caps_count;
+ MigrationCapability *capabilities;
} SaveState;
static SaveState savevm_state = {
@@ -256,15 +259,51 @@ static SaveState savevm_state = {
.global_section_id = 0,
};
+static bool should_validate_capability(int capability)
+{
+ assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
+ /* Validate only new capabilities to keep compatibility. */
+ switch (capability) {
+ case MIGRATION_CAPABILITY_X_IGNORE_SHARED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint32_t get_validatable_capabilities_count(void)
+{
+ MigrationState *s = migrate_get_current();
+ uint32_t result = 0;
+ int i;
+ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
+ if (should_validate_capability(i) && s->enabled_capabilities[i]) {
+ result++;
+ }
+ }
+ return result;
+}
+
static int configuration_pre_save(void *opaque)
{
SaveState *state = opaque;
const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
+ MigrationState *s = migrate_get_current();
+ int i, j;
state->len = strlen(current_name);
state->name = current_name;
state->target_page_bits = qemu_target_page_bits();
+ state->caps_count = get_validatable_capabilities_count();
+ state->capabilities = g_renew(MigrationCapability, state->capabilities,
+ state->caps_count);
+ for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
+ if (should_validate_capability(i) && s->enabled_capabilities[i]) {
+ state->capabilities[j++] = i;
+ }
+ }
+
return 0;
}
@@ -280,6 +319,40 @@ static int configuration_pre_load(void *opaque)
return 0;
}
+static bool configuration_validate_capabilities(SaveState *state)
+{
+ bool ret = true;
+ MigrationState *s = migrate_get_current();
+ unsigned long *source_caps_bm;
+ int i;
+
+ source_caps_bm = bitmap_new(MIGRATION_CAPABILITY__MAX);
+ for (i = 0; i < state->caps_count; i++) {
+ MigrationCapability capability = state->capabilities[i];
+ set_bit(capability, source_caps_bm);
+ }
+
+ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
+ bool source_state, target_state;
+ if (!should_validate_capability(i)) {
+ continue;
+ }
+ source_state = test_bit(i, source_caps_bm);
+ target_state = s->enabled_capabilities[i];
+ if (source_state != target_state) {
+ error_report("Capability %s is %s, but received capability is %s",
+ MigrationCapability_str(i),
+ target_state ? "on" : "off",
+ source_state ? "on" : "off");
+ ret = false;
+ /* Don't break here to report all failed capabilities */
+ }
+ }
+
+ g_free(source_caps_bm);
+ return ret;
+}
+
static int configuration_post_load(void *opaque, int version_id)
{
SaveState *state = opaque;
@@ -297,9 +370,53 @@ static int configuration_post_load(void *opaque, int version_id)
return -EINVAL;
}
+ if (!configuration_validate_capabilities(state)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int get_capability(QEMUFile *f, void *pv, size_t size,
+ const VMStateField *field)
+{
+ MigrationCapability *capability = pv;
+ char capability_str[UINT8_MAX + 1];
+ uint8_t len;
+ int i;
+
+ len = qemu_get_byte(f);
+ qemu_get_buffer(f, (uint8_t *)capability_str, len);
+ capability_str[len] = '\0';
+ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
+ if (!strcmp(MigrationCapability_str(i), capability_str)) {
+ *capability = i;
+ return 0;
+ }
+ }
+ error_report("Received unknown capability %s", capability_str);
+ return -EINVAL;
+}
+
+static int put_capability(QEMUFile *f, void *pv, size_t size,
+ const VMStateField *field, QJSON *vmdesc)
+{
+ MigrationCapability *capability = pv;
+ const char *capability_str = MigrationCapability_str(*capability);
+ size_t len = strlen(capability_str);
+ assert(len <= UINT8_MAX);
+
+ qemu_put_byte(f, len);
+ qemu_put_buffer(f, (uint8_t *)capability_str, len);
return 0;
}
+static const VMStateInfo vmstate_info_capability = {
+ .name = "capability",
+ .get = get_capability,
+ .put = put_capability,
+};
+
/* The target-page-bits subsection is present only if the
* target page size is not the same as the default (ie the
* minimum page size for a variable-page-size guest CPU).
@@ -324,6 +441,25 @@ static const VMStateDescription vmstate_target_page_bits = {
}
};
+static bool vmstate_capabilites_needed(void *opaque)
+{
+ return get_validatable_capabilities_count() > 0;
+}
+
+static const VMStateDescription vmstate_capabilites = {
+ .name = "configuration/capabilities",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = vmstate_capabilites_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_V(caps_count, SaveState, 1),
+ VMSTATE_VARRAY_UINT32_ALLOC(capabilities, SaveState, caps_count, 1,
+ vmstate_info_capability,
+ MigrationCapability),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_configuration = {
.name = "configuration",
.version_id = 1,
@@ -337,6 +473,7 @@ static const VMStateDescription vmstate_configuration = {
},
.subsections = (const VMStateDescription*[]) {
&vmstate_target_page_bits,
+ &vmstate_capabilites,
NULL
}
};
@@ -951,6 +1088,7 @@ void qemu_savevm_state_header(QEMUFile *f)
void qemu_savevm_state_setup(QEMUFile *f)
{
SaveStateEntry *se;
+ Error *local_err = NULL;
int ret;
trace_savevm_state_setup();
@@ -972,6 +1110,10 @@ void qemu_savevm_state_setup(QEMUFile *f)
break;
}
}
+
+ if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) {
+ error_report_err(local_err);
+ }
}
int qemu_savevm_state_resume_prepare(MigrationState *s)
@@ -1114,6 +1256,11 @@ int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
SaveStateEntry *se;
int ret;
bool in_postcopy = migration_in_postcopy();
+ Error *local_err = NULL;
+
+ if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
+ error_report_err(local_err);
+ }
trace_savevm_state_complete_precopy();
@@ -1246,6 +1393,11 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
void qemu_savevm_state_cleanup(void)
{
SaveStateEntry *se;
+ Error *local_err = NULL;
+
+ if (precopy_notify(PRECOPY_NOTIFY_CLEANUP, &local_err)) {
+ error_report_err(local_err);
+ }
trace_savevm_state_cleanup();
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
diff --git a/migration/socket.c b/migration/socket.c
index f4c8174..239527f 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -15,6 +15,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "qemu-common.h"
#include "qemu/error-report.h"
@@ -177,6 +178,7 @@ static void socket_start_incoming_migration(SocketAddress *saddr,
Error **errp)
{
QIONetListener *listener = qio_net_listener_new();
+ size_t i;
qio_net_listener_set_name(listener, "migration-socket-listener");
@@ -189,6 +191,15 @@ static void socket_start_incoming_migration(SocketAddress *saddr,
socket_accept_incoming_migration,
NULL, NULL,
g_main_context_get_thread_default());
+
+ for (i = 0; i < listener->nsioc; i++) {
+ SocketAddress *address =
+ qio_channel_socket_get_local_address(listener->sioc[i], errp);
+ if (!address) {
+ return;
+ }
+ migrate_add_address(address);
+ }
}
void tcp_start_incoming_migration(const char *host_port, Error **errp)