aboutsummaryrefslogtreecommitdiff
path: root/migration/migration.c
diff options
context:
space:
mode:
Diffstat (limited to 'migration/migration.c')
-rw-r--r--migration/migration.c942
1 files changed, 576 insertions, 366 deletions
diff --git a/migration/migration.c b/migration/migration.c
index 3dea06d..4098870 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -14,6 +14,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/ctype.h"
#include "qemu/cutils.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
@@ -22,11 +23,12 @@
#include "fd.h"
#include "file.h"
#include "socket.h"
-#include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/cpu-throttle.h"
+#include "system/runstate.h"
+#include "system/system.h"
+#include "system/cpu-throttle.h"
#include "rdma.h"
#include "ram.h"
+#include "migration/cpr.h"
#include "migration/global_state.h"
#include "migration/misc.h"
#include "migration.h"
@@ -43,7 +45,7 @@
#include "qapi/qapi-commands-migration.h"
#include "qapi/qapi-events-migration.h"
#include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qnull.h"
+#include "qobject/qnull.h"
#include "qemu/rcu.h"
#include "postcopy-ram.h"
#include "qemu/thread.h"
@@ -59,13 +61,13 @@
#include "multifd.h"
#include "threadinfo.h"
#include "qemu/yank.h"
-#include "sysemu/cpus.h"
+#include "system/cpus.h"
#include "yank_functions.h"
-#include "sysemu/qtest.h"
+#include "system/qtest.h"
#include "options.h"
-#include "sysemu/dirtylimit.h"
+#include "system/dirtylimit.h"
#include "qemu/sockets.h"
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
#define NOTIFIER_ELEM_INIT(array, elem) \
[elem] = NOTIFIER_WITH_RETURN_LIST_INITIALIZER((array)[elem])
@@ -75,6 +77,7 @@
static NotifierWithReturnList migration_state_notifiers[] = {
NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL),
NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT),
+ NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_TRANSFER),
};
/* Messages sent on the return path from destination to source */
@@ -92,6 +95,9 @@ enum mig_rp_message_type {
MIG_RP_MSG_MAX
};
+/* Migration channel types */
+enum { CH_MAIN, CH_MULTIFD, CH_POSTCOPY };
+
/* When we add fault tolerance, we could have several
migrations at once. For now we don't need to add
dynamic creation of migration */
@@ -102,12 +108,10 @@ static MigrationIncomingState *current_incoming;
static GSList *migration_blockers[MIG_MODE__MAX];
static bool migration_object_check(MigrationState *ms, Error **errp);
-static int migration_maybe_pause(MigrationState *s,
- int *current_active_state,
- int new_state);
-static void migrate_fd_cancel(MigrationState *s);
+static bool migration_switchover_start(MigrationState *s, Error **errp);
static bool close_return_path_on_source(MigrationState *s);
static void migration_completion_end(MigrationState *s);
+static void migrate_hup_delete(MigrationState *s);
static void migration_downtime_start(MigrationState *s)
{
@@ -115,6 +119,27 @@ static void migration_downtime_start(MigrationState *s)
s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
}
+/*
+ * This is unfortunate: incoming migration actually needs the outgoing
+ * migration state (MigrationState) to be there too, e.g. to query
+ * capabilities, parameters, using locks, setup errors, etc.
+ *
+ * NOTE: when calling this, making sure current_migration exists and not
+ * been freed yet! Otherwise trying to access the refcount is already
+ * an use-after-free itself..
+ *
+ * TODO: Move shared part of incoming / outgoing out into separate object.
+ * Then this is not needed.
+ */
+static void migrate_incoming_ref_outgoing_state(void)
+{
+ object_ref(migrate_get_current());
+}
+static void migrate_incoming_unref_outgoing_state(void)
+{
+ object_unref(migrate_get_current());
+}
+
static void migration_downtime_end(MigrationState *s)
{
int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
@@ -125,9 +150,19 @@ static void migration_downtime_end(MigrationState *s)
*/
if (!s->downtime) {
s->downtime = now - s->downtime_start;
+ trace_vmstate_downtime_checkpoint("src-downtime-end");
+ }
+}
+
+static void precopy_notify_complete(void)
+{
+ Error *local_err = NULL;
+
+ if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) {
+ error_report_err(local_err);
}
- trace_vmstate_downtime_checkpoint("src-downtime-end");
+ trace_migration_precopy_complete();
}
static bool migration_needs_multiple_sockets(void)
@@ -135,6 +170,21 @@ static bool migration_needs_multiple_sockets(void)
return migrate_multifd() || migrate_postcopy_preempt();
}
+static RunState migration_get_target_runstate(void)
+{
+ /*
+ * When the global state is not migrated, it means we don't know the
+ * runstate of the src QEMU. We don't have much choice but assuming
+ * the VM is running. NOTE: this is pretty rare case, so far only Xen
+ * uses it.
+ */
+ if (!global_state_received()) {
+ return RUN_STATE_RUNNING;
+ }
+
+ return global_state_get_runstate();
+}
+
static bool transport_supports_multi_channels(MigrationAddress *addr)
{
if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
@@ -203,9 +253,33 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
return false;
}
+ if (migrate_mode() == MIG_MODE_CPR_TRANSFER &&
+ addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
+ error_setg(errp, "Migration requires streamable transport (eg unix)");
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+migration_capabilities_and_transport_compatible(MigrationAddress *addr,
+ Error **errp)
+{
+ if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
+ return migrate_rdma_caps_check(migrate_get_current()->capabilities,
+ errp);
+ }
+
return true;
}
+static bool migration_transport_compatible(MigrationAddress *addr, Error **errp)
+{
+ return migration_channels_and_transport_compatible(addr, errp) &&
+ migration_capabilities_and_transport_compatible(addr, errp);
+}
+
static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
{
uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
@@ -263,6 +337,9 @@ void migration_object_init(void)
ram_mig_init();
dirty_bitmap_mig_init();
+
+ /* Initialize cpu throttle timers */
+ cpu_throttle_init();
}
typedef struct {
@@ -306,17 +383,6 @@ void migration_bh_schedule(QEMUBHFunc *cb, void *opaque)
qemu_bh_schedule(bh);
}
-void migration_cancel(const Error *error)
-{
- if (error) {
- migrate_set_error(current_migration, error);
- }
- if (migrate_dirty_limit()) {
- qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
- }
- migrate_fd_cancel(current_migration);
-}
-
void migration_shutdown(void)
{
/*
@@ -329,7 +395,7 @@ void migration_shutdown(void)
* Cancel the current migration - that will (eventually)
* stop the migration using this structure
*/
- migration_cancel(NULL);
+ migration_cancel();
object_unref(OBJECT(current_migration));
/*
@@ -379,6 +445,24 @@ void migration_incoming_state_destroy(void)
multifd_recv_cleanup();
+ /*
+ * RAM state cleanup needs to happen after multifd cleanup, because
+ * multifd threads can use some of its states (receivedmap).
+ * The VFIO load_cleanup() implementation is BQL-sensitive. It requires
+ * BQL must NOT be taken when recycling load threads, so that it won't
+ * block the load threads from making progress on address space
+ * modification operations.
+ *
+ * To make it work, we could try to not take BQL for all load_cleanup(),
+ * or conditionally unlock BQL only if bql_locked() in VFIO.
+ *
+ * Since most existing call sites take BQL for load_cleanup(), make
+ * it simple by taking BQL always as the rule, so that VFIO can unlock
+ * BQL and retake unconditionally.
+ */
+ assert(bql_locked());
+ qemu_loadvm_state_cleanup(mis);
+
if (mis->to_src_file) {
/* Tell source that we are done */
migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
@@ -410,6 +494,7 @@ void migration_incoming_state_destroy(void)
mis->postcopy_qemufile_dst = NULL;
}
+ cpr_set_incoming_mode(MIG_MODE_NONE);
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
@@ -563,6 +648,16 @@ void migrate_add_address(SocketAddress *address)
QAPI_CLONE(SocketAddress, address));
}
+bool migrate_is_uri(const char *uri)
+{
+ while (*uri && *uri != ':') {
+ if (!qemu_isalpha(*uri++)) {
+ return false;
+ }
+ }
+ return *uri == ':';
+}
+
bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
Error **errp)
{
@@ -660,7 +755,8 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
if (channels) {
/* To verify that Migrate channel list has only item */
if (channels->next) {
- error_setg(errp, "Channel list has more than one entries");
+ error_setg(errp, "Channel list must have only one entry, "
+ "for type 'main'");
return;
}
addr = channels->value->addr;
@@ -675,7 +771,7 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
}
/* transport mechanism not suitable for migration? */
- if (!migration_channels_and_transport_compatible(addr, errp)) {
+ if (!migration_transport_compatible(addr, errp)) {
return;
}
@@ -694,14 +790,6 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
}
#ifdef CONFIG_RDMA
} else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
- if (migrate_xbzrle()) {
- error_setg(errp, "RDMA and XBZRLE can't be used together");
- return;
- }
- if (migrate_multifd()) {
- error_setg(errp, "RDMA and multifd can't be used together");
- return;
- }
rdma_start_incoming_migration(&addr->u.rdma, errp);
#endif
} else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
@@ -711,34 +799,17 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
} else {
error_setg(errp, "unknown migration protocol: %s", uri);
}
+
+ /* Close cpr socket to tell source that we are listening */
+ cpr_state_close();
}
static void process_incoming_migration_bh(void *opaque)
{
- Error *local_err = NULL;
MigrationIncomingState *mis = opaque;
trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter");
- /* If capability late_block_activate is set:
- * Only fire up the block code now if we're going to restart the
- * VM, else 'cont' will do it.
- * This causes file locking to happen; so we don't want it to happen
- * unless we really are starting the VM.
- */
- if (!migrate_late_block_activate() ||
- (autostart && (!global_state_received() ||
- runstate_is_live(global_state_get_runstate())))) {
- /* Make sure all file formats throw away their mutable metadata.
- * If we get an error here, just don't restart the VM yet. */
- bdrv_activate_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- local_err = NULL;
- autostart = false;
- }
- }
-
/*
* This must happen after all error conditions are dealt with and
* we're sure the VM is going to be running on this host.
@@ -751,10 +822,23 @@ static void process_incoming_migration_bh(void *opaque)
dirty_bitmap_mig_before_vm_start();
- if (!global_state_received() ||
- runstate_is_live(global_state_get_runstate())) {
+ if (runstate_is_live(migration_get_target_runstate())) {
if (autostart) {
- vm_start();
+ /*
+ * Block activation is always delayed until VM starts, either
+ * here (which means we need to start the dest VM right now..),
+ * or until qmp_cont() later.
+ *
+ * We used to have cap 'late-block-activate' but now we do this
+ * unconditionally, as it has no harm but only benefit. E.g.,
+ * it's not part of migration ABI on the time of disk activation.
+ *
+ * Make sure all file formats throw away their mutable
+ * metadata. If error, don't restart the VM yet.
+ */
+ if (migration_block_activate(NULL)) {
+ vm_start();
+ }
} else {
runstate_set(RUN_STATE_PAUSED);
}
@@ -813,7 +897,7 @@ process_incoming_migration_co(void *opaque)
* postcopy thread.
*/
trace_process_incoming_migration_co_postcopy_end_main();
- return;
+ goto out;
}
/* Else if something went wrong then just fall out of the normal exit */
}
@@ -829,7 +913,8 @@ process_incoming_migration_co(void *opaque)
}
migration_bh_schedule(process_incoming_migration_bh, mis);
- return;
+ goto out;
+
fail:
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_FAILED);
@@ -846,6 +931,9 @@ fail:
exit(EXIT_FAILURE);
}
+out:
+ /* Pairs with the refcount taken in qmp_migrate_incoming() */
+ migrate_incoming_unref_outgoing_state();
}
/**
@@ -856,9 +944,8 @@ static void migration_incoming_setup(QEMUFile *f)
{
MigrationIncomingState *mis = migration_incoming_get_current();
- if (!mis->from_src_file) {
- mis->from_src_file = f;
- }
+ assert(!mis->from_src_file);
+ mis->from_src_file = f;
qemu_file_set_blocking(f, false);
}
@@ -910,28 +997,19 @@ void migration_fd_process_incoming(QEMUFile *f)
migration_incoming_process();
}
-/*
- * Returns true when we want to start a new incoming migration process,
- * false otherwise.
- */
-static bool migration_should_start_incoming(bool main_channel)
+static bool migration_has_main_and_multifd_channels(void)
{
- /* Multifd doesn't start unless all channels are established */
- if (migrate_multifd()) {
- return migration_has_all_channels();
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ if (!mis->from_src_file) {
+ /* main channel not established */
+ return false;
}
- /* Preempt channel only starts when the main channel is created */
- if (migrate_postcopy_preempt()) {
- return main_channel;
+ if (migrate_multifd() && !multifd_recv_all_channels_created()) {
+ return false;
}
- /*
- * For all the rest types of migration, we should only reach here when
- * it's the main channel that's being created, and we should always
- * proceed with this channel.
- */
- assert(main_channel);
+ /* main and all multifd channels are established */
return true;
}
@@ -940,59 +1018,81 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
QEMUFile *f;
- bool default_channel = true;
+ uint8_t channel;
uint32_t channel_magic = 0;
int ret = 0;
- if (migrate_multifd() && !migrate_mapped_ram() &&
- !migrate_postcopy_ram() &&
- qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
- /*
- * With multiple channels, it is possible that we receive channels
- * out of order on destination side, causing incorrect mapping of
- * source channels on destination side. Check channel MAGIC to
- * decide type of channel. Please note this is best effort, postcopy
- * preempt channel does not send any magic number so avoid it for
- * postcopy live migration. Also tls live migration already does
- * tls handshake while initializing main channel so with tls this
- * issue is not possible.
- */
- ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
- sizeof(channel_magic), errp);
+ if (!migration_has_main_and_multifd_channels()) {
+ if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+ /*
+ * With multiple channels, it is possible that we receive channels
+ * out of order on destination side, causing incorrect mapping of
+ * source channels on destination side. Check channel MAGIC to
+ * decide type of channel. Please note this is best effort,
+ * postcopy preempt channel does not send any magic number so
+ * avoid it for postcopy live migration. Also tls live migration
+ * already does tls handshake while initializing main channel so
+ * with tls this issue is not possible.
+ */
+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
+ sizeof(channel_magic), errp);
+ if (ret != 0) {
+ return;
+ }
- if (ret != 0) {
+ channel_magic = be32_to_cpu(channel_magic);
+ if (channel_magic == QEMU_VM_FILE_MAGIC) {
+ channel = CH_MAIN;
+ } else if (channel_magic == MULTIFD_MAGIC) {
+ assert(migrate_multifd());
+ channel = CH_MULTIFD;
+ } else if (!mis->from_src_file &&
+ mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+ /* reconnect main channel for postcopy recovery */
+ channel = CH_MAIN;
+ } else {
+ error_setg(errp, "unknown channel magic: %u", channel_magic);
+ return;
+ }
+ } else if (mis->from_src_file && migrate_multifd()) {
+ /*
+ * Non-peekable channels like tls/file are processed as
+ * multifd channels when multifd is enabled.
+ */
+ channel = CH_MULTIFD;
+ } else if (!mis->from_src_file) {
+ channel = CH_MAIN;
+ } else {
+ error_setg(errp, "non-peekable channel used without multifd");
return;
}
-
- default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
} else {
- default_channel = !mis->from_src_file;
+ assert(migrate_postcopy_preempt());
+ channel = CH_POSTCOPY;
}
if (multifd_recv_setup(errp) != 0) {
return;
}
- if (default_channel) {
+ if (channel == CH_MAIN) {
f = qemu_file_new_input(ioc);
migration_incoming_setup(f);
- } else {
+ } else if (channel == CH_MULTIFD) {
/* Multiple connections */
- assert(migration_needs_multiple_sockets());
- if (migrate_multifd()) {
- multifd_recv_new_channel(ioc, &local_err);
- } else {
- assert(migrate_postcopy_preempt());
- f = qemu_file_new_input(ioc);
- postcopy_preempt_new_channel(mis, f);
- }
+ multifd_recv_new_channel(ioc, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
+ } else if (channel == CH_POSTCOPY) {
+ assert(!mis->postcopy_qemufile_dst);
+ f = qemu_file_new_input(ioc);
+ postcopy_preempt_new_channel(mis, f);
+ return;
}
- if (migration_should_start_incoming(default_channel)) {
+ if (migration_has_main_and_multifd_channels()) {
/* If it's a recovery, we're done */
if (postcopy_try_recover()) {
return;
@@ -1009,18 +1109,13 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
*/
bool migration_has_all_channels(void)
{
- MigrationIncomingState *mis = migration_incoming_get_current();
-
- if (!mis->from_src_file) {
+ if (!migration_has_main_and_multifd_channels()) {
return false;
}
- if (migrate_multifd()) {
- return multifd_recv_all_channels_created();
- }
-
- if (migrate_postcopy_preempt()) {
- return mis->postcopy_qemufile_dst != NULL;
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ if (migrate_postcopy_preempt() && !mis->postcopy_qemufile_dst) {
+ return false;
}
return true;
@@ -1105,14 +1200,14 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
}
-/*
- * Return true if we're already in the middle of a migration
- * (i.e. any of the active or setup states)
- */
-bool migration_is_setup_or_active(void)
+bool migration_is_running(void)
{
MigrationState *s = current_migration;
+ if (!s) {
+ return false;
+ }
+
switch (s->state) {
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
@@ -1123,36 +1218,20 @@ bool migration_is_setup_or_active(void)
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
case MIGRATION_STATUS_WAIT_UNPLUG:
+ case MIGRATION_STATUS_CANCELLING:
case MIGRATION_STATUS_COLO:
return true;
-
default:
return false;
-
}
}
-bool migration_is_running(void)
+static bool migration_is_active(void)
{
MigrationState *s = current_migration;
- switch (s->state) {
- case MIGRATION_STATUS_ACTIVE:
- case MIGRATION_STATUS_POSTCOPY_ACTIVE:
- case MIGRATION_STATUS_POSTCOPY_PAUSED:
- case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
- case MIGRATION_STATUS_POSTCOPY_RECOVER:
- case MIGRATION_STATUS_SETUP:
- case MIGRATION_STATUS_PRE_SWITCHOVER:
- case MIGRATION_STATUS_DEVICE:
- case MIGRATION_STATUS_WAIT_UNPLUG:
- case MIGRATION_STATUS_CANCELLING:
- return true;
-
- default:
- return false;
-
- }
+ return (s->state == MIGRATION_STATUS_ACTIVE ||
+ s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
}
static bool migrate_show_downtime(MigrationState *s)
@@ -1397,39 +1476,52 @@ void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
}
}
-static void migrate_fd_cleanup(MigrationState *s)
+static void migration_cleanup_json_writer(MigrationState *s)
+{
+ g_clear_pointer(&s->vmdesc, json_writer_free);
+}
+
+static void migration_cleanup(MigrationState *s)
{
MigrationEventType type;
+ QEMUFile *tmp = NULL;
+
+ trace_migration_cleanup();
+
+ migration_cleanup_json_writer(s);
g_free(s->hostname);
s->hostname = NULL;
- json_writer_free(s->vmdesc);
- s->vmdesc = NULL;
qemu_savevm_state_cleanup();
+ cpr_state_close();
+ migrate_hup_delete(s);
close_return_path_on_source(s);
- if (s->to_dst_file) {
- QEMUFile *tmp;
-
- trace_migrate_fd_cleanup();
+ if (s->migration_thread_running) {
bql_unlock();
- if (s->migration_thread_running) {
- qemu_thread_join(&s->thread);
- s->migration_thread_running = false;
- }
+ qemu_thread_join(&s->thread);
+ s->migration_thread_running = false;
bql_lock();
+ }
- multifd_send_shutdown();
- qemu_mutex_lock(&s->qemu_file_lock);
+ WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
+ /*
+ * Close the file handle without the lock to make sure the critical
+ * section won't block for long.
+ */
tmp = s->to_dst_file;
s->to_dst_file = NULL;
- qemu_mutex_unlock(&s->qemu_file_lock);
+ }
+
+ if (tmp) {
/*
- * Close the file handle without the lock to make sure the
- * critical section won't block for long.
+ * We only need to shutdown multifd if tmp!=NULL, because if
+ * tmp==NULL, it means the main channel isn't established, while
+ * multifd is only setup after that (in migration_thread()).
*/
+ multifd_send_shutdown();
migration_ioc_unregister_yank_from_file(tmp);
qemu_fclose(tmp);
}
@@ -1451,9 +1543,9 @@ static void migrate_fd_cleanup(MigrationState *s)
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
-static void migrate_fd_cleanup_bh(void *opaque)
+static void migration_cleanup_bh(void *opaque)
{
- migrate_fd_cleanup(opaque);
+ migration_cleanup(opaque);
}
void migrate_set_error(MigrationState *s, const Error *error)
@@ -1483,7 +1575,7 @@ static void migrate_error_free(MigrationState *s)
}
}
-static void migrate_fd_error(MigrationState *s, const Error *error)
+static void migration_connect_set_error(MigrationState *s, const Error *error)
{
MigrationStatus current = s->state;
MigrationStatus next;
@@ -1512,11 +1604,17 @@ static void migrate_fd_error(MigrationState *s, const Error *error)
migrate_set_error(s, error);
}
-static void migrate_fd_cancel(MigrationState *s)
+void migration_cancel(void)
{
+ MigrationState *s = migrate_get_current();
int old_state ;
+ bool setup = (s->state == MIGRATION_STATUS_SETUP);
+
+ trace_migration_cancel();
- trace_migrate_fd_cancel();
+ if (migrate_dirty_limit()) {
+ qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
+ }
WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
if (s->rp_state.from_dst_file) {
@@ -1532,7 +1630,7 @@ static void migrate_fd_cancel(MigrationState *s)
}
/* If the migration is paused, kick it out of the pause */
if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
- qemu_sem_post(&s->pause_sem);
+ qemu_event_set(&s->pause_event);
}
migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
} while (s->state != MIGRATION_STATUS_CANCELLING);
@@ -1549,15 +1647,16 @@ static void migrate_fd_cancel(MigrationState *s)
}
}
}
- if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
- Error *local_err = NULL;
- bdrv_activate_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- } else {
- s->block_inactive = false;
- }
+ /*
+ * If qmp_migrate_finish has not been called, then there is no path that
+ * will complete the cancellation. Do it now.
+ */
+ if (setup && !s->to_dst_file) {
+ migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
+ MIGRATION_STATUS_CANCELLED);
+ cpr_state_close();
+ migrate_hup_delete(s);
}
}
@@ -1644,42 +1743,7 @@ bool migration_incoming_postcopy_advised(void)
bool migration_in_bg_snapshot(void)
{
- return migrate_background_snapshot() &&
- migration_is_setup_or_active();
-}
-
-bool migration_is_idle(void)
-{
- MigrationState *s = current_migration;
-
- if (!s) {
- return true;
- }
-
- switch (s->state) {
- case MIGRATION_STATUS_NONE:
- case MIGRATION_STATUS_CANCELLED:
- case MIGRATION_STATUS_COMPLETED:
- case MIGRATION_STATUS_FAILED:
- return true;
- default:
- return false;
- }
-}
-
-bool migration_is_active(void)
-{
- MigrationState *s = current_migration;
-
- return (s->state == MIGRATION_STATUS_ACTIVE ||
- s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
-}
-
-bool migration_is_device(void)
-{
- MigrationState *s = current_migration;
-
- return s->state == MIGRATION_STATUS_DEVICE;
+ return migrate_background_snapshot() && migration_is_running();
}
bool migration_thread_is_self(void)
@@ -1691,7 +1755,9 @@ bool migration_thread_is_self(void)
bool migrate_mode_is_cpr(MigrationState *s)
{
- return s->parameters.mode == MIG_MODE_CPR_REBOOT;
+ MigMode mode = s->parameters.mode;
+ return mode == MIG_MODE_CPR_REBOOT ||
+ mode == MIG_MODE_CPR_TRANSFER;
}
int migrate_init(MigrationState *s, Error **errp)
@@ -1720,7 +1786,10 @@ int migrate_init(MigrationState *s, Error **errp)
s->migration_thread_running = false;
error_free(s->error);
s->error = NULL;
- s->vmdesc = NULL;
+
+ if (should_send_vmdesc()) {
+ s->vmdesc = json_writer_new(false);
+ }
migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
@@ -1745,7 +1814,7 @@ static bool is_busy(Error **reasonp, Error **errp)
ERRP_GUARD();
/* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
- if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
+ if (runstate_check(RUN_STATE_SAVE_VM) || migration_is_running()) {
error_propagate_prepend(errp, *reasonp,
"disallowing migration blocker "
"(migration/snapshot in progress) for: ");
@@ -1877,6 +1946,17 @@ void qmp_migrate_incoming(const char *uri, bool has_channels,
return;
}
+ /*
+ * Making sure MigrationState is available until incoming migration
+ * completes.
+ *
+ * NOTE: QEMU _might_ leak this refcount in some failure paths, but
+ * that's OK. This is the minimum change we need to at least making
+ * sure success case is clean on the refcount. We can try harder to
+ * make it accurate for any kind of failures, but it might be an
+ * overkill and doesn't bring us much benefit.
+ */
+ migrate_incoming_ref_outgoing_state();
once = false;
}
@@ -2066,6 +2146,40 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
return true;
}
+static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
+ Error **errp);
+
+static void migrate_hup_add(MigrationState *s, QIOChannel *ioc, GSourceFunc cb,
+ void *opaque)
+{
+ s->hup_source = qio_channel_create_watch(ioc, G_IO_HUP);
+ g_source_set_callback(s->hup_source, cb, opaque, NULL);
+ g_source_attach(s->hup_source, NULL);
+}
+
+static void migrate_hup_delete(MigrationState *s)
+{
+ if (s->hup_source) {
+ g_source_destroy(s->hup_source);
+ g_source_unref(s->hup_source);
+ s->hup_source = NULL;
+ }
+}
+
+static gboolean qmp_migrate_finish_cb(QIOChannel *channel,
+ GIOCondition cond,
+ void *opaque)
+{
+ MigrationAddress *addr = opaque;
+
+ qmp_migrate_finish(addr, false, NULL);
+
+ cpr_state_close();
+ migrate_hup_delete(migrate_get_current());
+ qapi_free_MigrationAddress(addr);
+ return G_SOURCE_REMOVE;
+}
+
void qmp_migrate(const char *uri, bool has_channels,
MigrationChannelList *channels, bool has_detach, bool detach,
bool has_resume, bool resume, Error **errp)
@@ -2075,6 +2189,8 @@ void qmp_migrate(const char *uri, bool has_channels,
MigrationState *s = migrate_get_current();
g_autoptr(MigrationChannel) channel = NULL;
MigrationAddress *addr = NULL;
+ MigrationChannel *channelv[MIGRATION_CHANNEL_TYPE__MAX] = { NULL };
+ MigrationChannel *cpr_channel = NULL;
/*
* Having preliminary checks for uri and channel
@@ -2085,12 +2201,22 @@ void qmp_migrate(const char *uri, bool has_channels,
}
if (channels) {
- /* To verify that Migrate channel list has only item */
- if (channels->next) {
- error_setg(errp, "Channel list has more than one entries");
+ for ( ; channels; channels = channels->next) {
+ MigrationChannelType type = channels->value->channel_type;
+
+ if (channelv[type]) {
+ error_setg(errp, "Channel list has more than one %s entry",
+ MigrationChannelType_str(type));
+ return;
+ }
+ channelv[type] = channels->value;
+ }
+ cpr_channel = channelv[MIGRATION_CHANNEL_TYPE_CPR];
+ addr = channelv[MIGRATION_CHANNEL_TYPE_MAIN]->addr;
+ if (!addr) {
+ error_setg(errp, "Channel list has no main entry");
return;
}
- addr = channels->value->addr;
}
if (uri) {
@@ -2102,7 +2228,12 @@ void qmp_migrate(const char *uri, bool has_channels,
}
/* transport mechanism not suitable for migration? */
- if (!migration_channels_and_transport_compatible(addr, errp)) {
+ if (!migration_transport_compatible(addr, errp)) {
+ return;
+ }
+
+ if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) {
+ error_setg(errp, "missing 'cpr' migration channel");
return;
}
@@ -2112,6 +2243,41 @@ void qmp_migrate(const char *uri, bool has_channels,
return;
}
+ if (cpr_state_save(cpr_channel, &local_err)) {
+ goto out;
+ }
+
+ /*
+ * For cpr-transfer, the target may not be listening yet on the migration
+ * channel, because first it must finish cpr_load_state. The target tells
+ * us it is listening by closing the cpr-state socket. Wait for that HUP
+ * event before connecting in qmp_migrate_finish.
+ *
+ * The HUP could occur because the target fails while reading CPR state,
+ * in which case the target will not listen for the incoming migration
+ * connection, so qmp_migrate_finish will fail to connect, and then recover.
+ */
+ if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) {
+ migrate_hup_add(s, cpr_state_ioc(), (GSourceFunc)qmp_migrate_finish_cb,
+ QAPI_CLONE(MigrationAddress, addr));
+
+ } else {
+ qmp_migrate_finish(addr, resume_requested, errp);
+ }
+
+out:
+ if (local_err) {
+ migration_connect_set_error(s, local_err);
+ error_propagate(errp, local_err);
+ }
+}
+
+static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested,
+ Error **errp)
+{
+ MigrationState *s = migrate_get_current();
+ Error *local_err = NULL;
+
if (!resume_requested) {
if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
return;
@@ -2146,7 +2312,7 @@ void qmp_migrate(const char *uri, bool has_channels,
if (!resume_requested) {
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
- migrate_fd_error(s, local_err);
+ migration_connect_set_error(s, local_err);
error_propagate(errp, local_err);
return;
}
@@ -2154,7 +2320,18 @@ void qmp_migrate(const char *uri, bool has_channels,
void qmp_migrate_cancel(Error **errp)
{
- migration_cancel(NULL);
+ /*
+ * After postcopy migration has started, the source machine is not
+ * recoverable in case of a migration error. This also means the
+ * cancel command cannot be used as cancel should allow the
+ * machine to continue operation.
+ */
+ if (migration_in_postcopy()) {
+ error_setg(errp, "Postcopy migration in progress, cannot cancel.");
+ return;
+ }
+
+ migration_cancel();
}
void qmp_migrate_continue(MigrationStatus state, Error **errp)
@@ -2165,7 +2342,7 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp)
MigrationStatus_str(s->state));
return;
}
- qemu_sem_post(&s->pause_sem);
+ qemu_event_set(&s->pause_event);
}
int migration_rp_wait(MigrationState *s)
@@ -2273,7 +2450,7 @@ static bool migrate_handle_rp_resume_ack(MigrationState *s,
*/
static void migration_release_dst_files(MigrationState *ms)
{
- QEMUFile *file;
+ QEMUFile *file = NULL;
WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
/*
@@ -2318,7 +2495,7 @@ static void *source_return_path_thread(void *opaque)
trace_source_return_path_thread_entry();
rcu_register_thread();
- while (migration_is_setup_or_active()) {
+ while (migration_is_running()) {
trace_source_return_path_thread_loop_top();
header_type = qemu_get_be16(rp);
@@ -2473,7 +2650,7 @@ static int open_return_path_on_source(MigrationState *ms)
trace_open_return_path_on_source();
- qemu_thread_create(&ms->rp_state.rp_thread, "mig/src/rp-thr",
+ qemu_thread_create(&ms->rp_state.rp_thread, MIGRATION_THREAD_SRC_RETURN,
source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
ms->rp_state.rp_thread_created = true;
@@ -2528,23 +2705,30 @@ static int postcopy_start(MigrationState *ms, Error **errp)
int ret;
QIOChannelBuffer *bioc;
QEMUFile *fb;
- uint64_t bandwidth = migrate_max_postcopy_bandwidth();
- bool restart_block = false;
- int cur_state = MIGRATION_STATUS_ACTIVE;
+
+ /*
+ * Now we're 100% sure to switch to postcopy, so JSON writer won't be
+ * useful anymore. Free the resources early if it is there. Clearing
+ * the vmdesc also means any follow up vmstate_save()s will start to
+ * skip all JSON operations, which can shrink postcopy downtime.
+ */
+ migration_cleanup_json_writer(ms);
if (migrate_postcopy_preempt()) {
migration_wait_main_channel(ms);
if (postcopy_preempt_establish_channel(ms)) {
- migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
+ if (ms->state != MIGRATION_STATUS_CANCELLING) {
+ migrate_set_state(&ms->state, ms->state,
+ MIGRATION_STATUS_FAILED);
+ }
error_setg(errp, "%s: Failed to establish preempt channel",
__func__);
return -1;
}
}
- if (!migrate_pause_before_switchover()) {
- migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
- MIGRATION_STATUS_POSTCOPY_ACTIVE);
+ if (!qemu_savevm_state_postcopy_prepare(ms->to_dst_file, errp)) {
+ return -1;
}
trace_postcopy_start();
@@ -2557,27 +2741,19 @@ static int postcopy_start(MigrationState *ms, Error **errp)
goto fail;
}
- ret = migration_maybe_pause(ms, &cur_state,
- MIGRATION_STATUS_POSTCOPY_ACTIVE);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "%s: Failed in migration_maybe_pause()",
- __func__);
+ if (!migration_switchover_start(ms, errp)) {
goto fail;
}
- ret = bdrv_inactivate_all();
- if (ret < 0) {
- error_setg_errno(errp, -ret, "%s: Failed in bdrv_inactivate_all()",
- __func__);
- goto fail;
- }
- restart_block = true;
-
/*
* Cause any non-postcopiable, but iterative devices to
* send out their final data.
*/
- qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
+ ret = qemu_savevm_state_complete_precopy_iterable(ms->to_dst_file, true);
+ if (ret) {
+ error_setg(errp, "Postcopy save non-postcopiable iterables failed");
+ goto fail;
+ }
/*
* in Finish migrate and with the io-lock held everything should
@@ -2589,12 +2765,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
ram_postcopy_send_discard_bitmap(ms);
}
- /*
- * send rest of state - note things that are doing postcopy
- * will notice we're in POSTCOPY_ACTIVE and not actually
- * wrap their state up here
- */
- migration_rate_set(bandwidth);
if (migrate_postcopy_ram()) {
/* Ping just for debugging, helps line traces up */
qemu_savevm_send_ping(ms->to_dst_file, 2);
@@ -2622,7 +2792,12 @@ static int postcopy_start(MigrationState *ms, Error **errp)
*/
qemu_savevm_send_postcopy_listen(fb);
- qemu_savevm_state_complete_precopy(fb, false, false);
+ ret = qemu_savevm_state_complete_precopy_non_iterable(fb, true);
+ if (ret) {
+ error_setg(errp, "Postcopy save non-iterable device states failed");
+ goto fail_closefb;
+ }
+
if (migrate_postcopy_ram()) {
qemu_savevm_send_ping(fb, 3);
}
@@ -2641,8 +2816,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
goto fail_closefb;
}
- restart_block = false;
-
/* Now send that blob */
if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
error_setg(errp, "%s: Failed to send packaged data", __func__);
@@ -2658,8 +2831,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
migration_downtime_end(ms);
- bql_unlock();
-
if (migrate_postcopy_ram()) {
/*
* Although this ping is just for debug, it could potentially be
@@ -2675,11 +2846,22 @@ static int postcopy_start(MigrationState *ms, Error **errp)
ret = qemu_file_get_error(ms->to_dst_file);
if (ret) {
error_setg_errno(errp, -ret, "postcopy_start: Migration stream error");
- bql_lock();
goto fail;
}
trace_postcopy_preempt_enabled(migrate_postcopy_preempt());
+ /*
+ * Now postcopy officially started, switch to postcopy bandwidth that
+ * user specified.
+ */
+ migration_rate_set(migrate_max_postcopy_bandwidth());
+
+ /* Now, switchover looks all fine, switching to postcopy-active */
+ migrate_set_state(&ms->state, MIGRATION_STATUS_DEVICE,
+ MIGRATION_STATUS_POSTCOPY_ACTIVE);
+
+ bql_unlock();
+
return ret;
fail_closefb:
@@ -2687,67 +2869,104 @@ fail_closefb:
fail:
migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
MIGRATION_STATUS_FAILED);
- if (restart_block) {
- /* A failure happened early enough that we know the destination hasn't
- * accessed block devices, so we're safe to recover.
- */
- Error *local_err = NULL;
-
- bdrv_activate_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- }
- }
+ migration_block_activate(NULL);
migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL);
bql_unlock();
return -1;
}
/**
- * migration_maybe_pause: Pause if required to by
- * migrate_pause_before_switchover called with the BQL locked
- * Returns: 0 on success
+ * @migration_switchover_prepare: Start VM switchover procedure
+ *
+ * @s: The migration state object pointer
+ *
+ * Prepares for the switchover, depending on "pause-before-switchover"
+ * capability.
+ *
+ * If cap set, state machine goes like:
+ * [postcopy-]active -> pre-switchover -> device
+ *
+ * If cap not set:
+ * [postcopy-]active -> device
+ *
+ * Returns: true on success, false on interruptions.
*/
-static int migration_maybe_pause(MigrationState *s,
- int *current_active_state,
- int new_state)
+static bool migration_switchover_prepare(MigrationState *s)
{
+ /* Concurrent cancellation? Quit */
+ if (s->state == MIGRATION_STATUS_CANCELLING) {
+ return false;
+ }
+
+ /*
+ * No matter precopy or postcopy, since we still hold BQL it must not
+ * change concurrently to CANCELLING, so it must be either ACTIVE or
+ * POSTCOPY_ACTIVE.
+ */
+ assert(migration_is_active());
+
+ /* If the pre stage not requested, directly switch to DEVICE */
if (!migrate_pause_before_switchover()) {
- return 0;
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_DEVICE);
+ return true;
}
- /* Since leaving this state is not atomic with posting the semaphore
+ /*
+ * Since leaving this state is not atomic with setting the event
* it's possible that someone could have issued multiple migrate_continue
- * and the semaphore is incorrectly positive at this point;
- * the docs say it's undefined to reinit a semaphore that's already
- * init'd, so use timedwait to eat up any existing posts.
+ * and the event is incorrectly set at this point so reset it.
*/
- while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
- /* This block intentionally left blank */
- }
+ qemu_event_reset(&s->pause_event);
+
+ /* Update [POSTCOPY_]ACTIVE to PRE_SWITCHOVER */
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_PRE_SWITCHOVER);
+ bql_unlock();
+
+ qemu_event_wait(&s->pause_event);
+ bql_lock();
/*
- * If the migration is cancelled when it is in the completion phase,
- * the migration state is set to MIGRATION_STATUS_CANCELLING.
- * So we don't need to wait a semaphore, otherwise we would always
- * wait for the 'pause_sem' semaphore.
+ * After BQL released and retaken, the state can be CANCELLING if it
+ * happend during sem_wait().. Only change the state if it's still
+ * pre-switchover.
*/
- if (s->state != MIGRATION_STATUS_CANCELLING) {
- bql_unlock();
- migrate_set_state(&s->state, *current_active_state,
- MIGRATION_STATUS_PRE_SWITCHOVER);
- qemu_sem_wait(&s->pause_sem);
- migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
- new_state);
- *current_active_state = new_state;
- bql_lock();
+ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
+ MIGRATION_STATUS_DEVICE);
+
+ return s->state == MIGRATION_STATUS_DEVICE;
+}
+
+static bool migration_switchover_start(MigrationState *s, Error **errp)
+{
+ ERRP_GUARD();
+
+ if (!migration_switchover_prepare(s)) {
+ error_setg(errp, "Switchover is interrupted");
+ return false;
}
- return s->state == new_state ? 0 : -EINVAL;
+ /* Inactivate disks except in COLO */
+ if (!migrate_colo()) {
+ /*
+ * Inactivate before sending QEMU_VM_EOF so that the
+ * bdrv_activate_all() on the other end won't fail.
+ */
+ if (!migration_block_inactivate()) {
+ error_setg(errp, "Block inactivate failed during switchover");
+ return false;
+ }
+ }
+
+ migration_rate_set(RATE_LIMIT_DISABLED);
+
+ precopy_notify_complete();
+
+ qemu_savevm_maybe_send_switchover_start(s->to_dst_file);
+
+ return true;
}
-static int migration_completion_precopy(MigrationState *s,
- int *current_active_state)
+static int migration_completion_precopy(MigrationState *s)
{
int ret;
@@ -2760,20 +2979,12 @@ static int migration_completion_precopy(MigrationState *s,
}
}
- ret = migration_maybe_pause(s, current_active_state,
- MIGRATION_STATUS_DEVICE);
- if (ret < 0) {
+ if (!migration_switchover_start(s, NULL)) {
+ ret = -EFAULT;
goto out_unlock;
}
- /*
- * Inactivate disks except in COLO, and track that we have done so in order
- * to remember to reactivate them if migration fails or is cancelled.
- */
- s->block_inactive = !migrate_colo();
- migration_rate_set(RATE_LIMIT_DISABLED);
- ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
- s->block_inactive);
+ ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false);
out_unlock:
bql_unlock();
return ret;
@@ -2798,31 +3009,6 @@ static void migration_completion_postcopy(MigrationState *s)
trace_migration_completion_postcopy_end_after_complete();
}
-static void migration_completion_failed(MigrationState *s,
- int current_active_state)
-{
- if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
- s->state == MIGRATION_STATUS_DEVICE)) {
- /*
- * If not doing postcopy, vm_start() will be called: let's
- * regain control on images.
- */
- Error *local_err = NULL;
-
- bql_lock();
- bdrv_activate_all(&local_err);
- if (local_err) {
- error_report_err(local_err);
- } else {
- s->block_inactive = false;
- }
- bql_unlock();
- }
-
- migrate_set_state(&s->state, current_active_state,
- MIGRATION_STATUS_FAILED);
-}
-
/**
* migration_completion: Used by migration_thread when there's not much left.
* The caller 'breaks' the loop when this returns.
@@ -2832,11 +3018,10 @@ static void migration_completion_failed(MigrationState *s,
static void migration_completion(MigrationState *s)
{
int ret = 0;
- int current_active_state = s->state;
Error *local_err = NULL;
if (s->state == MIGRATION_STATUS_ACTIVE) {
- ret = migration_completion_precopy(s, &current_active_state);
+ ret = migration_completion_precopy(s);
} else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
migration_completion_postcopy(s);
} else {
@@ -2876,7 +3061,9 @@ fail:
error_free(local_err);
}
- migration_completion_failed(s, current_active_state);
+ if (s->state != MIGRATION_STATUS_CANCELLING) {
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+ }
}
/**
@@ -2899,7 +3086,7 @@ static void bg_migration_completion(MigrationState *s)
qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
qemu_fflush(s->to_dst_file);
} else if (s->state == MIGRATION_STATUS_CANCELLING) {
- goto fail;
+ return;
}
if (qemu_file_get_error(s->to_dst_file)) {
@@ -3283,10 +3470,17 @@ static MigIterateState migration_iteration_run(MigrationState *s)
static void migration_iteration_finish(MigrationState *s)
{
- /* If we enabled cpu throttling for auto-converge, turn it off. */
- cpu_throttle_stop();
-
bql_lock();
+
+ /*
+ * If we enabled cpu throttling for auto-converge, turn it off.
+ * Stopping CPU throttle should be serialized by BQL to avoid
+ * racing for the throttle_dirty_sync_timer.
+ */
+ if (migrate_auto_converge()) {
+ cpu_throttle_stop();
+ }
+
switch (s->state) {
case MIGRATION_STATUS_COMPLETED:
runstate_set(RUN_STATE_POSTMIGRATE);
@@ -3299,6 +3493,11 @@ static void migration_iteration_finish(MigrationState *s)
case MIGRATION_STATUS_FAILED:
case MIGRATION_STATUS_CANCELLED:
case MIGRATION_STATUS_CANCELLING:
+ /*
+ * Re-activate the block drives if they're inactivated. Note, COLO
+ * shouldn't use block_active at all, so it should be no-op there.
+ */
+ migration_block_activate(NULL);
if (runstate_is_live(s->vm_old_state)) {
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
vm_start();
@@ -3316,7 +3515,7 @@ static void migration_iteration_finish(MigrationState *s)
break;
}
- migration_bh_schedule(migrate_fd_cleanup_bh, s);
+ migration_bh_schedule(migration_cleanup_bh, s);
bql_unlock();
}
@@ -3344,7 +3543,7 @@ static void bg_migration_iteration_finish(MigrationState *s)
break;
}
- migration_bh_schedule(migrate_fd_cleanup_bh, s);
+ migration_bh_schedule(migration_cleanup_bh, s);
bql_unlock();
}
@@ -3462,11 +3661,11 @@ static void *migration_thread(void *opaque)
Error *local_err = NULL;
int ret;
- thread = migration_threads_add("live_migration", qemu_get_thread_id());
+ thread = migration_threads_add(MIGRATION_THREAD_SRC_MAIN,
+ qemu_get_thread_id());
rcu_register_thread();
- object_ref(OBJECT(s));
update_iteration_initial_status(s);
if (!multifd_send_setup()) {
@@ -3503,6 +3702,11 @@ static void *migration_thread(void *opaque)
qemu_savevm_send_colo_enable(s->to_dst_file);
}
+ if (migrate_auto_converge()) {
+ /* Start RAMBlock dirty bitmap sync timer */
+ cpu_throttle_dirty_sync_timer(true);
+ }
+
bql_lock();
ret = qemu_savevm_state_setup(s->to_dst_file, &local_err);
bql_unlock();
@@ -3599,7 +3803,6 @@ static void *bg_migration_thread(void *opaque)
int ret;
rcu_register_thread();
- object_ref(OBJECT(s));
migration_rate_set(RATE_LIMIT_DISABLED);
@@ -3657,12 +3860,8 @@ static void *bg_migration_thread(void *opaque)
if (migration_stop_vm(s, RUN_STATE_PAUSED)) {
goto fail;
}
- /*
- * Put vCPUs in sync with shadow context structures, then
- * save their state to channel-buffer along with devices.
- */
- cpu_synchronize_all_states();
- if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
+
+ if (qemu_savevm_state_complete_precopy_non_iterable(fb, false)) {
goto fail;
}
/*
@@ -3726,7 +3925,7 @@ fail_setup:
return NULL;
}
-void migrate_fd_connect(MigrationState *s, Error *error_in)
+void migration_connect(MigrationState *s, Error *error_in)
{
Error *local_err = NULL;
uint64_t rate_limit;
@@ -3736,24 +3935,24 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
/*
* If there's a previous error, free it and prepare for another one.
* Meanwhile if migration completes successfully, there won't have an error
- * dumped when calling migrate_fd_cleanup().
+ * dumped when calling migration_cleanup().
*/
migrate_error_free(s);
s->expected_downtime = migrate_downtime_limit();
if (error_in) {
- migrate_fd_error(s, error_in);
+ migration_connect_set_error(s, error_in);
if (resume) {
/*
* Don't do cleanup for resume if channel is invalid, but only dump
* the error. We wait for another channel connect from the user.
* The error_report still gives HMP user a hint on what failed.
- * It's normally done in migrate_fd_cleanup(), but call it here
+ * It's normally done in migration_cleanup(), but call it here
* explicitly.
*/
error_report_err(error_copy(s->error));
} else {
- migrate_fd_cleanup(s);
+ migration_cleanup(s);
}
return;
}
@@ -3811,11 +4010,19 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
}
}
+ /*
+ * Take a refcount to make sure the migration object won't get freed by
+ * the main thread already in migration_shutdown().
+ *
+ * The refcount will be released at the end of the thread function.
+ */
+ object_ref(OBJECT(s));
+
if (migrate_background_snapshot()) {
- qemu_thread_create(&s->thread, "mig/snapshot",
+ qemu_thread_create(&s->thread, MIGRATION_THREAD_SNAPSHOT,
bg_migration_thread, s, QEMU_THREAD_JOINABLE);
} else {
- qemu_thread_create(&s->thread, "mig/src/main",
+ qemu_thread_create(&s->thread, MIGRATION_THREAD_SRC_MAIN,
migration_thread, s, QEMU_THREAD_JOINABLE);
}
s->migration_thread_running = true;
@@ -3823,17 +4030,20 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
fail:
migrate_set_error(s, local_err);
- migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+ if (s->state != MIGRATION_STATUS_CANCELLING) {
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+ }
error_report_err(local_err);
- migrate_fd_cleanup(s);
+ migration_cleanup(s);
}
-static void migration_class_init(ObjectClass *klass, void *data)
+static void migration_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->user_creatable = false;
- device_class_set_props(dc, migration_properties);
+ device_class_set_props_n(dc, migration_properties,
+ migration_properties_count);
}
static void migration_instance_finalize(Object *obj)
@@ -3844,7 +4054,7 @@ static void migration_instance_finalize(Object *obj)
qemu_mutex_destroy(&ms->qemu_file_lock);
qemu_sem_destroy(&ms->wait_unplug_sem);
qemu_sem_destroy(&ms->rate_limit_sem);
- qemu_sem_destroy(&ms->pause_sem);
+ qemu_event_destroy(&ms->pause_event);
qemu_sem_destroy(&ms->postcopy_pause_sem);
qemu_sem_destroy(&ms->rp_state.rp_sem);
qemu_sem_destroy(&ms->rp_state.rp_pong_acks);
@@ -3859,7 +4069,7 @@ static void migration_instance_init(Object *obj)
ms->state = MIGRATION_STATUS_NONE;
ms->mbps = -1;
ms->pages_per_second = -1;
- qemu_sem_init(&ms->pause_sem, 0);
+ qemu_event_init(&ms->pause_event, false);
qemu_mutex_init(&ms->error_mutex);
migrate_params_init(&ms->parameters);