aboutsummaryrefslogtreecommitdiff
path: root/migration
diff options
context:
space:
mode:
authorDr. David Alan Gilbert <dgilbert@redhat.com>2017-02-02 15:59:09 +0000
committerJuan Quintela <quintela@redhat.com>2017-02-06 13:36:49 +0100
commitef8d6488d2767fe81bb4bb9bcdc52af5ff718b56 (patch)
treefe58764b604fbbc6d44157c424f9f393e9330378 /migration
parent328d4d85282e7d62f89f5b0547a493d9cd07cea0 (diff)
downloadqemu-ef8d6488d2767fe81bb4bb9bcdc52af5ff718b56.zip
qemu-ef8d6488d2767fe81bb4bb9bcdc52af5ff718b56.tar.gz
qemu-ef8d6488d2767fe81bb4bb9bcdc52af5ff718b56.tar.bz2
postcopy: Recover block devices on early failure
An early postcopy failure can be recovered from as long as we know we haven't sent the command to run the destination. We have to undo the bdrv_inactivate_all by calling bdrv_invalidate_cache_all Note that I'm not using ms->block_inactive because once we've sent the postcopy package we dont want anything else to try and recover the block storage on the source; the destination might have started writing to it. Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Message-Id: <20170202155909.31784-3-dgilbert@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com>
Diffstat (limited to 'migration')
-rw-r--r--migration/migration.c25
1 files changed, 25 insertions, 0 deletions
diff --git a/migration/migration.c b/migration/migration.c
index 619ccc4..2b179c6 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1601,6 +1601,7 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
QIOChannelBuffer *bioc;
QEMUFile *fb;
int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ bool restart_block = false;
migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_POSTCOPY_ACTIVE);
@@ -1620,6 +1621,7 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
if (ret < 0) {
goto fail;
}
+ restart_block = true;
/*
* Cause any non-postcopiable, but iterative devices to
@@ -1676,6 +1678,18 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
/* <><> end of stuff going into the package */
+ /* Last point of recovery; as soon as we send the package the destination
+ * can open devices and potentially start running.
+ * Lets just check again we've not got any errors.
+ */
+ ret = qemu_file_get_error(ms->to_dst_file);
+ if (ret) {
+ error_report("postcopy_start: Migration stream errored (pre package)");
+ goto fail_closefb;
+ }
+
+ restart_block = false;
+
/* Now send that blob */
if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
goto fail_closefb;
@@ -1713,6 +1727,17 @@ fail_closefb:
fail:
migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
MIGRATION_STATUS_FAILED);
+ if (restart_block) {
+ /* A failure happened early enough that we know the destination hasn't
+ * accessed block devices, so we're safe to recover.
+ */
+ Error *local_err = NULL;
+
+ bdrv_invalidate_cache_all(&local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ }
qemu_mutex_unlock_iothread();
return -1;
}