aboutsummaryrefslogtreecommitdiff
path: root/migration/migration.h
diff options
context:
space:
mode:
authorPeter Xu <peterx@redhat.com>2022-07-07 14:55:06 -0400
committerDr. David Alan Gilbert <dgilbert@redhat.com>2022-07-20 12:15:08 +0100
commit60bb3c5871a7f7b7cfff5d0a30a035e30cce8e42 (patch)
treece2a2d6915b9ddb44bd1c2a976fa6e57f1a091bb /migration/migration.h
parentc01b16edf6a22f28c2a943652c82d18fccc527b7 (diff)
downloadqemu-60bb3c5871a7f7b7cfff5d0a30a035e30cce8e42.zip
qemu-60bb3c5871a7f7b7cfff5d0a30a035e30cce8e42.tar.gz
qemu-60bb3c5871a7f7b7cfff5d0a30a035e30cce8e42.tar.bz2
migration: Postcopy recover with preempt enabled
To allow postcopy recovery, the ram fast load (preempt-only) dest QEMU thread needs similar handling on fault tolerance. When ram_load_postcopy() fails, instead of stopping the thread it halts with a semaphore, preparing to be kicked again when recovery is detected. A mutex is introduced to make sure there's no concurrent operation upon the socket. To make it simple, the fast ram load thread will take the mutex during its whole procedure, and only release it if it's paused. The fast-path socket will be properly released by the main loading thread safely when there's network failures during postcopy with that mutex held. Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Message-Id: <20220707185506.27257-1-peterx@redhat.com> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Diffstat (limited to 'migration/migration.h')
-rw-r--r--migration/migration.h19
1 files changed, 19 insertions, 0 deletions
diff --git a/migration/migration.h b/migration/migration.h
index ff714c2..9220cec 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -119,6 +119,18 @@ struct MigrationIncomingState {
QemuThread postcopy_prio_thread;
bool postcopy_prio_thread_created;
/*
+ * Used to sync between the ram load main thread and the fast ram load
+ * thread. It protects postcopy_qemufile_dst, which is the postcopy
+ * fast channel.
+ *
+ * The ram fast load thread will take it mostly for the whole lifecycle
+ * because it needs to continuously read data from the channel, and
+ * it'll only release this mutex if postcopy is interrupted, so that
+ * the ram load main thread will take this mutex over and properly
+ * release the broken channel.
+ */
+ QemuMutex postcopy_prio_thread_mutex;
+ /*
* An array of temp host huge pages to be used, one for each postcopy
* channel.
*/
@@ -147,6 +159,13 @@ struct MigrationIncomingState {
/* notify PAUSED postcopy incoming migrations to try to continue */
QemuSemaphore postcopy_pause_sem_dst;
QemuSemaphore postcopy_pause_sem_fault;
+ /*
+ * This semaphore is used to allow the ram fast load thread (only when
+ * postcopy preempt is enabled) fall into sleep when there's network
+ * interruption detected. When the recovery is done, the main load
+ * thread will kick the fast ram load thread using this semaphore.
+ */
+ QemuSemaphore postcopy_pause_sem_fast_load;
/* List of listening socket addresses */
SocketAddressList *socket_address_list;