aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLi Zhang <lizhang@suse.de>2021-12-03 12:55:33 +0100
committerJuan Quintela <quintela@redhat.com>2021-12-15 10:31:42 +0100
commit077fbb5942b56826f731de16caae3122a30aab22 (patch)
tree29d9c511bb43ae497322d2089817afcf7dc4c933
parent01102a2ef6c97acc5cc8a2c3bb62b7665a20f51f (diff)
downloadqemu-077fbb5942b56826f731de16caae3122a30aab22.zip
qemu-077fbb5942b56826f731de16caae3122a30aab22.tar.gz
qemu-077fbb5942b56826f731de16caae3122a30aab22.tar.bz2
multifd: Shut down the QIO channels to avoid blocking the send threads when they are terminated.
When doing live migration with multifd channels 8, 16 or larger number, the guest hangs in the presence of the network errors such as missing TCP ACKs. At sender's side: The main thread is blocked on qemu_thread_join, migration_fd_cleanup is called because one thread fails on qio_channel_write_all when the network problem happens and other send threads are blocked on sendmsg. They could not be terminated. So the main thread is blocked on qemu_thread_join to wait for the threads terminated. (gdb) bt 0 0x00007f30c8dcffc0 in __pthread_clockjoin_ex () at /lib64/libpthread.so.0 1 0x000055cbb716084b in qemu_thread_join (thread=0x55cbb881f418) at ../util/qemu-thread-posix.c:627 2 0x000055cbb6b54e40 in multifd_save_cleanup () at ../migration/multifd.c:542 3 0x000055cbb6b4de06 in migrate_fd_cleanup (s=0x55cbb8024000) at ../migration/migration.c:1808 4 0x000055cbb6b4dfb4 in migrate_fd_cleanup_bh (opaque=0x55cbb8024000) at ../migration/migration.c:1850 5 0x000055cbb7173ac1 in aio_bh_call (bh=0x55cbb7eb98e0) at ../util/async.c:141 6 0x000055cbb7173bcb in aio_bh_poll (ctx=0x55cbb7ebba80) at ../util/async.c:169 7 0x000055cbb715ba4b in aio_dispatch (ctx=0x55cbb7ebba80) at ../util/aio-posix.c:381 8 0x000055cbb7173ffe in aio_ctx_dispatch (source=0x55cbb7ebba80, callback=0x0, user_data=0x0) at ../util/async.c:311 9 0x00007f30c9c8cdf4 in g_main_context_dispatch () at /usr/lib64/libglib-2.0.so.0 10 0x000055cbb71851a2 in glib_pollfds_poll () at ../util/main-loop.c:232 11 0x000055cbb718521c in os_host_main_loop_wait (timeout=42251070366) at ../util/main-loop.c:255 12 0x000055cbb7185321 in main_loop_wait (nonblocking=0) at ../util/main-loop.c:531 13 0x000055cbb6e6ba27 in qemu_main_loop () at ../softmmu/runstate.c:726 14 0x000055cbb6ad6fd7 in main (argc=68, argv=0x7ffc0c578888, envp=0x7ffc0c578ab0) at ../softmmu/main.c:50 To make sure that the send threads could be terminated, IO channels should be shut down to avoid waiting IO. Signed-off-by: Li Zhang <lizhang@suse.de> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Reviewed-by: Juan Quintela <quintela@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com>
-rw-r--r--migration/multifd.c3
1 files changed, 3 insertions, 0 deletions
diff --git a/migration/multifd.c b/migration/multifd.c
index 0533da1..3242f68 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -524,6 +524,9 @@ static void multifd_send_terminate_threads(Error *err)
qemu_mutex_lock(&p->mutex);
p->quit = true;
qemu_sem_post(&p->sem);
+ if (p->c) {
+ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+ }
qemu_mutex_unlock(&p->mutex);
}
}