aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-07-25 12:09:12 +0100
committerPeter Maydell <peter.maydell@linaro.org>2019-07-25 12:09:12 +0100
commitb43bea01b853dfdb6c0418615b57d0e1b98e9e98 (patch)
tree509ec323d2b8069e8a71d2d20e05720378145025
parent7ea53245335b4f60b56a3323232baa39d9bb1ebb (diff)
parentf193bc0c5342496ce07355c0c30394560a7f4738 (diff)
downloadqemu-b43bea01b853dfdb6c0418615b57d0e1b98e9e98.zip
qemu-b43bea01b853dfdb6c0418615b57d0e1b98e9e98.tar.gz
qemu-b43bea01b853dfdb6c0418615b57d0e1b98e9e98.tar.bz2
Merge remote-tracking branch 'remotes/juanquintela/tags/migration-pull-request' into staging
Migration pull request This series fixes problems with migration-cancel while using multifd. In some cases it can hang waiting in a semaphore. Please apply. # gpg: Signature made Thu 25 Jul 2019 11:56:57 BST # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] # Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723 * remotes/juanquintela/tags/migration-pull-request: migration: fix migrate_cancel multifd migration leads destination hung forever migration: Make explicit that we are quitting multifd migration: fix migrate_cancel leads live_migration thread hung forever migration: fix migrate_cancel leads live_migration thread endless loop Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--migration/ram.c66
1 files changed, 57 insertions, 9 deletions
diff --git a/migration/ram.c b/migration/ram.c
index 2b0774c..889148d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -677,6 +677,8 @@ typedef struct {
QemuMutex mutex;
/* is this channel thread running */
bool running;
+ /* should this thread finish */
+ bool quit;
/* array of pages to receive */
MultiFDPages_t *pages;
/* packet allocated len */
@@ -920,7 +922,7 @@ struct {
* false.
*/
-static void multifd_send_pages(void)
+static int multifd_send_pages(void)
{
int i;
static int next_channel;
@@ -933,6 +935,11 @@ static void multifd_send_pages(void)
p = &multifd_send_state->params[i];
qemu_mutex_lock(&p->mutex);
+ if (p->quit) {
+ error_report("%s: channel %d has already quit!", __func__, i);
+ qemu_mutex_unlock(&p->mutex);
+ return -1;
+ }
if (!p->pending_job) {
p->pending_job++;
next_channel = (i + 1) % migrate_multifd_channels();
@@ -951,9 +958,11 @@ static void multifd_send_pages(void)
ram_counters.transferred += transferred;;
qemu_mutex_unlock(&p->mutex);
qemu_sem_post(&p->sem);
+
+ return 1;
}
-static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
+static int multifd_queue_page(RAMBlock *block, ram_addr_t offset)
{
MultiFDPages_t *pages = multifd_send_state->pages;
@@ -968,15 +977,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
pages->used++;
if (pages->used < pages->allocated) {
- return;
+ return 1;
}
}
- multifd_send_pages();
+ if (multifd_send_pages() < 0) {
+ return -1;
+ }
if (pages->block != block) {
- multifd_queue_page(block, offset);
+ return multifd_queue_page(block, offset);
}
+
+ return 1;
}
static void multifd_send_terminate_threads(Error *err)
@@ -1049,7 +1062,10 @@ static void multifd_send_sync_main(void)
return;
}
if (multifd_send_state->pages->used) {
- multifd_send_pages();
+ if (multifd_send_pages() < 0) {
+ error_report("%s: multifd_send_pages fail", __func__);
+ return;
+ }
}
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
@@ -1058,6 +1074,12 @@ static void multifd_send_sync_main(void)
qemu_mutex_lock(&p->mutex);
+ if (p->quit) {
+ error_report("%s: channel %d has already quit", __func__, i);
+ qemu_mutex_unlock(&p->mutex);
+ return;
+ }
+
p->packet_num = multifd_send_state->packet_num++;
p->flags |= MULTIFD_FLAG_SYNC;
p->pending_job++;
@@ -1077,7 +1099,8 @@ static void *multifd_send_thread(void *opaque)
{
MultiFDSendParams *p = opaque;
Error *local_err = NULL;
- int ret;
+ int ret = 0;
+ uint32_t flags = 0;
trace_multifd_send_thread_start(p->id);
rcu_register_thread();
@@ -1095,7 +1118,7 @@ static void *multifd_send_thread(void *opaque)
if (p->pending_job) {
uint32_t used = p->pages->used;
uint64_t packet_num = p->packet_num;
- uint32_t flags = p->flags;
+ flags = p->flags;
p->next_packet_size = used * qemu_target_page_size();
multifd_send_fill_packet(p);
@@ -1144,6 +1167,17 @@ out:
multifd_send_terminate_threads(local_err);
}
+ /*
+ * Error happen, I will exit, but I can't just leave, tell
+ * who pay attention to me.
+ */
+ if (ret != 0) {
+ if (flags & MULTIFD_FLAG_SYNC) {
+ qemu_sem_post(&multifd_send_state->sem_sync);
+ }
+ qemu_sem_post(&multifd_send_state->channels_ready);
+ }
+
qemu_mutex_lock(&p->mutex);
p->running = false;
qemu_mutex_unlock(&p->mutex);
@@ -1234,6 +1268,7 @@ static void multifd_recv_terminate_threads(Error *err)
MultiFDRecvParams *p = &multifd_recv_state->params[i];
qemu_mutex_lock(&p->mutex);
+ p->quit = true;
/* We could arrive here for two reasons:
- normal quit, i.e. everything went fine, just finished
- error quit: We close the channels so the channel threads
@@ -1256,6 +1291,12 @@ int multifd_load_cleanup(Error **errp)
MultiFDRecvParams *p = &multifd_recv_state->params[i];
if (p->running) {
+ p->quit = true;
+ /*
+ * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
+ * however try to wakeup it without harm in cleanup phase.
+ */
+ qemu_sem_post(&p->sem_sync);
qemu_thread_join(&p->thread);
}
object_unref(OBJECT(p->c));
@@ -1319,6 +1360,10 @@ static void *multifd_recv_thread(void *opaque)
uint32_t used;
uint32_t flags;
+ if (p->quit) {
+ break;
+ }
+
ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
p->packet_len, &local_err);
if (ret == 0) { /* EOF */
@@ -1390,6 +1435,7 @@ int multifd_load_setup(void)
qemu_mutex_init(&p->mutex);
qemu_sem_init(&p->sem_sync, 0);
+ p->quit = false;
p->id = i;
p->pages = multifd_pages_init(page_count);
p->packet_len = sizeof(MultiFDPacket_t)
@@ -2033,7 +2079,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
ram_addr_t offset)
{
- multifd_queue_page(block, offset);
+ if (multifd_queue_page(block, offset) < 0) {
+ return -1;
+ }
ram_counters.normal++;
return 1;