aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Wolf <kwolf@redhat.com>2018-09-07 15:31:22 +0200
committerKevin Wolf <kwolf@redhat.com>2018-09-25 15:50:15 +0200
commitb5a7a0573530698ee448b063ac01d485e30446bd (patch)
tree0a47d7a6d582452092b64938b1fb81f874a97bb6
parent46aaf2a566e364a62315219255099cbf1c9b990d (diff)
downloadqemu-b5a7a0573530698ee448b063ac01d485e30446bd.zip
qemu-b5a7a0573530698ee448b063ac01d485e30446bd.tar.gz
qemu-b5a7a0573530698ee448b063ac01d485e30446bd.tar.bz2
blockjob: Lie better in child_job_drained_poll()
Block jobs claim in .drained_poll() that they are in a quiescent state as soon as job->deferred_to_main_loop is true. This is obviously wrong, they still have a completion BH to run. We only get away with this because commit 91af091f923 added an unconditional aio_poll(false) to the drain functions, but this is bypassing the regular drain mechanisms. However, just removing this and telling that the job is still active doesn't work either: The completion callbacks themselves call drain functions (directly, or indirectly with bdrv_reopen), so they would deadlock then. As a better lie, tell that the job is active as long as the BH is pending, but falsely call it quiescent from the point in the BH when the completion callback is called. At this point, nested drain calls won't deadlock because they ignore the job, and outer drains will wait for the job to really reach a quiescent state because the callback is already running. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
-rw-r--r--blockjob.c2
-rw-r--r--include/qemu/job.h3
-rw-r--r--job.c11
3 files changed, 14 insertions, 2 deletions
diff --git a/blockjob.c b/blockjob.c
index 58dbd87..4d53422 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -164,7 +164,7 @@ static bool child_job_drained_poll(BdrvChild *c)
/* An inactive or completed job doesn't have any pending requests. Jobs
* with !job->busy are either already paused or have a pause point after
* being reentered, so no job driver code will run before they pause. */
- if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) {
+ if (!job->busy || job_is_completed(job)) {
return false;
}
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 63c60ef..9e7cd1e 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -76,6 +76,9 @@ typedef struct Job {
* Set to false by the job while the coroutine has yielded and may be
* re-entered by job_enter(). There may still be I/O or event loop activity
* pending. Accessed under block_job_mutex (in blockjob.c).
+ *
+ * When the job is deferred to the main loop, busy is true as long as the
+ * bottom half is still pending.
*/
bool busy;
diff --git a/job.c b/job.c
index 7ec8c3b..518f603 100644
--- a/job.c
+++ b/job.c
@@ -857,7 +857,16 @@ static void job_exit(void *opaque)
AioContext *ctx = job->aio_context;
aio_context_acquire(ctx);
+
+ /* This is a lie, we're not quiescent, but still doing the completion
+ * callbacks. However, completion callbacks tend to involve operations that
+ * drain block nodes, and if .drained_poll still returned true, we would
+ * deadlock. */
+ job->busy = false;
+ job_event_idle(job);
+
job_completed(job);
+
aio_context_release(ctx);
}
@@ -872,8 +881,8 @@ static void coroutine_fn job_co_entry(void *opaque)
assert(job && job->driver && job->driver->run);
job_pause_point(job);
job->ret = job->driver->run(job, &job->err);
- job_event_idle(job);
job->deferred_to_main_loop = true;
+ job->busy = true;
aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
}