From 5360782d0827854383097d560715d8d8027ee590 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:10 -0400
Subject: block/commit: add block job creation flags

Add support for taking and passing forward job creation flags.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20180906130225.5118-2-jsnow@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/commit.c            | 5 +++--
 blockdev.c                | 7 ++++---
 include/block/block_int.h | 5 ++++-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index da69165..b6e8969 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -249,7 +249,8 @@ static BlockDriver bdrv_commit_top = {
 };
 
 void commit_start(const char *job_id, BlockDriverState *bs,
-                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
+                  BlockDriverState *base, BlockDriverState *top,
+                  int creation_flags, int64_t speed,
                   BlockdevOnError on_error, const char *backing_file_str,
                   const char *filter_node_name, Error **errp)
 {
@@ -267,7 +268,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     }
 
     s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
-                         speed, JOB_DEFAULT, NULL, NULL, errp);
+                         speed, creation_flags, NULL, NULL, errp);
     if (!s) {
         return;
     }
diff --git a/blockdev.c b/blockdev.c
index 72f5347..c15a1e6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3214,6 +3214,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
      * BlockdevOnError change for blkmirror makes it in
      */
     BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
+    int job_flags = JOB_DEFAULT;
 
     if (!has_speed) {
         speed = 0;
@@ -3295,15 +3296,15 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
             goto out;
         }
         commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
-                            JOB_DEFAULT, speed, on_error,
+                            job_flags, speed, on_error,
                             filter_node_name, NULL, NULL, false, &local_err);
     } else {
         BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
         if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
             goto out;
         }
-        commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
-                     on_error, has_backing_file ? backing_file : NULL,
+        commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, job_flags,
+                     speed, on_error, has_backing_file ? backing_file : NULL,
                      filter_node_name, &local_err);
     }
     if (local_err != NULL) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 903b9c1..ffab0b4 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -980,6 +980,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
  * @bs: Active block device.
  * @top: Top block device to be committed.
  * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
  * @backing_file_str: String to use as the backing file in @top's overlay
@@ -990,7 +992,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
  *
  */
 void commit_start(const char *job_id, BlockDriverState *bs,
-                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
+                  BlockDriverState *base, BlockDriverState *top,
+                  int creation_flags, int64_t speed,
                   BlockdevOnError on_error, const char *backing_file_str,
                   const char *filter_node_name, Error **errp);
 /**
-- 
cgit v1.1


From a1999b33488daba68a1bcd7c6fdf314ddeacc6a2 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:11 -0400
Subject: block/mirror: add block job creation flags

Add support for taking and passing forward job creation flags.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20180906130225.5118-3-jsnow@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/mirror.c            | 5 +++--
 blockdev.c                | 3 ++-
 include/block/block_int.h | 5 ++++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index b8941db..cba555b 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1639,7 +1639,8 @@ fail:
 
 void mirror_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, const char *replaces,
-                  int64_t speed, uint32_t granularity, int64_t buf_size,
+                  int creation_flags, int64_t speed,
+                  uint32_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
@@ -1655,7 +1656,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
     }
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
-    mirror_start_job(job_id, bs, JOB_DEFAULT, target, replaces,
+    mirror_start_job(job_id, bs, creation_flags, target, replaces,
                      speed, granularity, buf_size, backing_mode,
                      on_source_error, on_target_error, unmap, NULL, NULL,
                      &mirror_job_driver, is_none_mode, base, false,
diff --git a/blockdev.c b/blockdev.c
index c15a1e6..6574356 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3590,6 +3590,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                    bool has_copy_mode, MirrorCopyMode copy_mode,
                                    Error **errp)
 {
+    int job_flags = JOB_DEFAULT;
 
     if (!has_speed) {
         speed = 0;
@@ -3642,7 +3643,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
      * and will allow to check whether the node still exist at mirror completion
      */
     mirror_start(job_id, bs, target,
-                 has_replaces ? replaces : NULL,
+                 has_replaces ? replaces : NULL, job_flags,
                  speed, granularity, buf_size, sync, backing_mode,
                  on_source_error, on_target_error, unmap, filter_node_name,
                  copy_mode, errp);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index ffab0b4..b40f0bf 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1029,6 +1029,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
  * @target: Block device to write to.
  * @replaces: Block graph node name to replace once the mirror is done. Can
  *            only be used when full mirroring is selected.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @granularity: The chosen granularity for the dirty bitmap.
  * @buf_size: The amount of data that can be in flight at one time.
@@ -1050,7 +1052,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
  */
 void mirror_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, const char *replaces,
-                  int64_t speed, uint32_t granularity, int64_t buf_size,
+                  int creation_flags, int64_t speed,
+                  uint32_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
-- 
cgit v1.1


From cf6320df581e6cbde6a95075266859a8f9ba9d55 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:12 -0400
Subject: block/stream: add block job creation flags

Add support for taking and passing forward job creation flags.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20180906130225.5118-4-jsnow@redhat.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/stream.c            | 5 +++--
 blockdev.c                | 3 ++-
 include/block/block_int.h | 5 ++++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 67e1e72..700eb23 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -214,7 +214,8 @@ static const BlockJobDriver stream_job_driver = {
 
 void stream_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, const char *backing_file_str,
-                  int64_t speed, BlockdevOnError on_error, Error **errp)
+                  int creation_flags, int64_t speed,
+                  BlockdevOnError on_error, Error **errp)
 {
     StreamBlockJob *s;
     BlockDriverState *iter;
@@ -236,7 +237,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
                          BLK_PERM_GRAPH_MOD,
                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
                          BLK_PERM_WRITE,
-                         speed, JOB_DEFAULT, NULL, NULL, errp);
+                         speed, creation_flags, NULL, NULL, errp);
     if (!s) {
         goto fail;
     }
diff --git a/blockdev.c b/blockdev.c
index 6574356..ec90eb1 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3123,6 +3123,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
     AioContext *aio_context;
     Error *local_err = NULL;
     const char *base_name = NULL;
+    int job_flags = JOB_DEFAULT;
 
     if (!has_on_error) {
         on_error = BLOCKDEV_ON_ERROR_REPORT;
@@ -3185,7 +3186,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
     base_name = has_backing_file ? backing_file : base_name;
 
     stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
-                 has_speed ? speed : 0, on_error, &local_err);
+                 job_flags, has_speed ? speed : 0, on_error, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index b40f0bf..4000d2a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -958,6 +958,8 @@ int is_windows_drive(const char *filename);
  * flatten the whole backing file chain onto @bs.
  * @backing_file_str: The file name that will be written to @bs as the
  * the new backing file if the job completes. Ignored if @base is %NULL.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
  * @errp: Error object.
@@ -971,7 +973,8 @@ int is_windows_drive(const char *filename);
  */
 void stream_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, const char *backing_file_str,
-                  int64_t speed, BlockdevOnError on_error, Error **errp);
+                  int creation_flags, int64_t speed,
+                  BlockdevOnError on_error, Error **errp);
 
 /**
  * commit_start:
-- 
cgit v1.1


From 22dffcbec62ba918db690ed44beba4bd4e970bb9 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:13 -0400
Subject: block/commit: refactor commit to use job callbacks

Use the component callbacks; prepare, abort, and clean.

NB: prepare is only called when the job has not yet failed;
and abort can be called after prepare.

complete -> prepare -> abort -> clean
complete -> abort -> clean

During refactor, a potential problem with bdrv_drop_intermediate
was identified, the patched behavior is no worse than the pre-patch
behavior, so leave a FIXME for now to be fixed in a future patch.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-5-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/commit.c | 92 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 51 insertions(+), 41 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index b6e8969..a2da574 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
     BlockDriverState *commit_top_bs;
     BlockBackend *top;
     BlockBackend *base;
+    BlockDriverState *base_bs;
     BlockdevOnError on_error;
     int base_flags;
     char *backing_file_str;
@@ -68,61 +69,67 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
     return 0;
 }
 
-static void commit_exit(Job *job)
+static int commit_prepare(Job *job)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
-    BlockJob *bjob = &s->common;
-    BlockDriverState *top = blk_bs(s->top);
-    BlockDriverState *base = blk_bs(s->base);
-    BlockDriverState *commit_top_bs = s->commit_top_bs;
-    bool remove_commit_top_bs = false;
-
-    /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
-    bdrv_ref(top);
-    bdrv_ref(commit_top_bs);
 
     /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
      * the normal backing chain can be restored. */
     blk_unref(s->base);
+    s->base = NULL;
+
+    /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
+     * identically. Further work is needed to disambiguate these cases. */
+    return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
+                                  s->backing_file_str);
+}
 
-    if (!job_is_cancelled(job) && job->ret == 0) {
-        /* success */
-        job->ret = bdrv_drop_intermediate(s->commit_top_bs, base,
-                                          s->backing_file_str);
-    } else {
-        /* XXX Can (or should) we somehow keep 'consistent read' blocked even
-         * after the failed/cancelled commit job is gone? If we already wrote
-         * something to base, the intermediate images aren't valid any more. */
-        remove_commit_top_bs = true;
+static void commit_abort(Job *job)
+{
+    CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
+    BlockDriverState *top_bs = blk_bs(s->top);
+
+    /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
+    bdrv_ref(top_bs);
+    bdrv_ref(s->commit_top_bs);
+
+    if (s->base) {
+        blk_unref(s->base);
     }
 
+    /* free the blockers on the intermediate nodes so that bdrv_replace_nodes
+     * can succeed */
+    block_job_remove_all_bdrv(&s->common);
+
+    /* If bdrv_drop_intermediate() failed (or was not invoked), remove the
+     * commit filter driver from the backing chain now. Do this as the final
+     * step so that the 'consistent read' permission can be granted.
+     *
+     * XXX Can (or should) we somehow keep 'consistent read' blocked even
+     * after the failed/cancelled commit job is gone? If we already wrote
+     * something to base, the intermediate images aren't valid any more. */
+    bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL,
+                            &error_abort);
+    bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs),
+                      &error_abort);
+
+    bdrv_unref(s->commit_top_bs);
+    bdrv_unref(top_bs);
+}
+
+static void commit_clean(Job *job)
+{
+    CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
+
     /* restore base open flags here if appropriate (e.g., change the base back
      * to r/o). These reopens do not need to be atomic, since we won't abort
      * even on failure here */
-    if (s->base_flags != bdrv_get_flags(base)) {
-        bdrv_reopen(base, s->base_flags, NULL);
+    if (s->base_flags != bdrv_get_flags(s->base_bs)) {
+        bdrv_reopen(s->base_bs, s->base_flags, NULL);
     }
+
     g_free(s->backing_file_str);
     blk_unref(s->top);
-
-    /* If there is more than one reference to the job (e.g. if called from
-     * job_finish_sync()), job_completed() won't free it and therefore the
-     * blockers on the intermediate nodes remain. This would cause
-     * bdrv_set_backing_hd() to fail. */
-    block_job_remove_all_bdrv(bjob);
-
-    /* If bdrv_drop_intermediate() didn't already do that, remove the commit
-     * filter driver from the backing chain. Do this as the final step so that
-     * the 'consistent read' permission can be granted.  */
-    if (remove_commit_top_bs) {
-        bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL,
-                                &error_abort);
-        bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs),
-                          &error_abort);
-    }
-
-    bdrv_unref(commit_top_bs);
-    bdrv_unref(top);
 }
 
 static int coroutine_fn commit_run(Job *job, Error **errp)
@@ -211,7 +218,9 @@ static const BlockJobDriver commit_job_driver = {
         .user_resume   = block_job_user_resume,
         .drain         = block_job_drain,
         .run           = commit_run,
-        .exit          = commit_exit,
+        .prepare       = commit_prepare,
+        .abort         = commit_abort,
+        .clean         = commit_clean
     },
 };
 
@@ -345,6 +354,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     if (ret < 0) {
         goto fail;
     }
+    s->base_bs = base;
 
     /* Required permissions are already taken with block_job_add_bdrv() */
     s->top = blk_new(0, BLK_PERM_ALL);
-- 
cgit v1.1


From c2924ceaa7f1866148e2847c969fc1902a2524fa Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:14 -0400
Subject: block/mirror: don't install backing chain on abort

In cases where we abort the block/mirror job, there's no point in
installing the new backing chain before we finish aborting.

Signed-off-by: John Snow <jsnow@redhat.com>
Message-id: 20180906130225.5118-6-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/mirror.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/mirror.c b/block/mirror.c
index cba555b..bd3e908 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -642,7 +642,7 @@ static void mirror_exit(Job *job)
      * required before it could become a backing file of target_bs. */
     bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
                             &error_abort);
-    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+    if (ret == 0 && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
         BlockDriverState *backing = s->is_none_mode ? src : s->base;
         if (backing_bs(target_bs) != backing) {
             bdrv_set_backing_hd(target_bs, backing, &local_err);
-- 
cgit v1.1


From 737efc1eda23b904fbe0e66b37715fb0e5c3e58b Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:15 -0400
Subject: block/mirror: conservative mirror_exit refactor

For purposes of minimum code movement, refactor the mirror_exit
callback to use the post-finalization callbacks in a trivial way.

Signed-off-by: John Snow <jsnow@redhat.com>
Message-id: 20180906130225.5118-7-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
[mreitz: Added comment for the mirror_exit() function]
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/mirror.c | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index bd3e908..56d9ef7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -79,6 +79,7 @@ typedef struct MirrorBlockJob {
     int max_iov;
     bool initial_zeroing_ongoing;
     int in_active_write_counter;
+    bool prepared;
 } MirrorBlockJob;
 
 typedef struct MirrorBDSOpaque {
@@ -607,7 +608,12 @@ static void mirror_wait_for_all_io(MirrorBlockJob *s)
     }
 }
 
-static void mirror_exit(Job *job)
+/**
+ * mirror_exit_common: handle both abort() and prepare() cases.
+ * for .prepare, returns 0 on success and -errno on failure.
+ * for .abort cases, denoted by abort = true, MUST return 0.
+ */
+static int mirror_exit_common(Job *job)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
     BlockJob *bjob = &s->common;
@@ -617,7 +623,13 @@ static void mirror_exit(Job *job)
     BlockDriverState *target_bs = blk_bs(s->target);
     BlockDriverState *mirror_top_bs = s->mirror_top_bs;
     Error *local_err = NULL;
-    int ret = job->ret;
+    bool abort = job->ret < 0;
+    int ret = 0;
+
+    if (s->prepared) {
+        return 0;
+    }
+    s->prepared = true;
 
     bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
 
@@ -642,7 +654,7 @@ static void mirror_exit(Job *job)
      * required before it could become a backing file of target_bs. */
     bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
                             &error_abort);
-    if (ret == 0 && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+    if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
         BlockDriverState *backing = s->is_none_mode ? src : s->base;
         if (backing_bs(target_bs) != backing) {
             bdrv_set_backing_hd(target_bs, backing, &local_err);
@@ -658,11 +670,8 @@ static void mirror_exit(Job *job)
         aio_context_acquire(replace_aio_context);
     }
 
-    if (s->should_complete && ret == 0) {
-        BlockDriverState *to_replace = src;
-        if (s->to_replace) {
-            to_replace = s->to_replace;
-        }
+    if (s->should_complete && !abort) {
+        BlockDriverState *to_replace = s->to_replace ?: src;
 
         if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) {
             bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL);
@@ -711,7 +720,18 @@ static void mirror_exit(Job *job)
     bdrv_unref(mirror_top_bs);
     bdrv_unref(src);
 
-    job->ret = ret;
+    return ret;
+}
+
+static int mirror_prepare(Job *job)
+{
+    return mirror_exit_common(job);
+}
+
+static void mirror_abort(Job *job)
+{
+    int ret = mirror_exit_common(job);
+    assert(ret == 0);
 }
 
 static void mirror_throttle(MirrorBlockJob *s)
@@ -1132,7 +1152,8 @@ static const BlockJobDriver mirror_job_driver = {
         .user_resume            = block_job_user_resume,
         .drain                  = block_job_drain,
         .run                    = mirror_run,
-        .exit                   = mirror_exit,
+        .prepare                = mirror_prepare,
+        .abort                  = mirror_abort,
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
     },
@@ -1149,7 +1170,8 @@ static const BlockJobDriver commit_active_job_driver = {
         .user_resume            = block_job_user_resume,
         .drain                  = block_job_drain,
         .run                    = mirror_run,
-        .exit                   = mirror_exit,
+        .prepare                = mirror_prepare,
+        .abort                  = mirror_abort,
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
     },
-- 
cgit v1.1


From 1b57488acf1beba157bcd8c926e596342bcb5c60 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:16 -0400
Subject: block/stream: refactor stream to use job callbacks

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-8-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/stream.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 700eb23..81a7ec8 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -54,16 +54,16 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
     return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
 }
 
-static void stream_exit(Job *job)
+static int stream_prepare(Job *job)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
     BlockJob *bjob = &s->common;
     BlockDriverState *bs = blk_bs(bjob->blk);
     BlockDriverState *base = s->base;
     Error *local_err = NULL;
-    int ret = job->ret;
+    int ret = 0;
 
-    if (!job_is_cancelled(job) && bs->backing && ret == 0) {
+    if (bs->backing) {
         const char *base_id = NULL, *base_fmt = NULL;
         if (base) {
             base_id = s->backing_file_str;
@@ -75,12 +75,19 @@ static void stream_exit(Job *job)
         bdrv_set_backing_hd(bs, base, &local_err);
         if (local_err) {
             error_report_err(local_err);
-            ret = -EPERM;
-            goto out;
+            return -EPERM;
         }
     }
 
-out:
+    return ret;
+}
+
+static void stream_clean(Job *job)
+{
+    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
+    BlockJob *bjob = &s->common;
+    BlockDriverState *bs = blk_bs(bjob->blk);
+
     /* Reopen the image back in read-only mode if necessary */
     if (s->bs_flags != bdrv_get_flags(bs)) {
         /* Give up write permissions before making it read-only */
@@ -89,7 +96,6 @@ out:
     }
 
     g_free(s->backing_file_str);
-    job->ret = ret;
 }
 
 static int coroutine_fn stream_run(Job *job, Error **errp)
@@ -206,7 +212,8 @@ static const BlockJobDriver stream_job_driver = {
         .job_type      = JOB_TYPE_STREAM,
         .free          = block_job_free,
         .run           = stream_run,
-        .exit          = stream_exit,
+        .prepare       = stream_prepare,
+        .clean         = stream_clean,
         .user_resume   = block_job_user_resume,
         .drain         = block_job_drain,
     },
-- 
cgit v1.1


From 0cc4643b01a0138543e886db8e3bf8a3f74ff8f9 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:17 -0400
Subject: tests/blockjob: replace Blockjob with Job

These tests don't actually test blockjobs anymore, they test
generic Job lifetimes. Change the types accordingly.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-9-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/test-blockjob.c | 98 ++++++++++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 48 deletions(-)

diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index ad4a65b..8e8b680 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -206,18 +206,20 @@ static const BlockJobDriver test_cancel_driver = {
     },
 };
 
-static CancelJob *create_common(BlockJob **pjob)
+static CancelJob *create_common(Job **pjob)
 {
     BlockBackend *blk;
-    BlockJob *job;
+    Job *job;
+    BlockJob *bjob;
     CancelJob *s;
 
     blk = create_blk(NULL);
-    job = mk_job(blk, "Steve", &test_cancel_driver, true,
-                 JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS);
-    job_ref(&job->job);
-    assert(job->job.status == JOB_STATUS_CREATED);
-    s = container_of(job, CancelJob, common);
+    bjob = mk_job(blk, "Steve", &test_cancel_driver, true,
+                  JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS);
+    job = &bjob->job;
+    job_ref(job);
+    assert(job->status == JOB_STATUS_CREATED);
+    s = container_of(bjob, CancelJob, common);
     s->blk = blk;
 
     *pjob = job;
@@ -242,7 +244,7 @@ static void cancel_common(CancelJob *s)
 
 static void test_cancel_created(void)
 {
-    BlockJob *job;
+    Job *job;
     CancelJob *s;
 
     s = create_common(&job);
@@ -251,119 +253,119 @@ static void test_cancel_created(void)
 
 static void test_cancel_running(void)
 {
-    BlockJob *job;
+    Job *job;
     CancelJob *s;
 
     s = create_common(&job);
 
-    job_start(&job->job);
-    assert(job->job.status == JOB_STATUS_RUNNING);
+    job_start(job);
+    assert(job->status == JOB_STATUS_RUNNING);
 
     cancel_common(s);
 }
 
 static void test_cancel_paused(void)
 {
-    BlockJob *job;
+    Job *job;
     CancelJob *s;
 
     s = create_common(&job);
 
-    job_start(&job->job);
-    assert(job->job.status == JOB_STATUS_RUNNING);
+    job_start(job);
+    assert(job->status == JOB_STATUS_RUNNING);
 
-    job_user_pause(&job->job, &error_abort);
-    job_enter(&job->job);
-    assert(job->job.status == JOB_STATUS_PAUSED);
+    job_user_pause(job, &error_abort);
+    job_enter(job);
+    assert(job->status == JOB_STATUS_PAUSED);
 
     cancel_common(s);
 }
 
 static void test_cancel_ready(void)
 {
-    BlockJob *job;
+    Job *job;
     CancelJob *s;
 
     s = create_common(&job);
 
-    job_start(&job->job);
-    assert(job->job.status == JOB_STATUS_RUNNING);
+    job_start(job);
+    assert(job->status == JOB_STATUS_RUNNING);
 
     s->should_converge = true;
-    job_enter(&job->job);
-    assert(job->job.status == JOB_STATUS_READY);
+    job_enter(job);
+    assert(job->status == JOB_STATUS_READY);
 
     cancel_common(s);
 }
 
 static void test_cancel_standby(void)
 {
-    BlockJob *job;
+    Job *job;
     CancelJob *s;
 
     s = create_common(&job);
 
-    job_start(&job->job);
-    assert(job->job.status == JOB_STATUS_RUNNING);
+    job_start(job);
+    assert(job->status == JOB_STATUS_RUNNING);
 
     s->should_converge = true;
-    job_enter(&job->job);
-    assert(job->job.status == JOB_STATUS_READY);
+    job_enter(job);
+    assert(job->status == JOB_STATUS_READY);
 
-    job_user_pause(&job->job, &error_abort);
-    job_enter(&job->job);
-    assert(job->job.status == JOB_STATUS_STANDBY);
+    job_user_pause(job, &error_abort);
+    job_enter(job);
+    assert(job->status == JOB_STATUS_STANDBY);
 
     cancel_common(s);
 }
 
 static void test_cancel_pending(void)
 {
-    BlockJob *job;
+    Job *job;
     CancelJob *s;
 
     s = create_common(&job);
 
-    job_start(&job->job);
-    assert(job->job.status == JOB_STATUS_RUNNING);
+    job_start(job);
+    assert(job->status == JOB_STATUS_RUNNING);
 
     s->should_converge = true;
-    job_enter(&job->job);
-    assert(job->job.status == JOB_STATUS_READY);
+    job_enter(job);
+    assert(job->status == JOB_STATUS_READY);
 
-    job_complete(&job->job, &error_abort);
-    job_enter(&job->job);
+    job_complete(job, &error_abort);
+    job_enter(job);
     while (!s->completed) {
         aio_poll(qemu_get_aio_context(), true);
     }
-    assert(job->job.status == JOB_STATUS_PENDING);
+    assert(job->status == JOB_STATUS_PENDING);
 
     cancel_common(s);
 }
 
 static void test_cancel_concluded(void)
 {
-    BlockJob *job;
+    Job *job;
     CancelJob *s;
 
     s = create_common(&job);
 
-    job_start(&job->job);
-    assert(job->job.status == JOB_STATUS_RUNNING);
+    job_start(job);
+    assert(job->status == JOB_STATUS_RUNNING);
 
     s->should_converge = true;
-    job_enter(&job->job);
-    assert(job->job.status == JOB_STATUS_READY);
+    job_enter(job);
+    assert(job->status == JOB_STATUS_READY);
 
-    job_complete(&job->job, &error_abort);
-    job_enter(&job->job);
+    job_complete(job, &error_abort);
+    job_enter(job);
     while (!s->completed) {
         aio_poll(qemu_get_aio_context(), true);
     }
-    assert(job->job.status == JOB_STATUS_PENDING);
+    assert(job->status == JOB_STATUS_PENDING);
 
-    job_finalize(&job->job, &error_abort);
-    assert(job->job.status == JOB_STATUS_CONCLUDED);
+    job_finalize(job, &error_abort);
+    assert(job->status == JOB_STATUS_CONCLUDED);
 
     cancel_common(s);
 }
-- 
cgit v1.1


From 977d26fdbeb35d8d2d0f203f9556d44a353e0dfd Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:18 -0400
Subject: tests/test-blockjob: remove exit callback

We remove the exit callback and the completed boolean along with it.
We can simulate it just fine by waiting for the job to defer to the
main loop, and then giving it one final kick to get the main loop
portion to run.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-10-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/test-blockjob.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 8e8b680..de4c1c2 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -160,15 +160,8 @@ typedef struct CancelJob {
     BlockBackend *blk;
     bool should_converge;
     bool should_complete;
-    bool completed;
 } CancelJob;
 
-static void cancel_job_exit(Job *job)
-{
-    CancelJob *s = container_of(job, CancelJob, common.job);
-    s->completed = true;
-}
-
 static void cancel_job_complete(Job *job, Error **errp)
 {
     CancelJob *s = container_of(job, CancelJob, common.job);
@@ -201,7 +194,6 @@ static const BlockJobDriver test_cancel_driver = {
         .user_resume   = block_job_user_resume,
         .drain         = block_job_drain,
         .run           = cancel_job_run,
-        .exit          = cancel_job_exit,
         .complete      = cancel_job_complete,
     },
 };
@@ -335,9 +327,11 @@ static void test_cancel_pending(void)
 
     job_complete(job, &error_abort);
     job_enter(job);
-    while (!s->completed) {
+    while (!job->deferred_to_main_loop) {
         aio_poll(qemu_get_aio_context(), true);
     }
+    assert(job->status == JOB_STATUS_READY);
+    aio_poll(qemu_get_aio_context(), true);
     assert(job->status == JOB_STATUS_PENDING);
 
     cancel_common(s);
@@ -359,9 +353,11 @@ static void test_cancel_concluded(void)
 
     job_complete(job, &error_abort);
     job_enter(job);
-    while (!s->completed) {
+    while (!job->deferred_to_main_loop) {
         aio_poll(qemu_get_aio_context(), true);
     }
+    assert(job->status == JOB_STATUS_READY);
+    aio_poll(qemu_get_aio_context(), true);
     assert(job->status == JOB_STATUS_PENDING);
 
     job_finalize(job, &error_abort);
-- 
cgit v1.1


From e4dad4275d51b594c8abbe726a4927f6f388e427 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:19 -0400
Subject: tests/test-blockjob-txn: move .exit to .clean

The exit callback in this test actually only performs cleanup.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-11-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/test-blockjob-txn.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index ef29f35..86606f9 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -24,7 +24,7 @@ typedef struct {
     int *result;
 } TestBlockJob;
 
-static void test_block_job_exit(Job *job)
+static void test_block_job_clean(Job *job)
 {
     BlockJob *bjob = container_of(job, BlockJob, job);
     BlockDriverState *bs = blk_bs(bjob->blk);
@@ -73,7 +73,7 @@ static const BlockJobDriver test_block_job_driver = {
         .user_resume   = block_job_user_resume,
         .drain         = block_job_drain,
         .run           = test_block_job_run,
-        .exit          = test_block_job_exit,
+        .clean         = test_block_job_clean,
     },
 };
 
-- 
cgit v1.1


From ccbfb3319aa265e71c16dac976ff857d0a5bcb4b Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:20 -0400
Subject: jobs: remove .exit callback

Now that all of the jobs use the component finalization callbacks,
there's no use for the heavy-hammer .exit callback anymore.

job_exit becomes a glorified type shim so that we can call
job_completed from aio_bh_schedule_oneshot.

Move these three functions down into job.c to eliminate a
forward reference.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-12-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 include/qemu/job.h | 11 --------
 job.c              | 77 ++++++++++++++++++++++++------------------------------
 2 files changed, 34 insertions(+), 54 deletions(-)

diff --git a/include/qemu/job.h b/include/qemu/job.h
index e0cff70..5cb0681 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -222,17 +222,6 @@ struct JobDriver {
     void (*drain)(Job *job);
 
     /**
-     * If the callback is not NULL, exit will be invoked from the main thread
-     * when the job's coroutine has finished, but before transactional
-     * convergence; before @prepare or @abort.
-     *
-     * FIXME TODO: This callback is only temporary to transition remaining jobs
-     * to prepare/commit/abort/clean callbacks and will be removed before 3.1.
-     * is released.
-     */
-    void (*exit)(Job *job);
-
-    /**
      * If the callback is not NULL, prepare will be invoked when all the jobs
      * belonging to the same transaction complete; or upon this job's completion
      * if it is not in a transaction.
diff --git a/job.c b/job.c
index 2327d79..192f168 100644
--- a/job.c
+++ b/job.c
@@ -535,49 +535,6 @@ void job_drain(Job *job)
     }
 }
 
-static void job_completed(Job *job);
-
-static void job_exit(void *opaque)
-{
-    Job *job = (Job *)opaque;
-    AioContext *aio_context = job->aio_context;
-
-    if (job->driver->exit) {
-        aio_context_acquire(aio_context);
-        job->driver->exit(job);
-        aio_context_release(aio_context);
-    }
-    job_completed(job);
-}
-
-/**
- * All jobs must allow a pause point before entering their job proper. This
- * ensures that jobs can be paused prior to being started, then resumed later.
- */
-static void coroutine_fn job_co_entry(void *opaque)
-{
-    Job *job = opaque;
-
-    assert(job && job->driver && job->driver->run);
-    job_pause_point(job);
-    job->ret = job->driver->run(job, &job->err);
-    job->deferred_to_main_loop = true;
-    aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
-}
-
-
-void job_start(Job *job)
-{
-    assert(job && !job_started(job) && job->paused &&
-           job->driver && job->driver->run);
-    job->co = qemu_coroutine_create(job_co_entry, job);
-    job->pause_count--;
-    job->busy = true;
-    job->paused = false;
-    job_state_transition(job, JOB_STATUS_RUNNING);
-    aio_co_enter(job->aio_context, job->co);
-}
-
 /* Assumes the block_job_mutex is held */
 static bool job_timer_not_pending(Job *job)
 {
@@ -894,6 +851,40 @@ static void job_completed(Job *job)
     }
 }
 
+/** Useful only as a type shim for aio_bh_schedule_oneshot. */
+static void job_exit(void *opaque)
+{
+    Job *job = (Job *)opaque;
+    job_completed(job);
+}
+
+/**
+ * All jobs must allow a pause point before entering their job proper. This
+ * ensures that jobs can be paused prior to being started, then resumed later.
+ */
+static void coroutine_fn job_co_entry(void *opaque)
+{
+    Job *job = opaque;
+
+    assert(job && job->driver && job->driver->run);
+    job_pause_point(job);
+    job->ret = job->driver->run(job, &job->err);
+    job->deferred_to_main_loop = true;
+    aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
+}
+
+void job_start(Job *job)
+{
+    assert(job && !job_started(job) && job->paused &&
+           job->driver && job->driver->run);
+    job->co = qemu_coroutine_create(job_co_entry, job);
+    job->pause_count--;
+    job->busy = true;
+    job->paused = false;
+    job_state_transition(job, JOB_STATUS_RUNNING);
+    aio_co_enter(job->aio_context, job->co);
+}
+
 void job_cancel(Job *job, bool force)
 {
     if (job->status == JOB_STATUS_CONCLUDED) {
-- 
cgit v1.1


From 96fbf5345f60a87fab8e7ea79a2406f381027db9 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:21 -0400
Subject: qapi/block-commit: expose new job properties

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-13-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 blockdev.c           |  8 ++++++++
 qapi/block-core.json | 16 +++++++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index ec90eb1..98b91e7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3204,6 +3204,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                       bool has_backing_file, const char *backing_file,
                       bool has_speed, int64_t speed,
                       bool has_filter_node_name, const char *filter_node_name,
+                      bool has_auto_finalize, bool auto_finalize,
+                      bool has_auto_dismiss, bool auto_dismiss,
                       Error **errp)
 {
     BlockDriverState *bs;
@@ -3223,6 +3225,12 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
     if (!has_filter_node_name) {
         filter_node_name = NULL;
     }
+    if (has_auto_finalize && !auto_finalize) {
+        job_flags |= JOB_MANUAL_FINALIZE;
+    }
+    if (has_auto_dismiss && !auto_dismiss) {
+        job_flags |= JOB_MANUAL_DISMISS;
+    }
 
     /* Important Note:
      *  libvirt relies on the DeviceNotFound error class in order to probe for
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 4c7a37a..d5b62e5 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1498,6 +1498,19 @@
 #                    above @top. If this option is not given, a node name is
 #                    autogenerated. (Since: 2.9)
 #
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true. (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true. (Since 3.1)
+#
 # Returns: Nothing on success
 #          If @device does not exist, DeviceNotFound
 #          Any other error returns a GenericError.
@@ -1515,7 +1528,8 @@
 { 'command': 'block-commit',
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str',
             '*backing-file': 'str', '*speed': 'int',
-            '*filter-node-name': 'str' } }
+            '*filter-node-name': 'str',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
 
 ##
 # @drive-backup:
-- 
cgit v1.1


From a6b58adec28ff43c0f29ff7c95cdd5d11e87cf61 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:22 -0400
Subject: qapi/block-mirror: expose new job properties

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-14-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 blockdev.c           | 14 ++++++++++++++
 qapi/block-core.json | 30 ++++++++++++++++++++++++++++--
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 98b91e7..429cdf9 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3597,6 +3597,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                    bool has_filter_node_name,
                                    const char *filter_node_name,
                                    bool has_copy_mode, MirrorCopyMode copy_mode,
+                                   bool has_auto_finalize, bool auto_finalize,
+                                   bool has_auto_dismiss, bool auto_dismiss,
                                    Error **errp)
 {
     int job_flags = JOB_DEFAULT;
@@ -3625,6 +3627,12 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
     if (!has_copy_mode) {
         copy_mode = MIRROR_COPY_MODE_BACKGROUND;
     }
+    if (has_auto_finalize && !auto_finalize) {
+        job_flags |= JOB_MANUAL_FINALIZE;
+    }
+    if (has_auto_dismiss && !auto_dismiss) {
+        job_flags |= JOB_MANUAL_DISMISS;
+    }
 
     if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3802,6 +3810,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
                            arg->has_unmap, arg->unmap,
                            false, NULL,
                            arg->has_copy_mode, arg->copy_mode,
+                           arg->has_auto_finalize, arg->auto_finalize,
+                           arg->has_auto_dismiss, arg->auto_dismiss,
                            &local_err);
     bdrv_unref(target_bs);
     error_propagate(errp, local_err);
@@ -3823,6 +3833,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                          bool has_filter_node_name,
                          const char *filter_node_name,
                          bool has_copy_mode, MirrorCopyMode copy_mode,
+                         bool has_auto_finalize, bool auto_finalize,
+                         bool has_auto_dismiss, bool auto_dismiss,
                          Error **errp)
 {
     BlockDriverState *bs;
@@ -3856,6 +3868,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                            true, true,
                            has_filter_node_name, filter_node_name,
                            has_copy_mode, copy_mode,
+                           has_auto_finalize, auto_finalize,
+                           has_auto_dismiss, auto_dismiss,
                            &local_err);
     error_propagate(errp, local_err);
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index d5b62e5..e785c2e 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1729,6 +1729,18 @@
 # @copy-mode: when to copy data to the destination; defaults to 'background'
 #             (Since: 3.0)
 #
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true. (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true. (Since 3.1)
 # Since: 1.3
 ##
 { 'struct': 'DriveMirror',
@@ -1738,7 +1750,8 @@
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError',
-            '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } }
+            '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
 
 ##
 # @BlockDirtyBitmap:
@@ -2004,6 +2017,18 @@
 # @copy-mode: when to copy data to the destination; defaults to 'background'
 #             (Since: 3.0)
 #
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true. (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true. (Since 3.1)
 # Returns: nothing on success.
 #
 # Since: 2.6
@@ -2025,7 +2050,8 @@
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError',
             '*filter-node-name': 'str',
-            '*copy-mode': 'MirrorCopyMode' } }
+            '*copy-mode': 'MirrorCopyMode',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
 
 ##
 # @block_set_io_throttle:
-- 
cgit v1.1


From 241ca1ab78542f02e666636e0323bcfe3cb1d5e8 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:23 -0400
Subject: qapi/block-stream: expose new job properties

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-15-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 blockdev.c           |  9 +++++++++
 hmp.c                |  5 +++--
 qapi/block-core.json | 16 +++++++++++++++-
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 429cdf9..0cf8feb 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3116,6 +3116,8 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
                       bool has_backing_file, const char *backing_file,
                       bool has_speed, int64_t speed,
                       bool has_on_error, BlockdevOnError on_error,
+                      bool has_auto_finalize, bool auto_finalize,
+                      bool has_auto_dismiss, bool auto_dismiss,
                       Error **errp)
 {
     BlockDriverState *bs, *iter;
@@ -3185,6 +3187,13 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
     /* backing_file string overrides base bs filename */
     base_name = has_backing_file ? backing_file : base_name;
 
+    if (has_auto_finalize && !auto_finalize) {
+        job_flags |= JOB_MANUAL_FINALIZE;
+    }
+    if (has_auto_dismiss && !auto_dismiss) {
+        job_flags |= JOB_MANUAL_DISMISS;
+    }
+
     stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
                  job_flags, has_speed ? speed : 0, on_error, &local_err);
     if (local_err) {
diff --git a/hmp.c b/hmp.c
index 4975fa5..868c1a0 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1905,8 +1905,9 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
     int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
     qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
-                     false, NULL, qdict_haskey(qdict, "speed"), speed,
-                     true, BLOCKDEV_ON_ERROR_REPORT, &error);
+                     false, NULL, qdict_haskey(qdict, "speed"), speed, true,
+                     BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
+                     &error);
 
     hmp_handle_error(mon, &error);
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e785c2e..f877e9e 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2317,6 +2317,19 @@
 #            'stop' and 'enospc' can only be used if the block device
 #            supports io-status (see BlockInfo).  Since 1.3.
 #
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true. (Since 3.1)
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true. (Since 3.1)
+#
 # Returns: Nothing on success. If @device does not exist, DeviceNotFound.
 #
 # Since: 1.1
@@ -2332,7 +2345,8 @@
 { 'command': 'block-stream',
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
             '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
-            '*on-error': 'BlockdevOnError' } }
+            '*on-error': 'BlockdevOnError',
+            '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
 
 ##
 # @block-job-set-speed:
-- 
cgit v1.1


From dfaff2c37dfa52ab045cf87503e60ea56317230a Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:24 -0400
Subject: block/backup: qapi documentation fixup

Fix documentation to match the other jobs amended for 3.1.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180906130225.5118-16-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 qapi/block-core.json | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index f877e9e..c0b3d33 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1272,13 +1272,14 @@
 #                   a different block device than @device).
 #
 # @auto-finalize: When false, this job will wait in a PENDING state after it has
-#                 finished its work, waiting for @block-job-finalize.
-#                 When true, this job will automatically perform its abort or
-#                 commit actions.
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
 #                 Defaults to true. (Since 2.12)
 #
 # @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
-#                has completed ceased all work, and wait for @block-job-dismiss.
+#                has completely ceased all work, and awaits @block-job-dismiss.
 #                When true, this job will automatically disappear from the query
 #                list without user intervention.
 #                Defaults to true. (Since 2.12)
@@ -1327,13 +1328,14 @@
 #                   a different block device than @device).
 #
 # @auto-finalize: When false, this job will wait in a PENDING state after it has
-#                 finished its work, waiting for @block-job-finalize.
-#                 When true, this job will automatically perform its abort or
-#                 commit actions.
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
 #                 Defaults to true. (Since 2.12)
 #
 # @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
-#                has completed ceased all work, and wait for @block-job-dismiss.
+#                has completely ceased all work, and awaits @block-job-dismiss.
 #                When true, this job will automatically disappear from the query
 #                list without user intervention.
 #                Defaults to true. (Since 2.12)
-- 
cgit v1.1


From 66da04ddd3dcb8c61ee664b6faced132da002006 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 6 Sep 2018 09:02:25 -0400
Subject: blockdev: document transactional shortcomings

Presently only the backup job really guarantees what one would consider
transactional semantics. To guard against someone helpfully adding them
in the future, document that there are shortcomings in the model that
would need to be audited at that time.

Signed-off-by: John Snow <jsnow@redhat.com>
Message-id: 20180906130225.5118-17-jsnow@redhat.com
Reviewed-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 blockdev.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 0cf8feb..d4b4240 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2182,7 +2182,13 @@ static const BlkActionOps actions[] = {
         .instance_size = sizeof(BlockDirtyBitmapState),
         .prepare = block_dirty_bitmap_disable_prepare,
         .abort = block_dirty_bitmap_disable_abort,
-     }
+    },
+    /* Where are transactions for MIRROR, COMMIT and STREAM?
+     * Although these blockjobs use transaction callbacks like the backup job,
+     * these jobs do not necessarily adhere to transaction semantics.
+     * These jobs may not fully undo all of their actions on abort, nor do they
+     * necessarily work in transactions with more than one job in them.
+     */
 };
 
 /**
-- 
cgit v1.1


From 3c605f4074ebeb97970eb660fb56a9cb06525923 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 27 Jun 2017 17:18:20 +0200
Subject: commit: Add top-node/base-node options

The block-commit QMP command required specifying the top and base nodes
of the commit jobs using the file name of that node. While this works
in simple cases (local files with absolute paths), the file names
generated for more complicated setups can be hard to predict.

The block-commit command has more problems than just this, so we want to
replace it altogether in the long run, but libvirt needs a reliable way
to address nodes now. So we don't want to wait for a new, cleaner
command, but just add the minimal thing needed right now.

This adds two new options top-node and base-node to the command, which
allow specifying node names instead. They are mutually exclusive with
the old options.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c           | 32 ++++++++++++++++++++++++++++++--
 qapi/block-core.json | 24 ++++++++++++++++++------
 2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index d4b4240..a8755bd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3214,7 +3214,9 @@ out:
 }
 
 void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
+                      bool has_base_node, const char *base_node,
                       bool has_base, const char *base,
+                      bool has_top_node, const char *top_node,
                       bool has_top, const char *top,
                       bool has_backing_file, const char *backing_file,
                       bool has_speed, int64_t speed,
@@ -3275,7 +3277,20 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
     /* default top_bs is the active layer */
     top_bs = bs;
 
-    if (has_top && top) {
+    if (has_top_node && has_top) {
+        error_setg(errp, "'top-node' and 'top' are mutually exclusive");
+        goto out;
+    } else if (has_top_node) {
+        top_bs = bdrv_lookup_bs(NULL, top_node, errp);
+        if (top_bs == NULL) {
+            goto out;
+        }
+        if (!bdrv_chain_contains(bs, top_bs)) {
+            error_setg(errp, "'%s' is not in this backing file chain",
+                       top_node);
+            goto out;
+        }
+    } else if (has_top && top) {
         if (strcmp(bs->filename, top) != 0) {
             top_bs = bdrv_find_backing_image(bs, top);
         }
@@ -3288,7 +3303,20 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
 
     assert(bdrv_get_aio_context(top_bs) == aio_context);
 
-    if (has_base && base) {
+    if (has_base_node && has_base) {
+        error_setg(errp, "'base-node' and 'base' are mutually exclusive");
+        goto out;
+    } else if (has_base_node) {
+        base_bs = bdrv_lookup_bs(NULL, base_node, errp);
+        if (base_bs == NULL) {
+            goto out;
+        }
+        if (!bdrv_chain_contains(top_bs, base_bs)) {
+            error_setg(errp, "'%s' is not in this backing file chain",
+                       base_node);
+            goto out;
+        }
+    } else if (has_base && base) {
         base_bs = bdrv_find_backing_image(top_bs, base);
     } else {
         base_bs = bdrv_find_base(top_bs);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c0b3d33..ac3b48e 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1457,12 +1457,23 @@
 #
 # @device:  the device name or node-name of a root node
 #
-# @base:   The file name of the backing image to write data into.
-#                    If not specified, this is the deepest backing image.
+# @base-node: The node name of the backing image to write data into.
+#             If not specified, this is the deepest backing image.
+#             (since: 3.1)
 #
-# @top:    The file name of the backing image within the image chain,
-#                    which contains the topmost data to be committed down. If
-#                    not specified, this is the active layer.
+# @base: Same as @base-node, except that it is a file name rather than a node
+#        name. This must be the exact filename string that was used to open the
+#        node; other strings, even if addressing the same file, are not
+#        accepted (deprecated, use @base-node instead)
+#
+# @top-node: The node name of the backing image within the image chain
+#            which contains the topmost data to be committed down. If
+#            not specified, this is the active layer. (since: 3.1)
+#
+# @top: Same as @top-node, except that it is a file name rather than a node
+#       name. This must be the exact filename string that was used to open the
+#       node; other strings, even if addressing the same file, are not
+#       accepted (deprecated, use @base-node instead)
 #
 # @backing-file:  The backing file string to write into the overlay
 #                           image of 'top'.  If 'top' is the active layer,
@@ -1528,7 +1539,8 @@
 #
 ##
 { 'command': 'block-commit',
-  'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str',
+  'data': { '*job-id': 'str', 'device': 'str', '*base-node': 'str',
+            '*base': 'str', '*top-node': 'str', '*top': 'str',
             '*backing-file': 'str', '*speed': 'int',
             '*filter-node-name': 'str',
             '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
-- 
cgit v1.1


From d57177a48fc604e5427921bf20b22ee0e6d578b3 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 10 Aug 2018 18:20:45 +0200
Subject: qemu-iotests: Test commit with top-node/base-node

This adds some tests for block-commit with the new options top-node and
base-node (taking node names) instead of top and base (taking file
names).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/qemu-iotests/040     | 52 ++++++++++++++++++++++++++++++++++++++++++++--
 tests/qemu-iotests/040.out |  4 ++--
 2 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 1beb5e6..1cb1cee 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -57,9 +57,12 @@ class ImageCommitTestCase(iotests.QMPTestCase):
         self.assert_no_active_block_jobs()
         self.vm.shutdown()
 
-    def run_commit_test(self, top, base, need_ready=False):
+    def run_commit_test(self, top, base, need_ready=False, node_names=False):
         self.assert_no_active_block_jobs()
-        result = self.vm.qmp('block-commit', device='drive0', top=top, base=base)
+        if node_names:
+            result = self.vm.qmp('block-commit', device='drive0', top_node=top, base_node=base)
+        else:
+            result = self.vm.qmp('block-commit', device='drive0', top=top, base=base)
         self.assert_qmp(result, 'return', {})
         self.wait_for_complete(need_ready)
 
@@ -101,6 +104,11 @@ class TestSingleDrive(ImageCommitTestCase):
         self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
         self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
 
+    def test_commit_node(self):
+        self.run_commit_test("mid", "base", node_names=True)
+        self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
+        self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
+
     def test_device_not_found(self):
         result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img)
         self.assert_qmp(result, 'error/class', 'DeviceNotFound')
@@ -123,6 +131,30 @@ class TestSingleDrive(ImageCommitTestCase):
         self.assert_qmp(result, 'error/class', 'GenericError')
         self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
 
+    def test_top_node_invalid(self):
+        self.assert_no_active_block_jobs()
+        result = self.vm.qmp('block-commit', device='drive0', top_node='badfile', base_node='base')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile")
+
+    def test_base_node_invalid(self):
+        self.assert_no_active_block_jobs()
+        result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='badfile')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile")
+
+    def test_top_path_and_node(self):
+        self.assert_no_active_block_jobs()
+        result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='base', top='%s' % mid_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "'top-node' and 'top' are mutually exclusive")
+
+    def test_base_path_and_node(self):
+        self.assert_no_active_block_jobs()
+        result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='base', base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "'base-node' and 'base' are mutually exclusive")
+
     def test_top_is_active(self):
         self.run_commit_test(test_img, backing_img, need_ready=True)
         self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
@@ -139,6 +171,22 @@ class TestSingleDrive(ImageCommitTestCase):
         self.assert_qmp(result, 'error/class', 'GenericError')
         self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % mid_img)
 
+    def test_top_and_base_node_reversed(self):
+        self.assert_no_active_block_jobs()
+        result = self.vm.qmp('block-commit', device='drive0', top_node='base', base_node='top')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "'top' is not in this backing file chain")
+
+    def test_top_node_in_wrong_chain(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('blockdev-add', driver='null-co', node_name='null')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-commit', device='drive0', top_node='null', base_node='base')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "'null' is not in this backing file chain")
+
     # When the job is running on a BB that is automatically deleted on hot
     # unplug, the job is cancelled when the device disappears
     def test_hot_unplug(self):
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
index e20a75c..802ffaa 100644
--- a/tests/qemu-iotests/040.out
+++ b/tests/qemu-iotests/040.out
@@ -1,5 +1,5 @@
-.............................
+...........................................
 ----------------------------------------------------------------------
-Ran 29 tests
+Ran 43 tests
 
 OK
-- 
cgit v1.1


From e091f0e905a4481f347913420f327d427f18d9d4 Mon Sep 17 00:00:00 2001
From: Sergio Lopez <slp@redhat.com>
Date: Wed, 5 Sep 2018 13:23:34 +0200
Subject: block/linux-aio: acquire AioContext before
 qemu_laio_process_completions

In qemu_laio_process_completions_and_submit, the AioContext is acquired
before the ioq_submit iteration and after qemu_laio_process_completions,
but the latter is not thread safe either.

This change avoids a number of random crashes when the Main Thread and
an IO Thread collide processing completions for the same AioContext.
This is an example of such crash:

 - The IO Thread is trying to acquire the AioContext at aio_co_enter,
   which evidences that it didn't lock it before:

Thread 3 (Thread 0x7fdfd8bd8700 (LWP 36743)):
 #0  0x00007fdfe0dd542d in __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
 #1  0x00007fdfe0dd0de6 in _L_lock_870 () at /lib64/libpthread.so.0
 #2  0x00007fdfe0dd0cdf in __GI___pthread_mutex_lock (mutex=mutex@entry=0x5631fde0e6c0)
    at ../nptl/pthread_mutex_lock.c:114
 #3  0x00005631fc0603a7 in qemu_mutex_lock_impl (mutex=0x5631fde0e6c0, file=0x5631fc23520f "util/async.c", line=511) at util/qemu-thread-posix.c:66
 #4  0x00005631fc05b558 in aio_co_enter (ctx=0x5631fde0e660, co=0x7fdfcc0c2b40) at util/async.c:493
 #5  0x00005631fc05b5ac in aio_co_wake (co=<optimized out>) at util/async.c:478
 #6  0x00005631fbfc51ad in qemu_laio_process_completion (laiocb=<optimized out>) at block/linux-aio.c:104
 #7  0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670)
    at block/linux-aio.c:222
 #8  0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670)
    at block/linux-aio.c:237
 #9  0x00005631fc05d978 in aio_dispatch_handlers (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:406
 #10 0x00005631fc05e3ea in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=true)
    at util/aio-posix.c:693
 #11 0x00005631fbd7ad96 in iothread_run (opaque=0x5631fde0e1c0) at iothread.c:64
 #12 0x00007fdfe0dcee25 in start_thread (arg=0x7fdfd8bd8700) at pthread_create.c:308
 #13 0x00007fdfe0afc34d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113

 - The Main Thread is also processing completions from the same
   AioContext, and crashes due to failed assertion at util/iov.c:78:

Thread 1 (Thread 0x7fdfeb5eac80 (LWP 36740)):
 #0  0x00007fdfe0a391f7 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
 #1  0x00007fdfe0a3a8e8 in __GI_abort () at abort.c:90
 #2  0x00007fdfe0a32266 in __assert_fail_base (fmt=0x7fdfe0b84e68 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset")
    at assert.c:92
 #3  0x00007fdfe0a32312 in __GI___assert_fail (assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset") at assert.c:101
 #4  0x00005631fc065287 in iov_memset (iov=<optimized out>, iov_cnt=<optimized out>, offset=<optimized out>, offset@entry=65536, fillc=fillc@entry=0, bytes=15515191315812405248) at util/iov.c:78
 #5  0x00005631fc065a63 in qemu_iovec_memset (qiov=<optimized out>, offset=offset@entry=65536, fillc=fillc@entry=0, bytes=<optimized out>) at util/iov.c:410
 #6  0x00005631fbfc5178 in qemu_laio_process_completion (laiocb=0x7fdd920df630) at block/linux-aio.c:88
 #7  0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670)
    at block/linux-aio.c:222
 #8  0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670)
    at block/linux-aio.c:237
 #9  0x00005631fbfc54ed in qemu_laio_poll_cb (opaque=<optimized out>) at block/linux-aio.c:272
 #10 0x00005631fc05d85e in run_poll_handlers_once (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:497
 #11 0x00005631fc05e2ca in aio_poll (blocking=false, ctx=0x5631fde0e660) at util/aio-posix.c:574
 #12 0x00005631fc05e2ca in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=false)
    at util/aio-posix.c:604
 #13 0x00005631fbfcb8a3 in bdrv_do_drained_begin (ignore_parent=<optimized out>, recursive=<optimized out>, bs=<optimized out>) at block/io.c:273
 #14 0x00005631fbfcb8a3 in bdrv_do_drained_begin (bs=0x5631fe8b6200, recursive=<optimized out>, parent=0x0, ignore_bds_parents=<optimized out>, poll=<optimized out>) at block/io.c:390
 #15 0x00005631fbfbcd2e in blk_drain (blk=0x5631fe83ac80) at block/block-backend.c:1590
 #16 0x00005631fbfbe138 in blk_remove_bs (blk=blk@entry=0x5631fe83ac80) at block/block-backend.c:774
 #17 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:401
 #18 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:449
 #19 0x00005631fbfc9a69 in commit_complete (job=0x5631fe8b94b0, opaque=0x7fdfcc1bb080)
    at block/commit.c:92
 #20 0x00005631fbf7d662 in job_defer_to_main_loop_bh (opaque=0x7fdfcc1b4560) at job.c:973
 #21 0x00005631fc05ad41 in aio_bh_poll (bh=0x7fdfcc01ad90) at util/async.c:90
 #22 0x00005631fc05ad41 in aio_bh_poll (ctx=ctx@entry=0x5631fddffdb0) at util/async.c:118
 #23 0x00005631fc05e210 in aio_dispatch (ctx=0x5631fddffdb0) at util/aio-posix.c:436
 #24 0x00005631fc05ac1e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:261
 #25 0x00007fdfeaae44c9 in g_main_context_dispatch (context=0x5631fde00140) at gmain.c:3201
 #26 0x00007fdfeaae44c9 in g_main_context_dispatch (context=context@entry=0x5631fde00140) at gmain.c:3854
 #27 0x00005631fc05d503 in main_loop_wait () at util/main-loop.c:215
 #28 0x00005631fc05d503 in main_loop_wait (timeout=<optimized out>) at util/main-loop.c:238
 #29 0x00005631fc05d503 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497
 #30 0x00005631fbd81412 in main_loop () at vl.c:1866
 #31 0x00005631fbc18ff3 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>)
    at vl.c:4647

 - A closer examination shows that s->io_q.in_flight appears to have
   gone backwards:

(gdb) frame 7
 #7  0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670)
    at block/linux-aio.c:222
222	            qemu_laio_process_completion(laiocb);
(gdb) p s
$2 = (LinuxAioState *) 0x7fdfc0297670
(gdb) p *s
$3 = {aio_context = 0x5631fde0e660, ctx = 0x7fdfeb43b000, e = {rfd = 33, wfd = 33}, io_q = {plugged = 0,
    in_queue = 0, in_flight = 4294967280, blocked = false, pending = {sqh_first = 0x0,
      sqh_last = 0x7fdfc0297698}}, completion_bh = 0x7fdfc0280ef0, event_idx = 21, event_max = 241}
(gdb) p/x s->io_q.in_flight
$4 = 0xfffffff0

Signed-off-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/linux-aio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 19eb922..217ce60 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -234,9 +234,9 @@ static void qemu_laio_process_completions(LinuxAioState *s)
 
 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
 {
+    aio_context_acquire(s->aio_context);
     qemu_laio_process_completions(s);
 
-    aio_context_acquire(s->aio_context);
     if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ioq_submit(s);
     }
-- 
cgit v1.1


From 8961be33e8ca7e809c603223803ea66ef7ea5be7 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Thu, 6 Sep 2018 17:25:41 +0300
Subject: block: Fix use after free error in bdrv_open_inherit()

When a block device is opened with BDRV_O_SNAPSHOT and the
bdrv_append_temp_snapshot() call fails then the error code path tries
to unref the already destroyed 'options' QDict.

This can be reproduced easily by setting TMPDIR to a location where
the QEMU process can't write:

   $ TMPDIR=/nonexistent $QEMU -drive driver=null-co,snapshot=on

Signed-off-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block.c b/block.c
index 0dbb1fc..a381c8e 100644
--- a/block.c
+++ b/block.c
@@ -2792,6 +2792,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
     bdrv_parent_cb_change_media(bs, true);
 
     qobject_unref(options);
+    options = NULL;
 
     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
      * temporary snapshot afterwards. */
-- 
cgit v1.1


From 6a7014ef22ad3cf9110ca0e178f73a67a6483e00 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 10 Sep 2018 12:29:23 +0300
Subject: qemu-iotests: Test snapshot=on with nonexistent TMPDIR

We just fixed a bug that was causing a use-after-free when QEMU was
unable to create a temporary snapshot. This is a test case for this
scenario.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/qemu-iotests/051        | 3 +++
 tests/qemu-iotests/051.out    | 3 +++
 tests/qemu-iotests/051.pc.out | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index ee9c820..25d3b2d 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -354,6 +354,9 @@ printf %b "qemu-io $device_id \"write -P 0x33 0 4k\"\ncommit $device_id\n" |
 
 $QEMU_IO -c "read -P 0x33 0 4k" "$TEST_IMG" | _filter_qemu_io
 
+# Using snapshot=on with a non-existent TMPDIR
+TMPDIR=/nonexistent run_qemu -drive driver=null-co,snapshot=on
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index b727350..793af2a 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -455,4 +455,7 @@ wrote 4096/4096 bytes at offset 0
 
 read 4096/4096 bytes at offset 0
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Testing: -drive driver=null-co,snapshot=on
+QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary filename: No such file or directory
+
 *** done
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index e9257fe..ca64eda 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -527,4 +527,7 @@ wrote 4096/4096 bytes at offset 0
 
 read 4096/4096 bytes at offset 0
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Testing: -drive driver=null-co,snapshot=on
+QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary filename: No such file or directory
+
 *** done
-- 
cgit v1.1


From 6808ae0417131f8dbe7b051256dff7a16634dc1d Mon Sep 17 00:00:00 2001
From: Sergio Lopez <slp@redhat.com>
Date: Wed, 5 Sep 2018 11:33:51 +0200
Subject: util/async: use qemu_aio_coroutine_enter in co_schedule_bh_cb

AIO Coroutines shouldn't by managed by an AioContext different than the
one assigned when they are created. aio_co_enter avoids entering a
coroutine from a different AioContext, calling aio_co_schedule instead.

Scheduled coroutines are then entered by co_schedule_bh_cb using
qemu_coroutine_enter, which just calls qemu_aio_coroutine_enter with the
current AioContext obtained with qemu_get_current_aio_context.
Eventually, co->ctx will be set to the AioContext passed as an argument
to qemu_aio_coroutine_enter.

This means that, if an IO Thread's AioConext is being processed by the
Main Thread (due to aio_poll being called with a BDS AioContext, as it
happens in AIO_WAIT_WHILE among other places), the AioContext from some
coroutines may be wrongly replaced with the one from the Main Thread.

This is the root cause behind some crashes, mainly triggered by the
drain code at block/io.c. The most common are these abort and failed
assertion:

util/async.c:aio_co_schedule
456     if (scheduled) {
457         fprintf(stderr,
458                 "%s: Co-routine was already scheduled in '%s'\n",
459                 __func__, scheduled);
460         abort();
461     }

util/qemu-coroutine-lock.c:
286     assert(mutex->holder == self);

But it's also known to cause random errors at different locations, and
even SIGSEGV with broken coroutine backtraces.

By using qemu_aio_coroutine_enter directly in co_schedule_bh_cb, we can
pass the correct AioContext as an argument, making sure co->ctx is not
wrongly altered.

Signed-off-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 util/async.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/async.c b/util/async.c
index 05979f8..c10642a 100644
--- a/util/async.c
+++ b/util/async.c
@@ -400,7 +400,7 @@ static void co_schedule_bh_cb(void *opaque)
 
         /* Protected by write barrier in qemu_aio_coroutine_enter */
         atomic_set(&co->scheduled, NULL);
-        qemu_coroutine_enter(co);
+        qemu_aio_coroutine_enter(ctx, co);
         aio_context_release(ctx);
     }
 }
-- 
cgit v1.1


From 49880165a44f26dc84651858750facdee31f2513 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Fri, 24 Aug 2018 10:43:42 +0800
Subject: job: Fix nested aio_poll() hanging in job_txn_apply

All callers have acquired ctx already. Doing that again results in
aio_poll() hang. This fixes the problem that a BDRV_POLL_WHILE() in the
callback cannot make progress because ctx is recursively locked, for
example, when drive-backup finishes.

There are two callers of job_finalize():

    fam@lemon:~/work/qemu [master]$ git grep -w -A1 '^\s*job_finalize'
    blockdev.c:    job_finalize(&job->job, errp);
    blockdev.c-    aio_context_release(aio_context);
    --
    job-qmp.c:    job_finalize(job, errp);
    job-qmp.c-    aio_context_release(aio_context);
    --
    tests/test-blockjob.c:    job_finalize(&job->job, &error_abort);
    tests/test-blockjob.c-    assert(job->job.status == JOB_STATUS_CONCLUDED);

Ignoring the test, it's easy to see both callers to job_finalize (and
job_do_finalize) have acquired the context.

Cc: qemu-stable@nongnu.org
Reported-by: Gu Nini <ngu@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 job.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/job.c b/job.c
index 192f168..3dd31a5 100644
--- a/job.c
+++ b/job.c
@@ -136,21 +136,13 @@ static void job_txn_del_job(Job *job)
     }
 }
 
-static int job_txn_apply(JobTxn *txn, int fn(Job *), bool lock)
+static int job_txn_apply(JobTxn *txn, int fn(Job *))
 {
-    AioContext *ctx;
     Job *job, *next;
     int rc = 0;
 
     QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
-        if (lock) {
-            ctx = job->aio_context;
-            aio_context_acquire(ctx);
-        }
         rc = fn(job);
-        if (lock) {
-            aio_context_release(ctx);
-        }
         if (rc) {
             break;
         }
@@ -780,11 +772,11 @@ static void job_do_finalize(Job *job)
     assert(job && job->txn);
 
     /* prepare the transaction to complete */
-    rc = job_txn_apply(job->txn, job_prepare, true);
+    rc = job_txn_apply(job->txn, job_prepare);
     if (rc) {
         job_completed_txn_abort(job);
     } else {
-        job_txn_apply(job->txn, job_finalize_single, true);
+        job_txn_apply(job->txn, job_finalize_single);
     }
 }
 
@@ -830,10 +822,10 @@ static void job_completed_txn_success(Job *job)
         assert(other_job->ret == 0);
     }
 
-    job_txn_apply(txn, job_transition_to_pending, false);
+    job_txn_apply(txn, job_transition_to_pending);
 
     /* If no jobs need manual finalization, automatically do so */
-    if (job_txn_apply(txn, job_needs_finalize, false) == 0) {
+    if (job_txn_apply(txn, job_needs_finalize) == 0) {
         job_do_finalize(job);
     }
 }
-- 
cgit v1.1


From d1756c780b7879fb64e41135feac781d84a1f995 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 12 Sep 2018 13:50:38 +0200
Subject: job: Fix missing locking due to mismerge

job_completed() had a problem with double locking that was recently
fixed independently by two different commits:

"job: Fix nested aio_poll() hanging in job_txn_apply"
"jobs: add exit shim"

One fix removed the first aio_context_acquire(), the other fix removed
the other one. Now we have a bug again and the code is run without any
locking.

Add it back in one of the places.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: John Snow <jsnow@redhat.com>
---
 job.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/job.c b/job.c
index 3dd31a5..d17b1c8 100644
--- a/job.c
+++ b/job.c
@@ -847,7 +847,11 @@ static void job_completed(Job *job)
 static void job_exit(void *opaque)
 {
     Job *job = (Job *)opaque;
+    AioContext *ctx = job->aio_context;
+
+    aio_context_acquire(ctx);
     job_completed(job);
+    aio_context_release(ctx);
 }
 
 /**
-- 
cgit v1.1


From 34dc97b9a0e592bc466bdb0bbfe45d77304a72b6 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 17 Aug 2018 14:53:05 +0200
Subject: blockjob: Wake up BDS when job becomes idle

In the context of draining a BDS, the .drained_poll callback of block
jobs is called. If this returns true (i.e. there is still some activity
pending), the drain operation may call aio_poll() with blocking=true to
wait for completion.

As soon as the pending activity is completed and the job finally arrives
in a quiescent state (i.e. its coroutine either yields with busy=false
or terminates), the block job must notify the aio_poll() loop to wake
up, otherwise we get a deadlock if both are running in different
threads.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 blockjob.c               | 18 ++++++++++++++++++
 include/block/blockjob.h | 13 +++++++++++++
 include/qemu/job.h       |  3 +++
 job.c                    |  7 +++++++
 4 files changed, 41 insertions(+)

diff --git a/blockjob.c b/blockjob.c
index bf7ef48..58dbd87 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -221,6 +221,22 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
     return 0;
 }
 
+void block_job_wakeup_all_bdrv(BlockJob *job)
+{
+    GSList *l;
+
+    for (l = job->nodes; l; l = l->next) {
+        BdrvChild *c = l->data;
+        bdrv_wakeup(c->bs);
+    }
+}
+
+static void block_job_on_idle(Notifier *n, void *opaque)
+{
+    BlockJob *job = opaque;
+    block_job_wakeup_all_bdrv(job);
+}
+
 bool block_job_is_internal(BlockJob *job)
 {
     return (job->job.id == NULL);
@@ -416,6 +432,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     job->finalize_completed_notifier.notify = block_job_event_completed;
     job->pending_notifier.notify = block_job_event_pending;
     job->ready_notifier.notify = block_job_event_ready;
+    job->idle_notifier.notify = block_job_on_idle;
 
     notifier_list_add(&job->job.on_finalize_cancelled,
                       &job->finalize_cancelled_notifier);
@@ -423,6 +440,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
                       &job->finalize_completed_notifier);
     notifier_list_add(&job->job.on_pending, &job->pending_notifier);
     notifier_list_add(&job->job.on_ready, &job->ready_notifier);
+    notifier_list_add(&job->job.on_idle, &job->idle_notifier);
 
     error_setg(&job->blocker, "block device is in use by block job: %s",
                job_type_str(&job->job));
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 32c00b7..2290bbb 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -70,6 +70,9 @@ typedef struct BlockJob {
     /** Called when the job transitions to READY */
     Notifier ready_notifier;
 
+    /** Called when the job coroutine yields or terminates */
+    Notifier idle_notifier;
+
     /** BlockDriverStates that are involved in this block job */
     GSList *nodes;
 } BlockJob;
@@ -119,6 +122,16 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
 void block_job_remove_all_bdrv(BlockJob *job);
 
 /**
+ * block_job_wakeup_all_bdrv:
+ * @job: The block job
+ *
+ * Calls bdrv_wakeup() for all BlockDriverStates that have been added to the
+ * job. This function is to be called whenever child_job_drained_poll() would
+ * go from true to false to notify waiting drain requests.
+ */
+void block_job_wakeup_all_bdrv(BlockJob *job);
+
+/**
  * block_job_set_speed:
  * @job: The job to set the speed for.
  * @speed: The new value
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 5cb0681..b4a784d 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -156,6 +156,9 @@ typedef struct Job {
     /** Notifiers called when the job transitions to READY */
     NotifierList on_ready;
 
+    /** Notifiers called when the job coroutine yields or terminates */
+    NotifierList on_idle;
+
     /** Element of the list of jobs */
     QLIST_ENTRY(Job) job_list;
 
diff --git a/job.c b/job.c
index d17b1c8..4812962 100644
--- a/job.c
+++ b/job.c
@@ -402,6 +402,11 @@ static void job_event_ready(Job *job)
     notifier_list_notify(&job->on_ready, job);
 }
 
+static void job_event_idle(Job *job)
+{
+    notifier_list_notify(&job->on_idle, job);
+}
+
 void job_enter_cond(Job *job, bool(*fn)(Job *job))
 {
     if (!job_started(job)) {
@@ -447,6 +452,7 @@ static void coroutine_fn job_do_yield(Job *job, uint64_t ns)
         timer_mod(&job->sleep_timer, ns);
     }
     job->busy = false;
+    job_event_idle(job);
     job_unlock();
     qemu_coroutine_yield();
 
@@ -865,6 +871,7 @@ static void coroutine_fn job_co_entry(void *opaque)
     assert(job && job->driver && job->driver->run);
     job_pause_point(job);
     job->ret = job->driver->run(job, &job->err);
+    job_event_idle(job);
     job->deferred_to_main_loop = true;
     aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
 }
-- 
cgit v1.1


From 486574483aba988c83b20e7d3f1ccd50c4c333d8 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 5 Sep 2018 18:14:17 +0200
Subject: aio-wait: Increase num_waiters even in home thread

Even if AIO_WAIT_WHILE() is called in the home context of the
AioContext, we still want to allow the condition to change depending on
other threads as long as they kick the AioWait. Specfically block jobs
can be running in an I/O thread and should then be able to kick a drain
in the main loop context.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
---
 include/block/aio-wait.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index c85a62f..600fad1 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -76,6 +76,8 @@ typedef struct {
     bool waited_ = false;                                          \
     AioWait *wait_ = (wait);                                       \
     AioContext *ctx_ = (ctx);                                      \
+    /* Increment wait_->num_waiters before evaluating cond. */     \
+    atomic_inc(&wait_->num_waiters);                               \
     if (ctx_ && in_aio_context_home_thread(ctx_)) {                \
         while ((cond)) {                                           \
             aio_poll(ctx_, true);                                  \
@@ -84,8 +86,6 @@ typedef struct {
     } else {                                                       \
         assert(qemu_get_current_aio_context() ==                   \
                qemu_get_aio_context());                            \
-        /* Increment wait_->num_waiters before evaluating cond. */ \
-        atomic_inc(&wait_->num_waiters);                           \
         while ((cond)) {                                           \
             if (ctx_) {                                            \
                 aio_context_release(ctx_);                         \
@@ -96,8 +96,8 @@ typedef struct {
             }                                                      \
             waited_ = true;                                        \
         }                                                          \
-        atomic_dec(&wait_->num_waiters);                           \
     }                                                              \
+    atomic_dec(&wait_->num_waiters);                               \
     waited_; })
 
 /**
-- 
cgit v1.1


From f62c172959cd2b6de4dd8ba782e855d64d94764b Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 4 Sep 2018 18:23:06 +0200
Subject: test-bdrv-drain: Drain with block jobs in an I/O thread

This extends the existing drain test with a block job to include
variants where the block job runs in a different AioContext.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
---
 tests/test-bdrv-drain.c | 92 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 86 insertions(+), 6 deletions(-)

diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index 89ac15e..57da22a 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -174,6 +174,28 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
     }
 }
 
+static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs)
+{
+    if (drain_type != BDRV_DRAIN_ALL) {
+        aio_context_acquire(bdrv_get_aio_context(bs));
+    }
+    do_drain_begin(drain_type, bs);
+    if (drain_type != BDRV_DRAIN_ALL) {
+        aio_context_release(bdrv_get_aio_context(bs));
+    }
+}
+
+static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs)
+{
+    if (drain_type != BDRV_DRAIN_ALL) {
+        aio_context_acquire(bdrv_get_aio_context(bs));
+    }
+    do_drain_end(drain_type, bs);
+    if (drain_type != BDRV_DRAIN_ALL) {
+        aio_context_release(bdrv_get_aio_context(bs));
+    }
+}
+
 static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
 {
     BlockBackend *blk;
@@ -785,11 +807,13 @@ BlockJobDriver test_job_driver = {
     },
 };
 
-static void test_blockjob_common(enum drain_type drain_type)
+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
 {
     BlockBackend *blk_src, *blk_target;
     BlockDriverState *src, *target;
     BlockJob *job;
+    IOThread *iothread = NULL;
+    AioContext *ctx;
     int ret;
 
     src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
@@ -797,21 +821,31 @@ static void test_blockjob_common(enum drain_type drain_type)
     blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk_src, src, &error_abort);
 
+    if (use_iothread) {
+        iothread = iothread_new();
+        ctx = iothread_get_aio_context(iothread);
+        blk_set_aio_context(blk_src, ctx);
+    } else {
+        ctx = qemu_get_aio_context();
+    }
+
     target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
                                   &error_abort);
     blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
     blk_insert_bs(blk_target, target, &error_abort);
 
+    aio_context_acquire(ctx);
     job = block_job_create("job0", &test_job_driver, NULL, src, 0, BLK_PERM_ALL,
                            0, 0, NULL, NULL, &error_abort);
     block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
     job_start(&job->job);
+    aio_context_release(ctx);
 
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
     g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
 
-    do_drain_begin(drain_type, src);
+    do_drain_begin_unlocked(drain_type, src);
 
     if (drain_type == BDRV_DRAIN_ALL) {
         /* bdrv_drain_all() drains both src and target */
@@ -822,7 +856,14 @@ static void test_blockjob_common(enum drain_type drain_type)
     g_assert_true(job->job.paused);
     g_assert_false(job->job.busy); /* The job is paused */
 
-    do_drain_end(drain_type, src);
+    do_drain_end_unlocked(drain_type, src);
+
+    if (use_iothread) {
+        /* paused is reset in the I/O thread, wait for it */
+        while (job->job.paused) {
+            aio_poll(qemu_get_aio_context(), false);
+        }
+    }
 
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
@@ -841,32 +882,64 @@ static void test_blockjob_common(enum drain_type drain_type)
 
     do_drain_end(drain_type, target);
 
+    if (use_iothread) {
+        /* paused is reset in the I/O thread, wait for it */
+        while (job->job.paused) {
+            aio_poll(qemu_get_aio_context(), false);
+        }
+    }
+
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
     g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
 
+    aio_context_acquire(ctx);
     ret = job_complete_sync(&job->job, &error_abort);
     g_assert_cmpint(ret, ==, 0);
 
+    if (use_iothread) {
+        blk_set_aio_context(blk_src, qemu_get_aio_context());
+    }
+    aio_context_release(ctx);
+
     blk_unref(blk_src);
     blk_unref(blk_target);
     bdrv_unref(src);
     bdrv_unref(target);
+
+    if (iothread) {
+        iothread_join(iothread);
+    }
 }
 
 static void test_blockjob_drain_all(void)
 {
-    test_blockjob_common(BDRV_DRAIN_ALL);
+    test_blockjob_common(BDRV_DRAIN_ALL, false);
 }
 
 static void test_blockjob_drain(void)
 {
-    test_blockjob_common(BDRV_DRAIN);
+    test_blockjob_common(BDRV_DRAIN, false);
 }
 
 static void test_blockjob_drain_subtree(void)
 {
-    test_blockjob_common(BDRV_SUBTREE_DRAIN);
+    test_blockjob_common(BDRV_SUBTREE_DRAIN, false);
+}
+
+static void test_blockjob_iothread_drain_all(void)
+{
+    test_blockjob_common(BDRV_DRAIN_ALL, true);
+}
+
+static void test_blockjob_iothread_drain(void)
+{
+    test_blockjob_common(BDRV_DRAIN, true);
+}
+
+static void test_blockjob_iothread_drain_subtree(void)
+{
+    test_blockjob_common(BDRV_SUBTREE_DRAIN, true);
 }
 
 
@@ -1338,6 +1411,13 @@ int main(int argc, char **argv)
     g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
                     test_blockjob_drain_subtree);
 
+    g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
+                    test_blockjob_iothread_drain_all);
+    g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
+                    test_blockjob_iothread_drain);
+    g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
+                    test_blockjob_iothread_drain_subtree);
+
     g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
     g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
     g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
-- 
cgit v1.1


From 30c070a547322a5e41ce129d540bca3653b1a9c8 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 17 Aug 2018 17:29:08 +0200
Subject: test-blockjob: Acquire AioContext around job_cancel_sync()

All callers in QEMU proper hold the AioContext lock when calling
job_finish_sync(). test-blockjob should do the same when it calls the
function indirectly through job_cancel_sync().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
---
 include/qemu/job.h    | 6 ++++++
 tests/test-blockjob.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/include/qemu/job.h b/include/qemu/job.h
index b4a784d..63c60ef 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -524,6 +524,8 @@ void job_user_cancel(Job *job, bool force, Error **errp);
  *
  * Returns the return value from the job if the job actually completed
  * during the call, or -ECANCELED if it was canceled.
+ *
+ * Callers must hold the AioContext lock of job->aio_context.
  */
 int job_cancel_sync(Job *job);
 
@@ -541,6 +543,8 @@ void job_cancel_sync_all(void);
  * function).
  *
  * Returns the return value from the job.
+ *
+ * Callers must hold the AioContext lock of job->aio_context.
  */
 int job_complete_sync(Job *job, Error **errp);
 
@@ -566,6 +570,8 @@ void job_dismiss(Job **job, Error **errp);
  *
  * Returns 0 if the job is successfully completed, -ECANCELED if the job was
  * cancelled before completing, and -errno in other error cases.
+ *
+ * Callers must hold the AioContext lock of job->aio_context.
  */
 int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp);
 
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index de4c1c2..652d1e8 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -223,6 +223,10 @@ static void cancel_common(CancelJob *s)
     BlockJob *job = &s->common;
     BlockBackend *blk = s->blk;
     JobStatus sts = job->job.status;
+    AioContext *ctx;
+
+    ctx = job->job.aio_context;
+    aio_context_acquire(ctx);
 
     job_cancel_sync(&job->job);
     if (sts != JOB_STATUS_CREATED && sts != JOB_STATUS_CONCLUDED) {
@@ -232,6 +236,8 @@ static void cancel_common(CancelJob *s)
     assert(job->job.status == JOB_STATUS_NULL);
     job_unref(&job->job);
     destroy_blk(blk);
+
+    aio_context_release(ctx);
 }
 
 static void test_cancel_created(void)
-- 
cgit v1.1


From de0fbe64806321fc3e6399bfab360553db87a41d Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 17 Aug 2018 14:58:49 +0200
Subject: job: Use AIO_WAIT_WHILE() in job_finish_sync()

job_finish_sync() needs to release the AioContext lock of the job before
calling aio_poll(). Otherwise, callbacks called by aio_poll() would
possibly take the lock a second time and run into a deadlock with a
nested AIO_WAIT_WHILE() call.

Also, job_drain() without aio_poll() isn't necessarily enough to make
progress on a job, it could depend on bottom halves to be executed.

Combine both open-coded while loops into a single AIO_WAIT_WHILE() call
that solves both of these problems.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 job.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/job.c b/job.c
index 4812962..7ec8c3b 100644
--- a/job.c
+++ b/job.c
@@ -29,6 +29,7 @@
 #include "qemu/job.h"
 #include "qemu/id.h"
 #include "qemu/main-loop.h"
+#include "block/aio-wait.h"
 #include "trace-root.h"
 #include "qapi/qapi-events-job.h"
 
@@ -962,6 +963,7 @@ void job_complete(Job *job, Error **errp)
 int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
 {
     Error *local_err = NULL;
+    AioWait dummy_wait = {};
     int ret;
 
     job_ref(job);
@@ -974,14 +976,10 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
         job_unref(job);
         return -EBUSY;
     }
-    /* job_drain calls job_enter, and it should be enough to induce progress
-     * until the job completes or moves to the main thread. */
-    while (!job->deferred_to_main_loop && !job_is_completed(job)) {
-        job_drain(job);
-    }
-    while (!job_is_completed(job)) {
-        aio_poll(qemu_get_aio_context(), true);
-    }
+
+    AIO_WAIT_WHILE(&dummy_wait, job->aio_context,
+                   (job_drain(job), !job_is_completed(job)));
+
     ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret;
     job_unref(job);
     return ret;
-- 
cgit v1.1


From ae23dde9dd486e57e152a0ebc9802caddedc45fc Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 6 Sep 2018 11:58:01 +0200
Subject: test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback

This is a regression test for a deadlock that occurred in block job
completion callbacks (via job_defer_to_main_loop) because the AioContext
lock was taken twice: once in job_finish_sync() and then again in
job_defer_to_main_loop_bh(). This would cause AIO_WAIT_WHILE() to hang.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
---
 tests/test-bdrv-drain.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index 57da22a..c3c17b9 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -774,6 +774,15 @@ typedef struct TestBlockJob {
     bool should_complete;
 } TestBlockJob;
 
+static int test_job_prepare(Job *job)
+{
+    TestBlockJob *s = container_of(job, TestBlockJob, common.job);
+
+    /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
+    blk_flush(s->common.blk);
+    return 0;
+}
+
 static int coroutine_fn test_job_run(Job *job, Error **errp)
 {
     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
@@ -804,6 +813,7 @@ BlockJobDriver test_job_driver = {
         .drain          = block_job_drain,
         .run            = test_job_run,
         .complete       = test_job_complete,
+        .prepare        = test_job_prepare,
     },
 };
 
-- 
cgit v1.1


From aa1361d54aac43094b98024b8b6c804eb6e41661 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 17 Aug 2018 18:54:18 +0200
Subject: block: Add missing locking in bdrv_co_drain_bh_cb()

bdrv_do_drained_begin/end() assume that they are called with the
AioContext lock of bs held. If we call drain functions from a coroutine
with the AioContext lock held, we yield and schedule a BH to move out of
coroutine context. This means that the lock for the home context of the
coroutine is released and must be re-acquired in the bottom half.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/io.c               | 15 +++++++++++++++
 include/qemu/coroutine.h |  5 +++++
 util/qemu-coroutine.c    |  5 +++++
 3 files changed, 25 insertions(+)

diff --git a/block/io.c b/block/io.c
index 7100344..914ba78 100644
--- a/block/io.c
+++ b/block/io.c
@@ -288,6 +288,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
     BlockDriverState *bs = data->bs;
 
     if (bs) {
+        AioContext *ctx = bdrv_get_aio_context(bs);
+        AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
+
+        /*
+         * When the coroutine yielded, the lock for its home context was
+         * released, so we need to re-acquire it here. If it explicitly
+         * acquired a different context, the lock is still held and we don't
+         * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
+         */
+        if (ctx == co_ctx) {
+            aio_context_acquire(ctx);
+        }
         bdrv_dec_in_flight(bs);
         if (data->begin) {
             bdrv_do_drained_begin(bs, data->recursive, data->parent,
@@ -296,6 +308,9 @@ static void bdrv_co_drain_bh_cb(void *opaque)
             bdrv_do_drained_end(bs, data->recursive, data->parent,
                                 data->ignore_bds_parents);
         }
+        if (ctx == co_ctx) {
+            aio_context_release(ctx);
+        }
     } else {
         assert(data->begin);
         bdrv_drain_all_begin();
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index 6f8a487..9801e7f 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -90,6 +90,11 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
 void coroutine_fn qemu_coroutine_yield(void);
 
 /**
+ * Get the AioContext of the given coroutine
+ */
+AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
+
+/**
  * Get the currently executing coroutine
  */
 Coroutine *coroutine_fn qemu_coroutine_self(void);
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 1ba4191..2295928 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -198,3 +198,8 @@ bool qemu_coroutine_entered(Coroutine *co)
 {
     return co->caller;
 }
+
+AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
+{
+    return co->ctx;
+}
-- 
cgit v1.1


From fe5258a503a87e69be37c9ac48799e293809386e Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 6 Sep 2018 17:43:49 +0200
Subject: block-backend: Add .drained_poll callback

A bdrv_drain operation must ensure that all parents are quiesced, this
includes BlockBackends. Otherwise, callbacks called by requests that are
completed on the BDS layer, but not quite yet on the BlockBackend layer
could still create new requests.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/block-backend.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 14a1b7a..4e7d08a 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -121,6 +121,7 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options,
     abort();
 }
 static void blk_root_drained_begin(BdrvChild *child);
+static bool blk_root_drained_poll(BdrvChild *child);
 static void blk_root_drained_end(BdrvChild *child);
 
 static void blk_root_change_media(BdrvChild *child, bool load);
@@ -294,6 +295,7 @@ static const BdrvChildRole child_root = {
     .get_parent_desc    = blk_root_get_parent_desc,
 
     .drained_begin      = blk_root_drained_begin,
+    .drained_poll       = blk_root_drained_poll,
     .drained_end        = blk_root_drained_end,
 
     .activate           = blk_root_activate,
@@ -2189,6 +2191,13 @@ static void blk_root_drained_begin(BdrvChild *child)
     }
 }
 
+static bool blk_root_drained_poll(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+    assert(blk->quiesce_counter);
+    return !!blk->in_flight;
+}
+
 static void blk_root_drained_end(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
-- 
cgit v1.1


From 5ca9d21bd1c8eeb578d0964e31bd03d47c25773d Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 7 Sep 2018 13:45:54 +0200
Subject: block-backend: Fix potential double blk_delete()

blk_unref() first decreases the refcount of the BlockBackend and calls
blk_delete() if the refcount reaches zero. Requests can still be in
flight at this point, they are only drained during blk_delete():

At this point, arbitrary callbacks can run. If any callback takes a
temporary BlockBackend reference, it will first increase the refcount to
1 and then decrease it to 0 again, triggering another blk_delete(). This
will cause a use-after-free crash in the outer blk_delete().

Fix it by draining the BlockBackend before decreasing to refcount to 0.
Assert in blk_ref() that it never takes the first refcount (which would
mean that the BlockBackend is already being deleted).

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/block-backend.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 4e7d08a..f71fdb4 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -435,6 +435,7 @@ int blk_get_refcnt(BlockBackend *blk)
  */
 void blk_ref(BlockBackend *blk)
 {
+    assert(blk->refcnt > 0);
     blk->refcnt++;
 }
 
@@ -447,7 +448,13 @@ void blk_unref(BlockBackend *blk)
 {
     if (blk) {
         assert(blk->refcnt > 0);
-        if (!--blk->refcnt) {
+        if (blk->refcnt > 1) {
+            blk->refcnt--;
+        } else {
+            blk_drain(blk);
+            /* blk_drain() cannot resurrect blk, nobody held a reference */
+            assert(blk->refcnt == 1);
+            blk->refcnt = 0;
             blk_delete(blk);
         }
     }
-- 
cgit v1.1


From 46aaf2a566e364a62315219255099cbf1c9b990d Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 6 Sep 2018 17:47:22 +0200
Subject: block-backend: Decrease in_flight only after callback

Request callbacks can do pretty much anything, including operations that
will yield from the coroutine (such as draining the backend). In that
case, a decreased in_flight would be visible to other code and could
lead to a drain completing while the callback hasn't actually completed
yet.

Note that reordering these operations forbids calling drain directly
inside an AIO callback. As Paolo explains, indirectly calling it is
okay:

- Calling it through a coroutine is okay, because then
  bdrv_drained_begin() goes through bdrv_co_yield_to_drain() and you
  have in_flight=2 when bdrv_co_yield_to_drain() yields, then soon
  in_flight=1 when the aio_co_wake() in the AIO callback completes, then
  in_flight=0 after the bottom half starts.

- Calling it through a bottom half would be okay too, as long as the AIO
  callback remembers to do inc_in_flight/dec_in_flight just like
  bdrv_co_yield_to_drain() and bdrv_co_drain_bh_cb() do

A few more important cases that come to mind:

- A coroutine that yields because of I/O is okay, with a sequence
  similar to bdrv_co_yield_to_drain().

- A coroutine that yields with no I/O pending will correctly decrease
  in_flight to zero before yielding.

- Calling more AIO from the callback won't overflow the counter just
  because of mutual recursion, because AIO functions always yield at
  least once before invoking the callback.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/block-backend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index f71fdb4..551e4e1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1339,8 +1339,8 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
 static void blk_aio_complete(BlkAioEmAIOCB *acb)
 {
     if (acb->has_returned) {
-        blk_dec_in_flight(acb->rwco.blk);
         acb->common.cb(acb->common.opaque, acb->rwco.ret);
+        blk_dec_in_flight(acb->rwco.blk);
         qemu_aio_unref(acb);
     }
 }
-- 
cgit v1.1


From b5a7a0573530698ee448b063ac01d485e30446bd Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 7 Sep 2018 15:31:22 +0200
Subject: blockjob: Lie better in child_job_drained_poll()

Block jobs claim in .drained_poll() that they are in a quiescent state
as soon as job->deferred_to_main_loop is true. This is obviously wrong,
they still have a completion BH to run. We only get away with this
because commit 91af091f923 added an unconditional aio_poll(false) to the
drain functions, but this is bypassing the regular drain mechanisms.

However, just removing this and telling that the job is still active
doesn't work either: The completion callbacks themselves call drain
functions (directly, or indirectly with bdrv_reopen), so they would
deadlock then.

As a better lie, tell that the job is active as long as the BH is
pending, but falsely call it quiescent from the point in the BH when the
completion callback is called. At this point, nested drain calls won't
deadlock because they ignore the job, and outer drains will wait for the
job to really reach a quiescent state because the callback is already
running.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 blockjob.c         |  2 +-
 include/qemu/job.h |  3 +++
 job.c              | 11 ++++++++++-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 58dbd87..4d53422 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -164,7 +164,7 @@ static bool child_job_drained_poll(BdrvChild *c)
     /* An inactive or completed job doesn't have any pending requests. Jobs
      * with !job->busy are either already paused or have a pause point after
      * being reentered, so no job driver code will run before they pause. */
-    if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) {
+    if (!job->busy || job_is_completed(job)) {
         return false;
     }
 
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 63c60ef..9e7cd1e 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -76,6 +76,9 @@ typedef struct Job {
      * Set to false by the job while the coroutine has yielded and may be
      * re-entered by job_enter(). There may still be I/O or event loop activity
      * pending. Accessed under block_job_mutex (in blockjob.c).
+     *
+     * When the job is deferred to the main loop, busy is true as long as the
+     * bottom half is still pending.
      */
     bool busy;
 
diff --git a/job.c b/job.c
index 7ec8c3b..518f603 100644
--- a/job.c
+++ b/job.c
@@ -857,7 +857,16 @@ static void job_exit(void *opaque)
     AioContext *ctx = job->aio_context;
 
     aio_context_acquire(ctx);
+
+    /* This is a lie, we're not quiescent, but still doing the completion
+     * callbacks. However, completion callbacks tend to involve operations that
+     * drain block nodes, and if .drained_poll still returned true, we would
+     * deadlock. */
+    job->busy = false;
+    job_event_idle(job);
+
     job_completed(job);
+
     aio_context_release(ctx);
 }
 
@@ -872,8 +881,8 @@ static void coroutine_fn job_co_entry(void *opaque)
     assert(job && job->driver && job->driver->run);
     job_pause_point(job);
     job->ret = job->driver->run(job, &job->err);
-    job_event_idle(job);
     job->deferred_to_main_loop = true;
+    job->busy = true;
     aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
 }
 
-- 
cgit v1.1


From 4cf077b59fc73eec29f8b7d082919dbb278bdc86 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 17 Aug 2018 14:58:49 +0200
Subject: block: Remove aio_poll() in bdrv_drain_poll variants

bdrv_drain_poll_top_level() was buggy because it didn't release the
AioContext lock of the node to be drained before calling aio_poll().
This way, callbacks called by aio_poll() would possibly take the lock a
second time and run into a deadlock with a nested AIO_WAIT_WHILE() call.

However, it turns out that the aio_poll() call isn't actually needed any
more. It was introduced in commit 91af091f923, which is effectively
reverted by this patch. The cases it was supposed to fix are now covered
by bdrv_drain_poll(), which waits for block jobs to reach a quiescent
state.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/io.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/block/io.c b/block/io.c
index 914ba78..8b81ff3 100644
--- a/block/io.c
+++ b/block/io.c
@@ -268,10 +268,6 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
 static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
                                       BdrvChild *ignore_parent)
 {
-    /* Execute pending BHs first and check everything else only after the BHs
-     * have executed. */
-    while (aio_poll(bs->aio_context, false));
-
     return bdrv_drain_poll(bs, recursive, ignore_parent, false);
 }
 
@@ -511,10 +507,6 @@ static bool bdrv_drain_all_poll(void)
     BlockDriverState *bs = NULL;
     bool result = false;
 
-    /* Execute pending BHs first (may modify the graph) and check everything
-     * else only after the BHs have executed. */
-    while (aio_poll(qemu_get_aio_context(), false));
-
     /* bdrv_drain_poll() can't make changes to the graph and we are holding the
      * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
     while ((bs = bdrv_next_all_states(bs))) {
-- 
cgit v1.1


From ecc1a5c790cf2c7732cb9755ca388c2fe108d1a1 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 6 Sep 2018 12:28:10 +0200
Subject: test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level()

This is a regression test for a deadlock that could occur in callbacks
called from the aio_poll() in bdrv_drain_poll_top_level(). The
AioContext lock wasn't released and therefore would be taken a second
time in the callback. This would cause a possible AIO_WAIT_WHILE() in
the callback to hang.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
---
 tests/test-bdrv-drain.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index c3c17b9..e105c0a 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -636,6 +636,17 @@ static void test_iothread_aio_cb(void *opaque, int ret)
     qemu_event_set(&done_event);
 }
 
+static void test_iothread_main_thread_bh(void *opaque)
+{
+    struct test_iothread_data *data = opaque;
+
+    /* Test that the AioContext is not yet locked in a random BH that is
+     * executed during drain, otherwise this would deadlock. */
+    aio_context_acquire(bdrv_get_aio_context(data->bs));
+    bdrv_flush(data->bs);
+    aio_context_release(bdrv_get_aio_context(data->bs));
+}
+
 /*
  * Starts an AIO request on a BDS that runs in the AioContext of iothread 1.
  * The request involves a BH on iothread 2 before it can complete.
@@ -705,6 +716,8 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
             aio_context_acquire(ctx_a);
         }
 
+        aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, &data);
+
         /* The request is running on the IOThread a. Draining its block device
          * will make sure that it has completed as far as the BDS is concerned,
          * but the drain in this thread can continue immediately after
-- 
cgit v1.1


From 644f3a29bd4974aefd46d2adb5062d86063c8a50 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 13 Sep 2018 14:35:27 +0200
Subject: job: Avoid deadlocks in job_completed_txn_abort()

Amongst others, job_finalize_single() calls the .prepare/.commit/.abort
callbacks of the individual job driver. Recently, their use was adapted
for all block jobs so that they involve code calling AIO_WAIT_WHILE()
now. Such code must be called under the AioContext lock for the
respective job, but without holding any other AioContext lock.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 job.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/job.c b/job.c
index 518f603..93aea79 100644
--- a/job.c
+++ b/job.c
@@ -718,6 +718,7 @@ static void job_cancel_async(Job *job, bool force)
 
 static void job_completed_txn_abort(Job *job)
 {
+    AioContext *outer_ctx = job->aio_context;
     AioContext *ctx;
     JobTxn *txn = job->txn;
     Job *other_job;
@@ -731,23 +732,26 @@ static void job_completed_txn_abort(Job *job)
     txn->aborting = true;
     job_txn_ref(txn);
 
-    /* We are the first failed job. Cancel other jobs. */
-    QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
-        ctx = other_job->aio_context;
-        aio_context_acquire(ctx);
-    }
+    /* We can only hold the single job's AioContext lock while calling
+     * job_finalize_single() because the finalization callbacks can involve
+     * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */
+    aio_context_release(outer_ctx);
 
     /* Other jobs are effectively cancelled by us, set the status for
      * them; this job, however, may or may not be cancelled, depending
      * on the caller, so leave it. */
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
         if (other_job != job) {
+            ctx = other_job->aio_context;
+            aio_context_acquire(ctx);
             job_cancel_async(other_job, false);
+            aio_context_release(ctx);
         }
     }
     while (!QLIST_EMPTY(&txn->jobs)) {
         other_job = QLIST_FIRST(&txn->jobs);
         ctx = other_job->aio_context;
+        aio_context_acquire(ctx);
         if (!job_is_completed(other_job)) {
             assert(job_is_cancelled(other_job));
             job_finish_sync(other_job, NULL, NULL);
@@ -756,6 +760,8 @@ static void job_completed_txn_abort(Job *job)
         aio_context_release(ctx);
     }
 
+    aio_context_acquire(outer_ctx);
+
     job_txn_unref(txn);
 }
 
-- 
cgit v1.1


From d49725af46a7710cde02cc120b7f1e485154b483 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 13 Sep 2018 14:39:14 +0200
Subject: test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort

This adds tests for calling AIO_WAIT_WHILE() in the .commit and .abort
callbacks. Both reasons why .abort could be called for a single job are
tested: Either .run or .prepare could return an error.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 tests/test-bdrv-drain.c | 116 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 104 insertions(+), 12 deletions(-)

diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index e105c0a..bbc56a0 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -784,6 +784,8 @@ static void test_iothread_drain_subtree(void)
 
 typedef struct TestBlockJob {
     BlockJob common;
+    int run_ret;
+    int prepare_ret;
     bool should_complete;
 } TestBlockJob;
 
@@ -793,7 +795,23 @@ static int test_job_prepare(Job *job)
 
     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
     blk_flush(s->common.blk);
-    return 0;
+    return s->prepare_ret;
+}
+
+static void test_job_commit(Job *job)
+{
+    TestBlockJob *s = container_of(job, TestBlockJob, common.job);
+
+    /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
+    blk_flush(s->common.blk);
+}
+
+static void test_job_abort(Job *job)
+{
+    TestBlockJob *s = container_of(job, TestBlockJob, common.job);
+
+    /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
+    blk_flush(s->common.blk);
 }
 
 static int coroutine_fn test_job_run(Job *job, Error **errp)
@@ -809,7 +827,7 @@ static int coroutine_fn test_job_run(Job *job, Error **errp)
         job_pause_point(&s->common.job);
     }
 
-    return 0;
+    return s->run_ret;
 }
 
 static void test_job_complete(Job *job, Error **errp)
@@ -827,14 +845,24 @@ BlockJobDriver test_job_driver = {
         .run            = test_job_run,
         .complete       = test_job_complete,
         .prepare        = test_job_prepare,
+        .commit         = test_job_commit,
+        .abort          = test_job_abort,
     },
 };
 
-static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
+enum test_job_result {
+    TEST_JOB_SUCCESS,
+    TEST_JOB_FAIL_RUN,
+    TEST_JOB_FAIL_PREPARE,
+};
+
+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
+                                 enum test_job_result result)
 {
     BlockBackend *blk_src, *blk_target;
     BlockDriverState *src, *target;
     BlockJob *job;
+    TestBlockJob *tjob;
     IOThread *iothread = NULL;
     AioContext *ctx;
     int ret;
@@ -858,9 +886,23 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
     blk_insert_bs(blk_target, target, &error_abort);
 
     aio_context_acquire(ctx);
-    job = block_job_create("job0", &test_job_driver, NULL, src, 0, BLK_PERM_ALL,
-                           0, 0, NULL, NULL, &error_abort);
+    tjob = block_job_create("job0", &test_job_driver, NULL, src,
+                            0, BLK_PERM_ALL,
+                            0, 0, NULL, NULL, &error_abort);
+    job = &tjob->common;
     block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
+
+    switch (result) {
+    case TEST_JOB_SUCCESS:
+        break;
+    case TEST_JOB_FAIL_RUN:
+        tjob->run_ret = -EIO;
+        break;
+    case TEST_JOB_FAIL_PREPARE:
+        tjob->prepare_ret = -EIO;
+        break;
+    }
+
     job_start(&job->job);
     aio_context_release(ctx);
 
@@ -918,7 +960,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
 
     aio_context_acquire(ctx);
     ret = job_complete_sync(&job->job, &error_abort);
-    g_assert_cmpint(ret, ==, 0);
+    g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO));
 
     if (use_iothread) {
         blk_set_aio_context(blk_src, qemu_get_aio_context());
@@ -937,32 +979,68 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
 
 static void test_blockjob_drain_all(void)
 {
-    test_blockjob_common(BDRV_DRAIN_ALL, false);
+    test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS);
 }
 
 static void test_blockjob_drain(void)
 {
-    test_blockjob_common(BDRV_DRAIN, false);
+    test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
 }
 
 static void test_blockjob_drain_subtree(void)
 {
-    test_blockjob_common(BDRV_SUBTREE_DRAIN, false);
+    test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
+}
+
+static void test_blockjob_error_drain_all(void)
+{
+    test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
+    test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_PREPARE);
+}
+
+static void test_blockjob_error_drain(void)
+{
+    test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_RUN);
+    test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
+}
+
+static void test_blockjob_error_drain_subtree(void)
+{
+    test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
+    test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
 }
 
 static void test_blockjob_iothread_drain_all(void)
 {
-    test_blockjob_common(BDRV_DRAIN_ALL, true);
+    test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
 }
 
 static void test_blockjob_iothread_drain(void)
 {
-    test_blockjob_common(BDRV_DRAIN, true);
+    test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
 }
 
 static void test_blockjob_iothread_drain_subtree(void)
 {
-    test_blockjob_common(BDRV_SUBTREE_DRAIN, true);
+    test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
+}
+
+static void test_blockjob_iothread_error_drain_all(void)
+{
+    test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
+    test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_PREPARE);
+}
+
+static void test_blockjob_iothread_error_drain(void)
+{
+    test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_RUN);
+    test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
+}
+
+static void test_blockjob_iothread_error_drain_subtree(void)
+{
+    test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
+    test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
 }
 
 
@@ -1434,6 +1512,13 @@ int main(int argc, char **argv)
     g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
                     test_blockjob_drain_subtree);
 
+    g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
+                    test_blockjob_error_drain_all);
+    g_test_add_func("/bdrv-drain/blockjob/error/drain",
+                    test_blockjob_error_drain);
+    g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
+                    test_blockjob_error_drain_subtree);
+
     g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
                     test_blockjob_iothread_drain_all);
     g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
@@ -1441,6 +1526,13 @@ int main(int argc, char **argv)
     g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
                     test_blockjob_iothread_drain_subtree);
 
+    g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
+                    test_blockjob_iothread_error_drain_all);
+    g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
+                    test_blockjob_iothread_error_drain);
+    g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
+                    test_blockjob_iothread_error_drain_subtree);
+
     g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
     g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
     g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
-- 
cgit v1.1


From 5599c162c3bec2bc8f0123e4d5802a70d9984b3b Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 20 Sep 2018 15:51:21 +0200
Subject: test-bdrv-drain: Fix outdated comments

Commit 89bd030533e changed the test case from using job_sleep_ns() to
using qemu_co_sleep_ns() instead. Also, block_job_sleep_ns() became
job_sleep_ns() in commit 5d43e86e11f.

In both cases, some comments in the test case were not updated. Do that
now.

Reported-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 tests/test-bdrv-drain.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index bbc56a0..f367e6c 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -820,9 +820,9 @@ static int coroutine_fn test_job_run(Job *job, Error **errp)
 
     job_transition_to_ready(&s->common.job);
     while (!s->should_complete) {
-        /* Avoid block_job_sleep_ns() because it marks the job as !busy. We
-         * want to emulate some actual activity (probably some I/O) here so
-         * that drain has to wait for this acitivity to stop. */
+        /* Avoid job_sleep_ns() because it marks the job as !busy. We want to
+         * emulate some actual activity (probably some I/O) here so that drain
+         * has to wait for this activity to stop. */
         qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
         job_pause_point(&s->common.job);
     }
@@ -908,7 +908,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
 
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
-    g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
+    g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
 
     do_drain_begin_unlocked(drain_type, src);
 
@@ -956,7 +956,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
 
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
-    g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
+    g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
 
     aio_context_acquire(ctx);
     ret = job_complete_sync(&job->job, &error_abort);
-- 
cgit v1.1


From cfe29d8294e06420e15d4938421ae006c8ac49e7 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 18 Sep 2018 17:09:16 +0200
Subject: block: Use a single global AioWait

When draining a block node, we recurse to its parent and for subtree
drains also to its children. A single AIO_WAIT_WHILE() is then used to
wait for bdrv_drain_poll() to become true, which depends on all of the
nodes we recursed to. However, if the respective child or parent becomes
quiescent and calls bdrv_wakeup(), only the AioWait of the child/parent
is checked, while AIO_WAIT_WHILE() depends on the AioWait of the
original node.

Fix this by using a single AioWait for all callers of AIO_WAIT_WHILE().

This may mean that the draining thread gets a few more unnecessary
wakeups because an unrelated operation got completed, but we already
wake it up when something _could_ have changed rather than only if it
has certainly changed.

Apart from that, drain is a slow path anyway. In theory it would be
possible to use wakeups more selectively and still correctly, but the
gains are likely not worth the additional complexity. In fact, this
patch is a nice simplification for some places in the code.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block.c                   |  5 -----
 block/block-backend.c     | 11 ++++-------
 block/io.c                |  7 ++-----
 blockjob.c                | 13 +------------
 include/block/aio-wait.h  | 22 +++++++++++-----------
 include/block/block.h     |  6 +-----
 include/block/block_int.h |  3 ---
 include/block/blockjob.h  | 10 ----------
 job.c                     |  3 +--
 util/aio-wait.c           | 11 ++++++-----
 10 files changed, 26 insertions(+), 65 deletions(-)

diff --git a/block.c b/block.c
index a381c8e..c298ca6 100644
--- a/block.c
+++ b/block.c
@@ -4886,11 +4886,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
     return bs ? bs->aio_context : qemu_get_aio_context();
 }
 
-AioWait *bdrv_get_aio_wait(BlockDriverState *bs)
-{
-    return bs ? &bs->wait : NULL;
-}
-
 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
 {
     aio_co_enter(bdrv_get_aio_context(bs), co);
diff --git a/block/block-backend.c b/block/block-backend.c
index 551e4e1..7b1ec50 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -88,7 +88,6 @@ struct BlockBackend {
      * Accessed with atomic ops.
      */
     unsigned int in_flight;
-    AioWait wait;
 };
 
 typedef struct BlockBackendAIOCB {
@@ -1298,7 +1297,7 @@ static void blk_inc_in_flight(BlockBackend *blk)
 static void blk_dec_in_flight(BlockBackend *blk)
 {
     atomic_dec(&blk->in_flight);
-    aio_wait_kick(&blk->wait);
+    aio_wait_kick();
 }
 
 static void error_callback_bh(void *opaque)
@@ -1599,9 +1598,8 @@ void blk_drain(BlockBackend *blk)
     }
 
     /* We may have -ENOMEDIUM completions in flight */
-    AIO_WAIT_WHILE(&blk->wait,
-            blk_get_aio_context(blk),
-            atomic_mb_read(&blk->in_flight) > 0);
+    AIO_WAIT_WHILE(blk_get_aio_context(blk),
+                   atomic_mb_read(&blk->in_flight) > 0);
 
     if (bs) {
         bdrv_drained_end(bs);
@@ -1620,8 +1618,7 @@ void blk_drain_all(void)
         aio_context_acquire(ctx);
 
         /* We may have -ENOMEDIUM completions in flight */
-        AIO_WAIT_WHILE(&blk->wait, ctx,
-                atomic_mb_read(&blk->in_flight) > 0);
+        AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0);
 
         aio_context_release(ctx);
     }
diff --git a/block/io.c b/block/io.c
index 8b81ff3..bd9d688 100644
--- a/block/io.c
+++ b/block/io.c
@@ -38,8 +38,6 @@
 /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
 #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
 
-static AioWait drain_all_aio_wait;
-
 static void bdrv_parent_cb_resize(BlockDriverState *bs);
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
     int64_t offset, int bytes, BdrvRequestFlags flags);
@@ -557,7 +555,7 @@ void bdrv_drain_all_begin(void)
     }
 
     /* Now poll the in-flight requests */
-    AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
+    AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
 
     while ((bs = bdrv_next_all_states(bs))) {
         bdrv_drain_assert_idle(bs);
@@ -713,8 +711,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
 
 void bdrv_wakeup(BlockDriverState *bs)
 {
-    aio_wait_kick(bdrv_get_aio_wait(bs));
-    aio_wait_kick(&drain_all_aio_wait);
+    aio_wait_kick();
 }
 
 void bdrv_dec_in_flight(BlockDriverState *bs)
diff --git a/blockjob.c b/blockjob.c
index 4d53422..58de8cb 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -221,20 +221,9 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
     return 0;
 }
 
-void block_job_wakeup_all_bdrv(BlockJob *job)
-{
-    GSList *l;
-
-    for (l = job->nodes; l; l = l->next) {
-        BdrvChild *c = l->data;
-        bdrv_wakeup(c->bs);
-    }
-}
-
 static void block_job_on_idle(Notifier *n, void *opaque)
 {
-    BlockJob *job = opaque;
-    block_job_wakeup_all_bdrv(job);
+    aio_wait_kick();
 }
 
 bool block_job_is_internal(BlockJob *job)
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index 600fad1..afd0ff7 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -30,14 +30,15 @@
 /**
  * AioWait:
  *
- * An object that facilitates synchronous waiting on a condition.  The main
- * loop can wait on an operation running in an IOThread as follows:
+ * An object that facilitates synchronous waiting on a condition. A single
+ * global AioWait object (global_aio_wait) is used internally.
+ *
+ * The main loop can wait on an operation running in an IOThread as follows:
  *
- *   AioWait *wait = ...;
  *   AioContext *ctx = ...;
  *   MyWork work = { .done = false };
  *   schedule_my_work_in_iothread(ctx, &work);
- *   AIO_WAIT_WHILE(wait, ctx, !work.done);
+ *   AIO_WAIT_WHILE(ctx, !work.done);
  *
  * The IOThread must call aio_wait_kick() to notify the main loop when
  * work.done changes:
@@ -46,7 +47,7 @@
  *   {
  *       ...
  *       work.done = true;
- *       aio_wait_kick(wait);
+ *       aio_wait_kick();
  *   }
  */
 typedef struct {
@@ -54,9 +55,10 @@ typedef struct {
     unsigned num_waiters;
 } AioWait;
 
+extern AioWait global_aio_wait;
+
 /**
  * AIO_WAIT_WHILE:
- * @wait: the aio wait object
  * @ctx: the aio context, or NULL if multiple aio contexts (for which the
  *       caller does not hold a lock) are involved in the polling condition.
  * @cond: wait while this conditional expression is true
@@ -72,9 +74,9 @@ typedef struct {
  * wait on conditions between two IOThreads since that could lead to deadlock,
  * go via the main loop instead.
  */
-#define AIO_WAIT_WHILE(wait, ctx, cond) ({                         \
+#define AIO_WAIT_WHILE(ctx, cond) ({                               \
     bool waited_ = false;                                          \
-    AioWait *wait_ = (wait);                                       \
+    AioWait *wait_ = &global_aio_wait;                             \
     AioContext *ctx_ = (ctx);                                      \
     /* Increment wait_->num_waiters before evaluating cond. */     \
     atomic_inc(&wait_->num_waiters);                               \
@@ -102,14 +104,12 @@ typedef struct {
 
 /**
  * aio_wait_kick:
- * @wait: the aio wait object that should re-evaluate its condition
- *
  * Wake up the main thread if it is waiting on AIO_WAIT_WHILE().  During
  * synchronous operations performed in an IOThread, the main thread lets the
  * IOThread's event loop run, waiting for the operation to complete.  A
  * aio_wait_kick() call will wake up the main thread.
  */
-void aio_wait_kick(AioWait *wait);
+void aio_wait_kick(void);
 
 /**
  * aio_wait_bh_oneshot:
diff --git a/include/block/block.h b/include/block/block.h
index 4e0871a..4edc1e8 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -410,13 +410,9 @@ void bdrv_drain_all_begin(void);
 void bdrv_drain_all_end(void);
 void bdrv_drain_all(void);
 
-/* Returns NULL when bs == NULL */
-AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
-
 #define BDRV_POLL_WHILE(bs, cond) ({                       \
     BlockDriverState *bs_ = (bs);                          \
-    AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_),                 \
-                   bdrv_get_aio_context(bs_),              \
+    AIO_WAIT_WHILE(bdrv_get_aio_context(bs_),              \
                    cond); })
 
 int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 4000d2a..92ecbd8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -794,9 +794,6 @@ struct BlockDriverState {
     unsigned int in_flight;
     unsigned int serialising_in_flight;
 
-    /* Kicked to signal main loop when a request completes. */
-    AioWait wait;
-
     /* counter for nested bdrv_io_plug.
      * Accessed with atomic ops.
     */
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 2290bbb..ede0bd8 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -122,16 +122,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
 void block_job_remove_all_bdrv(BlockJob *job);
 
 /**
- * block_job_wakeup_all_bdrv:
- * @job: The block job
- *
- * Calls bdrv_wakeup() for all BlockDriverStates that have been added to the
- * job. This function is to be called whenever child_job_drained_poll() would
- * go from true to false to notify waiting drain requests.
- */
-void block_job_wakeup_all_bdrv(BlockJob *job);
-
-/**
  * block_job_set_speed:
  * @job: The job to set the speed for.
  * @speed: The new value
diff --git a/job.c b/job.c
index 93aea79..c65e01b 100644
--- a/job.c
+++ b/job.c
@@ -978,7 +978,6 @@ void job_complete(Job *job, Error **errp)
 int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
 {
     Error *local_err = NULL;
-    AioWait dummy_wait = {};
     int ret;
 
     job_ref(job);
@@ -992,7 +991,7 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
         return -EBUSY;
     }
 
-    AIO_WAIT_WHILE(&dummy_wait, job->aio_context,
+    AIO_WAIT_WHILE(job->aio_context,
                    (job_drain(job), !job_is_completed(job)));
 
     ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret;
diff --git a/util/aio-wait.c b/util/aio-wait.c
index b8a8f86..b487749 100644
--- a/util/aio-wait.c
+++ b/util/aio-wait.c
@@ -26,21 +26,22 @@
 #include "qemu/main-loop.h"
 #include "block/aio-wait.h"
 
+AioWait global_aio_wait;
+
 static void dummy_bh_cb(void *opaque)
 {
     /* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */
 }
 
-void aio_wait_kick(AioWait *wait)
+void aio_wait_kick(void)
 {
     /* The barrier (or an atomic op) is in the caller.  */
-    if (atomic_read(&wait->num_waiters)) {
+    if (atomic_read(&global_aio_wait.num_waiters)) {
         aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
     }
 }
 
 typedef struct {
-    AioWait wait;
     bool done;
     QEMUBHFunc *cb;
     void *opaque;
@@ -54,7 +55,7 @@ static void aio_wait_bh(void *opaque)
     data->cb(data->opaque);
 
     data->done = true;
-    aio_wait_kick(&data->wait);
+    aio_wait_kick();
 }
 
 void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
@@ -67,5 +68,5 @@ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
 
     aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data);
-    AIO_WAIT_WHILE(&data.wait, ctx, !data.done);
+    AIO_WAIT_WHILE(ctx, !data.done);
 }
-- 
cgit v1.1


From d8b3afd597d54e496809b05ac39ac29a5799664f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 20 Sep 2018 17:39:13 +0200
Subject: test-bdrv-drain: Test draining job source child and parent

For the block job drain test, don't only test draining the source and
the target node, but create a backing chain for the source
(source_backing <- source <- source_overlay) and test draining each of
the nodes in it.

When using iothreads, the source node (and therefore the job) is in a
different AioContext than the drain, which happens from the main
thread. This way, the main thread waits in AIO_WAIT_WHILE() for the
iothread to make process and aio_wait_kick() is required to notify it.
The test validates that calling bdrv_wakeup() for a child or a parent
node will actually notify AIO_WAIT_WHILE() instead of letting it hang.

Increase the sleep time a bit (to 1 ms) because the test case is racy
and with the shorter sleep, it didn't reproduce the bug it is supposed
to test for me under 'rr record -n'.

This was because bdrv_drain_invoke_entry() (in the main thread) was only
called after the job had already reached the pause point, so we got a
bdrv_dec_in_flight() from the main thread and the additional
aio_wait_kick() when the job becomes idle (that we really wanted to test
here) wasn't even necessary any more to make progress.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 tests/test-bdrv-drain.c | 77 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 8 deletions(-)

diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index f367e6c..c9f29c8 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -786,6 +786,7 @@ typedef struct TestBlockJob {
     BlockJob common;
     int run_ret;
     int prepare_ret;
+    bool running;
     bool should_complete;
 } TestBlockJob;
 
@@ -818,12 +819,17 @@ static int coroutine_fn test_job_run(Job *job, Error **errp)
 {
     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
 
+    /* We are running the actual job code past the pause point in
+     * job_co_entry(). */
+    s->running = true;
+
     job_transition_to_ready(&s->common.job);
     while (!s->should_complete) {
         /* Avoid job_sleep_ns() because it marks the job as !busy. We want to
          * emulate some actual activity (probably some I/O) here so that drain
          * has to wait for this activity to stop. */
-        qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
+        qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
+
         job_pause_point(&s->common.job);
     }
 
@@ -856,11 +862,19 @@ enum test_job_result {
     TEST_JOB_FAIL_PREPARE,
 };
 
-static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
-                                 enum test_job_result result)
+enum test_job_drain_node {
+    TEST_JOB_DRAIN_SRC,
+    TEST_JOB_DRAIN_SRC_CHILD,
+    TEST_JOB_DRAIN_SRC_PARENT,
+};
+
+static void test_blockjob_common_drain_node(enum drain_type drain_type,
+                                            bool use_iothread,
+                                            enum test_job_result result,
+                                            enum test_job_drain_node drain_node)
 {
     BlockBackend *blk_src, *blk_target;
-    BlockDriverState *src, *target;
+    BlockDriverState *src, *src_backing, *src_overlay, *target, *drain_bs;
     BlockJob *job;
     TestBlockJob *tjob;
     IOThread *iothread = NULL;
@@ -869,8 +883,32 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
 
     src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
                                &error_abort);
+    src_backing = bdrv_new_open_driver(&bdrv_test, "source-backing",
+                                       BDRV_O_RDWR, &error_abort);
+    src_overlay = bdrv_new_open_driver(&bdrv_test, "source-overlay",
+                                       BDRV_O_RDWR, &error_abort);
+
+    bdrv_set_backing_hd(src_overlay, src, &error_abort);
+    bdrv_unref(src);
+    bdrv_set_backing_hd(src, src_backing, &error_abort);
+    bdrv_unref(src_backing);
+
     blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
-    blk_insert_bs(blk_src, src, &error_abort);
+    blk_insert_bs(blk_src, src_overlay, &error_abort);
+
+    switch (drain_node) {
+    case TEST_JOB_DRAIN_SRC:
+        drain_bs = src;
+        break;
+    case TEST_JOB_DRAIN_SRC_CHILD:
+        drain_bs = src_backing;
+        break;
+    case TEST_JOB_DRAIN_SRC_PARENT:
+        drain_bs = src_overlay;
+        break;
+    default:
+        g_assert_not_reached();
+    }
 
     if (use_iothread) {
         iothread = iothread_new();
@@ -906,11 +944,21 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
     job_start(&job->job);
     aio_context_release(ctx);
 
+    if (use_iothread) {
+        /* job_co_entry() is run in the I/O thread, wait for the actual job
+         * code to start (we don't want to catch the job in the pause point in
+         * job_co_entry(). */
+        while (!tjob->running) {
+            aio_poll(qemu_get_aio_context(), false);
+        }
+    }
+
     g_assert_cmpint(job->job.pause_count, ==, 0);
     g_assert_false(job->job.paused);
+    g_assert_true(tjob->running);
     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
 
-    do_drain_begin_unlocked(drain_type, src);
+    do_drain_begin_unlocked(drain_type, drain_bs);
 
     if (drain_type == BDRV_DRAIN_ALL) {
         /* bdrv_drain_all() drains both src and target */
@@ -921,7 +969,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
     g_assert_true(job->job.paused);
     g_assert_false(job->job.busy); /* The job is paused */
 
-    do_drain_end_unlocked(drain_type, src);
+    do_drain_end_unlocked(drain_type, drain_bs);
 
     if (use_iothread) {
         /* paused is reset in the I/O thread, wait for it */
@@ -969,7 +1017,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
 
     blk_unref(blk_src);
     blk_unref(blk_target);
-    bdrv_unref(src);
+    bdrv_unref(src_overlay);
     bdrv_unref(target);
 
     if (iothread) {
@@ -977,6 +1025,19 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
     }
 }
 
+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
+                                 enum test_job_result result)
+{
+    test_blockjob_common_drain_node(drain_type, use_iothread, result,
+                                    TEST_JOB_DRAIN_SRC);
+    test_blockjob_common_drain_node(drain_type, use_iothread, result,
+                                    TEST_JOB_DRAIN_SRC_CHILD);
+    if (drain_type == BDRV_SUBTREE_DRAIN) {
+        test_blockjob_common_drain_node(drain_type, use_iothread, result,
+                                        TEST_JOB_DRAIN_SRC_PARENT);
+    }
+}
+
 static void test_blockjob_drain_all(void)
 {
     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS);
-- 
cgit v1.1