From 8d3dd037d947ce96c42c376ef0bb5e5c6ef96cbe Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:53:57 +0100 Subject: stream: Traverse graph after modification bdrv_cor_filter_drop() modifies the block graph. That means that other parties can also modify the block graph before it returns. Therefore, we cannot assume that the result of a graph traversal we did before remains valid afterwards. We should thus fetch `base` and `unfiltered_base` afterwards instead of before. Signed-off-by: Hanna Reitz Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20211111120829.81329-2-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-2-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block/stream.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/stream.c b/block/stream.c index 97bee48..e45113a 100644 --- a/block/stream.c +++ b/block/stream.c @@ -54,8 +54,8 @@ static int stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs); - BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base); - BlockDriverState *unfiltered_base = bdrv_skip_filters(base); + BlockDriverState *base; + BlockDriverState *unfiltered_base; Error *local_err = NULL; int ret = 0; @@ -63,6 +63,9 @@ static int stream_prepare(Job *job) bdrv_cor_filter_drop(s->cor_filter_bs); s->cor_filter_bs = NULL; + base = bdrv_filter_or_cow_bs(s->above_base); + unfiltered_base = bdrv_skip_filters(base); + if (bdrv_cow_child(unfiltered_bs)) { const char *base_id = NULL, *base_fmt = NULL; if (unfiltered_base) { -- cgit v1.1 From a225369bce684c01095be52e1014e3fa91dec210 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:53:58 +0100 Subject: block: Manipulate children list in .attach/.detach The children list is specific to BDS parents. We should not modify it in the general children modification code, but let BDS parents deal with it in their .attach() and .detach() methods. This also has the advantage that a BdrvChild is removed from the children list before its .bs pointer can become NULL. BDS parents generally assume that their children's .bs pointer is never NULL, so this is actually a bug fix. Signed-off-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20211111120829.81329-3-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-3-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/block.c b/block.c index 580cb77..ca024ff 100644 --- a/block.c +++ b/block.c @@ -1387,6 +1387,8 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; + QLIST_INSERT_HEAD(&bs->children, child, next); + if (child->role & BDRV_CHILD_COW) { bdrv_backing_attach(child); } @@ -1403,6 +1405,8 @@ static void bdrv_child_cb_detach(BdrvChild *child) } bdrv_unapply_subtree_drain(child, bs); + + QLIST_REMOVE(child, next); } static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, @@ -2747,7 +2751,7 @@ static void bdrv_child_free(void *opaque) static void bdrv_remove_empty_child(BdrvChild *child) { assert(!child->bs); - QLIST_SAFE_REMOVE(child, next); + assert(!child->next.le_prev); /* not in children list */ bdrv_child_free(child); } @@ -2913,12 +2917,6 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, return ret; } - QLIST_INSERT_HEAD(&parent_bs->children, *child, next); - /* - * child is removed in bdrv_attach_child_common_abort(), so don't care to - * abort this change separately. - */ - return 0; } @@ -4851,7 +4849,6 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque) BdrvRemoveFilterOrCowChild *s = opaque; BlockDriverState *parent_bs = s->child->opaque; - QLIST_INSERT_HEAD(&parent_bs->children, s->child, next); if (s->is_backing) { parent_bs->backing = s->child; } else { @@ -4906,7 +4903,6 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); - QLIST_SAFE_REMOVE(child, next); if (s->is_backing) { bs->backing = NULL; } else { -- cgit v1.1 From 04c9c3a52c21088e7039956e5a635250fe3eaee6 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:53:59 +0100 Subject: block: Unite remove_empty_child and child_free Now that bdrv_remove_empty_child() no longer removes the child from the parent's children list but only checks that it is not in such a list, it is only a wrapper around bdrv_child_free() that checks that the child is empty and unused. That should apply to all children that we free, so put those checks into bdrv_child_free() and drop bdrv_remove_empty_child(). Signed-off-by: Hanna Reitz Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20211111120829.81329-4-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-4-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/block.c b/block.c index ca024ff..19bff4f 100644 --- a/block.c +++ b/block.c @@ -2740,19 +2740,19 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } } -static void bdrv_child_free(void *opaque) -{ - BdrvChild *c = opaque; - - g_free(c->name); - g_free(c); -} - -static void bdrv_remove_empty_child(BdrvChild *child) +/** + * Free the given @child. + * + * The child must be empty (i.e. `child->bs == NULL`) and it must be + * unused (i.e. not in a children list). + */ +static void bdrv_child_free(BdrvChild *child) { assert(!child->bs); assert(!child->next.le_prev); /* not in children list */ - bdrv_child_free(child); + + g_free(child->name); + g_free(child); } typedef struct BdrvAttachChildCommonState { @@ -2786,7 +2786,7 @@ static void bdrv_attach_child_common_abort(void *opaque) } bdrv_unref(bs); - bdrv_remove_empty_child(child); + bdrv_child_free(child); *s->child = NULL; } @@ -2859,7 +2859,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, if (ret < 0) { error_propagate(errp, local_err); - bdrv_remove_empty_child(new_child); + bdrv_child_free(new_child); return ret; } } @@ -2925,7 +2925,7 @@ static void bdrv_detach_child(BdrvChild *child) BlockDriverState *old_bs = child->bs; bdrv_replace_child_noperm(child, NULL); - bdrv_remove_empty_child(child); + bdrv_child_free(child); if (old_bs) { /* -- cgit v1.1 From 265180614189b969058f5e507c8964fc7dfd422f Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:00 +0100 Subject: block: Drop detached child from ignore list bdrv_attach_child_common_abort() restores the parent's AioContext. To do so, the child (which was supposed to be attached, but is now detached again by this abort handler) is added to the ignore list for the AioContext changing functions. However, since we modify a BDS's children list in the BdrvChildClass's .attach and .detach handlers, the child is already effectively detached from the parent by this point. We do not need to put it into the ignore list. Use this opportunity to clean up the empty line structure: Keep setting the ignore list, invoking the AioContext function, and freeing the ignore list in blocks separated by empty lines. Signed-off-by: Hanna Reitz Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20211111120829.81329-5-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-5-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block.c b/block.c index 19bff4f..c7d5aa5 100644 --- a/block.c +++ b/block.c @@ -2774,14 +2774,16 @@ static void bdrv_attach_child_common_abort(void *opaque) } if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { - GSList *ignore = g_slist_prepend(NULL, child); + GSList *ignore; + /* No need to ignore `child`, because it has been detached already */ + ignore = NULL; child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, &error_abort); g_slist_free(ignore); - ignore = g_slist_prepend(NULL, child); - child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); + ignore = NULL; + child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); g_slist_free(ignore); } -- cgit v1.1 From be64bbb0149748f3999c49b13976aafb8330ea86 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:01 +0100 Subject: block: Pass BdrvChild ** to replace_child_noperm bdrv_replace_child_noperm() modifies BdrvChild.bs, and can potentially set it to NULL. That is dangerous, because BDS parents generally assume that their children's .bs pointer is never NULL. We therefore want to let bdrv_replace_child_noperm() set the corresponding BdrvChild pointer to NULL, too. This patch lays the foundation for it by passing a BdrvChild ** pointer to bdrv_replace_child_noperm() so that it can later use it to NULL the BdrvChild pointer immediately after setting BdrvChild.bs to NULL. (We will still need to undertake some intermediate steps, though.) Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-6-hreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-6-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index c7d5aa5..d668156 100644 --- a/block.c +++ b/block.c @@ -87,7 +87,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); -static void bdrv_replace_child_noperm(BdrvChild *child, +static void bdrv_replace_child_noperm(BdrvChild **child, BlockDriverState *new_bs); static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, @@ -2270,7 +2270,7 @@ static void bdrv_replace_child_abort(void *opaque) BlockDriverState *new_bs = s->child->bs; /* old_bs reference is transparently moved from @s to @s->child */ - bdrv_replace_child_noperm(s->child, s->old_bs); + bdrv_replace_child_noperm(&s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2300,7 +2300,7 @@ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, if (new_bs) { bdrv_ref(new_bs); } - bdrv_replace_child_noperm(child, new_bs); + bdrv_replace_child_noperm(&child, new_bs); /* old_bs reference is transparently moved from @child to @s */ } @@ -2672,9 +2672,10 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } -static void bdrv_replace_child_noperm(BdrvChild *child, +static void bdrv_replace_child_noperm(BdrvChild **childp, BlockDriverState *new_bs) { + BdrvChild *child = *childp; BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; int drain_saldo; @@ -2767,7 +2768,7 @@ static void bdrv_attach_child_common_abort(void *opaque) BdrvChild *child = *s->child; BlockDriverState *bs = child->bs; - bdrv_replace_child_noperm(child, NULL); + bdrv_replace_child_noperm(s->child, NULL); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); @@ -2867,7 +2868,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, } bdrv_ref(child_bs); - bdrv_replace_child_noperm(new_child, child_bs); + bdrv_replace_child_noperm(&new_child, child_bs); *child = new_child; @@ -2922,12 +2923,12 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, return 0; } -static void bdrv_detach_child(BdrvChild *child) +static void bdrv_detach_child(BdrvChild **childp) { - BlockDriverState *old_bs = child->bs; + BlockDriverState *old_bs = (*childp)->bs; - bdrv_replace_child_noperm(child, NULL); - bdrv_child_free(child); + bdrv_replace_child_noperm(childp, NULL); + bdrv_child_free(*childp); if (old_bs) { /* @@ -3033,7 +3034,7 @@ void bdrv_root_unref_child(BdrvChild *child) BlockDriverState *child_bs; child_bs = child->bs; - bdrv_detach_child(child); + bdrv_detach_child(&child); bdrv_unref(child_bs); } -- cgit v1.1 From 562bda8bb41879eeda0bd484dd3d55134579b28e Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:02 +0100 Subject: block: Restructure remove_file_or_backing_child() As of a future patch, bdrv_replace_child_tran() will take a BdrvChild ** pointer. Prepare for that by getting such a pointer and using it where applicable, and (dereferenced) as a parameter for bdrv_replace_child_tran(). Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-7-hreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-7-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/block.c b/block.c index d668156..8da057f 100644 --- a/block.c +++ b/block.c @@ -4887,30 +4887,33 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran) { + BdrvChild **childp; BdrvRemoveFilterOrCowChild *s; - assert(child == bs->backing || child == bs->file); - if (!child) { return; } + if (child == bs->backing) { + childp = &bs->backing; + } else if (child == bs->file) { + childp = &bs->file; + } else { + g_assert_not_reached(); + } + if (child->bs) { - bdrv_replace_child_tran(child, NULL, tran); + bdrv_replace_child_tran(*childp, NULL, tran); } s = g_new(BdrvRemoveFilterOrCowChild, 1); *s = (BdrvRemoveFilterOrCowChild) { .child = child, - .is_backing = (child == bs->backing), + .is_backing = (childp == &bs->backing), }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); - if (s->is_backing) { - bs->backing = NULL; - } else { - bs->file = NULL; - } + *childp = NULL; } /* -- cgit v1.1 From 079bff693bc47bf69fb131f87a03c1689e48ed55 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:03 +0100 Subject: transactions: Invoke clean() after everything else Invoke the transaction drivers' .clean() methods only after all .commit() or .abort() handlers are done. This makes it easier to have nested transactions where the top-level transactions pass objects to lower transactions that the latter can still use throughout their commit/abort phases, while the top-level transaction keeps a reference that is released in its .clean() method. (Before this commit, that is also possible, but the top-level transaction would need to take care to invoke tran_add() before the lower-level transaction does. This commit makes the ordering irrelevant, which is just a bit nicer.) Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-8-hreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-8-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- include/qemu/transactions.h | 3 +++ util/transactions.c | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h index 92c5965..2f2060a 100644 --- a/include/qemu/transactions.h +++ b/include/qemu/transactions.h @@ -31,6 +31,9 @@ * tran_create(), call your "prepare" functions on it, and finally call * tran_abort() or tran_commit() to finalize the transaction by corresponding * finalization actions in reverse order. + * + * The clean() functions registered by the drivers in a transaction are called + * last, after all abort() or commit() functions have been called. */ #ifndef QEMU_TRANSACTIONS_H diff --git a/util/transactions.c b/util/transactions.c index d0bc9a3..2dbdedc 100644 --- a/util/transactions.c +++ b/util/transactions.c @@ -61,11 +61,13 @@ void tran_abort(Transaction *tran) { TransactionAction *act, *next; - QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { + QSLIST_FOREACH(act, &tran->actions, entry) { if (act->drv->abort) { act->drv->abort(act->opaque); } + } + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { if (act->drv->clean) { act->drv->clean(act->opaque); } @@ -80,11 +82,13 @@ void tran_commit(Transaction *tran) { TransactionAction *act, *next; - QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { + QSLIST_FOREACH(act, &tran->actions, entry) { if (act->drv->commit) { act->drv->commit(act->opaque); } + } + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { if (act->drv->clean) { act->drv->clean(act->opaque); } -- cgit v1.1 From 82b54cf51656bf3cd5ed1ac549e8a1085a0e3290 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:04 +0100 Subject: block: Let replace_child_tran keep indirect pointer As of a future commit, bdrv_replace_child_noperm() will clear the indirect BdrvChild pointer passed to it if the new child BDS is NULL. bdrv_replace_child_tran() will want to let it do that, but revert this change in its abort handler. For that, we need to have it receive a BdrvChild ** pointer, too, and keep it stored in the BdrvReplaceChildState object that we attach to the transaction. Note that we do not need to store it in the BdrvReplaceChildState when new_bs is not NULL, because then there is nothing to revert. This is important so that bdrv_replace_node_noperm() can pass a pointer to a loop-local variable to bdrv_replace_child_tran() without worrying that this pointer will outlive one loop iteration. (Of course, for that to work, bdrv_replace_node_noperm() and in turn bdrv_replace_node() and its relatives may not be called with a NULL @to node. Luckily, they already are not, but now we should assert this.) bdrv_remove_file_or_backing_child() on the other hand needs to ensure that the indirect pointer it passes will stay valid for the duration of the transaction. Ensure this by keeping a strong reference to the BDS whose &bs->backing or &bs->file it passes to bdrv_replace_child_tran(), and giving up that reference only in the transaction .clean() handler. Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-9-hreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-9-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 10 deletions(-) diff --git a/block.c b/block.c index 8da057f..a400271 100644 --- a/block.c +++ b/block.c @@ -2254,6 +2254,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, typedef struct BdrvReplaceChildState { BdrvChild *child; + BdrvChild **childp; BlockDriverState *old_bs; } BdrvReplaceChildState; @@ -2269,7 +2270,29 @@ static void bdrv_replace_child_abort(void *opaque) BdrvReplaceChildState *s = opaque; BlockDriverState *new_bs = s->child->bs; - /* old_bs reference is transparently moved from @s to @s->child */ + /* + * old_bs reference is transparently moved from @s to s->child. + * + * Pass &s->child here instead of s->childp, because: + * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not + * modify the BdrvChild * pointer we indirectly pass to it, i.e. it + * will not modify s->child. From that perspective, it does not matter + * whether we pass s->childp or &s->child. + * (TODO: Right now, bdrv_replace_child_noperm() never modifies that + * pointer anyway (though it will in the future), so at this point it + * absolutely does not matter whether we pass s->childp or &s->child.) + * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use + * it here. + * (3) If new_bs is NULL, *s->childp will have been NULLed by + * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we + * must not pass a NULL *s->childp here. + * (TODO: In its current state, bdrv_replace_child_noperm() will not + * have NULLed *s->childp, so this does not apply yet. It will in the + * future.) + * + * So whether new_bs was NULL or not, we cannot pass s->childp here; and in + * any case, there is no reason to pass it anyway. + */ bdrv_replace_child_noperm(&s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2286,22 +2309,32 @@ static TransactionActionDrv bdrv_replace_child_drv = { * Note: real unref of old_bs is done only on commit. * * The function doesn't update permissions, caller is responsible for this. + * + * Note that if new_bs == NULL, @childp is stored in a state object attached + * to @tran, so that the old child can be reinstated in the abort handler. + * Therefore, if @new_bs can be NULL, @childp must stay valid until the + * transaction is committed or aborted. + * + * (TODO: The reinstating does not happen yet, but it will once + * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.) */ -static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, +static void bdrv_replace_child_tran(BdrvChild **childp, + BlockDriverState *new_bs, Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); *s = (BdrvReplaceChildState) { - .child = child, - .old_bs = child->bs, + .child = *childp, + .childp = new_bs == NULL ? childp : NULL, + .old_bs = (*childp)->bs, }; tran_add(tran, &bdrv_replace_child_drv, s); if (new_bs) { bdrv_ref(new_bs); } - bdrv_replace_child_noperm(&child, new_bs); - /* old_bs reference is transparently moved from @child to @s */ + bdrv_replace_child_noperm(childp, new_bs); + /* old_bs reference is transparently moved from *childp to @s */ } /* @@ -4844,6 +4877,7 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) typedef struct BdrvRemoveFilterOrCowChild { BdrvChild *child; + BlockDriverState *bs; bool is_backing; } BdrvRemoveFilterOrCowChild; @@ -4873,10 +4907,19 @@ static void bdrv_remove_filter_or_cow_child_commit(void *opaque) bdrv_child_free(s->child); } +static void bdrv_remove_filter_or_cow_child_clean(void *opaque) +{ + BdrvRemoveFilterOrCowChild *s = opaque; + + /* Drop the bs reference after the transaction is done */ + bdrv_unref(s->bs); + g_free(s); +} + static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { .abort = bdrv_remove_filter_or_cow_child_abort, .commit = bdrv_remove_filter_or_cow_child_commit, - .clean = g_free, + .clean = bdrv_remove_filter_or_cow_child_clean, }; /* @@ -4894,6 +4937,11 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, return; } + /* + * Keep a reference to @bs so @childp will stay valid throughout the + * transaction (required by bdrv_replace_child_tran()) + */ + bdrv_ref(bs); if (child == bs->backing) { childp = &bs->backing; } else if (child == bs->file) { @@ -4903,12 +4951,13 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, } if (child->bs) { - bdrv_replace_child_tran(*childp, NULL, tran); + bdrv_replace_child_tran(childp, NULL, tran); } s = g_new(BdrvRemoveFilterOrCowChild, 1); *s = (BdrvRemoveFilterOrCowChild) { .child = child, + .bs = bs, .is_backing = (childp == &bs->backing), }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); @@ -4934,6 +4983,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, { BdrvChild *c, *next; + assert(to != NULL); + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); if (!should_update_child(c, to)) { @@ -4949,7 +5000,12 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, c->name, from->node_name); return -EPERM; } - bdrv_replace_child_tran(c, to, tran); + + /* + * Passing a pointer to the local variable @c is fine here, because + * @to is not NULL, and so &c will not be attached to the transaction. + */ + bdrv_replace_child_tran(&c, to, tran); } return 0; @@ -4964,6 +5020,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, * * With @detach_subchain=true @to must be in a backing chain of @from. In this * case backing link of the cow-parent of @to is removed. + * + * @to must not be NULL. */ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to, @@ -4976,6 +5034,8 @@ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to_cow_parent = NULL; int ret; + assert(to != NULL); + if (detach_subchain) { assert(bdrv_chain_contains(from, to)); assert(from != to); @@ -5031,6 +5091,9 @@ out: return ret; } +/** + * Replace node @from by @to (where neither may be NULL). + */ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp) { @@ -5098,7 +5161,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); - bdrv_replace_child_tran(child, new_bs, tran); + bdrv_replace_child_tran(&child, new_bs, tran); found = g_hash_table_new(NULL, NULL); refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); -- cgit v1.1 From b0a9f6fed3d80de610dcd04a7e66f9f30a04174f Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:05 +0100 Subject: block: Let replace_child_noperm free children In most of the block layer, especially when traversing down from other BlockDriverStates, we assume that BdrvChild.bs can never be NULL. When it becomes NULL, it is expected that the corresponding BdrvChild pointer also becomes NULL and the BdrvChild object is freed. Therefore, once bdrv_replace_child_noperm() sets the BdrvChild.bs pointer to NULL, it should also immediately set the corresponding BdrvChild pointer (like bs->file or bs->backing) to NULL. In that context, it also makes sense for this function to free the child. Sometimes we cannot do so, though, because it is called in a transactional context where the caller might still want to reinstate the child in the abort branch (and free it only on commit), so this behavior has to remain optional. In bdrv_replace_child_tran()'s abort handler, we now rely on the fact that the BdrvChild passed to bdrv_replace_child_tran() must have had a non-NULL .bs pointer initially. Make a note of that and assert it. Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-10-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-10-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 23 deletions(-) diff --git a/block.c b/block.c index a400271..0ac5b16 100644 --- a/block.c +++ b/block.c @@ -87,8 +87,10 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); +static void bdrv_child_free(BdrvChild *child); static void bdrv_replace_child_noperm(BdrvChild **child, - BlockDriverState *new_bs); + BlockDriverState *new_bs, + bool free_empty_child); static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran); @@ -2256,12 +2258,16 @@ typedef struct BdrvReplaceChildState { BdrvChild *child; BdrvChild **childp; BlockDriverState *old_bs; + bool free_empty_child; } BdrvReplaceChildState; static void bdrv_replace_child_commit(void *opaque) { BdrvReplaceChildState *s = opaque; + if (s->free_empty_child && !s->child->bs) { + bdrv_child_free(s->child); + } bdrv_unref(s->old_bs); } @@ -2278,22 +2284,26 @@ static void bdrv_replace_child_abort(void *opaque) * modify the BdrvChild * pointer we indirectly pass to it, i.e. it * will not modify s->child. From that perspective, it does not matter * whether we pass s->childp or &s->child. - * (TODO: Right now, bdrv_replace_child_noperm() never modifies that - * pointer anyway (though it will in the future), so at this point it - * absolutely does not matter whether we pass s->childp or &s->child.) * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use * it here. * (3) If new_bs is NULL, *s->childp will have been NULLed by * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we * must not pass a NULL *s->childp here. - * (TODO: In its current state, bdrv_replace_child_noperm() will not - * have NULLed *s->childp, so this does not apply yet. It will in the - * future.) * * So whether new_bs was NULL or not, we cannot pass s->childp here; and in * any case, there is no reason to pass it anyway. */ - bdrv_replace_child_noperm(&s->child, s->old_bs); + bdrv_replace_child_noperm(&s->child, s->old_bs, true); + /* + * The child was pre-existing, so s->old_bs must be non-NULL, and + * s->child thus must not have been freed + */ + assert(s->child != NULL); + if (!new_bs) { + /* As described above, *s->childp was cleared, so restore it */ + assert(s->childp != NULL); + *s->childp = s->child; + } bdrv_unref(new_bs); } @@ -2310,30 +2320,44 @@ static TransactionActionDrv bdrv_replace_child_drv = { * * The function doesn't update permissions, caller is responsible for this. * + * (*childp)->bs must not be NULL. + * * Note that if new_bs == NULL, @childp is stored in a state object attached * to @tran, so that the old child can be reinstated in the abort handler. * Therefore, if @new_bs can be NULL, @childp must stay valid until the * transaction is committed or aborted. * - * (TODO: The reinstating does not happen yet, but it will once - * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.) + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed (on commit). @free_empty_child should only be false if the + * caller will free the BDrvChild themselves (which may be important + * if this is in turn called in another transactional context). */ static void bdrv_replace_child_tran(BdrvChild **childp, BlockDriverState *new_bs, - Transaction *tran) + Transaction *tran, + bool free_empty_child) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); *s = (BdrvReplaceChildState) { .child = *childp, .childp = new_bs == NULL ? childp : NULL, .old_bs = (*childp)->bs, + .free_empty_child = free_empty_child, }; tran_add(tran, &bdrv_replace_child_drv, s); + /* The abort handler relies on this */ + assert(s->old_bs != NULL); + if (new_bs) { bdrv_ref(new_bs); } - bdrv_replace_child_noperm(childp, new_bs); + /* + * Pass free_empty_child=false, we will free the child (if + * necessary) in bdrv_replace_child_commit() (if our + * @free_empty_child parameter was true). + */ + bdrv_replace_child_noperm(childp, new_bs, false); /* old_bs reference is transparently moved from *childp to @s */ } @@ -2705,8 +2729,22 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } +/** + * Replace (*childp)->bs by @new_bs. + * + * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents + * generally cannot handle a BdrvChild with .bs == NULL, so clearing + * BdrvChild.bs should generally immediately be followed by the + * BdrvChild pointer being cleared as well. + * + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed. @free_empty_child should only be false if the caller will + * free the BdrvChild themselves (this may be important in a + * transactional context, where it may only be freed on commit). + */ static void bdrv_replace_child_noperm(BdrvChild **childp, - BlockDriverState *new_bs) + BlockDriverState *new_bs, + bool free_empty_child) { BdrvChild *child = *childp; BlockDriverState *old_bs = child->bs; @@ -2743,6 +2781,9 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, } child->bs = new_bs; + if (!new_bs) { + *childp = NULL; + } if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); @@ -2772,6 +2813,10 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, bdrv_parent_drained_end_single(child); drain_saldo++; } + + if (free_empty_child && !child->bs) { + bdrv_child_free(child); + } } /** @@ -2801,7 +2846,14 @@ static void bdrv_attach_child_common_abort(void *opaque) BdrvChild *child = *s->child; BlockDriverState *bs = child->bs; - bdrv_replace_child_noperm(s->child, NULL); + /* + * Pass free_empty_child=false, because we still need the child + * for the AioContext operations on the parent below; those + * BdrvChildClass methods all work on a BdrvChild object, so we + * need to keep it as an empty shell (after this function, it will + * not be attached to any parent, and it will not have a .bs). + */ + bdrv_replace_child_noperm(s->child, NULL, false); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); @@ -2823,7 +2875,6 @@ static void bdrv_attach_child_common_abort(void *opaque) bdrv_unref(bs); bdrv_child_free(child); - *s->child = NULL; } static TransactionActionDrv bdrv_attach_child_common_drv = { @@ -2901,7 +2952,9 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, } bdrv_ref(child_bs); - bdrv_replace_child_noperm(&new_child, child_bs); + bdrv_replace_child_noperm(&new_child, child_bs, true); + /* child_bs was non-NULL, so new_child must not have been freed */ + assert(new_child != NULL); *child = new_child; @@ -2960,8 +3013,7 @@ static void bdrv_detach_child(BdrvChild **childp) { BlockDriverState *old_bs = (*childp)->bs; - bdrv_replace_child_noperm(childp, NULL); - bdrv_child_free(*childp); + bdrv_replace_child_noperm(childp, NULL, true); if (old_bs) { /* @@ -4951,7 +5003,11 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, } if (child->bs) { - bdrv_replace_child_tran(childp, NULL, tran); + /* + * Pass free_empty_child=false, we will free the child in + * bdrv_remove_filter_or_cow_child_commit() + */ + bdrv_replace_child_tran(childp, NULL, tran, false); } s = g_new(BdrvRemoveFilterOrCowChild, 1); @@ -4961,8 +5017,6 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, .is_backing = (childp == &bs->backing), }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); - - *childp = NULL; } /* @@ -5005,7 +5059,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, * Passing a pointer to the local variable @c is fine here, because * @to is not NULL, and so &c will not be attached to the transaction. */ - bdrv_replace_child_tran(&c, to, tran); + bdrv_replace_child_tran(&c, to, tran, true); } return 0; @@ -5161,7 +5215,9 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); - bdrv_replace_child_tran(&child, new_bs, tran); + bdrv_replace_child_tran(&child, new_bs, tran, true); + /* @new_bs must have been non-NULL, so @child must not have been freed */ + assert(child != NULL); found = g_hash_table_new(NULL, NULL); refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); -- cgit v1.1 From 16e29cc050516ac0915f0db9226079b17dcbcc51 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:06 +0100 Subject: iotests/030: Unthrottle parallel jobs in reverse See the comment for why this is necessary. Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-11-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-11-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- tests/qemu-iotests/030 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 5fb65b4..567bf1d 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -251,7 +251,16 @@ class TestParallelOps(iotests.QMPTestCase): speed=1024) self.assert_qmp(result, 'return', {}) - for job in pending_jobs: + # Do this in reverse: After unthrottling them, some jobs may finish + # before we have unthrottled all of them. This will drain their + # subgraph, and this will make jobs above them advance (despite those + # jobs on top being throttled). In the worst case, all jobs below the + # top one are finished before we can unthrottle it, and this makes it + # advance so far that it completes before we can unthrottle it - which + # results in an error. + # Starting from the top (i.e. in reverse) does not have this problem: + # When a job finishes, the ones below it are not advanced. + for job in reversed(pending_jobs): result = self.vm.qmp('block-job-set-speed', device=job, speed=0) self.assert_qmp(result, 'return', {}) -- cgit v1.1 From 4d8b0f0a95361b1a3888f3eb3f76b27420383753 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 15 Nov 2021 15:54:07 +0100 Subject: docs: Deprecate incorrectly typed device_add arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While introducing a non-QemuOpts code path for device creation for JSON -device, we noticed that QMP device_add doesn't check its input correctly (accepting arguments that should have been rejected), and that users may be relying on this behaviour (libvirt did until it was fixed recently). Let's use a deprecation period before we fix this bug in QEMU to avoid nasty surprises for users. Signed-off-by: Kevin Wolf Message-Id: <20211111143530.18985-1-kwolf@redhat.com> Reviewed-by: Markus Armbruster Reviewed-by: Daniel P. Berrangé Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-12-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- docs/about/deprecated.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 6000312..c03fcf9 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -250,6 +250,20 @@ options are removed in favor of using explicit ``blockdev-create`` and ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for details. +Incorrectly typed ``device_add`` arguments (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Due to shortcomings in the internal implementation of ``device_add``, QEMU +incorrectly accepts certain invalid arguments: Any object or list arguments are +silently ignored. Other argument types are not checked, but an implicit +conversion happens, so that e.g. string values can be assigned to integer +device properties or vice versa. + +This is a bug in QEMU that will be fixed in the future so that previously +accepted incorrect commands will return an error. Users should make sure that +all arguments passed to ``device_add`` are consistent with the documented +property types. + System accelerators ------------------- -- cgit v1.1 From c9d4e42a8febe4db22eed8463087b38c3c74fd4c Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 15 Nov 2021 15:54:09 +0100 Subject: softmmu/qdev-monitor: fix use-after-free in qdev_set_id() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by Coverity (CID 1465222). Fixes: 4a1d937796de0fecd8b22d7dbebf87f38e8282fd ("softmmu/qdev-monitor: add error handling in qdev_set_id") Cc: Damien Hedde Cc: Kevin Wolf Cc: Michael S. Tsirkin Signed-off-by: Stefan Hajnoczi Message-Id: <20211102163342.31162-1-stefanha@redhat.com> Reviewed-by: Kevin Wolf Reviewed-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Marc-André Lureau Reviewed-by: Damien Hedde Reviewed-by: Markus Armbruster Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-14-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- softmmu/qdev-monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index b5aaae4..01ec420 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -593,8 +593,8 @@ const char *qdev_set_id(DeviceState *dev, char *id, Error **errp) if (prop) { dev->id = id; } else { - g_free(id); error_setg(errp, "Duplicate device ID '%s'", id); + g_free(id); return NULL; } } else { -- cgit v1.1 From 5dbd0ce115c7720268e653fe928bb6a0c1314a80 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 16 Nov 2021 11:14:31 +0100 Subject: file-posix: Fix alignment after reopen changing O_DIRECT At the end of a reopen, we already call bdrv_refresh_limits(), which should update bs->request_alignment according to the new file descriptor. However, raw_probe_alignment() relies on s->needs_alignment and just uses 1 if it isn't set. We neglected to update this field, so starting with cache=writeback and then reopening with cache=none means that we get an incorrect bs->request_alignment == 1 and unaligned requests fail instead of being automatically aligned. Fix this by recalculating s->needs_alignment in raw_refresh_limits() before calling raw_probe_alignment(). Signed-off-by: Kevin Wolf Message-Id: <20211104113109.56336-1-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Stefano Garzarella Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-13-kwolf@redhat.com> [hreitz: Fix iotest 142 for block sizes greater than 512 by operating on a file with a size of 1 MB] Signed-off-by: Hanna Reitz Message-Id: <20211116101431.105252-1-hreitz@redhat.com> --- block/file-posix.c | 20 ++++++++++++++++---- tests/qemu-iotests/142 | 29 +++++++++++++++++++++++++++++ tests/qemu-iotests/142.out | 18 ++++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 7a27c83..b283093 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -167,6 +167,7 @@ typedef struct BDRVRawState { int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; + bool force_alignment; bool drop_cache; bool check_cache_dropped; struct { @@ -351,6 +352,17 @@ static bool dio_byte_aligned(int fd) return false; } +static bool raw_needs_alignment(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { + return true; + } + + return s->force_alignment; +} + /* Check if read is allowed with given memory buffer and length. * * This function is used to check O_DIRECT memory buffer and request alignment. @@ -728,9 +740,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, s->has_discard = true; s->has_write_zeroes = true; - if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { - s->needs_alignment = true; - } if (fstat(s->fd, &st) < 0) { ret = -errno; @@ -784,9 +793,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, * so QEMU makes sure all IO operations on the device are aligned * to sector size, or else FreeBSD will reject them with EINVAL. */ - s->needs_alignment = true; + s->force_alignment = true; } #endif + s->needs_alignment = raw_needs_alignment(bs); #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { @@ -1251,7 +1261,9 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) BDRVRawState *s = bs->opaque; struct stat st; + s->needs_alignment = raw_needs_alignment(bs); raw_probe_alignment(bs, s->fd, errp); + bs->bl.min_mem_alignment = s->buf_align; bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); diff --git a/tests/qemu-iotests/142 b/tests/qemu-iotests/142 index 69fd10e..86d65a2 100755 --- a/tests/qemu-iotests/142 +++ b/tests/qemu-iotests/142 @@ -350,6 +350,35 @@ info block backing-file" echo "$hmp_cmds" | run_qemu -drive "$files","$ids" | grep "Cache" +echo +echo "--- Alignment after changing O_DIRECT ---" +echo + +# Directly test the protocol level: Can unaligned requests succeed even if +# O_DIRECT was only enabled through a reopen and vice versa? + +# Ensure image size is a multiple of the sector size (required for O_DIRECT) +$QEMU_IMG create -f file "$TEST_IMG" 1M | _filter_img_create + +# And write some data (not strictly necessary, but it feels better to actually +# have something to be read) +$QEMU_IO -f file -c 'write 0 4096' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO --cache=writeback -f file $TEST_IMG <