From 5529b02da2dcd1ef6bc6cd42d4fbfb537fe2276f Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Tue, 18 May 2021 13:42:14 +0200 Subject: block/quorum: Provide .bdrv_co_flush instead of .bdrv_co_flush_to_disk The quorum block driver uses a custom flush callback to handle the case when some children return io errors. In that case it still returns success if enough children are healthy. However, it provides it as the .bdrv_co_flush_to_disk callback, not as .bdrv_co_flush. This causes the block layer to do it's own generic flushing for the children instead, which doesn't handle errors properly. Fix this by providing .bdrv_co_flush instead of .bdrv_co_flush_to_disk so the block layer uses the custom flush callback. Signed-off-by: Lukas Straub Reported-by: Minghao Yuan Message-Id: <20210518134214.11ccf05f@gecko.fritz.box> Tested-by: Zhang Chen Signed-off-by: Kevin Wolf --- block/quorum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/quorum.c b/block/quorum.c index cfc1436..f2c0805 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1279,7 +1279,7 @@ static BlockDriver bdrv_quorum = { .bdrv_dirname = quorum_dirname, .bdrv_co_block_status = quorum_co_block_status, - .bdrv_co_flush_to_disk = quorum_co_flush, + .bdrv_co_flush = quorum_co_flush, .bdrv_getlength = quorum_getlength, -- cgit v1.1 From 8eaf10187a2fd25aa27cb81b602815b07f9a7f89 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 19 May 2021 12:05:32 +0300 Subject: qemu-io-cmds: assert that we don't have .perm requested in no-blk case Coverity thinks blk may be NULL. It's a false-positive, as described in a new comment. Fixes: Coverity CID 1453194 Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210519090532.3753-1-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- qemu-io-cmds.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 998b671..e8d862a 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -92,9 +92,19 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc, return -EINVAL; } - /* Request additional permissions if necessary for this command. The caller + /* + * Request additional permissions if necessary for this command. The caller * is responsible for restoring the original permissions afterwards if this - * is what it wants. */ + * is what it wants. + * + * Coverity thinks that blk may be NULL in the following if condition. It's + * not so: in init_check_command() we fail if blk is NULL for command with + * both CMD_FLAG_GLOBAL and CMD_NOFILE_OK flags unset. And in + * qemuio_add_command() we assert that command with non-zero .perm field + * doesn't set this flags. So, the following assertion is to silence + * Coverity: + */ + assert(blk || !ct->perm); if (ct->perm && blk_is_available(blk)) { uint64_t orig_perm, orig_shared_perm; blk_get_perm(blk, &orig_perm, &orig_shared_perm); -- cgit v1.1 From fb62b5889695825ea22f29d4eadb9ac1b8935a71 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 24 May 2021 13:12:56 +0300 Subject: block/vvfat: child_vvfat_qcow: add .get_parent_aio_context, fix crash Commit 3ca1f3225727419ba573673b744edac10904276f "block: BdrvChildClass: add .get_parent_aio_context handler" introduced new handler and commit 228ca37e12f97788e05bd0c92f89b3e5e4019607 "block: drop ctx argument from bdrv_root_attach_child" made a generic use of it. But 3ca1f3225727419ba573673b744edac10904276f didn't update child_vvfat_qcow. Fix that. Before that fix the command ./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \ -drive file=fat:rw:fat-type=16:"",id=fat16,format=raw,if=none crashes: 1 bdrv_child_get_parent_aio_context (c=0x559d62426d20) at ../block.c:1440 2 bdrv_attach_child_common (child_bs=0x559d62468190, child_name=0x559d606f9e3d "write-target", child_class=0x559d60c58d20 , child_role=3, perm=3, shared_perm=4, opaque=0x559d62445690, child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60) at ../block.c:2795 3 bdrv_attach_child_noperm (parent_bs=0x559d62445690, child_bs=0x559d62468190, child_name=0x559d606f9e3d "write-target", child_class=0x559d60c58d20 , child_role=3, child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60) at ../block.c:2855 4 bdrv_attach_child (parent_bs=0x559d62445690, child_bs=0x559d62468190, child_name=0x559d606f9e3d "write-target", child_class=0x559d60c58d20 , child_role=3, errp=0x7ffc74c2ae60) at ../block.c:2953 5 bdrv_open_child (filename=0x559d62464b80 "/var/tmp/vl.h3TIS4", options=0x559d6246ec20, bdref_key=0x559d606f9e3d "write-target", parent=0x559d62445690, child_class=0x559d60c58d20 , child_role=3, allow_none=false, errp=0x7ffc74c2ae60) at ../block.c:3351 6 enable_write_target (bs=0x559d62445690, errp=0x7ffc74c2ae60) at ../block/vvfat.c:3176 7 vvfat_open (bs=0x559d62445690, options=0x559d6244adb0, flags=155650, errp=0x7ffc74c2ae60) at ../block/vvfat.c:1236 8 bdrv_open_driver (bs=0x559d62445690, drv=0x559d60d4f7e0 , node_name=0x0, options=0x559d6244adb0, open_flags=155650, errp=0x7ffc74c2af70) at ../block.c:1557 9 bdrv_open_common (bs=0x559d62445690, file=0x0, options=0x559d6244adb0, errp=0x7ffc74c2af70) at ... (gdb) fr 1 #1 0x0000559d603ea3bf in bdrv_child_get_parent_aio_context (c=0x559d62426d20) at ../block.c:1440 1440 return c->klass->get_parent_aio_context(c); (gdb) p c->klass $1 = (const BdrvChildClass *) 0x559d60c58d20 (gdb) p c->klass->get_parent_aio_context $2 = (AioContext *(*)(BdrvChild *)) 0x0 Fixes: 3ca1f3225727419ba573673b744edac10904276f Fixes: 228ca37e12f97788e05bd0c92f89b3e5e4019607 Reported-by: John Arbuckle Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210524101257.119377-2-vsementsov@virtuozzo.com> Tested-by: John Arbuckle Signed-off-by: Kevin Wolf --- block.c | 4 ++-- block/vvfat.c | 1 + include/block/block.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 0dc9728..ef13076 100644 --- a/block.c +++ b/block.c @@ -1412,7 +1412,7 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, return 0; } -static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c) +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) { BlockDriverState *bs = c->opaque; @@ -1432,7 +1432,7 @@ const BdrvChildClass child_of_bds = { .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, .set_aio_ctx = bdrv_child_cb_set_aio_ctx, .update_filename = bdrv_child_cb_update_filename, - .get_parent_aio_context = bdrv_child_cb_get_parent_aio_context, + .get_parent_aio_context = child_of_bds_get_parent_aio_context, }; AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) diff --git a/block/vvfat.c b/block/vvfat.c index 54807f8..07232a7 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3130,6 +3130,7 @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, static const BdrvChildClass child_vvfat_qcow = { .parent_is_bds = true, .inherit_options = vvfat_qcow_options, + .get_parent_aio_context = child_of_bds_get_parent_aio_context, }; static int enable_write_target(BlockDriverState *bs, Error **errp) diff --git a/include/block/block.h b/include/block/block.h index 8218596..8e707a8 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -701,6 +701,7 @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, GSList **ignore, Error **errp); AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); -- cgit v1.1 From 39df2c6d57b9eaa30d37a34b5a20cbc0474725c0 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 24 May 2021 13:12:57 +0300 Subject: block/vvfat: fix vvfat_child_perm crash It's wrong to rely on s->qcow in vvfat_child_perm, as on permission update during bdrv_open_child() call this field is not set yet. Still prior to aa5a04c7db27eea6b36de32f241b155f0d9ce34d, it didn't crash, as bdrv_open_child passed NULL as child to bdrv_child_perm(), and NULL was equal to NULL in assertion (still, it was bad guarantee for child being s->qcow, not backing :). Since aa5a04c7db27eea6b36de32f241b155f0d9ce34d "add bdrv_attach_child_noperm" bdrv_refresh_perms called on parent node when attaching child, and new correct child pointer is passed to .bdrv_child_perm. Still, s->qcow is NULL at the moment. Let's rely only on role instead. Without that fix, ./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \ -drive \ file=fat:rw:fat-type=16:"",id=fat16,format=raw,if=none crashes: (gdb) bt 0 raise () at /lib64/libc.so.6 1 abort () at /lib64/libc.so.6 2 _nl_load_domain.cold () at /lib64/libc.so.6 3 annobin_assert.c_end () at /lib64/libc.so.6 4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3, reopen_queue=0x0, perm=0, shared=31, nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at ../block/vvfat.c:3214 5 bdrv_child_perm (bs=0x559186f3d690, child_bs=0x559186f60190, c=0x559186f1ed20, role=3, reopen_queue=0x0, parent_perm=0, parent_shared=31, nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at ../block.c:2094 6 bdrv_node_refresh_perm (bs=0x559186f3d690, q=0x0, tran=0x559186f65850, errp=0x7ffe56f28530) at ../block.c:2336 7 bdrv_list_refresh_perms (list=0x559186db5b90 = {...}, q=0x0, tran=0x559186f65850, errp=0x7ffe56f28530) at ../block.c:2358 8 bdrv_refresh_perms (bs=0x559186f3d690, errp=0x7ffe56f28530) at ../block.c:2419 9 bdrv_attach_child (parent_bs=0x559186f3d690, child_bs=0x559186f60190, child_name=0x559184d83e3d "write-target", child_class=0x5591852f3b00 , child_role=3, errp=0x7ffe56f28530) at ../block.c:2959 10 bdrv_open_child (filename=0x559186f5cb80 "/var/tmp/vl.7WYmFU", options=0x559186f66c20, bdref_key=0x559184d83e3d "write-target", parent=0x559186f3d690, child_class=0x5591852f3b00 , child_role=3, allow_none=false, errp=0x7ffe56f28530) at ../block.c:3351 11 enable_write_target (bs=0x559186f3d690, errp=0x7ffe56f28530) at ../block/vvfat.c:3177 12 vvfat_open (bs=0x559186f3d690, options=0x559186f42db0, flags=155650, errp=0x7ffe56f28530) at ../block/vvfat.c:1236 13 bdrv_open_driver (bs=0x559186f3d690, drv=0x5591853d97e0 , node_name=0x0, options=0x559186f42db0, open_flags=155650, errp=0x7ffe56f28640) at ../block.c:1557 14 bdrv_open_common (bs=0x559186f3d690, file=0x0, options=0x559186f42db0, errp=0x7ffe56f28640) at ../block.c:1833 ... (gdb) fr 4 #4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3, reopen_queue=0x0, perm=0, shared=31, nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at ../block/vvfat.c:3214 3214 assert(c == s->qcow || (role & BDRV_CHILD_COW)); (gdb) p role $1 = 3 # BDRV_CHILD_DATA | BDRV_CHILD_METADATA (gdb) p *c $2 = {bs = 0x559186f60190, name = 0x559186f669d0 "write-target", klass = 0x5591852f3b00 , role = 3, opaque = 0x559186f3d690, perm = 3, shared_perm = 4, frozen = false, parent_quiesce_counter = 0, next = {le_next = 0x0, le_prev = 0x559186f41818}, next_parent = {le_next = 0x0, le_prev = 0x559186f64320}} (gdb) p s->qcow $3 = (BdrvChild *) 0x0 Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210524101257.119377-3-vsementsov@virtuozzo.com> Tested-by: John Arbuckle Signed-off-by: Kevin Wolf --- block/vvfat.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index 07232a7..86d99c8 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3209,15 +3209,12 @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { - BDRVVVFATState *s = bs->opaque; - - assert(c == s->qcow || (role & BDRV_CHILD_COW)); - - if (c == s->qcow) { + if (role & BDRV_CHILD_DATA) { /* This is a private node, nobody should try to attach to it */ *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; *nshared = BLK_PERM_WRITE_UNCHANGED; } else { + assert(role & BDRV_CHILD_COW); /* The backing file is there so 'commit' can use it. vvfat doesn't * access it in any way. */ *nperm = 0; -- cgit v1.1 From 307261b243df2edde538f3ed5c9d80e168529355 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 27 May 2021 18:40:54 +0300 Subject: block: consistently use bdrv_is_read_only() It's better to use accessor function instead of bs->read_only directly. In some places use bdrv_is_writable() instead of checking both BDRV_O_RDWR set and BDRV_O_INACTIVE not set. In bdrv_open_common() it's a bit strange to add one more variable, but we are going to drop bs->read_only in the next patch, so new ro local variable substitutes it here. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210527154056.70294-2-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block.c | 11 +++++++---- block/block-backend.c | 2 +- block/commit.c | 2 +- block/io.c | 4 ++-- block/qapi.c | 2 +- block/qcow2-snapshot.c | 2 +- block/qcow2.c | 5 ++--- block/snapshot.c | 2 +- block/vhdx-log.c | 2 +- 9 files changed, 17 insertions(+), 15 deletions(-) diff --git a/block.c b/block.c index ef13076..33e99d0 100644 --- a/block.c +++ b/block.c @@ -1720,6 +1720,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, QemuOpts *opts; BlockDriver *drv; Error *local_err = NULL; + bool ro; assert(bs->file == NULL); assert(options != NULL && bs->options != options); @@ -1772,15 +1773,17 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, bs->read_only = !(bs->open_flags & BDRV_O_RDWR); - if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { - if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { + ro = bdrv_is_read_only(bs); + + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { + if (!ro && bdrv_is_whitelisted(drv, true)) { ret = bdrv_apply_auto_read_only(bs, NULL, NULL); } else { ret = -ENOTSUP; } if (ret < 0) { error_setg(errp, - !bs->read_only && bdrv_is_whitelisted(drv, true) + !ro && bdrv_is_whitelisted(drv, true) ? "Driver '%s' can only be used for read-only devices" : "Driver '%s' is not whitelisted", drv->format_name); @@ -1792,7 +1795,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, assert(qatomic_read(&bs->copy_on_read) == 0); if (bs->open_flags & BDRV_O_COPY_ON_READ) { - if (!bs->read_only) { + if (!ro) { bdrv_enable_copy_on_read(bs); } else { error_setg(errp, "Can't use copy-on-read on read-only device"); diff --git a/block/block-backend.c b/block/block-backend.c index de5496a..21b834e 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2269,7 +2269,7 @@ void blk_update_root_state(BlockBackend *blk) assert(blk->root); blk->root_state.open_flags = blk->root->bs->open_flags; - blk->root_state.read_only = blk->root->bs->read_only; + blk->root_state.read_only = bdrv_is_read_only(blk->root->bs); blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; } diff --git a/block/commit.c b/block/commit.c index b89bb20..b7f0c7c 100644 --- a/block/commit.c +++ b/block/commit.c @@ -453,7 +453,7 @@ int bdrv_commit(BlockDriverState *bs) return -EBUSY; } - ro = backing_file_bs->read_only; + ro = bdrv_is_read_only(backing_file_bs); if (ro) { if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) { diff --git a/block/io.c b/block/io.c index 1e826ba..323854d 100644 --- a/block/io.c +++ b/block/io.c @@ -1973,7 +1973,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, bdrv_check_request(offset, bytes, &error_abort); - if (bs->read_only) { + if (bdrv_is_read_only(bs)) { return -EPERM; } @@ -3406,7 +3406,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, if (new_bytes) { bdrv_make_request_serialising(&req, 1); } - if (bs->read_only) { + if (bdrv_is_read_only(bs)) { error_setg(errp, "Image is read-only"); ret = -EACCES; goto out; diff --git a/block/qapi.c b/block/qapi.c index 943e7b1..dc69341 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -59,7 +59,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, info = g_malloc0(sizeof(*info)); info->file = g_strdup(bs->filename); - info->ro = bs->read_only; + info->ro = bdrv_is_read_only(bs); info->drv = g_strdup(bs->drv->format_name); info->encrypted = bs->encrypted; diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 2e98c7f..71ddb08 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -1026,7 +1026,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, int new_l1_bytes; int ret; - assert(bs->read_only); + assert(bdrv_is_read_only(bs)); /* Search the snapshot */ snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name); diff --git a/block/qcow2.c b/block/qcow2.c index 39b91ef..ee4530c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1723,8 +1723,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, /* Clear unknown autoclear feature bits */ update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; - update_header = - update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); + update_header = update_header && bdrv_is_writable(bs); if (update_header) { s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; } @@ -1811,7 +1810,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; /* Repair image if dirty */ - if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && + if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) && (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { BdrvCheckResult result = {0}; diff --git a/block/snapshot.c b/block/snapshot.c index e8ae9a2..6702c75 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -415,7 +415,7 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, error_setg(errp, "snapshot_id and name are both NULL"); return -EINVAL; } - if (!bs->read_only) { + if (!bdrv_is_read_only(bs)) { error_setg(errp, "Device is not readonly"); return -EINVAL; } diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 404fb5f..7672161 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -801,7 +801,7 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, } if (logs.valid) { - if (bs->read_only) { + if (bdrv_is_read_only(bs)) { bdrv_refresh_filename(bs); ret = -EPERM; error_setg(errp, -- cgit v1.1 From 975da073748ecb271d8ba2a7711ff46a8c6d8366 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 27 May 2021 18:40:55 +0300 Subject: block: drop BlockDriverState::read_only This variable is just a cache for !(bs->open_flags & BDRV_O_RDWR), which we have to synchronize everywhere. Let's just drop it and consistently use bdrv_is_read_only(). Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210527154056.70294-3-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block.c | 7 +------ include/block/block_int.h | 1 - tests/unit/test-block-iothread.c | 6 ------ 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/block.c b/block.c index 33e99d0..84cb721 100644 --- a/block.c +++ b/block.c @@ -265,7 +265,7 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, * image is inactivated. */ bool bdrv_is_read_only(BlockDriverState *bs) { - return bs->read_only; + return !(bs->open_flags & BDRV_O_RDWR); } int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, @@ -317,7 +317,6 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, goto fail; } - bs->read_only = true; bs->open_flags &= ~BDRV_O_RDWR; return 0; @@ -1549,7 +1548,6 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, } bs->drv = drv; - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); bs->opaque = g_malloc0(drv->instance_size); if (drv->bdrv_file_open) { @@ -1771,8 +1769,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, trace_bdrv_open_common(bs, filename ?: "", bs->open_flags, drv->format_name); - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); - ro = bdrv_is_read_only(bs); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { @@ -4548,7 +4544,6 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) bs->explicit_options = reopen_state->explicit_options; bs->options = reopen_state->options; bs->open_flags = reopen_state->flags; - bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); bs->detect_zeroes = reopen_state->detect_zeroes; if (reopen_state->replace_backing_bs) { diff --git a/include/block/block_int.h b/include/block/block_int.h index b2c8b09..09661a1 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -843,7 +843,6 @@ struct BlockDriverState { * locking needed during I/O... */ int open_flags; /* flags used to open the file, re-used for re-open */ - bool read_only; /* if true, the media is read only */ bool encrypted; /* if true, the media is encrypted */ bool sg; /* if true, the device is a /dev/sg* */ bool probed; /* if true, format was probed rather than specified */ diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c index 8cf172c..c39e70b 100644 --- a/tests/unit/test-block-iothread.c +++ b/tests/unit/test-block-iothread.c @@ -194,13 +194,11 @@ static void test_sync_op_truncate(BdrvChild *c) g_assert_cmpint(ret, ==, -EINVAL); /* Error: Read-only image */ - c->bs->read_only = true; c->bs->open_flags &= ~BDRV_O_RDWR; ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); g_assert_cmpint(ret, ==, -EACCES); - c->bs->read_only = false; c->bs->open_flags |= BDRV_O_RDWR; } @@ -236,13 +234,11 @@ static void test_sync_op_flush(BdrvChild *c) g_assert_cmpint(ret, ==, 0); /* Early success: Read-only image */ - c->bs->read_only = true; c->bs->open_flags &= ~BDRV_O_RDWR; ret = bdrv_flush(c->bs); g_assert_cmpint(ret, ==, 0); - c->bs->read_only = false; c->bs->open_flags |= BDRV_O_RDWR; } @@ -256,13 +252,11 @@ static void test_sync_op_blk_flush(BlockBackend *blk) g_assert_cmpint(ret, ==, 0); /* Early success: Read-only image */ - bs->read_only = true; bs->open_flags &= ~BDRV_O_RDWR; ret = blk_flush(blk); g_assert_cmpint(ret, ==, 0); - bs->read_only = false; bs->open_flags |= BDRV_O_RDWR; } -- cgit v1.1 From 260242a833d0cdfba5d9988169f2dc89946809a2 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 27 May 2021 18:40:56 +0300 Subject: block: drop BlockBackendRootState::read_only Instead of keeping additional boolean field, let's store the information in BDRV_O_RDWR bit of BlockBackendRootState::open_flags. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210527154056.70294-4-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block/block-backend.c | 10 ++-------- blockdev.c | 3 +-- include/block/block_int.h | 1 - 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 21b834e..d1a33a2 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1852,7 +1852,7 @@ bool blk_supports_write_perm(BlockBackend *blk) if (bs) { return !bdrv_is_read_only(bs); } else { - return !blk->root_state.read_only; + return blk->root_state.open_flags & BDRV_O_RDWR; } } @@ -2269,7 +2269,6 @@ void blk_update_root_state(BlockBackend *blk) assert(blk->root); blk->root_state.open_flags = blk->root->bs->open_flags; - blk->root_state.read_only = bdrv_is_read_only(blk->root->bs); blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; } @@ -2288,12 +2287,7 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) */ int blk_get_open_flags_from_root_state(BlockBackend *blk) { - int bs_flags; - - bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR; - bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR; - - return bs_flags; + return blk->root_state.open_flags; } BlockBackendRootState *blk_get_root_state(BlockBackend *blk) diff --git a/blockdev.c b/blockdev.c index 834c230..f08192d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -583,8 +583,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blk_rs = blk_get_root_state(blk); - blk_rs->open_flags = bdrv_flags; - blk_rs->read_only = read_only; + blk_rs->open_flags = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR); blk_rs->detect_zeroes = detect_zeroes; qobject_unref(bs_opts); diff --git a/include/block/block_int.h b/include/block/block_int.h index 09661a1..057d88b 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -1007,7 +1007,6 @@ struct BlockDriverState { struct BlockBackendRootState { int open_flags; - bool read_only; BlockdevDetectZeroesOptions detect_zeroes; }; -- cgit v1.1 From 73ebf29729d1a40feaa9f8ab8951b6ee6dbfbede Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 27 May 2021 19:20:19 +0200 Subject: block/file-posix: Fix problem with fallocate(PUNCH_HOLE) on GPFS A customer reported that running qemu-img convert -t none -O qcow2 -f qcow2 input.qcow2 output.qcow2 fails for them with the following error message when the images are stored on a GPFS file system : qemu-img: error while writing sector 0: Invalid argument After analyzing the strace output, it seems like the problem is in handle_aiocb_write_zeroes(): The call to fallocate(FALLOC_FL_PUNCH_HOLE) returns EINVAL, which can apparently happen if the file system has a different idea of the granularity of the operation. It's arguably a bug in GPFS, since the PUNCH_HOLE mode should not result in EINVAL according to the man-page of fallocate(), but the file system is out there in production and so we have to deal with it. In commit 294682cc3a ("block: workaround for unaligned byte range in fallocate()") we also already applied the a work-around for the same problem to the earlier fallocate(FALLOC_FL_ZERO_RANGE) call, so do it now similar with the PUNCH_HOLE call. But instead of silently catching and returning -ENOTSUP (which causes the caller to fall back to writing zeroes), let's rather inform the user once about the buggy file system and try the other fallback instead. Signed-off-by: Thomas Huth Message-Id: <20210527172020.847617-2-thuth@redhat.com> Signed-off-by: Kevin Wolf --- block/file-posix.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index 10b71d9..6e24083 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1650,6 +1650,17 @@ static int handle_aiocb_write_zeroes(void *opaque) return ret; } s->has_fallocate = false; + } else if (ret == -EINVAL) { + /* + * Some file systems like older versions of GPFS do not like un- + * aligned byte ranges, and return EINVAL in such a case, though + * they should not do it according to the man-page of fallocate(). + * Warn about the bad filesystem and try the final fallback instead. + */ + warn_report_once("Your file system is misbehaving: " + "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. " + "Please report this bug to your file sytem " + "vendor."); } else if (ret != -ENOTSUP) { return ret; } else { -- cgit v1.1 From fa95e9fbab2c19fc07ba82988b1690f8a6ff171b Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 27 May 2021 19:20:20 +0200 Subject: block/file-posix: Try other fallbacks after invalid FALLOC_FL_ZERO_RANGE If fallocate(... FALLOC_FL_ZERO_RANGE ...) returns EINVAL, it's likely an indication that the file system is buggy and does not implement unaligned accesses right. We still might be lucky with the other fallback fallocate() calls later in this function, though, so we should not return immediately and try the others first. Since FALLOC_FL_ZERO_RANGE could also return EINVAL if the file descriptor is not a regular file, we ignore this filesystem bug silently, without printing an error message for the user. Signed-off-by: Thomas Huth Message-Id: <20210527172020.847617-3-thuth@redhat.com> Signed-off-by: Kevin Wolf --- block/file-posix.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 6e24083..f37dfc1 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1625,17 +1625,17 @@ static int handle_aiocb_write_zeroes(void *opaque) if (s->has_write_zeroes) { int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, aiocb->aio_offset, aiocb->aio_nbytes); - if (ret == -EINVAL) { - /* - * Allow falling back to pwrite for file systems that - * do not support fallocate() for an unaligned byte range. - */ - return -ENOTSUP; - } - if (ret == 0 || ret != -ENOTSUP) { + if (ret == -ENOTSUP) { + s->has_write_zeroes = false; + } else if (ret == 0 || ret != -EINVAL) { return ret; } - s->has_write_zeroes = false; + /* + * Note: Some file systems do not like unaligned byte ranges, and + * return EINVAL in such a case, though they should not do it according + * to the man-page of fallocate(). Thus we simply ignore this return + * value and try the other fallbacks instead. + */ } #endif -- cgit v1.1 From f8d2ad7881cde73508f9adeb28c7e033b0903ca8 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 1 Jun 2021 10:52:13 +0300 Subject: block: document child argument of bdrv_attach_child_common() The logic around **child is not obvious: this reference is used not only to return resulting child, but also to rollback NULL value on transaction abort. So, let's add documentation and some assertions. While being here, drop extra declaration of bdrv_attach_child_noperm(). Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210601075218.79249-2-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/block.c b/block.c index 84cb721..c0fd363 100644 --- a/block.c +++ b/block.c @@ -84,14 +84,6 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs); -static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - BdrvChild **child, - Transaction *tran, - Error **errp); static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, Transaction *tran); @@ -2759,6 +2751,12 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { /* * Common part of attaching bdrv child to bs or to blk or to job + * + * Resulting new child is returned through @child. + * At start *@child must be NULL. + * @child is saved to a new entry of @tran, so that *@child could be reverted to + * NULL on abort(). So referenced variable must live at least until transaction + * end. */ static int bdrv_attach_child_common(BlockDriverState *child_bs, const char *child_name, @@ -2833,6 +2831,10 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, return 0; } +/* + * Variable referenced by @child must live at least until transaction end. + * (see bdrv_attach_child_common() doc for details) + */ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, @@ -2915,7 +2917,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, child_role, perm, shared_perm, opaque, &child, tran, errp); if (ret < 0) { - assert(child == NULL); goto out; } @@ -2923,6 +2924,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, out: tran_finalize(tran, ret); + /* child is unset on failure by bdrv_attach_child_common_abort() */ + assert((ret < 0) == !child); + bdrv_unref(child_bs); return child; } @@ -2962,6 +2966,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, out: tran_finalize(tran, ret); + /* child is unset on failure by bdrv_attach_child_common_abort() */ + assert((ret < 0) == !child); bdrv_unref(child_bs); -- cgit v1.1 From fd240a184b0e8a9889097216d182def6aece30cb Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 1 Jun 2021 10:52:14 +0300 Subject: block-backend: improve blk_root_get_parent_desc() We have different types of parents: block nodes, block backends and jobs. So, it makes sense to specify type together with name. While being here also use g_autofree. iotest 307 output is updated. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Alberto Garcia Message-Id: <20210601075218.79249-3-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block/block-backend.c | 9 ++++----- tests/qemu-iotests/307.out | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index d1a33a2..5be32c0 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -141,19 +141,18 @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, static char *blk_root_get_parent_desc(BdrvChild *child) { BlockBackend *blk = child->opaque; - char *dev_id; + g_autofree char *dev_id = NULL; if (blk->name) { - return g_strdup(blk->name); + return g_strdup_printf("block device '%s'", blk->name); } dev_id = blk_get_attached_dev_id(blk); if (*dev_id) { - return dev_id; + return g_strdup_printf("block device '%s'", dev_id); } else { /* TODO Callback into the BB owner for something more detailed */ - g_free(dev_id); - return g_strdup("a block device"); + return g_strdup("an unnamed block device"); } } diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out index daa8ad2..66bf2dd 100644 --- a/tests/qemu-iotests/307.out +++ b/tests/qemu-iotests/307.out @@ -53,7 +53,7 @@ exports available: 1 === Add a writable export === {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}} -{"error": {"class": "GenericError", "desc": "Conflicts with use by sda as 'root', which does not allow 'write' on fmt"}} +{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}} {"execute": "device_del", "arguments": {"id": "sda"}} {"return": {}} {"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -- cgit v1.1 From 2c0a3acb9570a9e1ffae3c73ef94bc826dc9dd1d Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 1 Jun 2021 10:52:15 +0300 Subject: block: improve bdrv_child_get_parent_desc() We have different types of parents: block nodes, block backends and jobs. So, it makes sense to specify type together with name. Next, this handler us used to compose an error message about permission conflict. And permission conflict occurs in a specific place of block graph. We shouldn't report name of parent device (as it refers another place in block graph), but exactly and only the name of the node. So, use bdrv_get_node_name() directly. iotest 283 output is updated. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Alberto Garcia Message-Id: <20210601075218.79249-4-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block.c | 2 +- tests/qemu-iotests/283.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index c0fd363..94cb7b6 100644 --- a/block.c +++ b/block.c @@ -1149,7 +1149,7 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) static char *bdrv_child_get_parent_desc(BdrvChild *c) { BlockDriverState *parent = c->opaque; - return g_strdup(bdrv_get_device_or_node_name(parent)); + return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); } static void bdrv_child_cb_drained_begin(BdrvChild *child) diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out index 97e62a4..c9397bf 100644 --- a/tests/qemu-iotests/283.out +++ b/tests/qemu-iotests/283.out @@ -5,7 +5,7 @@ {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}} {"return": {}} {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}} -{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by source as 'image', which does not allow 'write' on base"}} +{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}} === backup-top should be gone after job-finalize === -- cgit v1.1 From 8081f064e404dd524b3c43248b2084dee9d32d7c Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 1 Jun 2021 10:52:16 +0300 Subject: block/vvfat: inherit child_vvfat_qcow from child_of_bds Recently we've fixed a crash by adding .get_parent_aio_context handler to child_vvfat_qcow. Now we want it to support .get_parent_desc as well. child_vvfat_qcow wants to implement own .inherit_options, it's not bad. But omitting all other handlers is a bad idea. Let's inherit the class from child_of_bds instead, similar to chain_child_class and detach_by_driver_cb_class in test-bdrv-drain.c. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210601075218.79249-5-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block/vvfat.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index 86d99c8..ae9d387 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3127,11 +3127,7 @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format, qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); } -static const BdrvChildClass child_vvfat_qcow = { - .parent_is_bds = true, - .inherit_options = vvfat_qcow_options, - .get_parent_aio_context = child_of_bds_get_parent_aio_context, -}; +static BdrvChildClass child_vvfat_qcow; static int enable_write_target(BlockDriverState *bs, Error **errp) { @@ -3268,6 +3264,8 @@ static BlockDriver bdrv_vvfat = { static void bdrv_vvfat_init(void) { + child_vvfat_qcow = child_of_bds; + child_vvfat_qcow.inherit_options = vvfat_qcow_options; bdrv_register(&bdrv_vvfat); } -- cgit v1.1 From da261b69aee9acb46ac1b0ebfe0ccb7b74450a88 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 1 Jun 2021 10:52:17 +0300 Subject: block: simplify bdrv_child_user_desc() All child classes have this callback. So, drop unreachable code. Still add an assertion to bdrv_attach_child_common(), to early detect bad classes. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210601075218.79249-6-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index 94cb7b6..3c0c396 100644 --- a/block.c +++ b/block.c @@ -2026,11 +2026,7 @@ bool bdrv_is_writable(BlockDriverState *bs) static char *bdrv_child_user_desc(BdrvChild *c) { - if (c->klass->get_parent_desc) { - return c->klass->get_parent_desc(c); - } - - return g_strdup("another user"); + return c->klass->get_parent_desc(c); } static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) @@ -2772,6 +2768,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, assert(child); assert(*child == NULL); + assert(child_class->get_parent_desc); new_child = g_new(BdrvChild, 1); *new_child = (BdrvChild) { -- cgit v1.1 From 30ebb9aa9203b5051c5c4f4e2421803b94e5f2cc Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 1 Jun 2021 10:52:18 +0300 Subject: block: improve permission conflict error message Now permissions are updated as follows: 1. do graph modifications ignoring permissions 2. do permission update (of course, we rollback [1] if [2] fails) So, on stage [2] we can't say which users are "old" and which are "new" and exist only since [1]. And current error message is a bit outdated. Let's improve it, to make everything clean. While being here, add also a comment and some good assertions. iotests 283, 307, qsd-jobs outputs are updated. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210601075218.79249-7-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block.c | 29 ++++++++++++++++++++++------- tests/qemu-iotests/283.out | 2 +- tests/qemu-iotests/307.out | 2 +- tests/qemu-iotests/tests/qsd-jobs.out | 2 +- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/block.c b/block.c index 3c0c396..3f45689 100644 --- a/block.c +++ b/block.c @@ -2029,20 +2029,35 @@ static char *bdrv_child_user_desc(BdrvChild *c) return c->klass->get_parent_desc(c); } +/* + * Check that @a allows everything that @b needs. @a and @b must reference same + * child node. + */ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) { - g_autofree char *user = NULL; - g_autofree char *perm_names = NULL; + const char *child_bs_name; + g_autofree char *a_user = NULL; + g_autofree char *b_user = NULL; + g_autofree char *perms = NULL; + + assert(a->bs); + assert(a->bs == b->bs); if ((b->perm & a->shared_perm) == b->perm) { return true; } - perm_names = bdrv_perm_names(b->perm & ~a->shared_perm); - user = bdrv_child_user_desc(a); - error_setg(errp, "Conflicts with use by %s as '%s', which does not " - "allow '%s' on %s", - user, a->name, perm_names, bdrv_get_node_name(b->bs)); + child_bs_name = bdrv_get_node_name(b->bs); + a_user = bdrv_child_user_desc(a); + b_user = bdrv_child_user_desc(b); + perms = bdrv_perm_names(b->perm & ~a->shared_perm); + + error_setg(errp, "Permission conflict on node '%s': permissions '%s' are " + "both required by %s (uses node '%s' as '%s' child) and " + "unshared by %s (uses node '%s' as '%s' child).", + child_bs_name, perms, + b_user, child_bs_name, b->name, + a_user, child_bs_name, a->name); return false; } diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out index c9397bf..c6e12b1 100644 --- a/tests/qemu-iotests/283.out +++ b/tests/qemu-iotests/283.out @@ -5,7 +5,7 @@ {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}} {"return": {}} {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}} -{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}} +{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Permission conflict on node 'base': permissions 'write' are both required by node 'other' (uses node 'base' as 'image' child) and unshared by node 'source' (uses node 'base' as 'image' child)."}} === backup-top should be gone after job-finalize === diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out index 66bf2dd..4b0c7e1 100644 --- a/tests/qemu-iotests/307.out +++ b/tests/qemu-iotests/307.out @@ -53,7 +53,7 @@ exports available: 1 === Add a writable export === {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}} -{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}} +{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt': permissions 'write' are both required by an unnamed block device (uses node 'fmt' as 'root' child) and unshared by block device 'sda' (uses node 'fmt' as 'root' child)."}} {"execute": "device_del", "arguments": {"id": "sda"}} {"return": {}} {"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out index 9f52255..1894233 100644 --- a/tests/qemu-iotests/tests/qsd-jobs.out +++ b/tests/qemu-iotests/tests/qsd-jobs.out @@ -16,7 +16,7 @@ QMP_VERSION {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} -{"error": {"class": "GenericError", "desc": "Conflicts with use by stream job 'job0' as 'intermediate node', which does not allow 'write' on fmt_base"}} +{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}} *** done -- cgit v1.1 From 095cc4d0f62513d75e9bc1da37f08d9e97f267c4 Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Wed, 2 Jun 2021 08:05:51 +0200 Subject: block-backend: add drained_poll Allow block backends to poll their devices/users to check if they have been quiesced when entering a drained section. This will be used in the next patch to wait for the NBD server to be completely quiesced. Suggested-by: Kevin Wolf Reviewed-by: Kevin Wolf Reviewed-by: Eric Blake Signed-off-by: Sergio Lopez Message-Id: <20210602060552.17433-2-slp@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/block-backend.c | 7 ++++++- include/sysemu/block-backend.h | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/block/block-backend.c b/block/block-backend.c index 5be32c0..15f1ea4 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2386,8 +2386,13 @@ static void blk_root_drained_begin(BdrvChild *child) static bool blk_root_drained_poll(BdrvChild *child) { BlockBackend *blk = child->opaque; + bool busy = false; assert(blk->quiesce_counter); - return !!blk->in_flight; + + if (blk->dev_ops && blk->dev_ops->drained_poll) { + busy = blk->dev_ops->drained_poll(blk->dev_opaque); + } + return busy || !!blk->in_flight; } static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 880e903..5423e3d 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -66,6 +66,10 @@ typedef struct BlockDevOps { * Runs when the backend's last drain request ends. */ void (*drained_end)(void *opaque); + /* + * Is the device still busy? + */ + bool (*drained_poll)(void *opaque); } BlockDevOps; /* This struct is embedded in (the private) BlockBackend struct and contains -- cgit v1.1 From fd6afc501a019682d1b8468b562355a2887087bd Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Wed, 2 Jun 2021 08:05:52 +0200 Subject: nbd/server: Use drained block ops to quiesce the server Before switching between AioContexts we need to make sure that we're fully quiesced ("nb_requests == 0" for every client) when entering the drained section. To do this, we set "quiescing = true" for every client on ".drained_begin" to prevent new coroutines from being created, and check if "nb_requests == 0" on ".drained_poll". Finally, once we're exiting the drained section, on ".drained_end" we set "quiescing = false" and call "nbd_client_receive_next_request()" to resume the processing of new requests. With these changes, "blk_aio_attach()" and "blk_aio_detach()" can be reverted to be as simple as they were before f148ae7d36. RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1960137 Suggested-by: Kevin Wolf Signed-off-by: Sergio Lopez Message-Id: <20210602060552.17433-3-slp@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- nbd/server.c | 82 ++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 86a44a9..b60ebc3 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1513,6 +1513,11 @@ static void nbd_request_put(NBDRequestData *req) g_free(req); client->nb_requests--; + + if (client->quiescing && client->nb_requests == 0) { + aio_wait_kick(); + } + nbd_client_receive_next_request(client); nbd_client_put(client); @@ -1530,49 +1535,68 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) QTAILQ_FOREACH(client, &exp->clients, next) { qio_channel_attach_aio_context(client->ioc, ctx); + assert(client->nb_requests == 0); assert(client->recv_coroutine == NULL); assert(client->send_coroutine == NULL); - - if (client->quiescing) { - client->quiescing = false; - nbd_client_receive_next_request(client); - } } } -static void nbd_aio_detach_bh(void *opaque) +static void blk_aio_detach(void *opaque) { NBDExport *exp = opaque; NBDClient *client; + trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); + QTAILQ_FOREACH(client, &exp->clients, next) { qio_channel_detach_aio_context(client->ioc); + } + + exp->common.ctx = NULL; +} + +static void nbd_drained_begin(void *opaque) +{ + NBDExport *exp = opaque; + NBDClient *client; + + QTAILQ_FOREACH(client, &exp->clients, next) { client->quiescing = true; + } +} - if (client->recv_coroutine) { - if (client->read_yielding) { - qemu_aio_coroutine_enter(exp->common.ctx, - client->recv_coroutine); - } else { - AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL); - } - } +static void nbd_drained_end(void *opaque) +{ + NBDExport *exp = opaque; + NBDClient *client; - if (client->send_coroutine) { - AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL); - } + QTAILQ_FOREACH(client, &exp->clients, next) { + client->quiescing = false; + nbd_client_receive_next_request(client); } } -static void blk_aio_detach(void *opaque) +static bool nbd_drained_poll(void *opaque) { NBDExport *exp = opaque; + NBDClient *client; - trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); + QTAILQ_FOREACH(client, &exp->clients, next) { + if (client->nb_requests != 0) { + /* + * If there's a coroutine waiting for a request on nbd_read_eof() + * enter it here so we don't depend on the client to wake it up. + */ + if (client->recv_coroutine != NULL && client->read_yielding) { + qemu_aio_coroutine_enter(exp->common.ctx, + client->recv_coroutine); + } - aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp); + return true; + } + } - exp->common.ctx = NULL; + return false; } static void nbd_eject_notifier(Notifier *n, void *data) @@ -1594,6 +1618,12 @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk) blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier); } +static const BlockDevOps nbd_block_ops = { + .drained_begin = nbd_drained_begin, + .drained_end = nbd_drained_end, + .drained_poll = nbd_drained_poll, +}; + static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, Error **errp) { @@ -1715,8 +1745,17 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, exp->allocation_depth = arg->allocation_depth; + /* + * We need to inhibit request queuing in the block layer to ensure we can + * be properly quiesced when entering a drained section, as our coroutines + * servicing pending requests might enter blk_pread(). + */ + blk_set_disable_request_queuing(blk, true); + blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); + blk_set_dev_ops(blk, &nbd_block_ops, exp); + QTAILQ_INSERT_TAIL(&exports, exp, next); return 0; @@ -1788,6 +1827,7 @@ static void nbd_export_delete(BlockExport *blk_exp) } blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached, blk_aio_detach, exp); + blk_set_disable_request_queuing(exp->common.blk, false); } for (i = 0; i < exp->nr_export_bitmaps; i++) { -- cgit v1.1 From 8146b357d0cb3a3f5d500a1536f9f0e1ff3302cc Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 28 May 2021 17:16:27 +0300 Subject: block-copy: fix block_copy_task_entry() progress update Don't report successful progress on failure, when call_state->ret is set. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210528141628.44287-2-vsementsov@virtuozzo.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/block-copy.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block/block-copy.c b/block/block-copy.c index c2e5090..f9e871b 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -439,9 +439,11 @@ static coroutine_fn int block_copy_task_entry(AioTask *task) ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, &error_is_read); - if (ret < 0 && !t->call_state->ret) { - t->call_state->ret = ret; - t->call_state->error_is_read = error_is_read; + if (ret < 0) { + if (!t->call_state->ret) { + t->call_state->ret = ret; + t->call_state->error_is_read = error_is_read; + } } else { progress_work_done(t->s->progress, t->bytes); } -- cgit v1.1 From bed9523471c13a44cdc15ed9ba0fb78cadf8c142 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 28 May 2021 17:16:28 +0300 Subject: block-copy: refactor copy_range handling Currently we update s->use_copy_range and s->copy_size in block_copy_do_copy(). It's not very good: 1. block_copy_do_copy() is intended to be a simple function, that wraps bdrv_co_ functions for need of block copy. That's why we don't pass BlockCopyTask into it. So, block_copy_do_copy() is bad place for manipulation with generic state of block-copy process 2. We are going to make block-copy thread-safe. So, it's good to move manipulation with state of block-copy to the places where we'll need critical sections anyway, to not introduce extra synchronization primitives in block_copy_do_copy(). Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210528141628.44287-3-vsementsov@virtuozzo.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/block-copy.c | 72 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/block/block-copy.c b/block/block-copy.c index f9e871b..5808cfe 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -65,6 +65,7 @@ typedef struct BlockCopyTask { int64_t offset; int64_t bytes; bool zeroes; + bool copy_range; QLIST_ENTRY(BlockCopyTask) list; CoQueue wait_queue; /* coroutines blocked on this task */ } BlockCopyTask; @@ -183,6 +184,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s, .call_state = call_state, .offset = offset, .bytes = bytes, + .copy_range = s->use_copy_range, }; qemu_co_queue_init(&task->wait_queue); QLIST_INSERT_HEAD(&s->tasks, task, list); @@ -342,11 +344,18 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool, * * No sync here: nor bitmap neighter intersecting requests handling, only copy. * + * @copy_range is an in-out argument: if *copy_range is false, copy_range is not + * done. If *copy_range is true, copy_range is attempted. If the copy_range + * attempt fails, the function falls back to the usual read+write and + * *copy_range is set to false. *copy_range and zeroes must not be true + * simultaneously. + * * Returns 0 on success. */ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes, - bool zeroes, bool *error_is_read) + bool zeroes, bool *copy_range, + bool *error_is_read) { int ret; int64_t nbytes = MIN(offset + bytes, s->len) - offset; @@ -359,6 +368,7 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, assert(offset + bytes <= s->len || offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size)); assert(nbytes < INT_MAX); + assert(!(*copy_range && zeroes)); if (zeroes) { ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags & @@ -370,32 +380,15 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s, return ret; } - if (s->use_copy_range) { + if (*copy_range) { ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes, 0, s->write_flags); if (ret < 0) { trace_block_copy_copy_range_fail(s, offset, ret); - s->use_copy_range = false; - s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); + *copy_range = false; /* Fallback to read+write with allocated buffer */ } else { - if (s->use_copy_range) { - /* - * Successful copy-range. Now increase copy_size. copy_range - * does not respect max_transfer (it's a TODO), so we factor - * that in here. - * - * Note: we double-check s->use_copy_range for the case when - * parallel block-copy request unsets it during previous - * bdrv_co_copy_range call. - */ - s->copy_size = - MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), - QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, - s->target), - s->cluster_size)); - } - goto out; + return 0; } } @@ -431,14 +424,44 @@ out: return ret; } +static void block_copy_handle_copy_range_result(BlockCopyState *s, + bool is_success) +{ + if (!s->use_copy_range) { + /* already disabled */ + return; + } + + if (is_success) { + /* + * Successful copy-range. Now increase copy_size. copy_range + * does not respect max_transfer (it's a TODO), so we factor + * that in here. + */ + s->copy_size = + MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE), + QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source, + s->target), + s->cluster_size)); + } else { + /* Copy-range failed, disable it. */ + s->use_copy_range = false; + s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); + } +} + static coroutine_fn int block_copy_task_entry(AioTask *task) { BlockCopyTask *t = container_of(task, BlockCopyTask, task); bool error_is_read = false; + bool copy_range = t->copy_range; int ret; ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes, - &error_is_read); + ©_range, &error_is_read); + if (t->copy_range) { + block_copy_handle_copy_range_result(t->s, copy_range); + } if (ret < 0) { if (!t->call_state->ret) { t->call_state->ret = ret; @@ -619,7 +642,10 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) g_free(task); continue; } - task->zeroes = ret & BDRV_BLOCK_ZERO; + if (ret & BDRV_BLOCK_ZERO) { + task->zeroes = true; + task->copy_range = false; + } if (s->speed) { if (!call_state->ignore_ratelimit) { -- cgit v1.1 From b317006a3f1f04191a7981cef83417cb2477213b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 1 Jun 2021 18:25:48 +0200 Subject: docs/secure-coding-practices: Describe how to use 'null-co' block driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document that security reports must use 'null-co,read-zeroes=on' because otherwise the memory is left uninitialized (which is an on-purpose performance feature). Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20210601162548.2076631-1-philmd@redhat.com> Signed-off-by: Kevin Wolf --- docs/devel/secure-coding-practices.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/devel/secure-coding-practices.rst b/docs/devel/secure-coding-practices.rst index cbfc8af..0454cc5 100644 --- a/docs/devel/secure-coding-practices.rst +++ b/docs/devel/secure-coding-practices.rst @@ -104,3 +104,12 @@ structures and only process the local copy. This prevents time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to crash when a vCPU thread modifies guest RAM while device emulation is processing it. + +Use of null-co block drivers +---------------------------- + +The ``null-co`` block driver is designed for performance: its read accesses are +not initialized by default. In case this driver has to be used for security +research, it must be used with the ``read-zeroes=on`` option which fills read +buffers with zeroes. Security issues reported with the default +(``read-zeroes=off``) will be discarded. -- cgit v1.1