From 520d8b40e898158bc9a2b416d1cbdb44d2260bc7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 25 Jan 2022 16:14:35 +0100 Subject: block/export: Fix vhost-user-blk shutdown with requests in flight The vhost-user-blk export runs requests asynchronously in their own coroutine. When the vhost connection goes away and we want to stop the vhost-user server, we need to wait for these coroutines to stop before we can unmap the shared memory. Otherwise, they would still access the unmapped memory and crash. This introduces a refcount to VuServer which is increased when spawning a new request coroutine and decreased before the coroutine exits. The memory is only unmapped when the refcount reaches zero. Signed-off-by: Kevin Wolf Message-Id: <20220125151435.48792-1-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block/export/vhost-user-blk-server.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'block') diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 1862563..a129204 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, return VIRTIO_BLK_S_IOERR; } +/* Called with server refcount increased, must decrease before returning */ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) { VuBlkReq *req = opaque; @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) } vu_blk_req_complete(req); + vhost_user_server_unref(server); return; err: free(req); + vhost_user_server_unref(server); } static void vu_blk_process_vq(VuDev *vu_dev, int idx) @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) Coroutine *co = qemu_coroutine_create(vu_blk_virtio_process_req, req); + + vhost_user_server_ref(server); qemu_coroutine_enter(co); } } -- cgit v1.1 From ac50419460cc45a66214e6d1c3e9d0d670522f8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 1 Feb 2022 12:26:54 +0100 Subject: block/export/fuse: Rearrange if-else-if ladder in fuse_fallocate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to safely maintain a mixture of #ifdef'ry with if-else-if ladder, rearrange the last statement (!mode) first. Since it is mutually exclusive with the other conditions, checking it first doesn't make any logical difference, but allows to add #ifdef'ry around in a more cleanly way. Suggested-by: Kevin Wolf Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20220201112655.344373-2-f4bug@amsat.org> Signed-off-by: Kevin Wolf --- block/export/fuse.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'block') diff --git a/block/export/fuse.c b/block/export/fuse.c index 6710d8a..d25e478 100644 --- a/block/export/fuse.c +++ b/block/export/fuse.c @@ -629,7 +629,26 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, length = MIN(length, blk_len - offset); } - if (mode & FALLOC_FL_PUNCH_HOLE) { + if (!mode) { + /* We can only fallocate at the EOF with a truncate */ + if (offset < blk_len) { + fuse_reply_err(req, EOPNOTSUPP); + return; + } + + if (offset > blk_len) { + /* No preallocation needed here */ + ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF); + if (ret < 0) { + fuse_reply_err(req, -ret); + return; + } + } + + ret = fuse_do_truncate(exp, offset + length, true, + PREALLOC_MODE_FALLOC); + } + else if (mode & FALLOC_FL_PUNCH_HOLE) { if (!(mode & FALLOC_FL_KEEP_SIZE)) { fuse_reply_err(req, EINVAL); return; @@ -665,25 +684,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, } while (ret == 0 && length > 0); } #endif /* CONFIG_FALLOCATE_ZERO_RANGE */ - else if (!mode) { - /* We can only fallocate at the EOF with a truncate */ - if (offset < blk_len) { - fuse_reply_err(req, EOPNOTSUPP); - return; - } - - if (offset > blk_len) { - /* No preallocation needed here */ - ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF); - if (ret < 0) { - fuse_reply_err(req, -ret); - return; - } - } - - ret = fuse_do_truncate(exp, offset + length, true, - PREALLOC_MODE_FALLOC); - } else { + else { ret = -EOPNOTSUPP; } -- cgit v1.1 From 3c9c70347b8e636c08035f39288f8cdd2e68bbda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 1 Feb 2022 12:26:55 +0100 Subject: block/export/fuse: Fix build failure on FreeBSD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building on FreeBSD we get: [816/6851] Compiling C object libblockdev.fa.p/block_export_fuse.c.o ../block/export/fuse.c:628:16: error: use of undeclared identifier 'FALLOC_FL_KEEP_SIZE' if (mode & FALLOC_FL_KEEP_SIZE) { ^ ../block/export/fuse.c:651:16: error: use of undeclared identifier 'FALLOC_FL_PUNCH_HOLE' if (mode & FALLOC_FL_PUNCH_HOLE) { ^ ../block/export/fuse.c:652:22: error: use of undeclared identifier 'FALLOC_FL_KEEP_SIZE' if (!(mode & FALLOC_FL_KEEP_SIZE)) { ^ 3 errors generated. FAILED: libblockdev.fa.p/block_export_fuse.c.o Meson indeed reported FALLOC_FL_PUNCH_HOLE is not available: C compiler for the host machine: cc (clang 10.0.1 "FreeBSD clang version 10.0.1") Checking for function "fallocate" : NO Checking for function "posix_fallocate" : YES Header has symbol "FALLOC_FL_PUNCH_HOLE" : NO Header has symbol "FALLOC_FL_ZERO_RANGE" : NO ... Similarly to commit 304332039 ("block/export/fuse.c: fix musl build"), guard the code requiring FALLOC_FL_KEEP_SIZE / FALLOC_FL_PUNCH_HOLE definitions under CONFIG_FALLOCATE_PUNCH_HOLE #ifdef'ry. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20220201112655.344373-3-f4bug@amsat.org> Signed-off-by: Kevin Wolf --- block/export/fuse.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'block') diff --git a/block/export/fuse.c b/block/export/fuse.c index d25e478..fdda8e3 100644 --- a/block/export/fuse.c +++ b/block/export/fuse.c @@ -625,9 +625,11 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, return; } +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE if (mode & FALLOC_FL_KEEP_SIZE) { length = MIN(length, blk_len - offset); } +#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */ if (!mode) { /* We can only fallocate at the EOF with a truncate */ @@ -648,6 +650,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, ret = fuse_do_truncate(exp, offset + length, true, PREALLOC_MODE_FALLOC); } +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE else if (mode & FALLOC_FL_PUNCH_HOLE) { if (!(mode & FALLOC_FL_KEEP_SIZE)) { fuse_reply_err(req, EINVAL); @@ -662,6 +665,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, length -= size; } while (ret == 0 && length > 0); } +#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */ #ifdef CONFIG_FALLOCATE_ZERO_RANGE else if (mode & FALLOC_FL_ZERO_RANGE) { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) { -- cgit v1.1 From 9e302f64bb407a9bb097b626da97228c2654cfee Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 13 Jan 2022 15:44:25 +0100 Subject: block/rbd: fix handling of holes in .bdrv_co_block_status the assumption that we can't hit a hole if we do not diff against a snapshot was wrong. We can see a hole in an image if we diff against base if there exists an older snapshot of the image and we have discarded blocks in the image where the snapshot has data. Fix this by simply handling a hole like an unallocated area. There are no callbacks for unallocated areas so just bail out if we hit a hole. Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b Suggested-by: Ilya Dryomov Cc: qemu-stable@nongnu.org Signed-off-by: Peter Lieven Message-Id: <20220113144426.4036493-2-pl@kamp.de> Reviewed-by: Ilya Dryomov Reviewed-by: Stefano Garzarella Signed-off-by: Kevin Wolf --- block/rbd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/rbd.c b/block/rbd.c index def9629..20bb896 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, RBDDiffIterateReq *req = opaque; assert(req->offs + req->bytes <= offs); - /* - * we do not diff against a snapshot so we should never receive a callback - * for a hole. - */ - assert(exists); + + /* treat a hole like an unallocated area and bail out */ + if (!exists) { + return 0; + } if (!req->exists && offs > req->offs) { /* -- cgit v1.1 From fc176116cdea816ceb8dd969080b2b95f58edbc0 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 13 Jan 2022 15:44:26 +0100 Subject: block/rbd: workaround for ceph issue #53784 librbd had a bug until early 2022 that affected all versions of ceph that supported fast-diff. This bug results in reporting of incorrect offsets if the offset parameter to rbd_diff_iterate2 is not object aligned. This patch works around this bug for pre Quincy versions of librbd. Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b Cc: qemu-stable@nongnu.org Signed-off-by: Peter Lieven Message-Id: <20220113144426.4036493-3-pl@kamp.de> Reviewed-by: Ilya Dryomov Reviewed-by: Stefano Garzarella Tested-by: Stefano Garzarella Signed-off-by: Kevin Wolf --- block/rbd.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/rbd.c b/block/rbd.c index 20bb896..8f183eb 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, int status, r; RBDDiffIterateReq req = { .offs = offset }; uint64_t features, flags; + uint64_t head = 0; assert(offset + bytes <= s->image_size); @@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, return status; } - r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, +#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0) + /* + * librbd had a bug until early 2022 that affected all versions of ceph that + * supported fast-diff. This bug results in reporting of incorrect offsets + * if the offset parameter to rbd_diff_iterate2 is not object aligned. + * Work around this bug by rounding down the offset to object boundaries. + * This is OK because we call rbd_diff_iterate2 with whole_object = true. + * However, this workaround only works for non cloned images with default + * striping. + * + * See: https://tracker.ceph.com/issues/53784 + */ + + /* check if RBD image has non-default striping enabled */ + if (features & RBD_FEATURE_STRIPINGV2) { + return status; + } + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + /* + * check if RBD image is a clone (= has a parent). + * + * rbd_get_parent_info is deprecated from Nautilus onwards, but the + * replacement rbd_get_parent is not present in Luminous and Mimic. + */ + if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) { + return status; + } +#pragma GCC diagnostic pop + + head = req.offs & (s->object_size - 1); + req.offs -= head; + bytes += head; +#endif + + r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true, qemu_rbd_diff_iterate_cb, &req); if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { return status; @@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; } - *pnum = req.bytes; + assert(req.bytes > head); + *pnum = req.bytes - head; return status; } -- cgit v1.1