From bdc4c4c5e372756a5ba3fb3a61e585b02f0dd7f4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 19 Feb 2021 16:33:46 +0100 Subject: backup: Remove nodes from job in .clean() The block job holds a reference to the backup-top node (because it is passed as the main job BDS to block_job_create()). Therefore, bdrv_backup_top_drop() cannot delete the backup-top node (replacing it by its child does not affect the job parent, because that has .stay_at_node set). That is a problem, because all of its I/O functions assume the BlockCopyState (s->bcs) to be valid and that it has a filtered child; but after bdrv_backup_top_drop(), neither of those things are true. It does not make sense to add new parents to backup-top after backup_clean(), so we should detach it from the job before bdrv_backup_top_drop(). Because there is no function to do that for a single node, just detach all of the job's nodes -- the job does not do anything past backup_clean() anyway. Signed-off-by: Max Reitz Message-Id: <20210219153348.41861-2-mreitz@redhat.com> Signed-off-by: Kevin Wolf --- block/backup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'block') diff --git a/block/backup.c b/block/backup.c index 94e6dcd..6cf2f97 100644 --- a/block/backup.c +++ b/block/backup.c @@ -103,6 +103,7 @@ static void backup_abort(Job *job) static void backup_clean(Job *job) { BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); + block_job_remove_all_bdrv(&s->common); bdrv_backup_top_drop(s->backup_top); } -- cgit v1.1 From 705dde27c6c53b73d2aa139b5b2a0ea490153e5b Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 19 Feb 2021 16:33:47 +0100 Subject: backup-top: Refuse I/O in inactive state When the backup-top node transitions from active to inactive in bdrv_backup_top_drop(), the BlockCopyState is freed and the filtered child is removed, so the node effectively becomes unusable. However, noone told its I/O functions this, so they will happily continue accessing bs->backing and s->bcs. Prevent that by aborting early when s->active is false. (After the preceding patch, the node should be gone after bdrv_backup_top_drop(), so this should largely be a theoretical problem. But still, better to be safe than sorry, and also I think it just makes sense to check s->active in the I/O functions.) Signed-off-by: Max Reitz Message-Id: <20210219153348.41861-3-mreitz@redhat.com> Signed-off-by: Kevin Wolf --- block/backup-top.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'block') diff --git a/block/backup-top.c b/block/backup-top.c index d1253e1..589e8b6 100644 --- a/block/backup-top.c +++ b/block/backup-top.c @@ -45,6 +45,12 @@ static coroutine_fn int backup_top_co_preadv( BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { + BDRVBackupTopState *s = bs->opaque; + + if (!s->active) { + return -EIO; + } + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); } @@ -54,6 +60,10 @@ static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset, BDRVBackupTopState *s = bs->opaque; uint64_t off, end; + if (!s->active) { + return -EIO; + } + if (flags & BDRV_REQ_WRITE_UNCHANGED) { return 0; } -- cgit v1.1 From a4f1542af58fd6ab061e594d4e161f1c8b4a4372 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 23 Feb 2021 14:46:48 +0000 Subject: block/export: fix blk_size double byteswap The config->blk_size field is little-endian. Use the native-endian blk_size variable to avoid double byteswapping. Fixes: 11f60f7eaee2630dd6fa0c3a8c49f792e46c4cf1 ("block/export: make vhost-user-blk config space little-endian") Signed-off-by: Stefan Hajnoczi Message-Id: <20210223144653.811468-8-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- block/export/vhost-user-blk-server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index ab2c4d4..7aea132 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -356,7 +356,7 @@ vu_blk_initialize_config(BlockDriverState *bs, config->num_queues = cpu_to_le16(num_queues); config->max_discard_sectors = cpu_to_le32(32768); config->max_discard_seg = cpu_to_le32(1); - config->discard_sector_alignment = cpu_to_le32(config->blk_size >> 9); + config->discard_sector_alignment = cpu_to_le32(blk_size >> 9); config->max_write_zeroes_sectors = cpu_to_le32(32768); config->max_write_zeroes_seg = cpu_to_le32(1); } -- cgit v1.1 From 524bac0744e5abf95856fb9e31c01fd2ef102188 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 23 Feb 2021 14:46:49 +0000 Subject: block/export: use VIRTIO_BLK_SECTOR_BITS Use VIRTIO_BLK_SECTOR_BITS and VIRTIO_BLK_SECTOR_SIZE when dealing with virtio-blk sector numbers. Although the values happen to be the same as BDRV_SECTOR_BITS and BDRV_SECTOR_SIZE, they are conceptually different. This makes it clearer when we are dealing with virtio-blk sector units. Use VIRTIO_BLK_SECTOR_BITS in vu_blk_initialize_config(). Later patches will use it the new constants the virtqueue request processing code path. Suggested-by: Max Reitz Signed-off-by: Stefan Hajnoczi Message-Id: <20210223144653.811468-9-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- block/export/vhost-user-blk-server.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 7aea132..2614a63 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -20,6 +20,13 @@ #include "sysemu/block-backend.h" #include "util/block-helpers.h" +/* + * Sector units are 512 bytes regardless of the + * virtio_blk_config->blk_size value. + */ +#define VIRTIO_BLK_SECTOR_BITS 9 +#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS) + enum { VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, }; @@ -347,7 +354,8 @@ vu_blk_initialize_config(BlockDriverState *bs, uint32_t blk_size, uint16_t num_queues) { - config->capacity = cpu_to_le64(bdrv_getlength(bs) >> BDRV_SECTOR_BITS); + config->capacity = + cpu_to_le64(bdrv_getlength(bs) >> VIRTIO_BLK_SECTOR_BITS); config->blk_size = cpu_to_le32(blk_size); config->size_max = cpu_to_le32(0); config->seg_max = cpu_to_le32(128 - 2); @@ -356,7 +364,8 @@ vu_blk_initialize_config(BlockDriverState *bs, config->num_queues = cpu_to_le16(num_queues); config->max_discard_sectors = cpu_to_le32(32768); config->max_discard_seg = cpu_to_le32(1); - config->discard_sector_alignment = cpu_to_le32(blk_size >> 9); + config->discard_sector_alignment = + cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS); config->max_write_zeroes_sectors = cpu_to_le32(32768); config->max_write_zeroes_seg = cpu_to_le32(1); } @@ -383,7 +392,7 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, if (vu_opts->has_logical_block_size) { logical_block_size = vu_opts->logical_block_size; } else { - logical_block_size = BDRV_SECTOR_SIZE; + logical_block_size = VIRTIO_BLK_SECTOR_SIZE; } check_block_size(exp->id, "logical-block-size", logical_block_size, &local_err); -- cgit v1.1 From e44362ce317bcc46d409ed6c4a5ed2b46804bcbf Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 23 Feb 2021 14:46:50 +0000 Subject: block/export: fix vhost-user-blk export sector number calculation The driver is supposed to honor the blk_size field but the protocol still uses 512-byte sector numbers. It is incorrect to multiply req->sector_num by blk_size. VIRTIO 1.1 5.2.5 Device Initialization says: blk_size can be read to determine the optimal sector size for the driver to use. This does not affect the units used in the protocol (always 512 bytes), but awareness of the correct value can affect performance. Fixes: 3578389bcf76c824a5d82e6586a6f0c71e56f2aa ("block/export: vhost-user block device backend server") Signed-off-by: Stefan Hajnoczi Message-Id: <20210223144653.811468-10-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- block/export/vhost-user-blk-server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 2614a63..f747962 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -144,7 +144,7 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) break; } - int64_t offset = req->sector_num * vexp->blk_size; + int64_t offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS; QEMUIOVector qiov; if (is_write) { qemu_iovec_init_external(&qiov, out_iov, out_num); -- cgit v1.1 From db4eadf9f10e19f864d70d1df3a90fbda31b8c06 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 23 Feb 2021 14:46:51 +0000 Subject: block/export: port virtio-blk discard/write zeroes input validation Validate discard/write zeroes the same way we do for virtio-blk. Some of these checks are mandated by the VIRTIO specification, others are internal to QEMU. Signed-off-by: Stefan Hajnoczi Message-Id: <20210223144653.811468-11-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- block/export/vhost-user-blk-server.c | 116 ++++++++++++++++++++++++++++------- 1 file changed, 93 insertions(+), 23 deletions(-) (limited to 'block') diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index f747962..0404422 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -29,6 +29,8 @@ enum { VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, + VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768, + VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768, }; struct virtio_blk_inhdr { unsigned char status; @@ -65,30 +67,102 @@ static void vu_blk_req_complete(VuBlkReq *req) free(req); } +static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector, + size_t size) +{ + uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; + uint64_t total_sectors; + + if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { + return false; + } + if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) { + return false; + } + blk_get_geometry(vexp->export.blk, &total_sectors); + if (sector > total_sectors || nb_sectors > total_sectors - sector) { + return false; + } + return true; +} + static int coroutine_fn -vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov, +vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, uint32_t iovcnt, uint32_t type) { + BlockBackend *blk = vexp->export.blk; struct virtio_blk_discard_write_zeroes desc; - ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc)); + ssize_t size; + uint64_t sector; + uint32_t num_sectors; + uint32_t max_sectors; + uint32_t flags; + int bytes; + + /* Only one desc is currently supported */ + if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) { + return VIRTIO_BLK_S_UNSUPP; + } + + size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc)); if (unlikely(size != sizeof(desc))) { - error_report("Invalid size %zd, expect %zu", size, sizeof(desc)); - return -EINVAL; + error_report("Invalid size %zd, expected %zu", size, sizeof(desc)); + return VIRTIO_BLK_S_IOERR; } - uint64_t range[2] = { le64_to_cpu(desc.sector) << 9, - le32_to_cpu(desc.num_sectors) << 9 }; - if (type == VIRTIO_BLK_T_DISCARD) { - if (blk_co_pdiscard(blk, range[0], range[1]) == 0) { - return 0; + sector = le64_to_cpu(desc.sector); + num_sectors = le32_to_cpu(desc.num_sectors); + flags = le32_to_cpu(desc.flags); + max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ? + VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS : + VHOST_USER_BLK_MAX_DISCARD_SECTORS; + + /* This check ensures that 'bytes' fits in an int */ + if (unlikely(num_sectors > max_sectors)) { + return VIRTIO_BLK_S_IOERR; + } + + bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS; + + if (unlikely(!vu_blk_sect_range_ok(vexp, sector, bytes))) { + return VIRTIO_BLK_S_IOERR; + } + + /* + * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard + * and write zeroes commands if any unknown flag is set. + */ + if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { + return VIRTIO_BLK_S_UNSUPP; + } + + if (type == VIRTIO_BLK_T_WRITE_ZEROES) { + int blk_flags = 0; + + if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { + blk_flags |= BDRV_REQ_MAY_UNMAP; + } + + if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS, + bytes, blk_flags) == 0) { + return VIRTIO_BLK_S_OK; } - } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { - if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) { - return 0; + } else if (type == VIRTIO_BLK_T_DISCARD) { + /* + * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for + * discard commands if the unmap flag is set. + */ + if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { + return VIRTIO_BLK_S_UNSUPP; + } + + if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS, + bytes) == 0) { + return VIRTIO_BLK_S_OK; } } - return -EINVAL; + return VIRTIO_BLK_S_IOERR; } static void coroutine_fn vu_blk_virtio_process_req(void *opaque) @@ -177,19 +251,13 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) } case VIRTIO_BLK_T_DISCARD: case VIRTIO_BLK_T_WRITE_ZEROES: { - int rc; - if (!vexp->writable) { req->in->status = VIRTIO_BLK_S_IOERR; break; } - rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type); - if (rc == 0) { - req->in->status = VIRTIO_BLK_S_OK; - } else { - req->in->status = VIRTIO_BLK_S_IOERR; - } + req->in->status = vu_blk_discard_write_zeroes(vexp, out_iov, out_num, + type); break; } default: @@ -362,11 +430,13 @@ vu_blk_initialize_config(BlockDriverState *bs, config->min_io_size = cpu_to_le16(1); config->opt_io_size = cpu_to_le32(1); config->num_queues = cpu_to_le16(num_queues); - config->max_discard_sectors = cpu_to_le32(32768); + config->max_discard_sectors = + cpu_to_le32(VHOST_USER_BLK_MAX_DISCARD_SECTORS); config->max_discard_seg = cpu_to_le32(1); config->discard_sector_alignment = cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS); - config->max_write_zeroes_sectors = cpu_to_le32(32768); + config->max_write_zeroes_sectors + = cpu_to_le32(VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS); config->max_write_zeroes_seg = cpu_to_le32(1); } -- cgit v1.1 From 05ae4e674e3d47342a7660ae7bc55b393e09f4c7 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 23 Feb 2021 14:46:53 +0000 Subject: block/export: port virtio-blk read/write range check Check that the sector number and byte count are valid. Signed-off-by: Stefan Hajnoczi Message-Id: <20210223144653.811468-13-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- block/export/vhost-user-blk-server.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 0404422..cb5d896 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -209,6 +209,8 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) switch (type & ~VIRTIO_BLK_T_BARRIER) { case VIRTIO_BLK_T_IN: case VIRTIO_BLK_T_OUT: { + QEMUIOVector qiov; + int64_t offset; ssize_t ret = 0; bool is_write = type & VIRTIO_BLK_T_OUT; req->sector_num = le64_to_cpu(req->out.sector); @@ -218,13 +220,24 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) break; } - int64_t offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS; - QEMUIOVector qiov; if (is_write) { qemu_iovec_init_external(&qiov, out_iov, out_num); - ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0); } else { qemu_iovec_init_external(&qiov, in_iov, in_num); + } + + if (unlikely(!vu_blk_sect_range_ok(vexp, + req->sector_num, + qiov.size))) { + req->in->status = VIRTIO_BLK_S_IOERR; + break; + } + + offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS; + + if (is_write) { + ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0); + } else { ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0); } if (ret >= 0) { -- cgit v1.1 From 35f428ba39718711177036ddf112e9299e7f20b2 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 24 Feb 2021 13:47:02 +0300 Subject: qcow2-bitmap: make bytes_covered_by_bitmap_cluster() public Rename bytes_covered_by_bitmap_cluster() to bdrv_dirty_bitmap_serialization_coverage() and make it public. It is needed as we are going to share it with bitmap loading in parallels format. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Reviewed-by: Denis V. Lunev Message-Id: <20210224104707.88430-2-vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf --- block/dirty-bitmap.c | 13 +++++++++++++ block/qcow2-bitmap.c | 16 ++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'block') diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 9b9cd71..a0eaa28 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -726,6 +726,19 @@ uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) return hbitmap_serialization_align(bitmap->bitmap); } +/* Return the disk size covered by a chunk of serialized bitmap data. */ +uint64_t bdrv_dirty_bitmap_serialization_coverage(int serialized_chunk_size, + const BdrvDirtyBitmap *bitmap) +{ + uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap); + uint64_t limit = granularity * (serialized_chunk_size << 3); + + assert(QEMU_IS_ALIGNED(limit, + bdrv_dirty_bitmap_serialization_align(bitmap))); + return limit; +} + + void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, uint8_t *buf, uint64_t offset, uint64_t bytes) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 5eef82f..42d81c4 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -278,18 +278,6 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) return 0; } -/* Return the disk size covered by a single qcow2 cluster of bitmap data. */ -static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s, - const BdrvDirtyBitmap *bitmap) -{ - uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap); - uint64_t limit = granularity * (s->cluster_size << 3); - - assert(QEMU_IS_ALIGNED(limit, - bdrv_dirty_bitmap_serialization_align(bitmap))); - return limit; -} - /* load_bitmap_data * @bitmap_table entries must satisfy specification constraints. * @bitmap must be cleared */ @@ -312,7 +300,7 @@ static int load_bitmap_data(BlockDriverState *bs, } buf = g_malloc(s->cluster_size); - limit = bytes_covered_by_bitmap_cluster(s, bitmap); + limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap); for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) { uint64_t count = MIN(bm_size - offset, limit); uint64_t entry = bitmap_table[i]; @@ -1303,7 +1291,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, } buf = g_malloc(s->cluster_size); - limit = bytes_covered_by_bitmap_cluster(s, bitmap); + limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap); assert(DIV_ROUND_UP(bm_size, limit) == tb_size); offset = 0; -- cgit v1.1 From e0b5207f54e45ccb7c733e736add47f7b06c5867 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 24 Feb 2021 13:47:04 +0300 Subject: block/parallels: BDRVParallelsState: add cluster_size field We are going to use it in more places, calculating "s->tracks << BDRV_SECTOR_BITS" doesn't look good. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210224104707.88430-4-vsementsov@virtuozzo.com> Reviewed-by: Denis V. Lunev Signed-off-by: Kevin Wolf --- block/parallels.c | 8 ++++---- block/parallels.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/parallels.c b/block/parallels.c index 3c22dfd..9594d84 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -421,7 +421,6 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, int ret; uint32_t i; bool flush_bat = false; - int cluster_size = s->tracks << BDRV_SECTOR_BITS; size = bdrv_getlength(bs->file->bs); if (size < 0) { @@ -472,7 +471,7 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, high_off = off; } - if (prev_off != 0 && (prev_off + cluster_size) != off) { + if (prev_off != 0 && (prev_off + s->cluster_size) != off) { res->bfi.fragmented_clusters++; } prev_off = off; @@ -487,10 +486,10 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, } } - res->image_end_offset = high_off + cluster_size; + res->image_end_offset = high_off + s->cluster_size; if (size > res->image_end_offset) { int64_t count; - count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size); + count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", size - res->image_end_offset); @@ -771,6 +770,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -EFBIG; goto fail; } + s->cluster_size = s->tracks << BDRV_SECTOR_BITS; s->bat_size = le32_to_cpu(ph.bat_entries); if (s->bat_size > INT_MAX / sizeof(uint32_t)) { diff --git a/block/parallels.h b/block/parallels.h index 5aa101c..9a9209e 100644 --- a/block/parallels.h +++ b/block/parallels.h @@ -79,6 +79,7 @@ typedef struct BDRVParallelsState { ParallelsPreallocMode prealloc_mode; unsigned int tracks; + unsigned int cluster_size; unsigned int off_multiplier; Error *migration_blocker; -- cgit v1.1 From baefd977002e72402f2cc42b11f2cb11b96aae9e Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 24 Feb 2021 13:47:05 +0300 Subject: parallels: support bitmap extension for read-only mode Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20210224104707.88430-5-vsementsov@virtuozzo.com> Reviewed-by: Denis V. Lunev Signed-off-by: Kevin Wolf --- block/meson.build | 3 +- block/parallels-ext.c | 300 ++++++++++++++++++++++++++++++++++++++++++++++++++ block/parallels.c | 18 +++ block/parallels.h | 6 +- 4 files changed, 325 insertions(+), 2 deletions(-) create mode 100644 block/parallels-ext.c (limited to 'block') diff --git a/block/meson.build b/block/meson.build index eeaefe5..d21990e 100644 --- a/block/meson.build +++ b/block/meson.build @@ -57,7 +57,8 @@ block_ss.add(when: 'CONFIG_QED', if_true: files( 'qed-table.c', 'qed.c', )) -block_ss.add(when: [libxml2, 'CONFIG_PARALLELS'], if_true: files('parallels.c')) +block_ss.add(when: [libxml2, 'CONFIG_PARALLELS'], + if_true: files('parallels.c', 'parallels-ext.c')) block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c')) block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit]) block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c')) diff --git a/block/parallels-ext.c b/block/parallels-ext.c new file mode 100644 index 0000000..e0dd097 --- /dev/null +++ b/block/parallels-ext.c @@ -0,0 +1,300 @@ +/* + * Support of Parallels Format Extension. It's a part of Parallels format + * driver. + * + * Copyright (c) 2021 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "block/block_int.h" +#include "parallels.h" +#include "crypto/hash.h" +#include "qemu/uuid.h" + +#define PARALLELS_FORMAT_EXTENSION_MAGIC 0xAB234CEF23DCEA87ULL + +#define PARALLELS_END_OF_FEATURES_MAGIC 0x0ULL +#define PARALLELS_DIRTY_BITMAP_FEATURE_MAGIC 0x20385FAE252CB34AULL + +typedef struct ParallelsFormatExtensionHeader { + uint64_t magic; /* PARALLELS_FORMAT_EXTENSION_MAGIC */ + uint8_t check_sum[16]; +} QEMU_PACKED ParallelsFormatExtensionHeader; + +typedef struct ParallelsFeatureHeader { + uint64_t magic; + uint64_t flags; + uint32_t data_size; + uint32_t _unused; +} QEMU_PACKED ParallelsFeatureHeader; + +typedef struct ParallelsDirtyBitmapFeature { + uint64_t size; + uint8_t id[16]; + uint32_t granularity; + uint32_t l1_size; + /* L1 table follows */ +} QEMU_PACKED ParallelsDirtyBitmapFeature; + +/* Given L1 table read bitmap data from the image and populate @bitmap */ +static int parallels_load_bitmap_data(BlockDriverState *bs, + const uint64_t *l1_table, + uint32_t l1_size, + BdrvDirtyBitmap *bitmap, + Error **errp) +{ + BDRVParallelsState *s = bs->opaque; + int ret = 0; + uint64_t offset, limit; + uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); + uint8_t *buf = NULL; + uint64_t i, tab_size = + DIV_ROUND_UP(bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size), + s->cluster_size); + + if (tab_size != l1_size) { + error_setg(errp, "Bitmap table size %" PRIu32 " does not correspond " + "to bitmap size and cluster size. Expected %" PRIu64, + l1_size, tab_size); + return -EINVAL; + } + + buf = qemu_blockalign(bs, s->cluster_size); + limit = bdrv_dirty_bitmap_serialization_coverage(s->cluster_size, bitmap); + for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) { + uint64_t count = MIN(bm_size - offset, limit); + uint64_t entry = l1_table[i]; + + if (entry == 0) { + /* No need to deserialize zeros because @bitmap is cleared. */ + continue; + } + + if (entry == 1) { + bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count, false); + } else { + ret = bdrv_pread(bs->file, entry << BDRV_SECTOR_BITS, buf, + s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to read bitmap data cluster"); + goto finish; + } + bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count, + false); + } + } + ret = 0; + + bdrv_dirty_bitmap_deserialize_finish(bitmap); + +finish: + qemu_vfree(buf); + + return ret; +} + +/* + * @data buffer (of @data_size size) is the Dirty bitmaps feature which + * consists of ParallelsDirtyBitmapFeature followed by L1 table. + */ +static BdrvDirtyBitmap *parallels_load_bitmap(BlockDriverState *bs, + uint8_t *data, + size_t data_size, + Error **errp) +{ + int ret; + ParallelsDirtyBitmapFeature bf; + g_autofree uint64_t *l1_table = NULL; + BdrvDirtyBitmap *bitmap; + QemuUUID uuid; + char uuidstr[UUID_FMT_LEN + 1]; + int i; + + if (data_size < sizeof(bf)) { + error_setg(errp, "Too small Bitmap Feature area in Parallels Format " + "Extension: %zu bytes, expected at least %zu bytes", + data_size, sizeof(bf)); + return NULL; + } + memcpy(&bf, data, sizeof(bf)); + bf.size = le64_to_cpu(bf.size); + bf.granularity = le32_to_cpu(bf.granularity) << BDRV_SECTOR_BITS; + bf.l1_size = le32_to_cpu(bf.l1_size); + data += sizeof(bf); + data_size -= sizeof(bf); + + if (bf.size != bs->total_sectors) { + error_setg(errp, "Bitmap size (in sectors) %" PRId64 " differs from " + "disk size in sectors %" PRId64, bf.size, bs->total_sectors); + return NULL; + } + + if (bf.l1_size * sizeof(uint64_t) > data_size) { + error_setg(errp, "Bitmaps feature corrupted: l1 table exceeds " + "extension data_size"); + return NULL; + } + + memcpy(&uuid, bf.id, sizeof(uuid)); + qemu_uuid_unparse(&uuid, uuidstr); + bitmap = bdrv_create_dirty_bitmap(bs, bf.granularity, uuidstr, errp); + if (!bitmap) { + return NULL; + } + + l1_table = g_new(uint64_t, bf.l1_size); + for (i = 0; i < bf.l1_size; i++, data += sizeof(uint64_t)) { + l1_table[i] = ldq_le_p(data); + } + + ret = parallels_load_bitmap_data(bs, l1_table, bf.l1_size, bitmap, errp); + if (ret < 0) { + bdrv_release_dirty_bitmap(bitmap); + return NULL; + } + + /* We support format extension only for RO parallels images. */ + assert(!(bs->open_flags & BDRV_O_RDWR)); + bdrv_dirty_bitmap_set_readonly(bitmap, true); + + return bitmap; +} + +static int parallels_parse_format_extension(BlockDriverState *bs, + uint8_t *ext_cluster, Error **errp) +{ + BDRVParallelsState *s = bs->opaque; + int ret; + int remaining = s->cluster_size; + uint8_t *pos = ext_cluster; + ParallelsFormatExtensionHeader eh; + g_autofree uint8_t *hash = NULL; + size_t hash_len = 0; + GSList *bitmaps = NULL, *el; + + memcpy(&eh, pos, sizeof(eh)); + eh.magic = le64_to_cpu(eh.magic); + pos += sizeof(eh); + remaining -= sizeof(eh); + + if (eh.magic != PARALLELS_FORMAT_EXTENSION_MAGIC) { + error_setg(errp, "Wrong parallels Format Extension magic: 0x%" PRIx64 + ", expected: 0x%llx", eh.magic, + PARALLELS_FORMAT_EXTENSION_MAGIC); + goto fail; + } + + ret = qcrypto_hash_bytes(QCRYPTO_HASH_ALG_MD5, (char *)pos, remaining, + &hash, &hash_len, errp); + if (ret < 0) { + goto fail; + } + + if (hash_len != sizeof(eh.check_sum) || + memcmp(hash, eh.check_sum, sizeof(eh.check_sum)) != 0) { + error_setg(errp, "Wrong checksum in Format Extension header. Format " + "extension is corrupted."); + goto fail; + } + + while (true) { + ParallelsFeatureHeader fh; + BdrvDirtyBitmap *bitmap; + + if (remaining < sizeof(fh)) { + error_setg(errp, "Can not read feature header, as remaining bytes " + "(%d) in Format Extension is less than Feature header " + "size (%zu)", remaining, sizeof(fh)); + goto fail; + } + + memcpy(&fh, pos, sizeof(fh)); + pos += sizeof(fh); + remaining -= sizeof(fh); + + fh.magic = le64_to_cpu(fh.magic); + fh.flags = le64_to_cpu(fh.flags); + fh.data_size = le32_to_cpu(fh.data_size); + + if (fh.flags) { + error_setg(errp, "Flags for extension feature are unsupported"); + goto fail; + } + + if (fh.data_size > remaining) { + error_setg(errp, "Feature data_size exceedes Format Extension " + "cluster"); + goto fail; + } + + switch (fh.magic) { + case PARALLELS_END_OF_FEATURES_MAGIC: + return 0; + + case PARALLELS_DIRTY_BITMAP_FEATURE_MAGIC: + bitmap = parallels_load_bitmap(bs, pos, fh.data_size, errp); + if (!bitmap) { + goto fail; + } + bitmaps = g_slist_append(bitmaps, bitmap); + break; + + default: + error_setg(errp, "Unknown feature: 0x%" PRIu64, fh.magic); + goto fail; + } + + pos = ext_cluster + QEMU_ALIGN_UP(pos + fh.data_size - ext_cluster, 8); + } + +fail: + for (el = bitmaps; el; el = el->next) { + bdrv_release_dirty_bitmap(el->data); + } + g_slist_free(bitmaps); + + return -EINVAL; +} + +int parallels_read_format_extension(BlockDriverState *bs, + int64_t ext_off, Error **errp) +{ + BDRVParallelsState *s = bs->opaque; + int ret; + uint8_t *ext_cluster = qemu_blockalign(bs, s->cluster_size); + + assert(ext_off > 0); + + ret = bdrv_pread(bs->file, ext_off, ext_cluster, s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read Format Extension cluster"); + goto out; + } + + ret = parallels_parse_format_extension(bs, ext_cluster, errp); + +out: + qemu_vfree(ext_cluster); + + return ret; +} diff --git a/block/parallels.c b/block/parallels.c index 9594d84..6ebad2a 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -29,6 +29,7 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "qapi/error.h" #include "block/block_int.h" #include "block/qdict.h" @@ -843,6 +844,23 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail_options; } + if (ph.ext_off) { + if (flags & BDRV_O_RDWR) { + /* + * It's unsafe to open image RW if there is an extension (as we + * don't support it). But parallels driver in QEMU historically + * ignores the extension, so print warning and don't care. + */ + warn_report("Format Extension ignored in RW mode"); + } else { + ret = parallels_read_format_extension( + bs, le64_to_cpu(ph.ext_off) << BDRV_SECTOR_BITS, errp); + if (ret < 0) { + goto fail; + } + } + } + if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) { s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC); ret = parallels_update_header(bs); diff --git a/block/parallels.h b/block/parallels.h index 9a9209e..f22f43f 100644 --- a/block/parallels.h +++ b/block/parallels.h @@ -48,7 +48,8 @@ typedef struct ParallelsHeader { uint64_t nb_sectors; uint32_t inuse; uint32_t data_off; - char padding[12]; + uint32_t flags; + uint64_t ext_off; } QEMU_PACKED ParallelsHeader; typedef enum ParallelsPreallocMode { @@ -85,4 +86,7 @@ typedef struct BDRVParallelsState { Error *migration_blocker; } BDRVParallelsState; +int parallels_read_format_extension(BlockDriverState *bs, + int64_t ext_off, Error **errp); + #endif -- cgit v1.1