diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/block-copy.c | 4 | ||||
-rw-r--r-- | block/copy-before-write.c | 2 | ||||
-rw-r--r-- | block/export/vduse-blk.c | 2 | ||||
-rw-r--r-- | block/export/vhost-user-blk-server.c | 2 | ||||
-rw-r--r-- | block/export/vhost-user-blk-server.h | 2 | ||||
-rw-r--r-- | block/file-posix.c | 8 | ||||
-rw-r--r-- | block/graph-lock.c | 2 | ||||
-rw-r--r-- | block/io.c | 82 | ||||
-rw-r--r-- | block/iscsi.c | 1 | ||||
-rw-r--r-- | block/linux-aio.c | 2 | ||||
-rw-r--r-- | block/meson.build | 12 | ||||
-rw-r--r-- | block/mirror.c | 10 | ||||
-rw-r--r-- | block/nbd.c | 11 | ||||
-rw-r--r-- | block/parallels.c | 346 | ||||
-rw-r--r-- | block/parallels.h | 1 | ||||
-rw-r--r-- | block/preallocate.c | 2 | ||||
-rw-r--r-- | block/qapi.c | 32 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 2 | ||||
-rw-r--r-- | block/qcow2.c | 1 | ||||
-rw-r--r-- | block/snapshot-access.c | 2 | ||||
-rw-r--r-- | block/vhdx.c | 2 | ||||
-rw-r--r-- | block/vhdx.h | 4 | ||||
-rw-r--r-- | block/vmdk.c | 2 | ||||
-rw-r--r-- | block/vpc.c | 4 |
24 files changed, 380 insertions, 158 deletions
diff --git a/block/block-copy.c b/block/block-copy.c index e13d7bc..1c60368 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -67,7 +67,7 @@ typedef struct BlockCopyCallState { QLIST_ENTRY(BlockCopyCallState) list; /* - * Fields that report information about return values and erros. + * Fields that report information about return values and errors. * Protected by lock in BlockCopyState. */ bool error_is_read; @@ -462,7 +462,7 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool, * Do copy of cluster-aligned chunk. Requested region is allowed to exceed * s->len only to cover last cluster when s->len is not aligned to clusters. * - * No sync here: nor bitmap neighter intersecting requests handling, only copy. + * No sync here: neither bitmap nor intersecting requests handling, only copy. * * @method is an in-out argument, so that copy_range can be either extended to * a full-size buffer or disabled if the copy_range attempt fails. The output diff --git a/block/copy-before-write.c b/block/copy-before-write.c index b866e42..9a0e2b6 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -503,7 +503,7 @@ static void cbw_close(BlockDriverState *bs) s->bcs = NULL; } -BlockDriver bdrv_cbw_filter = { +static BlockDriver bdrv_cbw_filter = { .format_name = "copy-before-write", .instance_size = sizeof(BDRVCopyBeforeWriteState), diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c index 83b0554..172f73c 100644 --- a/block/export/vduse-blk.c +++ b/block/export/vduse-blk.c @@ -138,7 +138,7 @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq) aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq), on_vduse_vq_kick, NULL, NULL, NULL, vq); - /* Make sure we don't miss any kick afer reconnecting */ + /* Make sure we don't miss any kick after reconnecting */ eventfd_write(vduse_queue_get_fd(vq), 1); } diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index f7b5073..fe2cee3 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -1,5 +1,5 @@ /* - * Sharing QEMU block devices via vhost-user protocal + * Sharing QEMU block devices via vhost-user protocol * * Parts of the code based on nbd/server.c. * diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h index fcf46fc..77fb5c0 100644 --- a/block/export/vhost-user-blk-server.h +++ b/block/export/vhost-user-blk-server.h @@ -1,5 +1,5 @@ /* - * Sharing QEMU block devices via vhost-user protocal + * Sharing QEMU block devices via vhost-user protocol * * Copyright (c) Coiby Xu <coiby.xu@gmail.com>. * Copyright (c) 2020 Red Hat, Inc. diff --git a/block/file-posix.c b/block/file-posix.c index aa89789..50e2b20 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1159,9 +1159,9 @@ static int raw_reopen_prepare(BDRVReopenState *state, * As part of reopen prepare we also want to create new fd by * raw_reconfigure_getfd(). But it wants updated "perm", when in * bdrv_reopen_multiple() .bdrv_reopen_prepare() callback called prior to - * permission update. Happily, permission update is always a part (a seprate - * stage) of bdrv_reopen_multiple() so we can rely on this fact and - * reconfigure fd in raw_check_perm(). + * permission update. Happily, permission update is always a part + * (a separate stage) of bdrv_reopen_multiple() so we can rely on this + * fact and reconfigure fd in raw_check_perm(). */ s->reopen_state = state; @@ -3372,7 +3372,7 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret) * of an array of zone descriptors. * zones is an array of zone descriptors to hold zone information on reply; * offset can be any byte within the entire size of the device; - * nr_zones is the maxium number of sectors the command should operate on. + * nr_zones is the maximum number of sectors the command should operate on. */ #if defined(CONFIG_BLKZONED) static int coroutine_fn raw_co_zone_report(BlockDriverState *bs, int64_t offset, diff --git a/block/graph-lock.c b/block/graph-lock.c index 5e66f01..f357a2c 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -95,7 +95,7 @@ static uint32_t reader_count(void) QEMU_LOCK_GUARD(&aio_context_list_lock); - /* rd can temporarly be negative, but the total will *always* be >= 0 */ + /* rd can temporarily be negative, but the total will *always* be >= 0 */ rd = orphaned_reader_count; QTAILQ_FOREACH(brdv_graph, &aio_context_list, next_aio) { rd += qatomic_read(&brdv_graph->reader_count); @@ -342,7 +342,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, * timer callback), it is a bug in the caller that should be fixed. */ assert(data.done); - /* Reaquire the AioContext of bs if we dropped it */ + /* Reacquire the AioContext of bs if we dropped it */ if (ctx != co_ctx) { aio_context_acquire(ctx); } @@ -591,10 +591,16 @@ static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req) qatomic_dec(&req->bs->serialising_in_flight); } - qemu_co_mutex_lock(&req->bs->reqs_lock); + qemu_mutex_lock(&req->bs->reqs_lock); QLIST_REMOVE(req, list); + qemu_mutex_unlock(&req->bs->reqs_lock); + + /* + * At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called + * anymore because the request has been removed from the list, so it's safe + * to restart the queue outside reqs_lock to minimize the critical section. + */ qemu_co_queue_restart_all(&req->wait_queue); - qemu_co_mutex_unlock(&req->bs->reqs_lock); } /** @@ -621,9 +627,9 @@ static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req, qemu_co_queue_init(&req->wait_queue); - qemu_co_mutex_lock(&bs->reqs_lock); + qemu_mutex_lock(&bs->reqs_lock); QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); - qemu_co_mutex_unlock(&bs->reqs_lock); + qemu_mutex_unlock(&bs->reqs_lock); } static bool tracked_request_overlaps(BdrvTrackedRequest *req, @@ -728,21 +734,21 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) } /** - * Round a region to cluster boundaries + * Round a region to subcluster (if supported) or cluster boundaries */ void coroutine_fn GRAPH_RDLOCK -bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes, - int64_t *cluster_offset, int64_t *cluster_bytes) +bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *align_offset, int64_t *align_bytes) { BlockDriverInfo bdi; IO_CODE(); - if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { - *cluster_offset = offset; - *cluster_bytes = bytes; + if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.subcluster_size == 0) { + *align_offset = offset; + *align_bytes = bytes; } else { - int64_t c = bdi.cluster_size; - *cluster_offset = QEMU_ALIGN_DOWN(offset, c); - *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c); + int64_t c = bdi.subcluster_size; + *align_offset = QEMU_ALIGN_DOWN(offset, c); + *align_bytes = QEMU_ALIGN_UP(offset - *align_offset + bytes, c); } } @@ -787,9 +793,9 @@ bdrv_wait_serialising_requests(BdrvTrackedRequest *self) return; } - qemu_co_mutex_lock(&bs->reqs_lock); + qemu_mutex_lock(&bs->reqs_lock); bdrv_wait_serialising_requests_locked(self); - qemu_co_mutex_unlock(&bs->reqs_lock); + qemu_mutex_unlock(&bs->reqs_lock); } void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, @@ -797,12 +803,12 @@ void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, { IO_CODE(); - qemu_co_mutex_lock(&req->bs->reqs_lock); + qemu_mutex_lock(&req->bs->reqs_lock); tracked_request_set_serialising(req, align); bdrv_wait_serialising_requests_locked(req); - qemu_co_mutex_unlock(&req->bs->reqs_lock); + qemu_mutex_unlock(&req->bs->reqs_lock); } int bdrv_check_qiov_request(int64_t offset, int64_t bytes, @@ -1168,8 +1174,8 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, void *bounce_buffer = NULL; BlockDriver *drv = bs->drv; - int64_t cluster_offset; - int64_t cluster_bytes; + int64_t align_offset; + int64_t align_bytes; int64_t skip_bytes; int ret; int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, @@ -1203,28 +1209,28 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which * is one reason we loop rather than doing it all at once. */ - bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes); - skip_bytes = offset - cluster_offset; + bdrv_round_to_subclusters(bs, offset, bytes, &align_offset, &align_bytes); + skip_bytes = offset - align_offset; trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, - cluster_offset, cluster_bytes); + align_offset, align_bytes); - while (cluster_bytes) { + while (align_bytes) { int64_t pnum; if (skip_write) { ret = 1; /* "already allocated", so nothing will be copied */ - pnum = MIN(cluster_bytes, max_transfer); + pnum = MIN(align_bytes, max_transfer); } else { - ret = bdrv_is_allocated(bs, cluster_offset, - MIN(cluster_bytes, max_transfer), &pnum); + ret = bdrv_is_allocated(bs, align_offset, + MIN(align_bytes, max_transfer), &pnum); if (ret < 0) { /* * Safe to treat errors in querying allocation as if * unallocated; we'll probably fail again soon on the * read, but at least that will set a decent errno. */ - pnum = MIN(cluster_bytes, max_transfer); + pnum = MIN(align_bytes, max_transfer); } /* Stop at EOF if the image ends in the middle of the cluster */ @@ -1242,7 +1248,7 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, /* Must copy-on-read; use the bounce buffer */ pnum = MIN(pnum, MAX_BOUNCE_BUFFER); if (!bounce_buffer) { - int64_t max_we_need = MAX(pnum, cluster_bytes - pnum); + int64_t max_we_need = MAX(pnum, align_bytes - pnum); int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER); int64_t bounce_buffer_len = MIN(max_we_need, max_allowed); @@ -1254,7 +1260,7 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, } qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); - ret = bdrv_driver_preadv(bs, cluster_offset, pnum, + ret = bdrv_driver_preadv(bs, align_offset, pnum, &local_qiov, 0, 0); if (ret < 0) { goto err; @@ -1266,13 +1272,13 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, /* FIXME: Should we (perhaps conditionally) be setting * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy * that still correctly reads as zero? */ - ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, + ret = bdrv_co_do_pwrite_zeroes(bs, align_offset, pnum, BDRV_REQ_WRITE_UNCHANGED); } else { /* This does not change the data on the disk, it is not * necessary to flush even in cache=writethrough mode. */ - ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, + ret = bdrv_driver_pwritev(bs, align_offset, pnum, &local_qiov, 0, BDRV_REQ_WRITE_UNCHANGED); } @@ -1301,8 +1307,8 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, } } - cluster_offset += pnum; - cluster_bytes -= pnum; + align_offset += pnum; + align_bytes -= pnum; progress += pnum - skip_bytes; skip_bytes = 0; } @@ -2996,7 +3002,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) goto early_exit; } - qemu_co_mutex_lock(&bs->reqs_lock); + qemu_mutex_lock(&bs->reqs_lock); current_gen = qatomic_read(&bs->write_gen); /* Wait until any previous flushes are completed */ @@ -3006,7 +3012,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) /* Flushes reach this point in nondecreasing current_gen order. */ bs->active_flush_req = true; - qemu_co_mutex_unlock(&bs->reqs_lock); + qemu_mutex_unlock(&bs->reqs_lock); /* Write back all layers by calling one driver function */ if (bs->drv->bdrv_co_flush) { @@ -3094,11 +3100,11 @@ out: bs->flushed_gen = current_gen; } - qemu_co_mutex_lock(&bs->reqs_lock); + qemu_mutex_lock(&bs->reqs_lock); bs->active_flush_req = false; /* Return value is ignored - it's ok if wait queue is empty */ qemu_co_queue_next(&bs->flush_queue); - qemu_co_mutex_unlock(&bs->reqs_lock); + qemu_mutex_unlock(&bs->reqs_lock); early_exit: bdrv_dec_in_flight(bs); diff --git a/block/iscsi.c b/block/iscsi.c index 34f97ab..5640c8b 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1058,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, return NULL; } + /* Must use malloc(): this is freed via scsi_free_scsi_task() */ acb->task = malloc(sizeof(struct scsi_task)); if (acb->task == NULL) { error_report("iSCSI: Failed to allocate task for scsi command. %s", diff --git a/block/linux-aio.c b/block/linux-aio.c index 561c71a..1a51503 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -227,7 +227,7 @@ static void qemu_laio_process_completions(LinuxAioState *s) /* If we are nested we have to notify the level above that we are done * by setting event_max to zero, upper level will then jump out of it's - * own `for` loop. If we are the last all counters droped to zero. */ + * own `for` loop. If we are the last all counters dropped to zero. */ s->event_max = 0; s->event_idx = 0; } diff --git a/block/meson.build b/block/meson.build index 529fc17..f351b9d 100644 --- a/block/meson.build +++ b/block/meson.build @@ -4,41 +4,41 @@ block_ss.add(files( 'aio_task.c', 'amend.c', 'backup.c', - 'copy-before-write.c', 'blkdebug.c', 'blklogwrites.c', 'blkverify.c', 'block-backend.c', 'block-copy.c', - 'graph-lock.c', 'commit.c', + 'copy-before-write.c', 'copy-on-read.c', - 'preallocate.c', - 'progress_meter.c', 'create.c', 'crypto.c', 'dirty-bitmap.c', 'filter-compress.c', + 'graph-lock.c', 'io.c', 'mirror.c', 'nbd.c', 'null.c', 'plug.c', + 'preallocate.c', + 'progress_meter.c', 'qapi.c', + 'qcow2.c', 'qcow2-bitmap.c', 'qcow2-cache.c', 'qcow2-cluster.c', 'qcow2-refcount.c', 'qcow2-snapshot.c', 'qcow2-threads.c', - 'qcow2.c', 'quorum.c', 'raw-format.c', 'reqlist.c', 'snapshot.c', 'snapshot-access.c', - 'throttle-groups.c', 'throttle.c', + 'throttle-groups.c', 'write-threshold.c', ), zstd, zlib, gnutls) diff --git a/block/mirror.c b/block/mirror.c index d3cacd1..aae4beb 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -283,8 +283,8 @@ static int coroutine_fn mirror_cow_align(MirrorBlockJob *s, int64_t *offset, need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity, s->cow_bitmap); if (need_cow) { - bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes, - &align_offset, &align_bytes); + bdrv_round_to_subclusters(blk_bs(s->target), *offset, *bytes, + &align_offset, &align_bytes); } if (align_bytes > max_bytes) { @@ -502,7 +502,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) job_pause_point(&s->common.job); - /* Find the number of consective dirty chunks following the first dirty + /* Find the number of consecutive dirty chunks following the first dirty * one, and wait for in flight requests in them. */ bdrv_dirty_bitmap_lock(s->dirty_bitmap); while (nb_chunks * s->granularity < s->buf_size) { @@ -576,8 +576,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) int64_t target_offset; int64_t target_bytes; WITH_GRAPH_RDLOCK_GUARD() { - bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes, - &target_offset, &target_bytes); + bdrv_round_to_subclusters(blk_bs(s->target), offset, io_bytes, + &target_offset, &target_bytes); } if (target_offset == offset && target_bytes == io_bytes) { diff --git a/block/nbd.c b/block/nbd.c index 5322e66..cc48580 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -352,7 +352,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, } qio_channel_set_blocking(s->ioc, false, NULL); - qio_channel_attach_aio_context(s->ioc, bdrv_get_aio_context(bs)); + qio_channel_set_follow_coroutine_ctx(s->ioc, true); /* successfully connected */ WITH_QEMU_LOCK_GUARD(&s->requests_lock) { @@ -397,7 +397,6 @@ static void coroutine_fn GRAPH_RDLOCK nbd_reconnect_attempt(BDRVNBDState *s) /* Finalize previous connection if any */ if (s->ioc) { - qio_channel_detach_aio_context(s->ioc); yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name), nbd_yank, s->bs); object_unref(OBJECT(s->ioc)); @@ -2089,10 +2088,6 @@ static void nbd_attach_aio_context(BlockDriverState *bs, * the reconnect_delay_timer cannot be active here. */ assert(!s->reconnect_delay_timer); - - if (s->ioc) { - qio_channel_attach_aio_context(s->ioc, new_context); - } } static void nbd_detach_aio_context(BlockDriverState *bs) @@ -2101,10 +2096,6 @@ static void nbd_detach_aio_context(BlockDriverState *bs) assert(!s->open_timer); assert(!s->reconnect_delay_timer); - - if (s->ioc) { - qio_channel_detach_aio_context(s->ioc); - } } static BlockDriver bdrv_nbd = { diff --git a/block/parallels.c b/block/parallels.c index 18e34ae..48c32d6 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -136,6 +136,12 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, return MIN(nb_sectors, ret); } +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off) +{ + off -= s->data_start << BDRV_SECTOR_BITS; + return off / s->cluster_size; +} + static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, int nb_sectors, int *pnum) { @@ -188,7 +194,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, idx = sector_num / s->tracks; to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; - /* This function is called only by parallels_co_writev(), which will never + /* + * This function is called only by parallels_co_writev(), which will never * pass a sector_num at or beyond the end of the image (because the block * layer never passes such a sector_num to that function). Therefore, idx * is always below s->bat_size. @@ -196,7 +203,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, * exceed the image end. Therefore, idx + to_allocate cannot exceed * s->bat_size. * Note that s->bat_size is an unsigned int, therefore idx + to_allocate - * will always fit into a uint32_t. */ + * will always fit into a uint32_t. + */ assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); space = to_allocate * s->tracks; @@ -230,13 +238,15 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } - /* Try to read from backing to fill empty clusters + /* + * Try to read from backing to fill empty clusters * FIXME: 1. previous write_zeroes may be redundant * 2. most of data we read from backing will be rewritten by * parallels_co_writev. On aligned-to-cluster write we do not need * this read at all. * 3. it would be good to combine write of data from backing and new - * data into one write call */ + * data into one write call. + */ if (bs->backing) { int64_t nb_cow_sectors = to_allocate * s->tracks; int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS; @@ -440,6 +450,81 @@ static void parallels_check_unclean(BlockDriverState *bs, } } +/* + * Returns true if data_off is correct, otherwise false. In both cases + * correct_offset is set to the proper value. + */ +static bool parallels_test_data_off(BDRVParallelsState *s, + int64_t file_nb_sectors, + uint32_t *correct_offset) +{ + uint32_t data_off, min_off; + bool old_magic; + + /* + * There are two slightly different image formats: with "WithoutFreeSpace" + * or "WithouFreSpacExt" magic words. Call the first one as "old magic". + * In such images data_off field can be zero. In this case the offset is + * calculated as the end of BAT table plus some padding to ensure sector + * size alignment. + */ + old_magic = !memcmp(s->header->magic, HEADER_MAGIC, 16); + + min_off = DIV_ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); + if (!old_magic) { + min_off = ROUND_UP(min_off, s->cluster_size / BDRV_SECTOR_SIZE); + } + + if (correct_offset) { + *correct_offset = min_off; + } + + data_off = le32_to_cpu(s->header->data_off); + if (data_off == 0 && old_magic) { + return true; + } + + if (data_off < min_off || data_off > file_nb_sectors) { + return false; + } + + if (correct_offset) { + *correct_offset = data_off; + } + + return true; +} + +static int coroutine_fn GRAPH_RDLOCK +parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ + BDRVParallelsState *s = bs->opaque; + int64_t file_size; + uint32_t data_off; + + file_size = bdrv_co_nb_sectors(bs->file->bs); + if (file_size < 0) { + res->check_errors++; + return file_size; + } + + if (parallels_test_data_off(s, file_size, &data_off)) { + return 0; + } + + res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + s->header->data_off = cpu_to_le32(data_off); + res->corruptions_fixed++; + } + + fprintf(stderr, "%s data_off field has incorrect value\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); + + return 0; +} + static int coroutine_fn GRAPH_RDLOCK parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -484,13 +569,13 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, static int coroutine_fn GRAPH_RDLOCK parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, - BdrvCheckMode fix) + BdrvCheckMode fix, bool explicit) { BDRVParallelsState *s = bs->opaque; int64_t size; int ret; - size = bdrv_getlength(bs->file->bs); + size = bdrv_co_getlength(bs->file->bs); if (size < 0) { res->check_errors++; return size; @@ -499,10 +584,13 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, if (size > res->image_end_offset) { int64_t count; count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); - fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", - fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", - size - res->image_end_offset); - res->leaks += count; + if (explicit) { + fprintf(stderr, + "%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", + size - res->image_end_offset); + res->leaks += count; + } if (fix & BDRV_FIX_LEAKS) { Error *local_err = NULL; @@ -517,13 +605,148 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, res->check_errors++; return ret; } - res->leaks_fixed += count; + if (explicit) { + res->leaks_fixed += count; + } } } return 0; } +static int coroutine_fn GRAPH_RDLOCK +parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ + BDRVParallelsState *s = bs->opaque; + int64_t host_off, host_sector, guest_sector; + unsigned long *bitmap; + uint32_t i, bitmap_size, cluster_index, bat_entry; + int n, ret = 0; + uint64_t *buf = NULL; + bool fixed = false; + + /* + * Create a bitmap of used clusters. + * If a bit is set, there is a BAT entry pointing to this cluster. + * Loop through the BAT entries, check bits relevant to an entry offset. + * If bit is set, this entry is duplicated. Otherwise set the bit. + * + * We shouldn't worry about newly allocated clusters outside the image + * because they are created higher then any existing cluster pointed by + * a BAT entry. + */ + bitmap_size = host_cluster_index(s, res->image_end_offset); + if (bitmap_size == 0) { + return 0; + } + if (res->image_end_offset % s->cluster_size) { + /* A not aligned image end leads to a bitmap shorter by 1 */ + bitmap_size++; + } + + bitmap = bitmap_new(bitmap_size); + + buf = qemu_blockalign(bs, s->cluster_size); + + for (i = 0; i < s->bat_size; i++) { + host_off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (host_off == 0) { + continue; + } + + cluster_index = host_cluster_index(s, host_off); + assert(cluster_index < bitmap_size); + if (!test_bit(cluster_index, bitmap)) { + bitmap_set(bitmap, cluster_index, 1); + continue; + } + + /* this cluster duplicates another one */ + fprintf(stderr, "%s duplicate offset in BAT entry %u\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + + res->corruptions++; + + if (!(fix & BDRV_FIX_ERRORS)) { + continue; + } + + /* + * Reset the entry and allocate a new cluster + * for the relevant guest offset. In this way we let + * the lower layer to place the new cluster properly. + * Copy the original cluster to the allocated one. + * But before save the old offset value for repairing + * if we have an error. + */ + bat_entry = s->bat_bitmap[i]; + parallels_set_bat_entry(s, i, 0); + + ret = bdrv_co_pread(bs->file, host_off, s->cluster_size, buf, 0); + if (ret < 0) { + res->check_errors++; + goto out_repair_bat; + } + + guest_sector = (i * (int64_t)s->cluster_size) >> BDRV_SECTOR_BITS; + host_sector = allocate_clusters(bs, guest_sector, s->tracks, &n); + if (host_sector < 0) { + res->check_errors++; + goto out_repair_bat; + } + host_off = host_sector << BDRV_SECTOR_BITS; + + ret = bdrv_co_pwrite(bs->file, host_off, s->cluster_size, buf, 0); + if (ret < 0) { + res->check_errors++; + goto out_repair_bat; + } + + if (host_off + s->cluster_size > res->image_end_offset) { + res->image_end_offset = host_off + s->cluster_size; + } + + /* + * In the future allocate_cluster() will reuse holed offsets + * inside the image. Keep the used clusters bitmap content + * consistent for the new allocated clusters too. + * + * Note, clusters allocated outside the current image are not + * considered, and the bitmap size doesn't change. + */ + cluster_index = host_cluster_index(s, host_off); + if (cluster_index < bitmap_size) { + bitmap_set(bitmap, cluster_index, 1); + } + + fixed = true; + res->corruptions_fixed++; + + } + + if (fixed) { + /* + * When new clusters are allocated, the file size increases by + * 128 Mb. We need to truncate the file to the right size. Let + * the leak fix code make its job without res changing. + */ + ret = parallels_check_leak(bs, res, fix, false); + } + +out_free: + g_free(buf); + g_free(bitmap); + return ret; +/* + * We can get here only from places where index and old_offset have + * meaningful values. + */ +out_repair_bat: + s->bat_bitmap[i] = bat_entry; + goto out_free; +} + static void parallels_collect_statistics(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -565,12 +788,22 @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, WITH_QEMU_LOCK_GUARD(&s->lock) { parallels_check_unclean(bs, res, fix); + ret = parallels_check_data_off(bs, res, fix); + if (ret < 0) { + return ret; + } + ret = parallels_check_outside_image(bs, res, fix); if (ret < 0) { return ret; } - ret = parallels_check_leak(bs, res, fix); + ret = parallels_check_leak(bs, res, fix, true); + if (ret < 0) { + return ret; + } + + ret = parallels_check_duplicate(bs, res, fix); if (ret < 0) { return ret; } @@ -798,10 +1031,12 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; int ret, size, i; - int64_t file_nb_sectors; + int64_t file_nb_sectors, sector; + uint32_t data_start; QemuOpts *opts = NULL; Error *local_err = NULL; char *buf; + bool data_off_is_correct; ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { @@ -859,15 +1094,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOMEM; goto fail; } - s->data_end = le32_to_cpu(ph.data_off); - if (s->data_end == 0) { - s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); - } - if (s->data_end < s->header_size) { - /* there is not enough unused space to fit to block align between BAT - and actual data. We can't avoid read-modify-write... */ - s->header_size = size; - } ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); if (ret < 0) { @@ -875,33 +1101,20 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->bat_bitmap = (uint32_t *)(s->header + 1); - for (i = 0; i < s->bat_size; i++) { - int64_t off = bat2sect(s, i); - if (off >= file_nb_sectors) { - if (flags & BDRV_O_CHECK) { - continue; - } - error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry " - "is larger than file size (%" PRIi64 ")", - off << BDRV_SECTOR_BITS, i, - file_nb_sectors << BDRV_SECTOR_BITS); - ret = -EINVAL; - goto fail; - } - if (off >= s->data_end) { - s->data_end = off + s->tracks; - } - } - if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { - /* Image was not closed correctly. The check is mandatory */ s->header_unclean = true; - if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { - error_setg(errp, "parallels: Image was not closed correctly; " - "cannot be opened read/write"); - ret = -EACCES; - goto fail; - } + } + + data_off_is_correct = parallels_test_data_off(s, file_nb_sectors, + &data_start); + s->data_start = data_start; + s->data_end = s->data_start; + if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { + /* + * There is not enough unused space to fit to block align between BAT + * and actual data. We can't avoid read-modify-write... + */ + s->header_size = size; } opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); @@ -962,10 +1175,41 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); ret = migrate_add_blocker(s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); + error_setg(errp, "Migration blocker error"); goto fail; } qemu_co_mutex_init(&s->lock); + + for (i = 0; i < s->bat_size; i++) { + sector = bat2sect(s, i); + if (sector + s->tracks > s->data_end) { + s->data_end = sector + s->tracks; + } + } + + /* + * We don't repair the image here if it's opened for checks. Also we don't + * want to change inactive images and can't change readonly images. + */ + if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) { + return 0; + } + + /* + * Repair the image if it's dirty or + * out-of-image corruption was detected. + */ + if (s->data_end > file_nb_sectors || s->header_unclean + || !data_off_is_correct) { + BdrvCheckResult res; + ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not repair corrupted image"); + migrate_del_blocker(s->migration_blocker); + goto fail; + } + } + return 0; fail_format: @@ -973,6 +1217,12 @@ fail_format: fail_options: ret = -EINVAL; fail: + /* + * "s" object was allocated by g_malloc0 so we can safely + * try to free its fields even they were not allocated. + */ + error_free(s->migration_blocker); + g_free(s->bat_dirty_bmap); qemu_vfree(s->header); return ret; } diff --git a/block/parallels.h b/block/parallels.h index f22f43f..4e53e95 100644 --- a/block/parallels.h +++ b/block/parallels.h @@ -75,6 +75,7 @@ typedef struct BDRVParallelsState { uint32_t *bat_bitmap; unsigned int bat_size; + int64_t data_start; int64_t data_end; uint64_t prealloc_size; ParallelsPreallocMode prealloc_mode; diff --git a/block/preallocate.c b/block/preallocate.c index 4d82125..3d0f621 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -535,7 +535,7 @@ static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c, } } -BlockDriver bdrv_preallocate_filter = { +static BlockDriver bdrv_preallocate_filter = { .format_name = "preallocate", .instance_size = sizeof(BDRVPreallocateState), diff --git a/block/qapi.c b/block/qapi.c index f34f95e..1cbb093 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -48,7 +48,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, { ImageInfo **p_image_info; ImageInfo *backing_info; - BlockDriverState *bs0, *backing; + BlockDriverState *backing; BlockDeviceInfo *info; ERRP_GUARD(); @@ -145,7 +145,6 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, info->write_threshold = bdrv_write_threshold_get(bs); - bs0 = bs; p_image_info = &info->image; info->backing_file_depth = 0; @@ -153,7 +152,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, * Skip automatically inserted nodes that the user isn't aware of for * query-block (blk != NULL), but not for query-named-block-nodes */ - bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp); + bdrv_query_image_info(bs, p_image_info, flat, blk != NULL, errp); if (*errp) { qapi_free_BlockDeviceInfo(info); return NULL; @@ -310,33 +309,6 @@ out: } /** - * bdrv_query_block_node_info: - * @bs: block node to examine - * @p_info: location to store node information - * @errp: location to store error information - * - * Store image information about @bs in @p_info. - * - * @p_info will be set only on success. On error, store error in @errp. - */ -void bdrv_query_block_node_info(BlockDriverState *bs, - BlockNodeInfo **p_info, - Error **errp) -{ - BlockNodeInfo *info; - ERRP_GUARD(); - - info = g_new0(BlockNodeInfo, 1); - bdrv_do_query_node_info(bs, info, errp); - if (*errp) { - qapi_free_BlockNodeInfo(info); - return; - } - - *p_info = info; -} - -/** * bdrv_query_image_info: * @bs: block node to examine * @p_info: location to store image information diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 5095e99..996d121 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -2645,7 +2645,7 @@ rebuild_refcount_structure(BlockDriverState *bs, BdrvCheckResult *res, * repeat all this until the reftable stops growing. * * (This loop will terminate, because with every cluster the - * reftable grows, it can accomodate a multitude of more refcounts, + * reftable grows, it can accommodate a multitude of more refcounts, * so that at some point this must be able to cover the reftable * and all refblocks describing it.) * diff --git a/block/qcow2.c b/block/qcow2.c index c51388e..b48cd9c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -5197,6 +5197,7 @@ qcow2_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { BDRVQcow2State *s = bs->opaque; bdi->cluster_size = s->cluster_size; + bdi->subcluster_size = s->subcluster_size; bdi->vm_state_offset = qcow2_vm_state_offset(s); bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY; return 0; diff --git a/block/snapshot-access.c b/block/snapshot-access.c index 67ea339..8d4e893 100644 --- a/block/snapshot-access.c +++ b/block/snapshot-access.c @@ -108,7 +108,7 @@ static void snapshot_access_child_perm(BlockDriverState *bs, BdrvChild *c, *nshared = BLK_PERM_ALL; } -BlockDriver bdrv_snapshot_access_drv = { +static BlockDriver bdrv_snapshot_access_drv = { .format_name = "snapshot-access", .bdrv_open = snapshot_access_open, diff --git a/block/vhdx.c b/block/vhdx.c index f2c3a80..a67edcc 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1077,7 +1077,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - /* endian convert populated BAT field entires */ + /* endian convert populated BAT field entries */ for (i = 0; i < s->bat_entries; i++) { s->bat[i] = le64_to_cpu(s->bat[i]); } diff --git a/block/vhdx.h b/block/vhdx.h index 7db746c..455a627 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -212,7 +212,7 @@ typedef struct QEMU_PACKED VHDXLogDataSector { uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */ uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive). see the data descriptor field for the - other mising bytes */ + other missing bytes */ uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */ } VHDXLogDataSector; @@ -257,7 +257,7 @@ typedef struct QEMU_PACKED VHDXMetadataTableHeader { #define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */ #define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set, - otherwise file metdata */ + otherwise file metadata */ #define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this entry to open the file */ typedef struct QEMU_PACKED VHDXMetadataTableEntry { diff --git a/block/vmdk.c b/block/vmdk.c index 70066c2..58ce290 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1207,7 +1207,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, bs, &child_of_bds, extent_role, false, &local_err); g_free(extent_path); - if (local_err) { + if (!extent_file) { error_propagate(errp, local_err); ret = -EINVAL; goto out; diff --git a/block/vpc.c b/block/vpc.c index 3810a60..ceb87dd 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -510,7 +510,7 @@ get_image_offset(BlockDriverState *bs, uint64_t offset, bool write, int *err) miss sparse read optimization, but it's not a problem in terms of correctness. */ if (write && (s->last_bitmap_offset != bitmap_offset)) { - uint8_t bitmap[s->bitmap_size]; + g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size); int r; s->last_bitmap_offset = bitmap_offset; @@ -558,7 +558,7 @@ alloc_block(BlockDriverState *bs, int64_t offset) int64_t bat_offset; uint32_t index, bat_value; int ret; - uint8_t bitmap[s->bitmap_size]; + g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size); /* Check if sector_num is valid */ if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) { |