aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/block-copy.c4
-rw-r--r--block/copy-before-write.c2
-rw-r--r--block/export/vduse-blk.c2
-rw-r--r--block/export/vhost-user-blk-server.c2
-rw-r--r--block/export/vhost-user-blk-server.h2
-rw-r--r--block/file-posix.c8
-rw-r--r--block/graph-lock.c2
-rw-r--r--block/io.c82
-rw-r--r--block/iscsi.c1
-rw-r--r--block/linux-aio.c2
-rw-r--r--block/meson.build12
-rw-r--r--block/mirror.c10
-rw-r--r--block/nbd.c11
-rw-r--r--block/parallels.c346
-rw-r--r--block/parallels.h1
-rw-r--r--block/preallocate.c2
-rw-r--r--block/qapi.c32
-rw-r--r--block/qcow2-refcount.c2
-rw-r--r--block/qcow2.c1
-rw-r--r--block/snapshot-access.c2
-rw-r--r--block/vhdx.c2
-rw-r--r--block/vhdx.h4
-rw-r--r--block/vmdk.c2
-rw-r--r--block/vpc.c4
24 files changed, 380 insertions, 158 deletions
diff --git a/block/block-copy.c b/block/block-copy.c
index e13d7bc..1c60368 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -67,7 +67,7 @@ typedef struct BlockCopyCallState {
QLIST_ENTRY(BlockCopyCallState) list;
/*
- * Fields that report information about return values and erros.
+ * Fields that report information about return values and errors.
* Protected by lock in BlockCopyState.
*/
bool error_is_read;
@@ -462,7 +462,7 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
* Do copy of cluster-aligned chunk. Requested region is allowed to exceed
* s->len only to cover last cluster when s->len is not aligned to clusters.
*
- * No sync here: nor bitmap neighter intersecting requests handling, only copy.
+ * No sync here: neither bitmap nor intersecting requests handling, only copy.
*
* @method is an in-out argument, so that copy_range can be either extended to
* a full-size buffer or disabled if the copy_range attempt fails. The output
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index b866e42..9a0e2b6 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -503,7 +503,7 @@ static void cbw_close(BlockDriverState *bs)
s->bcs = NULL;
}
-BlockDriver bdrv_cbw_filter = {
+static BlockDriver bdrv_cbw_filter = {
.format_name = "copy-before-write",
.instance_size = sizeof(BDRVCopyBeforeWriteState),
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
index 83b0554..172f73c 100644
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -138,7 +138,7 @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
on_vduse_vq_kick, NULL, NULL, NULL, vq);
- /* Make sure we don't miss any kick afer reconnecting */
+ /* Make sure we don't miss any kick after reconnecting */
eventfd_write(vduse_queue_get_fd(vq), 1);
}
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
index f7b5073..fe2cee3 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -1,5 +1,5 @@
/*
- * Sharing QEMU block devices via vhost-user protocal
+ * Sharing QEMU block devices via vhost-user protocol
*
* Parts of the code based on nbd/server.c.
*
diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h
index fcf46fc..77fb5c0 100644
--- a/block/export/vhost-user-blk-server.h
+++ b/block/export/vhost-user-blk-server.h
@@ -1,5 +1,5 @@
/*
- * Sharing QEMU block devices via vhost-user protocal
+ * Sharing QEMU block devices via vhost-user protocol
*
* Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
* Copyright (c) 2020 Red Hat, Inc.
diff --git a/block/file-posix.c b/block/file-posix.c
index aa89789..50e2b20 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1159,9 +1159,9 @@ static int raw_reopen_prepare(BDRVReopenState *state,
* As part of reopen prepare we also want to create new fd by
* raw_reconfigure_getfd(). But it wants updated "perm", when in
* bdrv_reopen_multiple() .bdrv_reopen_prepare() callback called prior to
- * permission update. Happily, permission update is always a part (a seprate
- * stage) of bdrv_reopen_multiple() so we can rely on this fact and
- * reconfigure fd in raw_check_perm().
+ * permission update. Happily, permission update is always a part
+ * (a separate stage) of bdrv_reopen_multiple() so we can rely on this
+ * fact and reconfigure fd in raw_check_perm().
*/
s->reopen_state = state;
@@ -3372,7 +3372,7 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret)
* of an array of zone descriptors.
* zones is an array of zone descriptors to hold zone information on reply;
* offset can be any byte within the entire size of the device;
- * nr_zones is the maxium number of sectors the command should operate on.
+ * nr_zones is the maximum number of sectors the command should operate on.
*/
#if defined(CONFIG_BLKZONED)
static int coroutine_fn raw_co_zone_report(BlockDriverState *bs, int64_t offset,
diff --git a/block/graph-lock.c b/block/graph-lock.c
index 5e66f01..f357a2c 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -95,7 +95,7 @@ static uint32_t reader_count(void)
QEMU_LOCK_GUARD(&aio_context_list_lock);
- /* rd can temporarly be negative, but the total will *always* be >= 0 */
+ /* rd can temporarily be negative, but the total will *always* be >= 0 */
rd = orphaned_reader_count;
QTAILQ_FOREACH(brdv_graph, &aio_context_list, next_aio) {
rd += qatomic_read(&brdv_graph->reader_count);
diff --git a/block/io.c b/block/io.c
index 055fcf7..ba23a9b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -342,7 +342,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
* timer callback), it is a bug in the caller that should be fixed. */
assert(data.done);
- /* Reaquire the AioContext of bs if we dropped it */
+ /* Reacquire the AioContext of bs if we dropped it */
if (ctx != co_ctx) {
aio_context_acquire(ctx);
}
@@ -591,10 +591,16 @@ static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
qatomic_dec(&req->bs->serialising_in_flight);
}
- qemu_co_mutex_lock(&req->bs->reqs_lock);
+ qemu_mutex_lock(&req->bs->reqs_lock);
QLIST_REMOVE(req, list);
+ qemu_mutex_unlock(&req->bs->reqs_lock);
+
+ /*
+ * At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called
+ * anymore because the request has been removed from the list, so it's safe
+ * to restart the queue outside reqs_lock to minimize the critical section.
+ */
qemu_co_queue_restart_all(&req->wait_queue);
- qemu_co_mutex_unlock(&req->bs->reqs_lock);
}
/**
@@ -621,9 +627,9 @@ static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,
qemu_co_queue_init(&req->wait_queue);
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
}
static bool tracked_request_overlaps(BdrvTrackedRequest *req,
@@ -728,21 +734,21 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
}
/**
- * Round a region to cluster boundaries
+ * Round a region to subcluster (if supported) or cluster boundaries
*/
void coroutine_fn GRAPH_RDLOCK
-bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
- int64_t *cluster_offset, int64_t *cluster_bytes)
+bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ int64_t *align_offset, int64_t *align_bytes)
{
BlockDriverInfo bdi;
IO_CODE();
- if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
- *cluster_offset = offset;
- *cluster_bytes = bytes;
+ if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.subcluster_size == 0) {
+ *align_offset = offset;
+ *align_bytes = bytes;
} else {
- int64_t c = bdi.cluster_size;
- *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
- *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+ int64_t c = bdi.subcluster_size;
+ *align_offset = QEMU_ALIGN_DOWN(offset, c);
+ *align_bytes = QEMU_ALIGN_UP(offset - *align_offset + bytes, c);
}
}
@@ -787,9 +793,9 @@ bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
return;
}
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
bdrv_wait_serialising_requests_locked(self);
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
}
void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
@@ -797,12 +803,12 @@ void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
{
IO_CODE();
- qemu_co_mutex_lock(&req->bs->reqs_lock);
+ qemu_mutex_lock(&req->bs->reqs_lock);
tracked_request_set_serialising(req, align);
bdrv_wait_serialising_requests_locked(req);
- qemu_co_mutex_unlock(&req->bs->reqs_lock);
+ qemu_mutex_unlock(&req->bs->reqs_lock);
}
int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
@@ -1168,8 +1174,8 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
void *bounce_buffer = NULL;
BlockDriver *drv = bs->drv;
- int64_t cluster_offset;
- int64_t cluster_bytes;
+ int64_t align_offset;
+ int64_t align_bytes;
int64_t skip_bytes;
int ret;
int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
@@ -1203,28 +1209,28 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
* BDRV_REQUEST_MAX_BYTES (even when the original read did not), which
* is one reason we loop rather than doing it all at once.
*/
- bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
- skip_bytes = offset - cluster_offset;
+ bdrv_round_to_subclusters(bs, offset, bytes, &align_offset, &align_bytes);
+ skip_bytes = offset - align_offset;
trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
- cluster_offset, cluster_bytes);
+ align_offset, align_bytes);
- while (cluster_bytes) {
+ while (align_bytes) {
int64_t pnum;
if (skip_write) {
ret = 1; /* "already allocated", so nothing will be copied */
- pnum = MIN(cluster_bytes, max_transfer);
+ pnum = MIN(align_bytes, max_transfer);
} else {
- ret = bdrv_is_allocated(bs, cluster_offset,
- MIN(cluster_bytes, max_transfer), &pnum);
+ ret = bdrv_is_allocated(bs, align_offset,
+ MIN(align_bytes, max_transfer), &pnum);
if (ret < 0) {
/*
* Safe to treat errors in querying allocation as if
* unallocated; we'll probably fail again soon on the
* read, but at least that will set a decent errno.
*/
- pnum = MIN(cluster_bytes, max_transfer);
+ pnum = MIN(align_bytes, max_transfer);
}
/* Stop at EOF if the image ends in the middle of the cluster */
@@ -1242,7 +1248,7 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
/* Must copy-on-read; use the bounce buffer */
pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
if (!bounce_buffer) {
- int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
+ int64_t max_we_need = MAX(pnum, align_bytes - pnum);
int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
@@ -1254,7 +1260,7 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
}
qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
- ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
+ ret = bdrv_driver_preadv(bs, align_offset, pnum,
&local_qiov, 0, 0);
if (ret < 0) {
goto err;
@@ -1266,13 +1272,13 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
/* FIXME: Should we (perhaps conditionally) be setting
* BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
* that still correctly reads as zero? */
- ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
+ ret = bdrv_co_do_pwrite_zeroes(bs, align_offset, pnum,
BDRV_REQ_WRITE_UNCHANGED);
} else {
/* This does not change the data on the disk, it is not
* necessary to flush even in cache=writethrough mode.
*/
- ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
+ ret = bdrv_driver_pwritev(bs, align_offset, pnum,
&local_qiov, 0,
BDRV_REQ_WRITE_UNCHANGED);
}
@@ -1301,8 +1307,8 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
}
}
- cluster_offset += pnum;
- cluster_bytes -= pnum;
+ align_offset += pnum;
+ align_bytes -= pnum;
progress += pnum - skip_bytes;
skip_bytes = 0;
}
@@ -2996,7 +3002,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto early_exit;
}
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
current_gen = qatomic_read(&bs->write_gen);
/* Wait until any previous flushes are completed */
@@ -3006,7 +3012,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
/* Flushes reach this point in nondecreasing current_gen order. */
bs->active_flush_req = true;
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
@@ -3094,11 +3100,11 @@ out:
bs->flushed_gen = current_gen;
}
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
bs->active_flush_req = false;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
early_exit:
bdrv_dec_in_flight(bs);
diff --git a/block/iscsi.c b/block/iscsi.c
index 34f97ab..5640c8b 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1058,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
return NULL;
}
+ /* Must use malloc(): this is freed via scsi_free_scsi_task() */
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 561c71a..1a51503 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -227,7 +227,7 @@ static void qemu_laio_process_completions(LinuxAioState *s)
/* If we are nested we have to notify the level above that we are done
* by setting event_max to zero, upper level will then jump out of it's
- * own `for` loop. If we are the last all counters droped to zero. */
+ * own `for` loop. If we are the last all counters dropped to zero. */
s->event_max = 0;
s->event_idx = 0;
}
diff --git a/block/meson.build b/block/meson.build
index 529fc17..f351b9d 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -4,41 +4,41 @@ block_ss.add(files(
'aio_task.c',
'amend.c',
'backup.c',
- 'copy-before-write.c',
'blkdebug.c',
'blklogwrites.c',
'blkverify.c',
'block-backend.c',
'block-copy.c',
- 'graph-lock.c',
'commit.c',
+ 'copy-before-write.c',
'copy-on-read.c',
- 'preallocate.c',
- 'progress_meter.c',
'create.c',
'crypto.c',
'dirty-bitmap.c',
'filter-compress.c',
+ 'graph-lock.c',
'io.c',
'mirror.c',
'nbd.c',
'null.c',
'plug.c',
+ 'preallocate.c',
+ 'progress_meter.c',
'qapi.c',
+ 'qcow2.c',
'qcow2-bitmap.c',
'qcow2-cache.c',
'qcow2-cluster.c',
'qcow2-refcount.c',
'qcow2-snapshot.c',
'qcow2-threads.c',
- 'qcow2.c',
'quorum.c',
'raw-format.c',
'reqlist.c',
'snapshot.c',
'snapshot-access.c',
- 'throttle-groups.c',
'throttle.c',
+ 'throttle-groups.c',
'write-threshold.c',
), zstd, zlib, gnutls)
diff --git a/block/mirror.c b/block/mirror.c
index d3cacd1..aae4beb 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -283,8 +283,8 @@ static int coroutine_fn mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
s->cow_bitmap);
if (need_cow) {
- bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes,
- &align_offset, &align_bytes);
+ bdrv_round_to_subclusters(blk_bs(s->target), *offset, *bytes,
+ &align_offset, &align_bytes);
}
if (align_bytes > max_bytes) {
@@ -502,7 +502,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
job_pause_point(&s->common.job);
- /* Find the number of consective dirty chunks following the first dirty
+ /* Find the number of consecutive dirty chunks following the first dirty
* one, and wait for in flight requests in them. */
bdrv_dirty_bitmap_lock(s->dirty_bitmap);
while (nb_chunks * s->granularity < s->buf_size) {
@@ -576,8 +576,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
int64_t target_offset;
int64_t target_bytes;
WITH_GRAPH_RDLOCK_GUARD() {
- bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
- &target_offset, &target_bytes);
+ bdrv_round_to_subclusters(blk_bs(s->target), offset, io_bytes,
+ &target_offset, &target_bytes);
}
if (target_offset == offset &&
target_bytes == io_bytes) {
diff --git a/block/nbd.c b/block/nbd.c
index 5322e66..cc48580 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -352,7 +352,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs,
}
qio_channel_set_blocking(s->ioc, false, NULL);
- qio_channel_attach_aio_context(s->ioc, bdrv_get_aio_context(bs));
+ qio_channel_set_follow_coroutine_ctx(s->ioc, true);
/* successfully connected */
WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
@@ -397,7 +397,6 @@ static void coroutine_fn GRAPH_RDLOCK nbd_reconnect_attempt(BDRVNBDState *s)
/* Finalize previous connection if any */
if (s->ioc) {
- qio_channel_detach_aio_context(s->ioc);
yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
nbd_yank, s->bs);
object_unref(OBJECT(s->ioc));
@@ -2089,10 +2088,6 @@ static void nbd_attach_aio_context(BlockDriverState *bs,
* the reconnect_delay_timer cannot be active here.
*/
assert(!s->reconnect_delay_timer);
-
- if (s->ioc) {
- qio_channel_attach_aio_context(s->ioc, new_context);
- }
}
static void nbd_detach_aio_context(BlockDriverState *bs)
@@ -2101,10 +2096,6 @@ static void nbd_detach_aio_context(BlockDriverState *bs)
assert(!s->open_timer);
assert(!s->reconnect_delay_timer);
-
- if (s->ioc) {
- qio_channel_detach_aio_context(s->ioc);
- }
}
static BlockDriver bdrv_nbd = {
diff --git a/block/parallels.c b/block/parallels.c
index 18e34ae..48c32d6 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -136,6 +136,12 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
return MIN(nb_sectors, ret);
}
+static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
+{
+ off -= s->data_start << BDRV_SECTOR_BITS;
+ return off / s->cluster_size;
+}
+
static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
int nb_sectors, int *pnum)
{
@@ -188,7 +194,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
idx = sector_num / s->tracks;
to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
- /* This function is called only by parallels_co_writev(), which will never
+ /*
+ * This function is called only by parallels_co_writev(), which will never
* pass a sector_num at or beyond the end of the image (because the block
* layer never passes such a sector_num to that function). Therefore, idx
* is always below s->bat_size.
@@ -196,7 +203,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
* exceed the image end. Therefore, idx + to_allocate cannot exceed
* s->bat_size.
* Note that s->bat_size is an unsigned int, therefore idx + to_allocate
- * will always fit into a uint32_t. */
+ * will always fit into a uint32_t.
+ */
assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
space = to_allocate * s->tracks;
@@ -230,13 +238,15 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
}
}
- /* Try to read from backing to fill empty clusters
+ /*
+ * Try to read from backing to fill empty clusters
* FIXME: 1. previous write_zeroes may be redundant
* 2. most of data we read from backing will be rewritten by
* parallels_co_writev. On aligned-to-cluster write we do not need
* this read at all.
* 3. it would be good to combine write of data from backing and new
- * data into one write call */
+ * data into one write call.
+ */
if (bs->backing) {
int64_t nb_cow_sectors = to_allocate * s->tracks;
int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
@@ -440,6 +450,81 @@ static void parallels_check_unclean(BlockDriverState *bs,
}
}
+/*
+ * Returns true if data_off is correct, otherwise false. In both cases
+ * correct_offset is set to the proper value.
+ */
+static bool parallels_test_data_off(BDRVParallelsState *s,
+ int64_t file_nb_sectors,
+ uint32_t *correct_offset)
+{
+ uint32_t data_off, min_off;
+ bool old_magic;
+
+ /*
+ * There are two slightly different image formats: with "WithoutFreeSpace"
+ * or "WithouFreSpacExt" magic words. Call the first one as "old magic".
+ * In such images data_off field can be zero. In this case the offset is
+ * calculated as the end of BAT table plus some padding to ensure sector
+ * size alignment.
+ */
+ old_magic = !memcmp(s->header->magic, HEADER_MAGIC, 16);
+
+ min_off = DIV_ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
+ if (!old_magic) {
+ min_off = ROUND_UP(min_off, s->cluster_size / BDRV_SECTOR_SIZE);
+ }
+
+ if (correct_offset) {
+ *correct_offset = min_off;
+ }
+
+ data_off = le32_to_cpu(s->header->data_off);
+ if (data_off == 0 && old_magic) {
+ return true;
+ }
+
+ if (data_off < min_off || data_off > file_nb_sectors) {
+ return false;
+ }
+
+ if (correct_offset) {
+ *correct_offset = data_off;
+ }
+
+ return true;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix)
+{
+ BDRVParallelsState *s = bs->opaque;
+ int64_t file_size;
+ uint32_t data_off;
+
+ file_size = bdrv_co_nb_sectors(bs->file->bs);
+ if (file_size < 0) {
+ res->check_errors++;
+ return file_size;
+ }
+
+ if (parallels_test_data_off(s, file_size, &data_off)) {
+ return 0;
+ }
+
+ res->corruptions++;
+ if (fix & BDRV_FIX_ERRORS) {
+ s->header->data_off = cpu_to_le32(data_off);
+ res->corruptions_fixed++;
+ }
+
+ fprintf(stderr, "%s data_off field has incorrect value\n",
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
+
+ return 0;
+}
+
static int coroutine_fn GRAPH_RDLOCK
parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix)
@@ -484,13 +569,13 @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
static int coroutine_fn GRAPH_RDLOCK
parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix)
+ BdrvCheckMode fix, bool explicit)
{
BDRVParallelsState *s = bs->opaque;
int64_t size;
int ret;
- size = bdrv_getlength(bs->file->bs);
+ size = bdrv_co_getlength(bs->file->bs);
if (size < 0) {
res->check_errors++;
return size;
@@ -499,10 +584,13 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
if (size > res->image_end_offset) {
int64_t count;
count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
- fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
- fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
- size - res->image_end_offset);
- res->leaks += count;
+ if (explicit) {
+ fprintf(stderr,
+ "%s space leaked at the end of the image %" PRId64 "\n",
+ fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
+ size - res->image_end_offset);
+ res->leaks += count;
+ }
if (fix & BDRV_FIX_LEAKS) {
Error *local_err = NULL;
@@ -517,13 +605,148 @@ parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
res->check_errors++;
return ret;
}
- res->leaks_fixed += count;
+ if (explicit) {
+ res->leaks_fixed += count;
+ }
}
}
return 0;
}
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix)
+{
+ BDRVParallelsState *s = bs->opaque;
+ int64_t host_off, host_sector, guest_sector;
+ unsigned long *bitmap;
+ uint32_t i, bitmap_size, cluster_index, bat_entry;
+ int n, ret = 0;
+ uint64_t *buf = NULL;
+ bool fixed = false;
+
+ /*
+ * Create a bitmap of used clusters.
+ * If a bit is set, there is a BAT entry pointing to this cluster.
+ * Loop through the BAT entries, check bits relevant to an entry offset.
+ * If bit is set, this entry is duplicated. Otherwise set the bit.
+ *
+ * We shouldn't worry about newly allocated clusters outside the image
+ * because they are created higher then any existing cluster pointed by
+ * a BAT entry.
+ */
+ bitmap_size = host_cluster_index(s, res->image_end_offset);
+ if (bitmap_size == 0) {
+ return 0;
+ }
+ if (res->image_end_offset % s->cluster_size) {
+ /* A not aligned image end leads to a bitmap shorter by 1 */
+ bitmap_size++;
+ }
+
+ bitmap = bitmap_new(bitmap_size);
+
+ buf = qemu_blockalign(bs, s->cluster_size);
+
+ for (i = 0; i < s->bat_size; i++) {
+ host_off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+ if (host_off == 0) {
+ continue;
+ }
+
+ cluster_index = host_cluster_index(s, host_off);
+ assert(cluster_index < bitmap_size);
+ if (!test_bit(cluster_index, bitmap)) {
+ bitmap_set(bitmap, cluster_index, 1);
+ continue;
+ }
+
+ /* this cluster duplicates another one */
+ fprintf(stderr, "%s duplicate offset in BAT entry %u\n",
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+
+ res->corruptions++;
+
+ if (!(fix & BDRV_FIX_ERRORS)) {
+ continue;
+ }
+
+ /*
+ * Reset the entry and allocate a new cluster
+ * for the relevant guest offset. In this way we let
+ * the lower layer to place the new cluster properly.
+ * Copy the original cluster to the allocated one.
+ * But before save the old offset value for repairing
+ * if we have an error.
+ */
+ bat_entry = s->bat_bitmap[i];
+ parallels_set_bat_entry(s, i, 0);
+
+ ret = bdrv_co_pread(bs->file, host_off, s->cluster_size, buf, 0);
+ if (ret < 0) {
+ res->check_errors++;
+ goto out_repair_bat;
+ }
+
+ guest_sector = (i * (int64_t)s->cluster_size) >> BDRV_SECTOR_BITS;
+ host_sector = allocate_clusters(bs, guest_sector, s->tracks, &n);
+ if (host_sector < 0) {
+ res->check_errors++;
+ goto out_repair_bat;
+ }
+ host_off = host_sector << BDRV_SECTOR_BITS;
+
+ ret = bdrv_co_pwrite(bs->file, host_off, s->cluster_size, buf, 0);
+ if (ret < 0) {
+ res->check_errors++;
+ goto out_repair_bat;
+ }
+
+ if (host_off + s->cluster_size > res->image_end_offset) {
+ res->image_end_offset = host_off + s->cluster_size;
+ }
+
+ /*
+ * In the future allocate_cluster() will reuse holed offsets
+ * inside the image. Keep the used clusters bitmap content
+ * consistent for the new allocated clusters too.
+ *
+ * Note, clusters allocated outside the current image are not
+ * considered, and the bitmap size doesn't change.
+ */
+ cluster_index = host_cluster_index(s, host_off);
+ if (cluster_index < bitmap_size) {
+ bitmap_set(bitmap, cluster_index, 1);
+ }
+
+ fixed = true;
+ res->corruptions_fixed++;
+
+ }
+
+ if (fixed) {
+ /*
+ * When new clusters are allocated, the file size increases by
+ * 128 Mb. We need to truncate the file to the right size. Let
+ * the leak fix code make its job without res changing.
+ */
+ ret = parallels_check_leak(bs, res, fix, false);
+ }
+
+out_free:
+ g_free(buf);
+ g_free(bitmap);
+ return ret;
+/*
+ * We can get here only from places where index and old_offset have
+ * meaningful values.
+ */
+out_repair_bat:
+ s->bat_bitmap[i] = bat_entry;
+ goto out_free;
+}
+
static void parallels_collect_statistics(BlockDriverState *bs,
BdrvCheckResult *res,
BdrvCheckMode fix)
@@ -565,12 +788,22 @@ parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
WITH_QEMU_LOCK_GUARD(&s->lock) {
parallels_check_unclean(bs, res, fix);
+ ret = parallels_check_data_off(bs, res, fix);
+ if (ret < 0) {
+ return ret;
+ }
+
ret = parallels_check_outside_image(bs, res, fix);
if (ret < 0) {
return ret;
}
- ret = parallels_check_leak(bs, res, fix);
+ ret = parallels_check_leak(bs, res, fix, true);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = parallels_check_duplicate(bs, res, fix);
if (ret < 0) {
return ret;
}
@@ -798,10 +1031,12 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
BDRVParallelsState *s = bs->opaque;
ParallelsHeader ph;
int ret, size, i;
- int64_t file_nb_sectors;
+ int64_t file_nb_sectors, sector;
+ uint32_t data_start;
QemuOpts *opts = NULL;
Error *local_err = NULL;
char *buf;
+ bool data_off_is_correct;
ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
if (ret < 0) {
@@ -859,15 +1094,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
ret = -ENOMEM;
goto fail;
}
- s->data_end = le32_to_cpu(ph.data_off);
- if (s->data_end == 0) {
- s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
- }
- if (s->data_end < s->header_size) {
- /* there is not enough unused space to fit to block align between BAT
- and actual data. We can't avoid read-modify-write... */
- s->header_size = size;
- }
ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0);
if (ret < 0) {
@@ -875,33 +1101,20 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
}
s->bat_bitmap = (uint32_t *)(s->header + 1);
- for (i = 0; i < s->bat_size; i++) {
- int64_t off = bat2sect(s, i);
- if (off >= file_nb_sectors) {
- if (flags & BDRV_O_CHECK) {
- continue;
- }
- error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry "
- "is larger than file size (%" PRIi64 ")",
- off << BDRV_SECTOR_BITS, i,
- file_nb_sectors << BDRV_SECTOR_BITS);
- ret = -EINVAL;
- goto fail;
- }
- if (off >= s->data_end) {
- s->data_end = off + s->tracks;
- }
- }
-
if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
- /* Image was not closed correctly. The check is mandatory */
s->header_unclean = true;
- if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
- error_setg(errp, "parallels: Image was not closed correctly; "
- "cannot be opened read/write");
- ret = -EACCES;
- goto fail;
- }
+ }
+
+ data_off_is_correct = parallels_test_data_off(s, file_nb_sectors,
+ &data_start);
+ s->data_start = data_start;
+ s->data_end = s->data_start;
+ if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
+ /*
+ * There is not enough unused space to fit to block align between BAT
+ * and actual data. We can't avoid read-modify-write...
+ */
+ s->header_size = size;
}
opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, errp);
@@ -962,10 +1175,41 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
bdrv_get_device_or_node_name(bs));
ret = migrate_add_blocker(s->migration_blocker, errp);
if (ret < 0) {
- error_free(s->migration_blocker);
+ error_setg(errp, "Migration blocker error");
goto fail;
}
qemu_co_mutex_init(&s->lock);
+
+ for (i = 0; i < s->bat_size; i++) {
+ sector = bat2sect(s, i);
+ if (sector + s->tracks > s->data_end) {
+ s->data_end = sector + s->tracks;
+ }
+ }
+
+ /*
+ * We don't repair the image here if it's opened for checks. Also we don't
+ * want to change inactive images and can't change readonly images.
+ */
+ if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) {
+ return 0;
+ }
+
+ /*
+ * Repair the image if it's dirty or
+ * out-of-image corruption was detected.
+ */
+ if (s->data_end > file_nb_sectors || s->header_unclean
+ || !data_off_is_correct) {
+ BdrvCheckResult res;
+ ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not repair corrupted image");
+ migrate_del_blocker(s->migration_blocker);
+ goto fail;
+ }
+ }
+
return 0;
fail_format:
@@ -973,6 +1217,12 @@ fail_format:
fail_options:
ret = -EINVAL;
fail:
+ /*
+ * "s" object was allocated by g_malloc0 so we can safely
+ * try to free its fields even they were not allocated.
+ */
+ error_free(s->migration_blocker);
+ g_free(s->bat_dirty_bmap);
qemu_vfree(s->header);
return ret;
}
diff --git a/block/parallels.h b/block/parallels.h
index f22f43f..4e53e95 100644
--- a/block/parallels.h
+++ b/block/parallels.h
@@ -75,6 +75,7 @@ typedef struct BDRVParallelsState {
uint32_t *bat_bitmap;
unsigned int bat_size;
+ int64_t data_start;
int64_t data_end;
uint64_t prealloc_size;
ParallelsPreallocMode prealloc_mode;
diff --git a/block/preallocate.c b/block/preallocate.c
index 4d82125..3d0f621 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -535,7 +535,7 @@ static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
}
}
-BlockDriver bdrv_preallocate_filter = {
+static BlockDriver bdrv_preallocate_filter = {
.format_name = "preallocate",
.instance_size = sizeof(BDRVPreallocateState),
diff --git a/block/qapi.c b/block/qapi.c
index f34f95e..1cbb093 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -48,7 +48,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
{
ImageInfo **p_image_info;
ImageInfo *backing_info;
- BlockDriverState *bs0, *backing;
+ BlockDriverState *backing;
BlockDeviceInfo *info;
ERRP_GUARD();
@@ -145,7 +145,6 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
info->write_threshold = bdrv_write_threshold_get(bs);
- bs0 = bs;
p_image_info = &info->image;
info->backing_file_depth = 0;
@@ -153,7 +152,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
* Skip automatically inserted nodes that the user isn't aware of for
* query-block (blk != NULL), but not for query-named-block-nodes
*/
- bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp);
+ bdrv_query_image_info(bs, p_image_info, flat, blk != NULL, errp);
if (*errp) {
qapi_free_BlockDeviceInfo(info);
return NULL;
@@ -310,33 +309,6 @@ out:
}
/**
- * bdrv_query_block_node_info:
- * @bs: block node to examine
- * @p_info: location to store node information
- * @errp: location to store error information
- *
- * Store image information about @bs in @p_info.
- *
- * @p_info will be set only on success. On error, store error in @errp.
- */
-void bdrv_query_block_node_info(BlockDriverState *bs,
- BlockNodeInfo **p_info,
- Error **errp)
-{
- BlockNodeInfo *info;
- ERRP_GUARD();
-
- info = g_new0(BlockNodeInfo, 1);
- bdrv_do_query_node_info(bs, info, errp);
- if (*errp) {
- qapi_free_BlockNodeInfo(info);
- return;
- }
-
- *p_info = info;
-}
-
-/**
* bdrv_query_image_info:
* @bs: block node to examine
* @p_info: location to store image information
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 5095e99..996d121 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -2645,7 +2645,7 @@ rebuild_refcount_structure(BlockDriverState *bs, BdrvCheckResult *res,
* repeat all this until the reftable stops growing.
*
* (This loop will terminate, because with every cluster the
- * reftable grows, it can accomodate a multitude of more refcounts,
+ * reftable grows, it can accommodate a multitude of more refcounts,
* so that at some point this must be able to cover the reftable
* and all refblocks describing it.)
*
diff --git a/block/qcow2.c b/block/qcow2.c
index c51388e..b48cd9c 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -5197,6 +5197,7 @@ qcow2_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVQcow2State *s = bs->opaque;
bdi->cluster_size = s->cluster_size;
+ bdi->subcluster_size = s->subcluster_size;
bdi->vm_state_offset = qcow2_vm_state_offset(s);
bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY;
return 0;
diff --git a/block/snapshot-access.c b/block/snapshot-access.c
index 67ea339..8d4e893 100644
--- a/block/snapshot-access.c
+++ b/block/snapshot-access.c
@@ -108,7 +108,7 @@ static void snapshot_access_child_perm(BlockDriverState *bs, BdrvChild *c,
*nshared = BLK_PERM_ALL;
}
-BlockDriver bdrv_snapshot_access_drv = {
+static BlockDriver bdrv_snapshot_access_drv = {
.format_name = "snapshot-access",
.bdrv_open = snapshot_access_open,
diff --git a/block/vhdx.c b/block/vhdx.c
index f2c3a80..a67edcc 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1077,7 +1077,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- /* endian convert populated BAT field entires */
+ /* endian convert populated BAT field entries */
for (i = 0; i < s->bat_entries; i++) {
s->bat[i] = le64_to_cpu(s->bat[i]);
}
diff --git a/block/vhdx.h b/block/vhdx.h
index 7db746c..455a627 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -212,7 +212,7 @@ typedef struct QEMU_PACKED VHDXLogDataSector {
uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */
uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive).
see the data descriptor field for the
- other mising bytes */
+ other missing bytes */
uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */
} VHDXLogDataSector;
@@ -257,7 +257,7 @@ typedef struct QEMU_PACKED VHDXMetadataTableHeader {
#define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */
#define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set,
- otherwise file metdata */
+ otherwise file metadata */
#define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this
entry to open the file */
typedef struct QEMU_PACKED VHDXMetadataTableEntry {
diff --git a/block/vmdk.c b/block/vmdk.c
index 70066c2..58ce290 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1207,7 +1207,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
bs, &child_of_bds, extent_role, false,
&local_err);
g_free(extent_path);
- if (local_err) {
+ if (!extent_file) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto out;
diff --git a/block/vpc.c b/block/vpc.c
index 3810a60..ceb87dd 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -510,7 +510,7 @@ get_image_offset(BlockDriverState *bs, uint64_t offset, bool write, int *err)
miss sparse read optimization, but it's not a problem in terms of
correctness. */
if (write && (s->last_bitmap_offset != bitmap_offset)) {
- uint8_t bitmap[s->bitmap_size];
+ g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);
int r;
s->last_bitmap_offset = bitmap_offset;
@@ -558,7 +558,7 @@ alloc_block(BlockDriverState *bs, int64_t offset)
int64_t bat_offset;
uint32_t index, bat_value;
int ret;
- uint8_t bitmap[s->bitmap_size];
+ g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);
/* Check if sector_num is valid */
if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {