aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-10-06 17:43:02 +0100
committerPeter Maydell <peter.maydell@linaro.org>2017-10-06 17:43:02 +0100
commit530049bc1dcc24c1178a29d99ca08b6dd08413e0 (patch)
treec50588c08260188244b194556b23d8ad19ca0921 /block
parent5121d81e387bba17496f5908d43fd623a946c645 (diff)
parentfc3fd63fc0573ffd2ee569591a2e7f6c7310fd18 (diff)
downloadqemu-530049bc1dcc24c1178a29d99ca08b6dd08413e0.zip
qemu-530049bc1dcc24c1178a29d99ca08b6dd08413e0.tar.gz
qemu-530049bc1dcc24c1178a29d99ca08b6dd08413e0.tar.bz2
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches # gpg: Signature made Fri 06 Oct 2017 16:52:59 BST # gpg: using RSA key 0x7F09B272C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * remotes/kevin/tags/for-upstream: (54 commits) block/mirror: check backing in bdrv_mirror_top_flush qcow2: truncate the tail of the image file after shrinking the image qcow2: fix return error code in qcow2_truncate() iotests: Fix 195 if IMGFMT is part of TEST_DIR block/mirror: check backing in bdrv_mirror_top_refresh_filename block: support passthrough of BDRV_REQ_FUA in crypto driver block: convert qcrypto_block_encrypt|decrypt to take bytes offset block: convert crypto driver to bdrv_co_preadv|pwritev block: fix data type casting for crypto payload offset crypto: expose encryption sector size in APIs block: use 1 MB bounce buffers for crypto instead of 16KB iotests: Add test 197 for covering copy-on-read block: Perform copy-on-read in loop block: Add blkdebug hook for copy-on-read iotests: Restore stty settings on completion block: Uniform handling of 0-length bdrv_get_block_status() qemu-io: Add -C for opening with copy-on-read commit: Remove overlay_bs qemu-iotests: Test commit block job where top has two parents qemu-iotests: Allow QMP pretty printing in common.qemu ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r--block/backup.c7
-rw-r--r--block/commit.c64
-rw-r--r--block/crypto.c130
-rw-r--r--block/dirty-bitmap.c134
-rw-r--r--block/io.c131
-rw-r--r--block/mirror.c88
-rw-r--r--block/qcow.c11
-rw-r--r--block/qcow2-bitmap.c62
-rw-r--r--block/qcow2-cluster.c8
-rw-r--r--block/qcow2-refcount.c22
-rw-r--r--block/qcow2.c53
-rw-r--r--block/qcow2.h1
12 files changed, 364 insertions, 347 deletions
diff --git a/block/backup.c b/block/backup.c
index 517c300..06ddbfd 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -372,10 +372,10 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
clusters_per_iter = MAX((granularity / job->cluster_size), 1);
- dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0);
+ dbi = bdrv_dirty_iter_new(job->sync_bitmap);
/* Find the next dirty sector(s) */
- while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) {
+ while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) {
cluster = offset / job->cluster_size;
/* Fake progress updates for any clusters we skipped */
@@ -403,8 +403,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
/* If the bitmap granularity is smaller than the backup granularity,
* we need to advance the iterator pointer to the next cluster. */
if (granularity < job->cluster_size) {
- bdrv_set_dirty_iter(dbi,
- cluster * job->cluster_size / BDRV_SECTOR_SIZE);
+ bdrv_set_dirty_iter(dbi, cluster * job->cluster_size);
}
last_cluster = cluster - 1;
diff --git a/block/commit.c b/block/commit.c
index 8f0e835..5036eec 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,13 +36,11 @@ enum {
typedef struct CommitBlockJob {
BlockJob common;
RateLimit limit;
- BlockDriverState *active;
BlockDriverState *commit_top_bs;
BlockBackend *top;
BlockBackend *base;
BlockdevOnError on_error;
int base_flags;
- int orig_overlay_flags;
char *backing_file_str;
} CommitBlockJob;
@@ -81,18 +79,15 @@ static void commit_complete(BlockJob *job, void *opaque)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common);
CommitCompleteData *data = opaque;
- BlockDriverState *active = s->active;
BlockDriverState *top = blk_bs(s->top);
BlockDriverState *base = blk_bs(s->base);
- BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
+ BlockDriverState *commit_top_bs = s->commit_top_bs;
int ret = data->ret;
bool remove_commit_top_bs = false;
- /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */
+ /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
bdrv_ref(top);
- if (overlay_bs) {
- bdrv_ref(overlay_bs);
- }
+ bdrv_ref(commit_top_bs);
/* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
* the normal backing chain can be restored. */
@@ -100,9 +95,9 @@ static void commit_complete(BlockJob *job, void *opaque)
if (!block_job_is_cancelled(&s->common) && ret == 0) {
/* success */
- ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
+ ret = bdrv_drop_intermediate(s->commit_top_bs, base,
s->backing_file_str);
- } else if (overlay_bs) {
+ } else {
/* XXX Can (or should) we somehow keep 'consistent read' blocked even
* after the failed/cancelled commit job is gone? If we already wrote
* something to base, the intermediate images aren't valid any more. */
@@ -115,9 +110,6 @@ static void commit_complete(BlockJob *job, void *opaque)
if (s->base_flags != bdrv_get_flags(base)) {
bdrv_reopen(base, s->base_flags, NULL);
}
- if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
- bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
- }
g_free(s->backing_file_str);
blk_unref(s->top);
@@ -134,10 +126,13 @@ static void commit_complete(BlockJob *job, void *opaque)
* filter driver from the backing chain. Do this as the final step so that
* the 'consistent read' permission can be granted. */
if (remove_commit_top_bs) {
- bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+ bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL,
+ &error_abort);
+ bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs),
+ &error_abort);
}
- bdrv_unref(overlay_bs);
+ bdrv_unref(commit_top_bs);
bdrv_unref(top);
}
@@ -283,10 +278,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
{
CommitBlockJob *s;
BlockReopenQueue *reopen_queue = NULL;
- int orig_overlay_flags;
int orig_base_flags;
BlockDriverState *iter;
- BlockDriverState *overlay_bs;
BlockDriverState *commit_top_bs = NULL;
Error *local_err = NULL;
int ret;
@@ -297,31 +290,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
return;
}
- overlay_bs = bdrv_find_overlay(bs, top);
-
- if (overlay_bs == NULL) {
- error_setg(errp, "Could not find overlay image for %s:", top->filename);
- return;
- }
-
s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
if (!s) {
return;
}
- orig_base_flags = bdrv_get_flags(base);
- orig_overlay_flags = bdrv_get_flags(overlay_bs);
-
- /* convert base & overlay_bs to r/w, if necessary */
+ /* convert base to r/w, if necessary */
+ orig_base_flags = bdrv_get_flags(base);
if (!(orig_base_flags & BDRV_O_RDWR)) {
reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
orig_base_flags | BDRV_O_RDWR);
}
- if (!(orig_overlay_flags & BDRV_O_RDWR)) {
- reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
- orig_overlay_flags | BDRV_O_RDWR);
- }
+
if (reopen_queue) {
bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
if (local_err != NULL) {
@@ -350,7 +331,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
error_propagate(errp, local_err);
goto fail;
}
- bdrv_set_backing_hd(overlay_bs, commit_top_bs, &local_err);
+ bdrv_replace_node(top, commit_top_bs, &local_err);
if (local_err) {
bdrv_unref(commit_top_bs);
commit_top_bs = NULL;
@@ -382,14 +363,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,
goto fail;
}
- /* overlay_bs must be blocked because it needs to be modified to
- * update the backing image string. */
- ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
- BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
- if (ret < 0) {
- goto fail;
- }
-
s->base = blk_new(BLK_PERM_CONSISTENT_READ
| BLK_PERM_WRITE
| BLK_PERM_RESIZE,
@@ -408,13 +381,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
goto fail;
}
- s->active = bs;
-
- s->base_flags = orig_base_flags;
- s->orig_overlay_flags = orig_overlay_flags;
-
+ s->base_flags = orig_base_flags;
s->backing_file_str = g_strdup(backing_file_str);
-
s->on_error = on_error;
trace_commit_start(bs, base, top, s);
@@ -429,7 +397,7 @@ fail:
blk_unref(s->top);
}
if (commit_top_bs) {
- bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+ bdrv_replace_node(commit_top_bs, top, &error_abort);
}
block_job_early_fail(&s->common);
}
diff --git a/block/crypto.c b/block/crypto.c
index 58ef6f2..60ddf86 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -279,6 +279,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
return -EINVAL;
}
+ bs->supported_write_flags = BDRV_REQ_FUA &
+ bs->file->bs->supported_write_flags;
+
opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
@@ -364,8 +367,9 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
BlockCrypto *crypto = bs->opaque;
- size_t payload_offset =
+ uint64_t payload_offset =
qcrypto_block_get_payload_offset(crypto->block);
+ assert(payload_offset < (INT64_MAX - offset));
offset += payload_offset;
@@ -379,66 +383,65 @@ static void block_crypto_close(BlockDriverState *bs)
}
-#define BLOCK_CRYPTO_MAX_SECTORS 32
+/*
+ * 1 MB bounce buffer gives good performance / memory tradeoff
+ * when using cache=none|directsync.
+ */
+#define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)
static coroutine_fn int
-block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
- int remaining_sectors, QEMUIOVector *qiov)
+block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BlockCrypto *crypto = bs->opaque;
- int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t cur_bytes; /* number of bytes in current iteration */
uint64_t bytes_done = 0;
uint8_t *cipher_data = NULL;
QEMUIOVector hd_qiov;
int ret = 0;
- size_t payload_offset =
- qcrypto_block_get_payload_offset(crypto->block) / 512;
+ uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
+ uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
+
+ assert(!flags);
+ assert(payload_offset < INT64_MAX);
+ assert(QEMU_IS_ALIGNED(offset, sector_size));
+ assert(QEMU_IS_ALIGNED(bytes, sector_size));
qemu_iovec_init(&hd_qiov, qiov->niov);
- /* Bounce buffer so we have a linear mem region for
- * entire sector. XXX optimize so we avoid bounce
- * buffer in case that qiov->niov == 1
+ /* Bounce buffer because we don't wish to expose cipher text
+ * in qiov which points to guest memory.
*/
cipher_data =
- qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+ qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE,
qiov->size));
if (cipher_data == NULL) {
ret = -ENOMEM;
goto cleanup;
}
- while (remaining_sectors) {
- cur_nr_sectors = remaining_sectors;
-
- if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
- cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
- }
+ while (bytes) {
+ cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+ qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes);
- ret = bdrv_co_readv(bs->file,
- payload_offset + sector_num,
- cur_nr_sectors, &hd_qiov);
+ ret = bdrv_co_preadv(bs->file, payload_offset + offset + bytes_done,
+ cur_bytes, &hd_qiov, 0);
if (ret < 0) {
goto cleanup;
}
- if (qcrypto_block_decrypt(crypto->block,
- sector_num,
- cipher_data, cur_nr_sectors * 512,
- NULL) < 0) {
+ if (qcrypto_block_decrypt(crypto->block, offset + bytes_done,
+ cipher_data, cur_bytes, NULL) < 0) {
ret = -EIO;
goto cleanup;
}
- qemu_iovec_from_buf(qiov, bytes_done,
- cipher_data, cur_nr_sectors * 512);
+ qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes);
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
+ bytes -= cur_bytes;
+ bytes_done += cur_bytes;
}
cleanup:
@@ -450,63 +453,58 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
static coroutine_fn int
-block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
- int remaining_sectors, QEMUIOVector *qiov)
+block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BlockCrypto *crypto = bs->opaque;
- int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t cur_bytes; /* number of bytes in current iteration */
uint64_t bytes_done = 0;
uint8_t *cipher_data = NULL;
QEMUIOVector hd_qiov;
int ret = 0;
- size_t payload_offset =
- qcrypto_block_get_payload_offset(crypto->block) / 512;
+ uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
+ uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
+
+ assert(!(flags & ~BDRV_REQ_FUA));
+ assert(payload_offset < INT64_MAX);
+ assert(QEMU_IS_ALIGNED(offset, sector_size));
+ assert(QEMU_IS_ALIGNED(bytes, sector_size));
qemu_iovec_init(&hd_qiov, qiov->niov);
- /* Bounce buffer so we have a linear mem region for
- * entire sector. XXX optimize so we avoid bounce
- * buffer in case that qiov->niov == 1
+ /* Bounce buffer because we're not permitted to touch
+ * contents of qiov - it points to guest memory.
*/
cipher_data =
- qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+ qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE,
qiov->size));
if (cipher_data == NULL) {
ret = -ENOMEM;
goto cleanup;
}
- while (remaining_sectors) {
- cur_nr_sectors = remaining_sectors;
+ while (bytes) {
+ cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE);
- if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
- cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
- }
-
- qemu_iovec_to_buf(qiov, bytes_done,
- cipher_data, cur_nr_sectors * 512);
+ qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes);
- if (qcrypto_block_encrypt(crypto->block,
- sector_num,
- cipher_data, cur_nr_sectors * 512,
- NULL) < 0) {
+ if (qcrypto_block_encrypt(crypto->block, offset + bytes_done,
+ cipher_data, cur_bytes, NULL) < 0) {
ret = -EIO;
goto cleanup;
}
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+ qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes);
- ret = bdrv_co_writev(bs->file,
- payload_offset + sector_num,
- cur_nr_sectors, &hd_qiov);
+ ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done,
+ cur_bytes, &hd_qiov, flags);
if (ret < 0) {
goto cleanup;
}
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
+ bytes -= cur_bytes;
+ bytes_done += cur_bytes;
}
cleanup:
@@ -516,13 +514,22 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
return ret;
}
+static void block_crypto_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ BlockCrypto *crypto = bs->opaque;
+ uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
+ bs->bl.request_alignment = sector_size; /* No sub-sector I/O */
+}
+
static int64_t block_crypto_getlength(BlockDriverState *bs)
{
BlockCrypto *crypto = bs->opaque;
int64_t len = bdrv_getlength(bs->file->bs);
- ssize_t offset = qcrypto_block_get_payload_offset(crypto->block);
+ uint64_t offset = qcrypto_block_get_payload_offset(crypto->block);
+ assert(offset < INT64_MAX);
+ assert(offset < len);
len -= offset;
@@ -613,8 +620,9 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_truncate = block_crypto_truncate,
.create_opts = &block_crypto_create_opts_luks,
- .bdrv_co_readv = block_crypto_co_readv,
- .bdrv_co_writev = block_crypto_co_writev,
+ .bdrv_refresh_limits = block_crypto_refresh_limits,
+ .bdrv_co_preadv = block_crypto_co_preadv,
+ .bdrv_co_pwritev = block_crypto_co_pwritev,
.bdrv_getlength = block_crypto_getlength,
.bdrv_get_info = block_crypto_get_info_luks,
.bdrv_get_specific_info = block_crypto_get_specific_info_luks,
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 30462d4..bd04e99 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -1,7 +1,7 @@
/*
* Block Dirty Bitmap
*
- * Copyright (c) 2016 Red Hat. Inc
+ * Copyright (c) 2016-2017 Red Hat. Inc
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -38,11 +38,11 @@
*/
struct BdrvDirtyBitmap {
QemuMutex *mutex;
- HBitmap *bitmap; /* Dirty sector bitmap implementation */
+ HBitmap *bitmap; /* Dirty bitmap implementation */
HBitmap *meta; /* Meta dirty bitmap */
BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
char *name; /* Optional non-empty unique ID */
- int64_t size; /* Size of the bitmap (Number of sectors) */
+ int64_t size; /* Size of the bitmap, in bytes */
bool disabled; /* Bitmap is disabled. It ignores all writes to
the device */
int active_iterators; /* How many iterators are active */
@@ -115,17 +115,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
{
int64_t bitmap_size;
BdrvDirtyBitmap *bitmap;
- uint32_t sector_granularity;
- assert((granularity & (granularity - 1)) == 0);
+ assert(is_power_of_2(granularity) && granularity >= BDRV_SECTOR_SIZE);
if (name && bdrv_find_dirty_bitmap(bs, name)) {
error_setg(errp, "Bitmap already exists: %s", name);
return NULL;
}
- sector_granularity = granularity >> BDRV_SECTOR_BITS;
- assert(sector_granularity);
- bitmap_size = bdrv_nb_sectors(bs);
+ bitmap_size = bdrv_getlength(bs);
if (bitmap_size < 0) {
error_setg_errno(errp, -bitmap_size, "could not get length of device");
errno = -bitmap_size;
@@ -133,7 +130,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
}
bitmap = g_new0(BdrvDirtyBitmap, 1);
bitmap->mutex = &bs->dirty_bitmap_mutex;
- bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+ bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity));
bitmap->size = bitmap_size;
bitmap->name = g_strdup(name);
bitmap->disabled = false;
@@ -173,45 +170,6 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
qemu_mutex_unlock(bitmap->mutex);
}
-int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap, int64_t sector,
- int nb_sectors)
-{
- uint64_t i;
- int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
-
- /* To optimize: we can make hbitmap to internally check the range in a
- * coarse level, or at least do it word by word. */
- for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) {
- if (hbitmap_get(bitmap->meta, i)) {
- return true;
- }
- }
- return false;
-}
-
-int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap, int64_t sector,
- int nb_sectors)
-{
- bool dirty;
-
- qemu_mutex_lock(bitmap->mutex);
- dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
- qemu_mutex_unlock(bitmap->mutex);
-
- return dirty;
-}
-
-void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap, int64_t sector,
- int nb_sectors)
-{
- qemu_mutex_lock(bitmap->mutex);
- hbitmap_reset(bitmap->meta, sector, nb_sectors);
- qemu_mutex_unlock(bitmap->mutex);
-}
-
int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
{
return bitmap->size;
@@ -341,17 +299,16 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
* Truncates _all_ bitmaps attached to a BDS.
* Called with BQL taken.
*/
-void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes)
{
BdrvDirtyBitmap *bitmap;
- uint64_t size = bdrv_nb_sectors(bs);
bdrv_dirty_bitmaps_lock(bs);
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
assert(!bdrv_dirty_bitmap_frozen(bitmap));
assert(!bitmap->active_iterators);
- hbitmap_truncate(bitmap->bitmap, size);
- bitmap->size = size;
+ hbitmap_truncate(bitmap->bitmap, bytes);
+ bitmap->size = bytes;
}
bdrv_dirty_bitmaps_unlock(bs);
}
@@ -461,7 +418,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
- info->count = bdrv_get_dirty_count(bm) << BDRV_SECTOR_BITS;
+ info->count = bdrv_get_dirty_count(bm);
info->granularity = bdrv_dirty_bitmap_granularity(bm);
info->has_name = !!bm->name;
info->name = g_strdup(bm->name);
@@ -476,13 +433,13 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
}
/* Called within bdrv_dirty_bitmap_lock..unlock */
-int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
- int64_t sector)
+bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t offset)
{
if (bitmap) {
- return hbitmap_get(bitmap->bitmap, sector);
+ return hbitmap_get(bitmap->bitmap, offset);
} else {
- return 0;
+ return false;
}
}
@@ -508,19 +465,13 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap)
{
- return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
-}
-
-uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap)
-{
- return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta);
+ return 1U << hbitmap_granularity(bitmap->bitmap);
}
-BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
- uint64_t first_sector)
+BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap)
{
BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1);
- hbitmap_iter_init(&iter->hbi, bitmap->bitmap, first_sector);
+ hbitmap_iter_init(&iter->hbi, bitmap->bitmap, 0);
iter->bitmap = bitmap;
bitmap->active_iterators++;
return iter;
@@ -552,35 +503,35 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
/* Called within bdrv_dirty_bitmap_lock..unlock */
void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int64_t nr_sectors)
+ int64_t offset, int64_t bytes)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
assert(!bdrv_dirty_bitmap_readonly(bitmap));
- hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+ hbitmap_set(bitmap->bitmap, offset, bytes);
}
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int64_t nr_sectors)
+ int64_t offset, int64_t bytes)
{
bdrv_dirty_bitmap_lock(bitmap);
- bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+ bdrv_set_dirty_bitmap_locked(bitmap, offset, bytes);
bdrv_dirty_bitmap_unlock(bitmap);
}
/* Called within bdrv_dirty_bitmap_lock..unlock */
void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int64_t nr_sectors)
+ int64_t offset, int64_t bytes)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
assert(!bdrv_dirty_bitmap_readonly(bitmap));
- hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+ hbitmap_reset(bitmap->bitmap, offset, bytes);
}
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int64_t nr_sectors)
+ int64_t offset, int64_t bytes)
{
bdrv_dirty_bitmap_lock(bitmap);
- bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+ bdrv_reset_dirty_bitmap_locked(bitmap, offset, bytes);
bdrv_dirty_bitmap_unlock(bitmap);
}
@@ -610,42 +561,42 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
}
uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
- uint64_t start, uint64_t count)
+ uint64_t offset, uint64_t bytes)
{
- return hbitmap_serialization_size(bitmap->bitmap, start, count);
+ return hbitmap_serialization_size(bitmap->bitmap, offset, bytes);
}
uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap)
{
- return hbitmap_serialization_granularity(bitmap->bitmap);
+ return hbitmap_serialization_align(bitmap->bitmap);
}
void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
- uint8_t *buf, uint64_t start,
- uint64_t count)
+ uint8_t *buf, uint64_t offset,
+ uint64_t bytes)
{
- hbitmap_serialize_part(bitmap->bitmap, buf, start, count);
+ hbitmap_serialize_part(bitmap->bitmap, buf, offset, bytes);
}
void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap,
- uint8_t *buf, uint64_t start,
- uint64_t count, bool finish)
+ uint8_t *buf, uint64_t offset,
+ uint64_t bytes, bool finish)
{
- hbitmap_deserialize_part(bitmap->bitmap, buf, start, count, finish);
+ hbitmap_deserialize_part(bitmap->bitmap, buf, offset, bytes, finish);
}
void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
- uint64_t start, uint64_t count,
+ uint64_t offset, uint64_t bytes,
bool finish)
{
- hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
+ hbitmap_deserialize_zeroes(bitmap->bitmap, offset, bytes, finish);
}
void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
- uint64_t start, uint64_t count,
+ uint64_t offset, uint64_t bytes,
bool finish)
{
- hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish);
+ hbitmap_deserialize_ones(bitmap->bitmap, offset, bytes, finish);
}
void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
@@ -653,8 +604,7 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
hbitmap_deserialize_finish(bitmap->bitmap);
}
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
- int64_t nr_sectors)
+void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
BdrvDirtyBitmap *bitmap;
@@ -668,7 +618,7 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
continue;
}
assert(!bdrv_dirty_bitmap_readonly(bitmap));
- hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+ hbitmap_set(bitmap->bitmap, offset, bytes);
}
bdrv_dirty_bitmaps_unlock(bs);
}
@@ -676,9 +626,9 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
/**
* Advance a BdrvDirtyBitmapIter to an arbitrary offset.
*/
-void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num)
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset)
{
- hbitmap_iter_init(&iter->hbi, iter->hbi.hb, sector_num);
+ hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset);
}
int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
diff --git a/block/io.c b/block/io.c
index 4378ae4..8e41907 100644
--- a/block/io.c
+++ b/block/io.c
@@ -34,6 +34,9 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
+#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
+
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags);
@@ -945,68 +948,114 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
BlockDriver *drv = bs->drv;
struct iovec iov;
- QEMUIOVector bounce_qiov;
+ QEMUIOVector local_qiov;
int64_t cluster_offset;
unsigned int cluster_bytes;
size_t skip_bytes;
int ret;
+ int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
+ BDRV_REQUEST_MAX_BYTES);
+ unsigned int progress = 0;
/* FIXME We cannot require callers to have write permissions when all they
* are doing is a read request. If we did things right, write permissions
* would be obtained anyway, but internally by the copy-on-read code. As
- * long as it is implemented here rather than in a separat filter driver,
+ * long as it is implemented here rather than in a separate filter driver,
* the copy-on-read code doesn't have its own BdrvChild, however, for which
* it could request permissions. Therefore we have to bypass the permission
* system for the moment. */
// assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
/* Cover entire cluster so no additional backing file I/O is required when
- * allocating cluster in the image file.
+ * allocating cluster in the image file. Note that this value may exceed
+ * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which
+ * is one reason we loop rather than doing it all at once.
*/
bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
+ skip_bytes = offset - cluster_offset;
trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
cluster_offset, cluster_bytes);
- iov.iov_len = cluster_bytes;
- iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
+ bounce_buffer = qemu_try_blockalign(bs,
+ MIN(MIN(max_transfer, cluster_bytes),
+ MAX_BOUNCE_BUFFER));
if (bounce_buffer == NULL) {
ret = -ENOMEM;
goto err;
}
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
+ while (cluster_bytes) {
+ int64_t pnum;
- ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
- &bounce_qiov, 0);
- if (ret < 0) {
- goto err;
- }
+ ret = bdrv_is_allocated(bs, cluster_offset,
+ MIN(cluster_bytes, max_transfer), &pnum);
+ if (ret < 0) {
+ /* Safe to treat errors in querying allocation as if
+ * unallocated; we'll probably fail again soon on the
+ * read, but at least that will set a decent errno.
+ */
+ pnum = MIN(cluster_bytes, max_transfer);
+ }
- if (drv->bdrv_co_pwrite_zeroes &&
- buffer_is_zero(bounce_buffer, iov.iov_len)) {
- /* FIXME: Should we (perhaps conditionally) be setting
- * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
- * that still correctly reads as zero? */
- ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
- } else {
- /* This does not change the data on the disk, it is not necessary
- * to flush even in cache=writethrough mode.
- */
- ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes,
- &bounce_qiov, 0);
- }
+ assert(skip_bytes < pnum);
- if (ret < 0) {
- /* It might be okay to ignore write errors for guest requests. If this
- * is a deliberate copy-on-read then we don't want to ignore the error.
- * Simply report it in all cases.
- */
- goto err;
- }
+ if (ret <= 0) {
+ /* Must copy-on-read; use the bounce buffer */
+ iov.iov_base = bounce_buffer;
+ iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
- skip_bytes = offset - cluster_offset;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes);
+ ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
+ &local_qiov, 0);
+ if (ret < 0) {
+ goto err;
+ }
+
+ bdrv_debug_event(bs, BLKDBG_COR_WRITE);
+ if (drv->bdrv_co_pwrite_zeroes &&
+ buffer_is_zero(bounce_buffer, pnum)) {
+ /* FIXME: Should we (perhaps conditionally) be setting
+ * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
+ * that still correctly reads as zero? */
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
+ } else {
+ /* This does not change the data on the disk, it is not
+ * necessary to flush even in cache=writethrough mode.
+ */
+ ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
+ &local_qiov, 0);
+ }
+
+ if (ret < 0) {
+ /* It might be okay to ignore write errors for guest
+ * requests. If this is a deliberate copy-on-read
+ * then we don't want to ignore the error. Simply
+ * report it in all cases.
+ */
+ goto err;
+ }
+
+ qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
+ pnum - skip_bytes);
+ } else {
+ /* Read directly into the destination */
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
+ ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
+ &local_qiov, 0);
+ qemu_iovec_destroy(&local_qiov);
+ if (ret < 0) {
+ goto err;
+ }
+ }
+
+ cluster_offset += pnum;
+ cluster_bytes -= pnum;
+ progress += pnum - skip_bytes;
+ skip_bytes = 0;
+ }
+ ret = 0;
err:
qemu_vfree(bounce_buffer);
@@ -1212,9 +1261,6 @@ int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0);
}
-/* Maximum buffer for write zeroes fallback, in bytes */
-#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
-
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags)
{
@@ -1229,8 +1275,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
bs->bl.request_alignment);
- int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
- MAX_WRITE_ZEROES_BOUNCE_BUFFER);
+ int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
assert(alignment % bs->bl.request_alignment == 0);
head = offset % alignment;
@@ -1334,7 +1379,6 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
bool waited;
int ret;
- int64_t start_sector = offset >> BDRV_SECTOR_BITS;
int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
uint64_t bytes_remaining = bytes;
int max_transfer;
@@ -1409,7 +1453,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
atomic_inc(&bs->write_gen);
- bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
+ bdrv_set_dirty(bs, offset, bytes);
stat64_max(&bs->wr_highest_offset, offset + bytes);
@@ -1778,6 +1822,10 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
*pnum = 0;
return BDRV_BLOCK_EOF;
}
+ if (!nb_sectors) {
+ *pnum = 0;
+ return 0;
+ }
n = total_sectors - sector_num;
if (n < nb_sectors) {
@@ -2438,8 +2486,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
ret = 0;
out:
atomic_inc(&bs->write_gen);
- bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
- req.bytes >> BDRV_SECTOR_BITS);
+ bdrv_set_dirty(bs, req.offset, req.bytes);
tracked_request_end(&req);
bdrv_dec_in_flight(bs);
return ret;
diff --git a/block/mirror.c b/block/mirror.c
index 6f5cb9f..153758c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -141,8 +141,7 @@ static void mirror_write_complete(void *opaque, int ret)
if (ret < 0) {
BlockErrorAction action;
- bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
- op->bytes >> BDRV_SECTOR_BITS);
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes);
action = mirror_error_action(s, false, -ret);
if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
s->ret = ret;
@@ -161,8 +160,7 @@ static void mirror_read_complete(void *opaque, int ret)
if (ret < 0) {
BlockErrorAction action;
- bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
- op->bytes >> BDRV_SECTOR_BITS);
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes);
action = mirror_error_action(s, true, -ret);
if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
s->ret = ret;
@@ -336,12 +334,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
bdrv_dirty_bitmap_lock(s->dirty_bitmap);
- offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+ offset = bdrv_dirty_iter_next(s->dbi);
if (offset < 0) {
bdrv_set_dirty_iter(s->dbi, 0);
- offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
- trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) *
- BDRV_SECTOR_SIZE);
+ offset = bdrv_dirty_iter_next(s->dbi);
+ trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
assert(offset >= 0);
}
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
@@ -362,19 +359,18 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
int64_t next_offset = offset + nb_chunks * s->granularity;
int64_t next_chunk = next_offset / s->granularity;
if (next_offset >= s->bdev_length ||
- !bdrv_get_dirty_locked(source, s->dirty_bitmap,
- next_offset >> BDRV_SECTOR_BITS)) {
+ !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_offset)) {
break;
}
if (test_bit(next_chunk, s->in_flight_bitmap)) {
break;
}
- next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+ next_dirty = bdrv_dirty_iter_next(s->dbi);
if (next_dirty > next_offset || next_dirty < 0) {
/* The bitmap iterator's cache is stale, refresh it */
- bdrv_set_dirty_iter(s->dbi, next_offset >> BDRV_SECTOR_BITS);
- next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+ bdrv_set_dirty_iter(s->dbi, next_offset);
+ next_dirty = bdrv_dirty_iter_next(s->dbi);
}
assert(next_dirty == next_offset);
nb_chunks++;
@@ -384,8 +380,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
* calling bdrv_get_block_status_above could yield - if some blocks are
* marked dirty in this window, we need to know.
*/
- bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset >> BDRV_SECTOR_BITS,
- nb_chunks * sectors_per_chunk);
+ bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset,
+ nb_chunks * s->granularity);
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks);
@@ -616,25 +612,23 @@ static void mirror_throttle(MirrorBlockJob *s)
static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{
- int64_t sector_num, end;
+ int64_t offset;
BlockDriverState *base = s->base;
BlockDriverState *bs = s->source;
BlockDriverState *target_bs = blk_bs(s->target);
- int ret, n;
+ int ret;
int64_t count;
- end = s->bdev_length / BDRV_SECTOR_SIZE;
-
if (base == NULL && !bdrv_has_zero_init(target_bs)) {
if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
- bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length);
return 0;
}
s->initial_zeroing_ongoing = true;
- for (sector_num = 0; sector_num < end; ) {
- int nb_sectors = MIN(end - sector_num,
- QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
+ for (offset = 0; offset < s->bdev_length; ) {
+ int bytes = MIN(s->bdev_length - offset,
+ QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
mirror_throttle(s);
@@ -650,9 +644,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
continue;
}
- mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE, false);
- sector_num += nb_sectors;
+ mirror_do_zero_or_discard(s, offset, bytes, false);
+ offset += bytes;
}
mirror_wait_for_all_io(s);
@@ -660,10 +653,10 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
}
/* First part, loop on the sectors and initialize the dirty bitmap. */
- for (sector_num = 0; sector_num < end; ) {
+ for (offset = 0; offset < s->bdev_length; ) {
/* Just to make sure we are not exceeding int limit. */
- int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
- end - sector_num);
+ int bytes = MIN(s->bdev_length - offset,
+ QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
mirror_throttle(s);
@@ -671,21 +664,16 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
return 0;
}
- ret = bdrv_is_allocated_above(bs, base, sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE, &count);
+ ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count);
if (ret < 0) {
return ret;
}
- /* TODO: Relax this once bdrv_is_allocated_above and dirty
- * bitmaps no longer require sector alignment. */
- assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
- n = count >> BDRV_SECTOR_BITS;
- assert(n > 0);
+ assert(count);
if (ret == 1) {
- bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count);
}
- sector_num += n;
+ offset += count;
}
return 0;
}
@@ -796,7 +784,7 @@ static void coroutine_fn mirror_run(void *opaque)
}
assert(!s->dbi);
- s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap, 0);
+ s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap);
for (;;) {
uint64_t delay_ns = 0;
int64_t cnt, delta;
@@ -811,11 +799,10 @@ static void coroutine_fn mirror_run(void *opaque)
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
/* s->common.offset contains the number of bytes already processed so
- * far, cnt is the number of dirty sectors remaining and
+ * far, cnt is the number of dirty bytes remaining and
* s->bytes_in_flight is the number of bytes currently being
* processed; together those are the current total operation length */
- s->common.len = s->common.offset + s->bytes_in_flight +
- cnt * BDRV_SECTOR_SIZE;
+ s->common.len = s->common.offset + s->bytes_in_flight + cnt;
/* Note that even when no rate limit is applied we need to yield
* periodically with no pending I/O so that bdrv_drain_all() returns.
@@ -827,8 +814,7 @@ static void coroutine_fn mirror_run(void *opaque)
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
- trace_mirror_yield(s, cnt * BDRV_SECTOR_SIZE,
- s->buf_free_count, s->in_flight);
+ trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
mirror_wait_for_io(s);
continue;
} else if (cnt != 0) {
@@ -869,7 +855,7 @@ static void coroutine_fn mirror_run(void *opaque)
* whether to switch to target check one last time if I/O has
* come in the meanwhile, and if not flush the data to disk.
*/
- trace_mirror_before_drain(s, cnt * BDRV_SECTOR_SIZE);
+ trace_mirror_before_drain(s, cnt);
bdrv_drained_begin(bs);
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
@@ -888,8 +874,7 @@ static void coroutine_fn mirror_run(void *opaque)
}
ret = 0;
- trace_mirror_before_sleep(s, cnt * BDRV_SECTOR_SIZE,
- s->synced, delay_ns);
+ trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
if (!s->synced) {
block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
if (block_job_is_cancelled(&s->common)) {
@@ -1056,6 +1041,10 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
{
+ if (bs->backing == NULL) {
+ /* we can be here after failed bdrv_append in mirror_start_job */
+ return 0;
+ }
return bdrv_co_flush(bs->backing->bs);
}
@@ -1073,6 +1062,11 @@ static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
{
+ if (bs->backing == NULL) {
+ /* we can be here after failed bdrv_attach_child in
+ * bdrv_set_backing_hd */
+ return;
+ }
bdrv_refresh_filename(bs->backing->bs);
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
bs->backing->bs->filename);
diff --git a/block/qcow.c b/block/qcow.c
index f450b00..9569dee 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -478,7 +478,9 @@ static int get_cluster_offset(BlockDriverState *bs,
for(i = 0; i < s->cluster_sectors; i++) {
if (i < n_start || i >= n_end) {
memset(s->cluster_data, 0x00, 512);
- if (qcrypto_block_encrypt(s->crypto, start_sect + i,
+ if (qcrypto_block_encrypt(s->crypto,
+ (start_sect + i) *
+ BDRV_SECTOR_SIZE,
s->cluster_data,
BDRV_SECTOR_SIZE,
NULL) < 0) {
@@ -668,7 +670,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
}
if (bs->encrypted) {
assert(s->crypto);
- if (qcrypto_block_decrypt(s->crypto, sector_num, buf,
+ if (qcrypto_block_decrypt(s->crypto,
+ sector_num * BDRV_SECTOR_SIZE, buf,
n * BDRV_SECTOR_SIZE, NULL) < 0) {
ret = -EIO;
break;
@@ -740,8 +743,8 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
}
if (bs->encrypted) {
assert(s->crypto);
- if (qcrypto_block_encrypt(s->crypto, sector_num, buf,
- n * BDRV_SECTOR_SIZE, NULL) < 0) {
+ if (qcrypto_block_encrypt(s->crypto, sector_num * BDRV_SECTOR_SIZE,
+ buf, n * BDRV_SECTOR_SIZE, NULL) < 0) {
ret = -EIO;
break;
}
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 14f41d0..f45e46c 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -269,15 +269,16 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
return 0;
}
-/* This function returns the number of disk sectors covered by a single qcow2
- * cluster of bitmap data. */
-static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s,
- const BdrvDirtyBitmap *bitmap)
+/* Return the disk size covered by a single qcow2 cluster of bitmap data. */
+static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s,
+ const BdrvDirtyBitmap *bitmap)
{
- uint32_t sector_granularity =
- bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+ uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
+ uint64_t limit = granularity * (s->cluster_size << 3);
- return (uint64_t)sector_granularity * (s->cluster_size << 3);
+ assert(QEMU_IS_ALIGNED(limit,
+ bdrv_dirty_bitmap_serialization_align(bitmap)));
+ return limit;
}
/* load_bitmap_data
@@ -290,7 +291,7 @@ static int load_bitmap_data(BlockDriverState *bs,
{
int ret = 0;
BDRVQcow2State *s = bs->opaque;
- uint64_t sector, sbc;
+ uint64_t offset, limit;
uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
uint8_t *buf = NULL;
uint64_t i, tab_size =
@@ -302,28 +303,28 @@ static int load_bitmap_data(BlockDriverState *bs,
}
buf = g_malloc(s->cluster_size);
- sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
- for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) {
- uint64_t count = MIN(bm_size - sector, sbc);
+ limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+ for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) {
+ uint64_t count = MIN(bm_size - offset, limit);
uint64_t entry = bitmap_table[i];
- uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
+ uint64_t data_offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
assert(check_table_entry(entry, s->cluster_size) == 0);
- if (offset == 0) {
+ if (data_offset == 0) {
if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) {
- bdrv_dirty_bitmap_deserialize_ones(bitmap, sector, count,
+ bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count,
false);
} else {
/* No need to deserialize zeros because the dirty bitmap is
* already cleared */
}
} else {
- ret = bdrv_pread(bs->file, offset, buf, s->cluster_size);
+ ret = bdrv_pread(bs->file, data_offset, buf, s->cluster_size);
if (ret < 0) {
goto finish;
}
- bdrv_dirty_bitmap_deserialize_part(bitmap, buf, sector, count,
+ bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count,
false);
}
}
@@ -1071,8 +1072,8 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
{
int ret;
BDRVQcow2State *s = bs->opaque;
- int64_t sector;
- uint64_t sbc;
+ int64_t offset;
+ uint64_t limit;
uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
const char *bm_name = bdrv_dirty_bitmap_name(bitmap);
uint8_t *buf = NULL;
@@ -1095,20 +1096,25 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
return NULL;
}
- dbi = bdrv_dirty_iter_new(bitmap, 0);
+ dbi = bdrv_dirty_iter_new(bitmap);
buf = g_malloc(s->cluster_size);
- sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
- assert(DIV_ROUND_UP(bm_size, sbc) == tb_size);
+ limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+ assert(DIV_ROUND_UP(bm_size, limit) == tb_size);
- while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
- uint64_t cluster = sector / sbc;
+ while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) {
+ uint64_t cluster = offset / limit;
uint64_t end, write_size;
int64_t off;
- sector = cluster * sbc;
- end = MIN(bm_size, sector + sbc);
- write_size =
- bdrv_dirty_bitmap_serialization_size(bitmap, sector, end - sector);
+ /*
+ * We found the first dirty offset, but want to write out the
+ * entire cluster of the bitmap that includes that offset,
+ * including any leading zero bits.
+ */
+ offset = QEMU_ALIGN_DOWN(offset, limit);
+ end = MIN(bm_size, offset + limit);
+ write_size = bdrv_dirty_bitmap_serialization_size(bitmap, offset,
+ end - offset);
assert(write_size <= s->cluster_size);
off = qcow2_alloc_clusters(bs, s->cluster_size);
@@ -1120,7 +1126,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
}
tb[cluster] = off;
- bdrv_dirty_bitmap_serialize_part(bitmap, buf, sector, end - sector);
+ bdrv_dirty_bitmap_serialize_part(bitmap, buf, offset, end - offset);
if (write_size < s->cluster_size) {
memset(buf + write_size, 0, s->cluster_size - write_size);
}
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index d2518d1..0e5aec8 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -446,15 +446,13 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
{
if (bytes && bs->encrypted) {
BDRVQcow2State *s = bs->opaque;
- int64_t sector = (s->crypt_physical_offset ?
+ int64_t offset = (s->crypt_physical_offset ?
(cluster_offset + offset_in_cluster) :
- (src_cluster_offset + offset_in_cluster))
- >> BDRV_SECTOR_BITS;
+ (src_cluster_offset + offset_in_cluster));
assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
assert((bytes & ~BDRV_SECTOR_MASK) == 0);
assert(s->crypto);
- if (qcrypto_block_encrypt(s->crypto, sector, buffer,
- bytes, NULL) < 0) {
+ if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) {
return false;
}
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 88d5a3f..aa3fd6c 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -3181,3 +3181,25 @@ out:
g_free(reftable_tmp);
return ret;
}
+
+int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t i;
+
+ for (i = size_to_clusters(s, size) - 1; i >= 0; i--) {
+ uint64_t refcount;
+ int ret = qcow2_get_refcount(bs, i, &refcount);
+ if (ret < 0) {
+ fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
+ i, strerror(-ret));
+ return ret;
+ }
+ if (refcount > 0) {
+ return i;
+ }
+ }
+ qcow2_signal_corruption(bs, true, -1, -1,
+ "There are no references in the refcount table.");
+ return -EIO;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 970006f..f63d183 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1811,7 +1811,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
if (qcrypto_block_decrypt(s->crypto,
(s->crypt_physical_offset ?
cluster_offset + offset_in_cluster :
- offset) >> BDRV_SECTOR_BITS,
+ offset),
cluster_data,
cur_bytes,
NULL) < 0) {
@@ -1946,7 +1946,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
if (qcrypto_block_encrypt(s->crypto,
(s->crypt_physical_offset ?
cluster_offset + offset_in_cluster :
- offset) >> BDRV_SECTOR_BITS,
+ offset),
cluster_data,
cur_bytes, NULL) < 0) {
ret = -EIO;
@@ -3107,6 +3107,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
new_l1_size = size_to_l1(s, offset);
if (offset < old_length) {
+ int64_t last_cluster, old_file_size;
if (prealloc != PREALLOC_MODE_OFF) {
error_setg(errp,
"Preallocation can't be used for shrinking an image");
@@ -3135,6 +3136,28 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
"Failed to discard unused refblocks");
return ret;
}
+
+ old_file_size = bdrv_getlength(bs->file->bs);
+ if (old_file_size < 0) {
+ error_setg_errno(errp, -old_file_size,
+ "Failed to inquire current file length");
+ return old_file_size;
+ }
+ last_cluster = qcow2_get_last_cluster(bs, old_file_size);
+ if (last_cluster < 0) {
+ error_setg_errno(errp, -last_cluster,
+ "Failed to find the last cluster");
+ return last_cluster;
+ }
+ if ((last_cluster + 1) * s->cluster_size < old_file_size) {
+ ret = bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
+ PREALLOC_MODE_OFF, NULL);
+ if (ret < 0) {
+ warn_report("Failed to truncate the tail of the image: %s",
+ strerror(-ret));
+ ret = 0;
+ }
+ }
} else {
ret = qcow2_grow_l1_table(bs, new_l1_size, true);
if (ret < 0) {
@@ -3167,7 +3190,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
if (old_file_size < 0) {
error_setg_errno(errp, -old_file_size,
"Failed to inquire current file length");
- return ret;
+ return old_file_size;
}
nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
@@ -3196,7 +3219,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
if (allocation_start < 0) {
error_setg_errno(errp, -allocation_start,
"Failed to resize refcount structures");
- return -allocation_start;
+ return allocation_start;
}
clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
@@ -3673,20 +3696,19 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
*/
required = virtual_size;
} else {
- int cluster_sectors = cluster_size / BDRV_SECTOR_SIZE;
- int64_t sector_num;
+ int64_t offset;
int pnum = 0;
- for (sector_num = 0;
- sector_num < ssize / BDRV_SECTOR_SIZE;
- sector_num += pnum) {
- int nb_sectors = MIN(ssize / BDRV_SECTOR_SIZE - sector_num,
- BDRV_REQUEST_MAX_SECTORS);
+ for (offset = 0; offset < ssize;
+ offset += pnum * BDRV_SECTOR_SIZE) {
+ int nb_sectors = MIN(ssize - offset,
+ BDRV_REQUEST_MAX_BYTES) / BDRV_SECTOR_SIZE;
BlockDriverState *file;
int64_t ret;
ret = bdrv_get_block_status_above(in_bs, NULL,
- sector_num, nb_sectors,
+ offset >> BDRV_SECTOR_BITS,
+ nb_sectors,
&pnum, &file);
if (ret < 0) {
error_setg_errno(&local_err, -ret,
@@ -3699,12 +3721,11 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
} else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
(BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
/* Extend pnum to end of cluster for next iteration */
- pnum = ROUND_UP(sector_num + pnum, cluster_sectors) -
- sector_num;
+ pnum = (ROUND_UP(offset + pnum * BDRV_SECTOR_SIZE,
+ cluster_size) - offset) >> BDRV_SECTOR_BITS;
/* Count clusters we've seen */
- required += (sector_num % cluster_sectors + pnum) *
- BDRV_SECTOR_SIZE;
+ required += offset % cluster_size + pnum * BDRV_SECTOR_SIZE;
}
}
}
diff --git a/block/qcow2.h b/block/qcow2.h
index 5a289a8..782a206 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -597,6 +597,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
BlockDriverAmendStatusCB *status_cb,
void *cb_opaque, Error **errp);
int qcow2_shrink_reftable(BlockDriverState *bs);
+int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,