diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-10-06 17:43:02 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-10-06 17:43:02 +0100 |
commit | 530049bc1dcc24c1178a29d99ca08b6dd08413e0 (patch) | |
tree | c50588c08260188244b194556b23d8ad19ca0921 /block | |
parent | 5121d81e387bba17496f5908d43fd623a946c645 (diff) | |
parent | fc3fd63fc0573ffd2ee569591a2e7f6c7310fd18 (diff) | |
download | qemu-530049bc1dcc24c1178a29d99ca08b6dd08413e0.zip qemu-530049bc1dcc24c1178a29d99ca08b6dd08413e0.tar.gz qemu-530049bc1dcc24c1178a29d99ca08b6dd08413e0.tar.bz2 |
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches
# gpg: Signature made Fri 06 Oct 2017 16:52:59 BST
# gpg: using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* remotes/kevin/tags/for-upstream: (54 commits)
block/mirror: check backing in bdrv_mirror_top_flush
qcow2: truncate the tail of the image file after shrinking the image
qcow2: fix return error code in qcow2_truncate()
iotests: Fix 195 if IMGFMT is part of TEST_DIR
block/mirror: check backing in bdrv_mirror_top_refresh_filename
block: support passthrough of BDRV_REQ_FUA in crypto driver
block: convert qcrypto_block_encrypt|decrypt to take bytes offset
block: convert crypto driver to bdrv_co_preadv|pwritev
block: fix data type casting for crypto payload offset
crypto: expose encryption sector size in APIs
block: use 1 MB bounce buffers for crypto instead of 16KB
iotests: Add test 197 for covering copy-on-read
block: Perform copy-on-read in loop
block: Add blkdebug hook for copy-on-read
iotests: Restore stty settings on completion
block: Uniform handling of 0-length bdrv_get_block_status()
qemu-io: Add -C for opening with copy-on-read
commit: Remove overlay_bs
qemu-iotests: Test commit block job where top has two parents
qemu-iotests: Allow QMP pretty printing in common.qemu
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/backup.c | 7 | ||||
-rw-r--r-- | block/commit.c | 64 | ||||
-rw-r--r-- | block/crypto.c | 130 | ||||
-rw-r--r-- | block/dirty-bitmap.c | 134 | ||||
-rw-r--r-- | block/io.c | 131 | ||||
-rw-r--r-- | block/mirror.c | 88 | ||||
-rw-r--r-- | block/qcow.c | 11 | ||||
-rw-r--r-- | block/qcow2-bitmap.c | 62 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 8 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 22 | ||||
-rw-r--r-- | block/qcow2.c | 53 | ||||
-rw-r--r-- | block/qcow2.h | 1 |
12 files changed, 364 insertions, 347 deletions
diff --git a/block/backup.c b/block/backup.c index 517c300..06ddbfd 100644 --- a/block/backup.c +++ b/block/backup.c @@ -372,10 +372,10 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap); clusters_per_iter = MAX((granularity / job->cluster_size), 1); - dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0); + dbi = bdrv_dirty_iter_new(job->sync_bitmap); /* Find the next dirty sector(s) */ - while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) { + while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) { cluster = offset / job->cluster_size; /* Fake progress updates for any clusters we skipped */ @@ -403,8 +403,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) /* If the bitmap granularity is smaller than the backup granularity, * we need to advance the iterator pointer to the next cluster. */ if (granularity < job->cluster_size) { - bdrv_set_dirty_iter(dbi, - cluster * job->cluster_size / BDRV_SECTOR_SIZE); + bdrv_set_dirty_iter(dbi, cluster * job->cluster_size); } last_cluster = cluster - 1; diff --git a/block/commit.c b/block/commit.c index 8f0e835..5036eec 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,13 +36,11 @@ enum { typedef struct CommitBlockJob { BlockJob common; RateLimit limit; - BlockDriverState *active; BlockDriverState *commit_top_bs; BlockBackend *top; BlockBackend *base; BlockdevOnError on_error; int base_flags; - int orig_overlay_flags; char *backing_file_str; } CommitBlockJob; @@ -81,18 +79,15 @@ static void commit_complete(BlockJob *job, void *opaque) { CommitBlockJob *s = container_of(job, CommitBlockJob, common); CommitCompleteData *data = opaque; - BlockDriverState *active = s->active; BlockDriverState *top = blk_bs(s->top); BlockDriverState *base = blk_bs(s->base); - BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs); + BlockDriverState *commit_top_bs = s->commit_top_bs; int ret = data->ret; bool remove_commit_top_bs = false; - /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */ + /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ bdrv_ref(top); - if (overlay_bs) { - bdrv_ref(overlay_bs); - } + bdrv_ref(commit_top_bs); /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before * the normal backing chain can be restored. */ @@ -100,9 +95,9 @@ static void commit_complete(BlockJob *job, void *opaque) if (!block_job_is_cancelled(&s->common) && ret == 0) { /* success */ - ret = bdrv_drop_intermediate(active, s->commit_top_bs, base, + ret = bdrv_drop_intermediate(s->commit_top_bs, base, s->backing_file_str); - } else if (overlay_bs) { + } else { /* XXX Can (or should) we somehow keep 'consistent read' blocked even * after the failed/cancelled commit job is gone? If we already wrote * something to base, the intermediate images aren't valid any more. */ @@ -115,9 +110,6 @@ static void commit_complete(BlockJob *job, void *opaque) if (s->base_flags != bdrv_get_flags(base)) { bdrv_reopen(base, s->base_flags, NULL); } - if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { - bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); - } g_free(s->backing_file_str); blk_unref(s->top); @@ -134,10 +126,13 @@ static void commit_complete(BlockJob *job, void *opaque) * filter driver from the backing chain. Do this as the final step so that * the 'consistent read' permission can be granted. */ if (remove_commit_top_bs) { - bdrv_set_backing_hd(overlay_bs, top, &error_abort); + bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); + bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs), + &error_abort); } - bdrv_unref(overlay_bs); + bdrv_unref(commit_top_bs); bdrv_unref(top); } @@ -283,10 +278,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, { CommitBlockJob *s; BlockReopenQueue *reopen_queue = NULL; - int orig_overlay_flags; int orig_base_flags; BlockDriverState *iter; - BlockDriverState *overlay_bs; BlockDriverState *commit_top_bs = NULL; Error *local_err = NULL; int ret; @@ -297,31 +290,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, return; } - overlay_bs = bdrv_find_overlay(bs, top); - - if (overlay_bs == NULL) { - error_setg(errp, "Could not find overlay image for %s:", top->filename); - return; - } - s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL, speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp); if (!s) { return; } - orig_base_flags = bdrv_get_flags(base); - orig_overlay_flags = bdrv_get_flags(overlay_bs); - - /* convert base & overlay_bs to r/w, if necessary */ + /* convert base to r/w, if necessary */ + orig_base_flags = bdrv_get_flags(base); if (!(orig_base_flags & BDRV_O_RDWR)) { reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, orig_base_flags | BDRV_O_RDWR); } - if (!(orig_overlay_flags & BDRV_O_RDWR)) { - reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL, - orig_overlay_flags | BDRV_O_RDWR); - } + if (reopen_queue) { bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); if (local_err != NULL) { @@ -350,7 +331,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, error_propagate(errp, local_err); goto fail; } - bdrv_set_backing_hd(overlay_bs, commit_top_bs, &local_err); + bdrv_replace_node(top, commit_top_bs, &local_err); if (local_err) { bdrv_unref(commit_top_bs); commit_top_bs = NULL; @@ -382,14 +363,6 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - /* overlay_bs must be blocked because it needs to be modified to - * update the backing image string. */ - ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs, - BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp); - if (ret < 0) { - goto fail; - } - s->base = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, @@ -408,13 +381,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - s->active = bs; - - s->base_flags = orig_base_flags; - s->orig_overlay_flags = orig_overlay_flags; - + s->base_flags = orig_base_flags; s->backing_file_str = g_strdup(backing_file_str); - s->on_error = on_error; trace_commit_start(bs, base, top, s); @@ -429,7 +397,7 @@ fail: blk_unref(s->top); } if (commit_top_bs) { - bdrv_set_backing_hd(overlay_bs, top, &error_abort); + bdrv_replace_node(commit_top_bs, top, &error_abort); } block_job_early_fail(&s->common); } diff --git a/block/crypto.c b/block/crypto.c index 58ef6f2..60ddf86 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -279,6 +279,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, return -EINVAL; } + bs->supported_write_flags = BDRV_REQ_FUA & + bs->file->bs->supported_write_flags; + opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { @@ -364,8 +367,9 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, PreallocMode prealloc, Error **errp) { BlockCrypto *crypto = bs->opaque; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + assert(payload_offset < (INT64_MAX - offset)); offset += payload_offset; @@ -379,66 +383,65 @@ static void block_crypto_close(BlockDriverState *bs) } -#define BLOCK_CRYPTO_MAX_SECTORS 32 +/* + * 1 MB bounce buffer gives good performance / memory tradeoff + * when using cache=none|directsync. + */ +#define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024) static coroutine_fn int -block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, - int remaining_sectors, QEMUIOVector *qiov) +block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BlockCrypto *crypto = bs->opaque; - int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t cur_bytes; /* number of bytes in current iteration */ uint64_t bytes_done = 0; uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - size_t payload_offset = - qcrypto_block_get_payload_offset(crypto->block) / 512; + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + + assert(!flags); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); qemu_iovec_init(&hd_qiov, qiov->niov); - /* Bounce buffer so we have a linear mem region for - * entire sector. XXX optimize so we avoid bounce - * buffer in case that qiov->niov == 1 + /* Bounce buffer because we don't wish to expose cipher text + * in qiov which points to guest memory. */ cipher_data = - qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, qiov->size)); if (cipher_data == NULL) { ret = -ENOMEM; goto cleanup; } - while (remaining_sectors) { - cur_nr_sectors = remaining_sectors; - - if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { - cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; - } + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); - ret = bdrv_co_readv(bs->file, - payload_offset + sector_num, - cur_nr_sectors, &hd_qiov); + ret = bdrv_co_preadv(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, 0); if (ret < 0) { goto cleanup; } - if (qcrypto_block_decrypt(crypto->block, - sector_num, - cipher_data, cur_nr_sectors * 512, - NULL) < 0) { + if (qcrypto_block_decrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } - qemu_iovec_from_buf(qiov, bytes_done, - cipher_data, cur_nr_sectors * 512); + qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes); - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; + bytes -= cur_bytes; + bytes_done += cur_bytes; } cleanup: @@ -450,63 +453,58 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, static coroutine_fn int -block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, - int remaining_sectors, QEMUIOVector *qiov) +block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BlockCrypto *crypto = bs->opaque; - int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t cur_bytes; /* number of bytes in current iteration */ uint64_t bytes_done = 0; uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - size_t payload_offset = - qcrypto_block_get_payload_offset(crypto->block) / 512; + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + + assert(!(flags & ~BDRV_REQ_FUA)); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); qemu_iovec_init(&hd_qiov, qiov->niov); - /* Bounce buffer so we have a linear mem region for - * entire sector. XXX optimize so we avoid bounce - * buffer in case that qiov->niov == 1 + /* Bounce buffer because we're not permitted to touch + * contents of qiov - it points to guest memory. */ cipher_data = - qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, qiov->size)); if (cipher_data == NULL) { ret = -ENOMEM; goto cleanup; } - while (remaining_sectors) { - cur_nr_sectors = remaining_sectors; + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); - if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { - cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; - } - - qemu_iovec_to_buf(qiov, bytes_done, - cipher_data, cur_nr_sectors * 512); + qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes); - if (qcrypto_block_encrypt(crypto->block, - sector_num, - cipher_data, cur_nr_sectors * 512, - NULL) < 0) { + if (qcrypto_block_encrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); - ret = bdrv_co_writev(bs->file, - payload_offset + sector_num, - cur_nr_sectors, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, flags); if (ret < 0) { goto cleanup; } - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; + bytes -= cur_bytes; + bytes_done += cur_bytes; } cleanup: @@ -516,13 +514,22 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, return ret; } +static void block_crypto_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + bs->bl.request_alignment = sector_size; /* No sub-sector I/O */ +} + static int64_t block_crypto_getlength(BlockDriverState *bs) { BlockCrypto *crypto = bs->opaque; int64_t len = bdrv_getlength(bs->file->bs); - ssize_t offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t offset = qcrypto_block_get_payload_offset(crypto->block); + assert(offset < INT64_MAX); + assert(offset < len); len -= offset; @@ -613,8 +620,9 @@ BlockDriver bdrv_crypto_luks = { .bdrv_truncate = block_crypto_truncate, .create_opts = &block_crypto_create_opts_luks, - .bdrv_co_readv = block_crypto_co_readv, - .bdrv_co_writev = block_crypto_co_writev, + .bdrv_refresh_limits = block_crypto_refresh_limits, + .bdrv_co_preadv = block_crypto_co_preadv, + .bdrv_co_pwritev = block_crypto_co_pwritev, .bdrv_getlength = block_crypto_getlength, .bdrv_get_info = block_crypto_get_info_luks, .bdrv_get_specific_info = block_crypto_get_specific_info_luks, diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 30462d4..bd04e99 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -1,7 +1,7 @@ /* * Block Dirty Bitmap * - * Copyright (c) 2016 Red Hat. Inc + * Copyright (c) 2016-2017 Red Hat. Inc * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -38,11 +38,11 @@ */ struct BdrvDirtyBitmap { QemuMutex *mutex; - HBitmap *bitmap; /* Dirty sector bitmap implementation */ + HBitmap *bitmap; /* Dirty bitmap implementation */ HBitmap *meta; /* Meta dirty bitmap */ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ char *name; /* Optional non-empty unique ID */ - int64_t size; /* Size of the bitmap (Number of sectors) */ + int64_t size; /* Size of the bitmap, in bytes */ bool disabled; /* Bitmap is disabled. It ignores all writes to the device */ int active_iterators; /* How many iterators are active */ @@ -115,17 +115,14 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, { int64_t bitmap_size; BdrvDirtyBitmap *bitmap; - uint32_t sector_granularity; - assert((granularity & (granularity - 1)) == 0); + assert(is_power_of_2(granularity) && granularity >= BDRV_SECTOR_SIZE); if (name && bdrv_find_dirty_bitmap(bs, name)) { error_setg(errp, "Bitmap already exists: %s", name); return NULL; } - sector_granularity = granularity >> BDRV_SECTOR_BITS; - assert(sector_granularity); - bitmap_size = bdrv_nb_sectors(bs); + bitmap_size = bdrv_getlength(bs); if (bitmap_size < 0) { error_setg_errno(errp, -bitmap_size, "could not get length of device"); errno = -bitmap_size; @@ -133,7 +130,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, } bitmap = g_new0(BdrvDirtyBitmap, 1); bitmap->mutex = &bs->dirty_bitmap_mutex; - bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); + bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity)); bitmap->size = bitmap_size; bitmap->name = g_strdup(name); bitmap->disabled = false; @@ -173,45 +170,6 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap) qemu_mutex_unlock(bitmap->mutex); } -int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) -{ - uint64_t i; - int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta); - - /* To optimize: we can make hbitmap to internally check the range in a - * coarse level, or at least do it word by word. */ - for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) { - if (hbitmap_get(bitmap->meta, i)) { - return true; - } - } - return false; -} - -int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) -{ - bool dirty; - - qemu_mutex_lock(bitmap->mutex); - dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors); - qemu_mutex_unlock(bitmap->mutex); - - return dirty; -} - -void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, int64_t sector, - int nb_sectors) -{ - qemu_mutex_lock(bitmap->mutex); - hbitmap_reset(bitmap->meta, sector, nb_sectors); - qemu_mutex_unlock(bitmap->mutex); -} - int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap) { return bitmap->size; @@ -341,17 +299,16 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, * Truncates _all_ bitmaps attached to a BDS. * Called with BQL taken. */ -void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes) { BdrvDirtyBitmap *bitmap; - uint64_t size = bdrv_nb_sectors(bs); bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { assert(!bdrv_dirty_bitmap_frozen(bitmap)); assert(!bitmap->active_iterators); - hbitmap_truncate(bitmap->bitmap, size); - bitmap->size = size; + hbitmap_truncate(bitmap->bitmap, bytes); + bitmap->size = bytes; } bdrv_dirty_bitmaps_unlock(bs); } @@ -461,7 +418,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); - info->count = bdrv_get_dirty_count(bm) << BDRV_SECTOR_BITS; + info->count = bdrv_get_dirty_count(bm); info->granularity = bdrv_dirty_bitmap_granularity(bm); info->has_name = !!bm->name; info->name = g_strdup(bm->name); @@ -476,13 +433,13 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) } /* Called within bdrv_dirty_bitmap_lock..unlock */ -int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, - int64_t sector) +bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t offset) { if (bitmap) { - return hbitmap_get(bitmap->bitmap, sector); + return hbitmap_get(bitmap->bitmap, offset); } else { - return 0; + return false; } } @@ -508,19 +465,13 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap) { - return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); -} - -uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap) -{ - return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta); + return 1U << hbitmap_granularity(bitmap->bitmap); } -BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap, - uint64_t first_sector) +BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap) { BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1); - hbitmap_iter_init(&iter->hbi, bitmap->bitmap, first_sector); + hbitmap_iter_init(&iter->hbi, bitmap->bitmap, 0); iter->bitmap = bitmap; bitmap->active_iterators++; return iter; @@ -552,35 +503,35 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter) /* Called within bdrv_dirty_bitmap_lock..unlock */ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_set(bitmap->bitmap, offset, bytes); } void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { bdrv_dirty_bitmap_lock(bitmap); - bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors); + bdrv_set_dirty_bitmap_locked(bitmap, offset, bytes); bdrv_dirty_bitmap_unlock(bitmap); } /* Called within bdrv_dirty_bitmap_lock..unlock */ void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { assert(bdrv_dirty_bitmap_enabled(bitmap)); assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_reset(bitmap->bitmap, offset, bytes); } void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int64_t nr_sectors) + int64_t offset, int64_t bytes) { bdrv_dirty_bitmap_lock(bitmap); - bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors); + bdrv_reset_dirty_bitmap_locked(bitmap, offset, bytes); bdrv_dirty_bitmap_unlock(bitmap); } @@ -610,42 +561,42 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) } uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count) + uint64_t offset, uint64_t bytes) { - return hbitmap_serialization_size(bitmap->bitmap, start, count); + return hbitmap_serialization_size(bitmap->bitmap, offset, bytes); } uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap) { - return hbitmap_serialization_granularity(bitmap->bitmap); + return hbitmap_serialization_align(bitmap->bitmap); } void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap, - uint8_t *buf, uint64_t start, - uint64_t count) + uint8_t *buf, uint64_t offset, + uint64_t bytes) { - hbitmap_serialize_part(bitmap->bitmap, buf, start, count); + hbitmap_serialize_part(bitmap->bitmap, buf, offset, bytes); } void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap, - uint8_t *buf, uint64_t start, - uint64_t count, bool finish) + uint8_t *buf, uint64_t offset, + uint64_t bytes, bool finish) { - hbitmap_deserialize_part(bitmap->bitmap, buf, start, count, finish); + hbitmap_deserialize_part(bitmap->bitmap, buf, offset, bytes, finish); } void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count, + uint64_t offset, uint64_t bytes, bool finish) { - hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish); + hbitmap_deserialize_zeroes(bitmap->bitmap, offset, bytes, finish); } void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, - uint64_t start, uint64_t count, + uint64_t offset, uint64_t bytes, bool finish) { - hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish); + hbitmap_deserialize_ones(bitmap->bitmap, offset, bytes, finish); } void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) @@ -653,8 +604,7 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) hbitmap_deserialize_finish(bitmap->bitmap); } -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int64_t nr_sectors) +void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) { BdrvDirtyBitmap *bitmap; @@ -668,7 +618,7 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, continue; } assert(!bdrv_dirty_bitmap_readonly(bitmap)); - hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + hbitmap_set(bitmap->bitmap, offset, bytes); } bdrv_dirty_bitmaps_unlock(bs); } @@ -676,9 +626,9 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, /** * Advance a BdrvDirtyBitmapIter to an arbitrary offset. */ -void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num) +void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset) { - hbitmap_iter_init(&iter->hbi, iter->hbi.hb, sector_num); + hbitmap_iter_init(&iter->hbi, iter->hbi.hb, offset); } int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) @@ -34,6 +34,9 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ +/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ +#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags); @@ -945,68 +948,114 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, BlockDriver *drv = bs->drv; struct iovec iov; - QEMUIOVector bounce_qiov; + QEMUIOVector local_qiov; int64_t cluster_offset; unsigned int cluster_bytes; size_t skip_bytes; int ret; + int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, + BDRV_REQUEST_MAX_BYTES); + unsigned int progress = 0; /* FIXME We cannot require callers to have write permissions when all they * are doing is a read request. If we did things right, write permissions * would be obtained anyway, but internally by the copy-on-read code. As - * long as it is implemented here rather than in a separat filter driver, + * long as it is implemented here rather than in a separate filter driver, * the copy-on-read code doesn't have its own BdrvChild, however, for which * it could request permissions. Therefore we have to bypass the permission * system for the moment. */ // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); /* Cover entire cluster so no additional backing file I/O is required when - * allocating cluster in the image file. + * allocating cluster in the image file. Note that this value may exceed + * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which + * is one reason we loop rather than doing it all at once. */ bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes); + skip_bytes = offset - cluster_offset; trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, cluster_offset, cluster_bytes); - iov.iov_len = cluster_bytes; - iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); + bounce_buffer = qemu_try_blockalign(bs, + MIN(MIN(max_transfer, cluster_bytes), + MAX_BOUNCE_BUFFER)); if (bounce_buffer == NULL) { ret = -ENOMEM; goto err; } - qemu_iovec_init_external(&bounce_qiov, &iov, 1); + while (cluster_bytes) { + int64_t pnum; - ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes, - &bounce_qiov, 0); - if (ret < 0) { - goto err; - } + ret = bdrv_is_allocated(bs, cluster_offset, + MIN(cluster_bytes, max_transfer), &pnum); + if (ret < 0) { + /* Safe to treat errors in querying allocation as if + * unallocated; we'll probably fail again soon on the + * read, but at least that will set a decent errno. + */ + pnum = MIN(cluster_bytes, max_transfer); + } - if (drv->bdrv_co_pwrite_zeroes && - buffer_is_zero(bounce_buffer, iov.iov_len)) { - /* FIXME: Should we (perhaps conditionally) be setting - * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy - * that still correctly reads as zero? */ - ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0); - } else { - /* This does not change the data on the disk, it is not necessary - * to flush even in cache=writethrough mode. - */ - ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes, - &bounce_qiov, 0); - } + assert(skip_bytes < pnum); - if (ret < 0) { - /* It might be okay to ignore write errors for guest requests. If this - * is a deliberate copy-on-read then we don't want to ignore the error. - * Simply report it in all cases. - */ - goto err; - } + if (ret <= 0) { + /* Must copy-on-read; use the bounce buffer */ + iov.iov_base = bounce_buffer; + iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER); + qemu_iovec_init_external(&local_qiov, &iov, 1); - skip_bytes = offset - cluster_offset; - qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes); + ret = bdrv_driver_preadv(bs, cluster_offset, pnum, + &local_qiov, 0); + if (ret < 0) { + goto err; + } + + bdrv_debug_event(bs, BLKDBG_COR_WRITE); + if (drv->bdrv_co_pwrite_zeroes && + buffer_is_zero(bounce_buffer, pnum)) { + /* FIXME: Should we (perhaps conditionally) be setting + * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy + * that still correctly reads as zero? */ + ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0); + } else { + /* This does not change the data on the disk, it is not + * necessary to flush even in cache=writethrough mode. + */ + ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, + &local_qiov, 0); + } + + if (ret < 0) { + /* It might be okay to ignore write errors for guest + * requests. If this is a deliberate copy-on-read + * then we don't want to ignore the error. Simply + * report it in all cases. + */ + goto err; + } + + qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes, + pnum - skip_bytes); + } else { + /* Read directly into the destination */ + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes); + ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size, + &local_qiov, 0); + qemu_iovec_destroy(&local_qiov); + if (ret < 0) { + goto err; + } + } + + cluster_offset += pnum; + cluster_bytes -= pnum; + progress += pnum - skip_bytes; + skip_bytes = 0; + } + ret = 0; err: qemu_vfree(bounce_buffer); @@ -1212,9 +1261,6 @@ int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num, return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0); } -/* Maximum buffer for write zeroes fallback, in bytes */ -#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) - static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags) { @@ -1229,8 +1275,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX); int alignment = MAX(bs->bl.pwrite_zeroes_alignment, bs->bl.request_alignment); - int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, - MAX_WRITE_ZEROES_BOUNCE_BUFFER); + int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER); assert(alignment % bs->bl.request_alignment == 0); head = offset % alignment; @@ -1334,7 +1379,6 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, bool waited; int ret; - int64_t start_sector = offset >> BDRV_SECTOR_BITS; int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); uint64_t bytes_remaining = bytes; int max_transfer; @@ -1409,7 +1453,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); atomic_inc(&bs->write_gen); - bdrv_set_dirty(bs, start_sector, end_sector - start_sector); + bdrv_set_dirty(bs, offset, bytes); stat64_max(&bs->wr_highest_offset, offset + bytes); @@ -1778,6 +1822,10 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, *pnum = 0; return BDRV_BLOCK_EOF; } + if (!nb_sectors) { + *pnum = 0; + return 0; + } n = total_sectors - sector_num; if (n < nb_sectors) { @@ -2438,8 +2486,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, ret = 0; out: atomic_inc(&bs->write_gen); - bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS, - req.bytes >> BDRV_SECTOR_BITS); + bdrv_set_dirty(bs, req.offset, req.bytes); tracked_request_end(&req); bdrv_dec_in_flight(bs); return ret; diff --git a/block/mirror.c b/block/mirror.c index 6f5cb9f..153758c 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -141,8 +141,7 @@ static void mirror_write_complete(void *opaque, int ret) if (ret < 0) { BlockErrorAction action; - bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS, - op->bytes >> BDRV_SECTOR_BITS); + bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); action = mirror_error_action(s, false, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -161,8 +160,7 @@ static void mirror_read_complete(void *opaque, int ret) if (ret < 0) { BlockErrorAction action; - bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS, - op->bytes >> BDRV_SECTOR_BITS); + bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset, op->bytes); action = mirror_error_action(s, true, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -336,12 +334,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); bdrv_dirty_bitmap_lock(s->dirty_bitmap); - offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + offset = bdrv_dirty_iter_next(s->dbi); if (offset < 0) { bdrv_set_dirty_iter(s->dbi, 0); - offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; - trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) * - BDRV_SECTOR_SIZE); + offset = bdrv_dirty_iter_next(s->dbi); + trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); assert(offset >= 0); } bdrv_dirty_bitmap_unlock(s->dirty_bitmap); @@ -362,19 +359,18 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) int64_t next_offset = offset + nb_chunks * s->granularity; int64_t next_chunk = next_offset / s->granularity; if (next_offset >= s->bdev_length || - !bdrv_get_dirty_locked(source, s->dirty_bitmap, - next_offset >> BDRV_SECTOR_BITS)) { + !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_offset)) { break; } if (test_bit(next_chunk, s->in_flight_bitmap)) { break; } - next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + next_dirty = bdrv_dirty_iter_next(s->dbi); if (next_dirty > next_offset || next_dirty < 0) { /* The bitmap iterator's cache is stale, refresh it */ - bdrv_set_dirty_iter(s->dbi, next_offset >> BDRV_SECTOR_BITS); - next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE; + bdrv_set_dirty_iter(s->dbi, next_offset); + next_dirty = bdrv_dirty_iter_next(s->dbi); } assert(next_dirty == next_offset); nb_chunks++; @@ -384,8 +380,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) * calling bdrv_get_block_status_above could yield - if some blocks are * marked dirty in this window, we need to know. */ - bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset >> BDRV_SECTOR_BITS, - nb_chunks * sectors_per_chunk); + bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset, + nb_chunks * s->granularity); bdrv_dirty_bitmap_unlock(s->dirty_bitmap); bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks); @@ -616,25 +612,23 @@ static void mirror_throttle(MirrorBlockJob *s) static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) { - int64_t sector_num, end; + int64_t offset; BlockDriverState *base = s->base; BlockDriverState *bs = s->source; BlockDriverState *target_bs = blk_bs(s->target); - int ret, n; + int ret; int64_t count; - end = s->bdev_length / BDRV_SECTOR_SIZE; - if (base == NULL && !bdrv_has_zero_init(target_bs)) { if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end); + bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); return 0; } s->initial_zeroing_ongoing = true; - for (sector_num = 0; sector_num < end; ) { - int nb_sectors = MIN(end - sector_num, - QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS); + for (offset = 0; offset < s->bdev_length; ) { + int bytes = MIN(s->bdev_length - offset, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); mirror_throttle(s); @@ -650,9 +644,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) continue; } - mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE, false); - sector_num += nb_sectors; + mirror_do_zero_or_discard(s, offset, bytes, false); + offset += bytes; } mirror_wait_for_all_io(s); @@ -660,10 +653,10 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) } /* First part, loop on the sectors and initialize the dirty bitmap. */ - for (sector_num = 0; sector_num < end; ) { + for (offset = 0; offset < s->bdev_length; ) { /* Just to make sure we are not exceeding int limit. */ - int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, - end - sector_num); + int bytes = MIN(s->bdev_length - offset, + QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); mirror_throttle(s); @@ -671,21 +664,16 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) return 0; } - ret = bdrv_is_allocated_above(bs, base, sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE, &count); + ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count); if (ret < 0) { return ret; } - /* TODO: Relax this once bdrv_is_allocated_above and dirty - * bitmaps no longer require sector alignment. */ - assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)); - n = count >> BDRV_SECTOR_BITS; - assert(n > 0); + assert(count); if (ret == 1) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); + bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count); } - sector_num += n; + offset += count; } return 0; } @@ -796,7 +784,7 @@ static void coroutine_fn mirror_run(void *opaque) } assert(!s->dbi); - s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap, 0); + s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap); for (;;) { uint64_t delay_ns = 0; int64_t cnt, delta; @@ -811,11 +799,10 @@ static void coroutine_fn mirror_run(void *opaque) cnt = bdrv_get_dirty_count(s->dirty_bitmap); /* s->common.offset contains the number of bytes already processed so - * far, cnt is the number of dirty sectors remaining and + * far, cnt is the number of dirty bytes remaining and * s->bytes_in_flight is the number of bytes currently being * processed; together those are the current total operation length */ - s->common.len = s->common.offset + s->bytes_in_flight + - cnt * BDRV_SECTOR_SIZE; + s->common.len = s->common.offset + s->bytes_in_flight + cnt; /* Note that even when no rate limit is applied we need to yield * periodically with no pending I/O so that bdrv_drain_all() returns. @@ -827,8 +814,7 @@ static void coroutine_fn mirror_run(void *opaque) s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, cnt * BDRV_SECTOR_SIZE, - s->buf_free_count, s->in_flight); + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); mirror_wait_for_io(s); continue; } else if (cnt != 0) { @@ -869,7 +855,7 @@ static void coroutine_fn mirror_run(void *opaque) * whether to switch to target check one last time if I/O has * come in the meanwhile, and if not flush the data to disk. */ - trace_mirror_before_drain(s, cnt * BDRV_SECTOR_SIZE); + trace_mirror_before_drain(s, cnt); bdrv_drained_begin(bs); cnt = bdrv_get_dirty_count(s->dirty_bitmap); @@ -888,8 +874,7 @@ static void coroutine_fn mirror_run(void *opaque) } ret = 0; - trace_mirror_before_sleep(s, cnt * BDRV_SECTOR_SIZE, - s->synced, delay_ns); + trace_mirror_before_sleep(s, cnt, s->synced, delay_ns); if (!s->synced) { block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); if (block_job_is_cancelled(&s->common)) { @@ -1056,6 +1041,10 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) { + if (bs->backing == NULL) { + /* we can be here after failed bdrv_append in mirror_start_job */ + return 0; + } return bdrv_co_flush(bs->backing->bs); } @@ -1073,6 +1062,11 @@ static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts) { + if (bs->backing == NULL) { + /* we can be here after failed bdrv_attach_child in + * bdrv_set_backing_hd */ + return; + } bdrv_refresh_filename(bs->backing->bs); pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->backing->bs->filename); diff --git a/block/qcow.c b/block/qcow.c index f450b00..9569dee 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -478,7 +478,9 @@ static int get_cluster_offset(BlockDriverState *bs, for(i = 0; i < s->cluster_sectors; i++) { if (i < n_start || i >= n_end) { memset(s->cluster_data, 0x00, 512); - if (qcrypto_block_encrypt(s->crypto, start_sect + i, + if (qcrypto_block_encrypt(s->crypto, + (start_sect + i) * + BDRV_SECTOR_SIZE, s->cluster_data, BDRV_SECTOR_SIZE, NULL) < 0) { @@ -668,7 +670,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_decrypt(s->crypto, sector_num, buf, + if (qcrypto_block_decrypt(s->crypto, + sector_num * BDRV_SECTOR_SIZE, buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { ret = -EIO; break; @@ -740,8 +743,8 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector_num, buf, - n * BDRV_SECTOR_SIZE, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, sector_num * BDRV_SECTOR_SIZE, + buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { ret = -EIO; break; } diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 14f41d0..f45e46c 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -269,15 +269,16 @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) return 0; } -/* This function returns the number of disk sectors covered by a single qcow2 - * cluster of bitmap data. */ -static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s, - const BdrvDirtyBitmap *bitmap) +/* Return the disk size covered by a single qcow2 cluster of bitmap data. */ +static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s, + const BdrvDirtyBitmap *bitmap) { - uint32_t sector_granularity = - bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; + uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap); + uint64_t limit = granularity * (s->cluster_size << 3); - return (uint64_t)sector_granularity * (s->cluster_size << 3); + assert(QEMU_IS_ALIGNED(limit, + bdrv_dirty_bitmap_serialization_align(bitmap))); + return limit; } /* load_bitmap_data @@ -290,7 +291,7 @@ static int load_bitmap_data(BlockDriverState *bs, { int ret = 0; BDRVQcow2State *s = bs->opaque; - uint64_t sector, sbc; + uint64_t offset, limit; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); uint8_t *buf = NULL; uint64_t i, tab_size = @@ -302,28 +303,28 @@ static int load_bitmap_data(BlockDriverState *bs, } buf = g_malloc(s->cluster_size); - sbc = sectors_covered_by_bitmap_cluster(s, bitmap); - for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) { - uint64_t count = MIN(bm_size - sector, sbc); + limit = bytes_covered_by_bitmap_cluster(s, bitmap); + for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) { + uint64_t count = MIN(bm_size - offset, limit); uint64_t entry = bitmap_table[i]; - uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; + uint64_t data_offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; assert(check_table_entry(entry, s->cluster_size) == 0); - if (offset == 0) { + if (data_offset == 0) { if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) { - bdrv_dirty_bitmap_deserialize_ones(bitmap, sector, count, + bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count, false); } else { /* No need to deserialize zeros because the dirty bitmap is * already cleared */ } } else { - ret = bdrv_pread(bs->file, offset, buf, s->cluster_size); + ret = bdrv_pread(bs->file, data_offset, buf, s->cluster_size); if (ret < 0) { goto finish; } - bdrv_dirty_bitmap_deserialize_part(bitmap, buf, sector, count, + bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count, false); } } @@ -1071,8 +1072,8 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, { int ret; BDRVQcow2State *s = bs->opaque; - int64_t sector; - uint64_t sbc; + int64_t offset; + uint64_t limit; uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); const char *bm_name = bdrv_dirty_bitmap_name(bitmap); uint8_t *buf = NULL; @@ -1095,20 +1096,25 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, return NULL; } - dbi = bdrv_dirty_iter_new(bitmap, 0); + dbi = bdrv_dirty_iter_new(bitmap); buf = g_malloc(s->cluster_size); - sbc = sectors_covered_by_bitmap_cluster(s, bitmap); - assert(DIV_ROUND_UP(bm_size, sbc) == tb_size); + limit = bytes_covered_by_bitmap_cluster(s, bitmap); + assert(DIV_ROUND_UP(bm_size, limit) == tb_size); - while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { - uint64_t cluster = sector / sbc; + while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) { + uint64_t cluster = offset / limit; uint64_t end, write_size; int64_t off; - sector = cluster * sbc; - end = MIN(bm_size, sector + sbc); - write_size = - bdrv_dirty_bitmap_serialization_size(bitmap, sector, end - sector); + /* + * We found the first dirty offset, but want to write out the + * entire cluster of the bitmap that includes that offset, + * including any leading zero bits. + */ + offset = QEMU_ALIGN_DOWN(offset, limit); + end = MIN(bm_size, offset + limit); + write_size = bdrv_dirty_bitmap_serialization_size(bitmap, offset, + end - offset); assert(write_size <= s->cluster_size); off = qcow2_alloc_clusters(bs, s->cluster_size); @@ -1120,7 +1126,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs, } tb[cluster] = off; - bdrv_dirty_bitmap_serialize_part(bitmap, buf, sector, end - sector); + bdrv_dirty_bitmap_serialize_part(bitmap, buf, offset, end - offset); if (write_size < s->cluster_size) { memset(buf + write_size, 0, s->cluster_size - write_size); } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d2518d1..0e5aec8 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -446,15 +446,13 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, { if (bytes && bs->encrypted) { BDRVQcow2State *s = bs->opaque; - int64_t sector = (s->crypt_physical_offset ? + int64_t offset = (s->crypt_physical_offset ? (cluster_offset + offset_in_cluster) : - (src_cluster_offset + offset_in_cluster)) - >> BDRV_SECTOR_BITS; + (src_cluster_offset + offset_in_cluster)); assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); assert((bytes & ~BDRV_SECTOR_MASK) == 0); assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector, buffer, - bytes, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) { return false; } } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 88d5a3f..aa3fd6c 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -3181,3 +3181,25 @@ out: g_free(reftable_tmp); return ret; } + +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i; + + for (i = size_to_clusters(s, size) - 1; i >= 0; i--) { + uint64_t refcount; + int ret = qcow2_get_refcount(bs, i, &refcount); + if (ret < 0) { + fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", + i, strerror(-ret)); + return ret; + } + if (refcount > 0) { + return i; + } + } + qcow2_signal_corruption(bs, true, -1, -1, + "There are no references in the refcount table."); + return -EIO; +} diff --git a/block/qcow2.c b/block/qcow2.c index 970006f..f63d183 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1811,7 +1811,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, if (qcrypto_block_decrypt(s->crypto, (s->crypt_physical_offset ? cluster_offset + offset_in_cluster : - offset) >> BDRV_SECTOR_BITS, + offset), cluster_data, cur_bytes, NULL) < 0) { @@ -1946,7 +1946,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, if (qcrypto_block_encrypt(s->crypto, (s->crypt_physical_offset ? cluster_offset + offset_in_cluster : - offset) >> BDRV_SECTOR_BITS, + offset), cluster_data, cur_bytes, NULL) < 0) { ret = -EIO; @@ -3107,6 +3107,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, new_l1_size = size_to_l1(s, offset); if (offset < old_length) { + int64_t last_cluster, old_file_size; if (prealloc != PREALLOC_MODE_OFF) { error_setg(errp, "Preallocation can't be used for shrinking an image"); @@ -3135,6 +3136,28 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, "Failed to discard unused refblocks"); return ret; } + + old_file_size = bdrv_getlength(bs->file->bs); + if (old_file_size < 0) { + error_setg_errno(errp, -old_file_size, + "Failed to inquire current file length"); + return old_file_size; + } + last_cluster = qcow2_get_last_cluster(bs, old_file_size); + if (last_cluster < 0) { + error_setg_errno(errp, -last_cluster, + "Failed to find the last cluster"); + return last_cluster; + } + if ((last_cluster + 1) * s->cluster_size < old_file_size) { + ret = bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size, + PREALLOC_MODE_OFF, NULL); + if (ret < 0) { + warn_report("Failed to truncate the tail of the image: %s", + strerror(-ret)); + ret = 0; + } + } } else { ret = qcow2_grow_l1_table(bs, new_l1_size, true); if (ret < 0) { @@ -3167,7 +3190,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); - return ret; + return old_file_size; } nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, @@ -3196,7 +3219,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (allocation_start < 0) { error_setg_errno(errp, -allocation_start, "Failed to resize refcount structures"); - return -allocation_start; + return allocation_start; } clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, @@ -3673,20 +3696,19 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, */ required = virtual_size; } else { - int cluster_sectors = cluster_size / BDRV_SECTOR_SIZE; - int64_t sector_num; + int64_t offset; int pnum = 0; - for (sector_num = 0; - sector_num < ssize / BDRV_SECTOR_SIZE; - sector_num += pnum) { - int nb_sectors = MIN(ssize / BDRV_SECTOR_SIZE - sector_num, - BDRV_REQUEST_MAX_SECTORS); + for (offset = 0; offset < ssize; + offset += pnum * BDRV_SECTOR_SIZE) { + int nb_sectors = MIN(ssize - offset, + BDRV_REQUEST_MAX_BYTES) / BDRV_SECTOR_SIZE; BlockDriverState *file; int64_t ret; ret = bdrv_get_block_status_above(in_bs, NULL, - sector_num, nb_sectors, + offset >> BDRV_SECTOR_BITS, + nb_sectors, &pnum, &file); if (ret < 0) { error_setg_errno(&local_err, -ret, @@ -3699,12 +3721,11 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { /* Extend pnum to end of cluster for next iteration */ - pnum = ROUND_UP(sector_num + pnum, cluster_sectors) - - sector_num; + pnum = (ROUND_UP(offset + pnum * BDRV_SECTOR_SIZE, + cluster_size) - offset) >> BDRV_SECTOR_BITS; /* Count clusters we've seen */ - required += (sector_num % cluster_sectors + pnum) * - BDRV_SECTOR_SIZE; + required += offset % cluster_size + pnum * BDRV_SECTOR_SIZE; } } } diff --git a/block/qcow2.h b/block/qcow2.h index 5a289a8..782a206 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -597,6 +597,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, BlockDriverAmendStatusCB *status_cb, void *cb_opaque, Error **errp); int qcow2_shrink_reftable(BlockDriverState *bs); +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, |