aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Wolf <kwolf@redhat.com>2018-05-15 16:19:53 +0200
committerKevin Wolf <kwolf@redhat.com>2018-05-15 16:19:53 +0200
commit1fce860ea5eba1ca00a67911fc0b8a5d80009514 (patch)
tree3dcbee27fc48c02f8008b5b098aacedeaf2e373a
parentbd21935b50d100d8da8c05cd3c2009f0f3432cb4 (diff)
parent4e7d73c5fbd97e55ffe5af02f24d1f7dbe3bbf20 (diff)
downloadqemu-1fce860ea5eba1ca00a67911fc0b8a5d80009514.zip
qemu-1fce860ea5eba1ca00a67911fc0b8a5d80009514.tar.gz
qemu-1fce860ea5eba1ca00a67911fc0b8a5d80009514.tar.bz2
Merge remote-tracking branch 'mreitz/tags/pull-block-2018-05-15' into queue-block
- Copy-on-read block driver - The qcow2 default refcount cache size has been decreased - Various bug fixes # gpg: Signature made Tue May 15 16:18:25 2018 CEST # gpg: using RSA key F407DB0061D5CF40 # gpg: Good signature from "Max Reitz <mreitz@redhat.com>" # Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40 * mreitz/tags/pull-block-2018-05-15: (21 commits) iotests: Add test for -U/force-share conflicts qemu-img: Use only string options in img_open_opts qemu-io: Use purely string blockdev options block: Document BDRV_REQ_WRITE_UNCHANGED support qemu-img: Check post-truncation size iotests: Add test for COR across nodes iotests: Copy 197 for COR filter driver iotests: Clean up wrap image in 197 block: Support BDRV_REQ_WRITE_UNCHANGED in filters block/quorum: Support BDRV_REQ_WRITE_UNCHANGED block: Set BDRV_REQ_WRITE_UNCHANGED for COR writes block: Add BDRV_REQ_WRITE_UNCHANGED flag block: BLK_PERM_WRITE includes ..._UNCHANGED block: Add COR filter driver iotests: Skip 181 and 201 without userfaultfd iotests: Add failure matching to common.qemu docs: Document the new default sizes of the qcow2 caches qcow2: Give the refcount cache the minimum possible size by default specs/qcow2: Clarify that compressed clusters have the COPIED bit reset Fix error message about compressed clusters with OFLAG_COPIED ... Signed-off-by: Kevin Wolf <kwolf@redhat.com>
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/blkdebug.c9
-rwxr-xr-xblock/blkreplay.c3
-rw-r--r--block/blkverify.c3
-rw-r--r--block/copy-on-read.c173
-rw-r--r--block/io.c12
-rw-r--r--block/mirror.c2
-rw-r--r--block/qcow2-refcount.c4
-rw-r--r--block/qcow2.c31
-rw-r--r--block/qcow2.h4
-rw-r--r--block/quorum.c19
-rw-r--r--block/raw-format.c9
-rw-r--r--block/throttle.c6
-rw-r--r--docs/interop/qcow2.txt8
-rw-r--r--docs/qcow2-cache.txt33
-rw-r--r--include/block/block.h9
-rw-r--r--include/block/block_int.h18
-rw-r--r--qapi/block-core.json5
-rw-r--r--qemu-img.c43
-rw-r--r--qemu-io.c4
-rwxr-xr-xtests/qemu-iotests/12247
-rw-r--r--tests/qemu-iotests/122.out33
-rw-r--r--tests/qemu-iotests/137.out2
-rwxr-xr-xtests/qemu-iotests/15317
-rw-r--r--tests/qemu-iotests/153.out16
-rwxr-xr-xtests/qemu-iotests/18113
-rwxr-xr-xtests/qemu-iotests/1971
-rwxr-xr-xtests/qemu-iotests/20113
-rwxr-xr-xtests/qemu-iotests/21497
-rw-r--r--tests/qemu-iotests/214.out35
-rwxr-xr-xtests/qemu-iotests/215120
-rw-r--r--tests/qemu-iotests/215.out26
-rwxr-xr-xtests/qemu-iotests/216115
-rw-r--r--tests/qemu-iotests/216.out28
-rw-r--r--tests/qemu-iotests/common.qemu58
-rw-r--r--tests/qemu-iotests/group3
36 files changed, 862 insertions, 159 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index d644bac..899bfb5 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -26,7 +26,7 @@ block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
block-obj-y += backup.o
block-obj-$(CONFIG_REPLICATION) += replication.o
-block-obj-y += throttle.o
+block-obj-y += throttle.o copy-on-read.o
block-obj-y += crypto.o
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 053372c..526af2a 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -398,10 +398,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
- bs->supported_write_flags = BDRV_REQ_FUA &
- bs->file->bs->supported_write_flags;
- bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
- bs->file->bs->supported_zero_flags;
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+ (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+ bs->file->bs->supported_zero_flags);
ret = -EINVAL;
/* Set alignment overrides */
diff --git a/block/blkreplay.c b/block/blkreplay.c
index fe5a9b4..b016dbe 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -35,6 +35,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
+
ret = 0;
fail:
return ret;
diff --git a/block/blkverify.c b/block/blkverify.c
index 754cc9e..da97ee5 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -141,6 +141,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
+
ret = 0;
fail:
qemu_opts_del(opts);
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
new file mode 100644
index 0000000..6a97208
--- /dev/null
+++ b/block/copy-on-read.c
@@ -0,0 +1,173 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * Author:
+ * Max Reitz <mreitz@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+
+
+static int cor_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
+ errp);
+ if (!bs->file) {
+ return -EINVAL;
+ }
+
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+ (BDRV_REQ_FUA &
+ bs->file->bs->supported_write_flags);
+
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+ bs->file->bs->supported_zero_flags);
+
+ return 0;
+}
+
+
+static void cor_close(BlockDriverState *bs)
+{
+}
+
+
+#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
+ | BLK_PERM_WRITE \
+ | BLK_PERM_RESIZE)
+#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH)
+
+static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ if (c == NULL) {
+ *nperm = (perm & PERM_PASSTHROUGH) | BLK_PERM_WRITE_UNCHANGED;
+ *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
+ return;
+ }
+
+ *nperm = (perm & PERM_PASSTHROUGH) |
+ (c->perm & PERM_UNCHANGED);
+ *nshared = (shared & PERM_PASSTHROUGH) |
+ (c->shared_perm & PERM_UNCHANGED);
+}
+
+
+static int64_t cor_getlength(BlockDriverState *bs)
+{
+ return bdrv_getlength(bs->file->bs);
+}
+
+
+static int cor_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
+{
+ return bdrv_truncate(bs->file, offset, prealloc, errp);
+}
+
+
+static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov,
+ flags | BDRV_REQ_COPY_ON_READ);
+}
+
+
+static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+
+static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int bytes,
+ BdrvRequestFlags flags)
+{
+ return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+}
+
+
+static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int bytes)
+{
+ return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
+}
+
+
+static void cor_eject(BlockDriverState *bs, bool eject_flag)
+{
+ bdrv_eject(bs->file->bs, eject_flag);
+}
+
+
+static void cor_lock_medium(BlockDriverState *bs, bool locked)
+{
+ bdrv_lock_medium(bs->file->bs, locked);
+}
+
+
+static bool cor_recurse_is_first_non_filter(BlockDriverState *bs,
+ BlockDriverState *candidate)
+{
+ return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
+}
+
+
+BlockDriver bdrv_copy_on_read = {
+ .format_name = "copy-on-read",
+
+ .bdrv_open = cor_open,
+ .bdrv_close = cor_close,
+ .bdrv_child_perm = cor_child_perm,
+
+ .bdrv_getlength = cor_getlength,
+ .bdrv_truncate = cor_truncate,
+
+ .bdrv_co_preadv = cor_co_preadv,
+ .bdrv_co_pwritev = cor_co_pwritev,
+ .bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes,
+ .bdrv_co_pdiscard = cor_co_pdiscard,
+
+ .bdrv_eject = cor_eject,
+ .bdrv_lock_medium = cor_lock_medium,
+
+ .bdrv_co_block_status = bdrv_co_block_status_from_file,
+
+ .bdrv_recurse_is_first_non_filter = cor_recurse_is_first_non_filter,
+
+ .has_variable_length = true,
+ .is_filter = true,
+};
+
+static void bdrv_copy_on_read_init(void)
+{
+ bdrv_register(&bdrv_copy_on_read);
+}
+
+block_init(bdrv_copy_on_read_init);
diff --git a/block/io.c b/block/io.c
index 4fad5ac..ca96b48 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1118,13 +1118,15 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
/* FIXME: Should we (perhaps conditionally) be setting
* BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
* that still correctly reads as zero? */
- ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
+ BDRV_REQ_WRITE_UNCHANGED);
} else {
/* This does not change the data on the disk, it is not
* necessary to flush even in cache=writethrough mode.
*/
ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
- &local_qiov, 0);
+ &local_qiov,
+ BDRV_REQ_WRITE_UNCHANGED);
}
if (ret < 0) {
@@ -1504,7 +1506,11 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
- assert(child->perm & BLK_PERM_WRITE);
+ if (flags & BDRV_REQ_WRITE_UNCHANGED) {
+ assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+ } else {
+ assert(child->perm & BLK_PERM_WRITE);
+ }
assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
diff --git a/block/mirror.c b/block/mirror.c
index 6aa38db..a4197bb 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1134,6 +1134,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
mirror_top_bs->implicit = true;
}
mirror_top_bs->total_sectors = bs->total_sectors;
+ mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+ mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 6b8b635..2dc2300 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1577,9 +1577,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
case QCOW2_CLUSTER_COMPRESSED:
/* Compressed clusters don't have QCOW_OFLAG_COPIED */
if (l2_entry & QCOW_OFLAG_COPIED) {
- fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+ fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": "
"copied flag must never be set for compressed "
- "clusters\n", l2_entry >> s->cluster_bits);
+ "clusters\n", l2_entry & s->cluster_offset_mask);
l2_entry &= ~QCOW_OFLAG_COPIED;
res->corruptions++;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 2f36e63..6d53247 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -802,23 +802,30 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
} else if (refcount_cache_size_set) {
*l2_cache_size = combined_cache_size - *refcount_cache_size;
} else {
- *refcount_cache_size = combined_cache_size
- / (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
- *l2_cache_size = combined_cache_size - *refcount_cache_size;
+ uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
+ uint64_t min_refcount_cache =
+ (uint64_t) MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
+
+ /* Assign as much memory as possible to the L2 cache, and
+ * use the remainder for the refcount cache */
+ if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
+ *l2_cache_size = max_l2_cache;
+ *refcount_cache_size = combined_cache_size - *l2_cache_size;
+ } else {
+ *refcount_cache_size =
+ MIN(combined_cache_size, min_refcount_cache);
+ *l2_cache_size = combined_cache_size - *refcount_cache_size;
+ }
}
} else {
- if (!l2_cache_size_set && !refcount_cache_size_set) {
+ if (!l2_cache_size_set) {
*l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
(uint64_t)DEFAULT_L2_CACHE_CLUSTERS
* s->cluster_size);
- *refcount_cache_size = *l2_cache_size
- / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
- } else if (!l2_cache_size_set) {
- *l2_cache_size = *refcount_cache_size
- * DEFAULT_L2_REFCOUNT_SIZE_RATIO;
- } else if (!refcount_cache_size_set) {
- *refcount_cache_size = *l2_cache_size
- / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
+ }
+ if (!refcount_cache_size_set) {
+ *refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
}
}
diff --git a/block/qcow2.h b/block/qcow2.h
index adf5c39..01b5250 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -77,10 +77,6 @@
#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */
#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */
-/* The refblock cache needs only a fourth of the L2 cache size to cover as many
- * clusters */
-#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4
-
#define DEFAULT_CLUSTER_SIZE 65536
diff --git a/block/quorum.c b/block/quorum.c
index a5051da..e448d7e 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -115,6 +115,7 @@ struct QuorumAIOCB {
/* Request metadata */
uint64_t offset;
uint64_t bytes;
+ int flags;
QEMUIOVector *qiov; /* calling IOV */
@@ -157,7 +158,8 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
QEMUIOVector *qiov,
uint64_t offset,
- uint64_t bytes)
+ uint64_t bytes,
+ int flags)
{
BDRVQuorumState *s = bs->opaque;
QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
@@ -168,6 +170,7 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
.bs = bs,
.offset = offset,
.bytes = bytes,
+ .flags = flags,
.qiov = qiov,
.votes.compare = quorum_sha256_compare,
.votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
@@ -271,9 +274,11 @@ static void quorum_rewrite_entry(void *opaque)
BDRVQuorumState *s = acb->bs->opaque;
/* Ignore any errors, it's just a correction attempt for already
- * corrupted data. */
+ * corrupted data.
+ * Mask out BDRV_REQ_WRITE_UNCHANGED because this overwrites the
+ * area with different data from the other children. */
bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
- acb->qiov, 0);
+ acb->qiov, acb->flags & ~BDRV_REQ_WRITE_UNCHANGED);
/* Wake up the caller after the last rewrite */
acb->rewrite_count--;
@@ -673,7 +678,7 @@ static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
int ret;
acb->is_read = true;
@@ -699,7 +704,7 @@ static void write_quorum_entry(void *opaque)
sacb->bs = s->children[i]->bs;
sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
- acb->qiov, 0);
+ acb->qiov, acb->flags);
if (sacb->ret == 0) {
acb->success_count++;
} else {
@@ -719,7 +724,7 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
int i, ret;
for (i = 0; i < s->num_children; i++) {
@@ -961,6 +966,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
}
s->next_child_index = s->num_children;
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+
g_free(opened);
goto exit;
diff --git a/block/raw-format.c b/block/raw-format.c
index a378547..fe33693 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -415,10 +415,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
}
bs->sg = bs->file->bs->sg;
- bs->supported_write_flags = BDRV_REQ_FUA &
- bs->file->bs->supported_write_flags;
- bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
- bs->file->bs->supported_zero_flags;
+ bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+ (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+ bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+ bs->file->bs->supported_zero_flags);
if (bs->probed && !bdrv_is_read_only(bs)) {
fprintf(stderr,
diff --git a/block/throttle.c b/block/throttle.c
index 95ed06a..e298827 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -81,8 +81,10 @@ static int throttle_open(BlockDriverState *bs, QDict *options,
if (!bs->file) {
return -EINVAL;
}
- bs->supported_write_flags = bs->file->bs->supported_write_flags;
- bs->supported_zero_flags = bs->file->bs->supported_zero_flags;
+ bs->supported_write_flags = bs->file->bs->supported_write_flags |
+ BDRV_REQ_WRITE_UNCHANGED;
+ bs->supported_zero_flags = bs->file->bs->supported_zero_flags |
+ BDRV_REQ_WRITE_UNCHANGED;
return throttle_configure_tgm(bs, tgm, options, errp);
}
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index feb711f..8e1547d 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -400,10 +400,10 @@ L2 table entry:
62: 0 for standard clusters
1 for compressed clusters
- 63: 0 for a cluster that is unused or requires COW, 1 if its
- refcount is exactly one. This information is only accurate
- in L2 tables that are reachable from the active L1
- table.
+ 63: 0 for clusters that are unused, compressed or require COW.
+ 1 for standard clusters whose refcount is exactly one.
+ This information is only accurate in L2 tables
+ that are reachable from the active L1 table.
Standard Cluster Descriptor:
diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt
index 170191a..8a09a5c 100644
--- a/docs/qcow2-cache.txt
+++ b/docs/qcow2-cache.txt
@@ -116,31 +116,30 @@ There are three options available, and all of them take bytes:
"refcount-cache-size": maximum size of the refcount block cache
"cache-size": maximum size of both caches combined
-There are two things that need to be taken into account:
+There are a few things that need to be taken into account:
- Both caches must have a size that is a multiple of the cluster size
(or the cache entry size: see "Using smaller cache sizes" below).
- - If you only set one of the options above, QEMU will automatically
- adjust the others so that the L2 cache is 4 times bigger than the
- refcount cache.
+ - The default L2 cache size is 8 clusters or 1MB (whichever is more),
+ and the minimum is 2 clusters (or 2 cache entries, see below).
-This means that these options are equivalent:
+ - The default (and minimum) refcount cache size is 4 clusters.
- -drive file=hd.qcow2,l2-cache-size=2097152
- -drive file=hd.qcow2,refcount-cache-size=524288
- -drive file=hd.qcow2,cache-size=2621440
+ - If only "cache-size" is specified then QEMU will assign as much
+ memory as possible to the L2 cache before increasing the refcount
+ cache size.
-The reason for this 1/4 ratio is to ensure that both caches cover the
-same amount of disk space. Note however that this is only valid with
-the default value of refcount_bits (16). If you are using a different
-value you might want to calculate both cache sizes yourself since QEMU
-will always use the same 1/4 ratio.
+Unlike L2 tables, refcount blocks are not used during normal I/O but
+only during allocations and internal snapshots. In most cases they are
+accessed sequentially (even during random guest I/O) so increasing the
+refcount cache size won't have any measurable effect in performance
+(this can change if you are using internal snapshots, so you may want
+to think about increasing the cache size if you use them heavily).
-It's also worth mentioning that there's no strict need for both caches
-to cover the same amount of disk space. The refcount cache is used
-much less often than the L2 cache, so it's perfectly reasonable to
-keep it small.
+Before QEMU 2.12 the refcount cache had a default size of 1/4 of the
+L2 cache size. This resulted in unnecessarily large caches, so now the
+refcount cache is as small as possible unless overridden by the user.
Using smaller cache entries
diff --git a/include/block/block.h b/include/block/block.h
index cdec363..3894edd 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -54,8 +54,12 @@ typedef enum {
BDRV_REQ_FUA = 0x10,
BDRV_REQ_WRITE_COMPRESSED = 0x20,
+ /* Signifies that this write request will not change the visible disk
+ * content. */
+ BDRV_REQ_WRITE_UNCHANGED = 0x40,
+
/* Mask of valid flags */
- BDRV_REQ_MASK = 0x3f,
+ BDRV_REQ_MASK = 0x7f,
} BdrvRequestFlags;
typedef struct BlockSizes {
@@ -205,6 +209,9 @@ enum {
* This permission (which is weaker than BLK_PERM_WRITE) is both enough and
* required for writes to the block node when the caller promises that
* the visible disk content doesn't change.
+ *
+ * As the BLK_PERM_WRITE permission is strictly stronger, either is
+ * sufficient to perform an unchanging write.
*/
BLK_PERM_WRITE_UNCHANGED = 0x04,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index e3d6219..76b589d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -656,10 +656,24 @@ struct BlockDriverState {
/* I/O Limits */
BlockLimits bl;
- /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
+ /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
+ * BDRV_REQ_WRITE_UNCHANGED).
+ * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
+ * writes will be issued as normal writes without the flag set.
+ * This is important to note for drivers that do not explicitly
+ * request a WRITE permission for their children and instead take
+ * the same permissions as their parent did (this is commonly what
+ * block filters do). Such drivers have to be aware that the
+ * parent may have taken a WRITE_UNCHANGED permission only and is
+ * issuing such requests. Drivers either must make sure that
+ * these requests do not result in plain WRITE accesses (usually
+ * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
+ * every incoming write request as-is, including potentially that
+ * flag), or they have to explicitly take the WRITE permission for
+ * their children. */
unsigned int supported_write_flags;
/* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
- * BDRV_REQ_MAY_UNMAP) */
+ * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
unsigned int supported_zero_flags;
/* the following member gives a name to every node on the bs graph. */
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 17ffd44..55728cb 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2510,11 +2510,12 @@
# @vxhs: Since 2.10
# @throttle: Since 2.11
# @nvme: Since 2.12
+# @copy-on-read: Since 2.13
#
# Since: 2.9
##
{ 'enum': 'BlockdevDriver',
- 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop',
+ 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read',
'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
@@ -3531,6 +3532,7 @@
'blkverify': 'BlockdevOptionsBlkverify',
'bochs': 'BlockdevOptionsGenericFormat',
'cloop': 'BlockdevOptionsGenericFormat',
+ 'copy-on-read':'BlockdevOptionsGenericFormat',
'dmg': 'BlockdevOptionsGenericFormat',
'file': 'BlockdevOptionsFile',
'ftp': 'BlockdevOptionsCurlFtp',
@@ -4058,6 +4060,7 @@
'blkverify': 'BlockdevCreateNotSupported',
'bochs': 'BlockdevCreateNotSupported',
'cloop': 'BlockdevCreateNotSupported',
+ 'copy-on-read': 'BlockdevCreateNotSupported',
'dmg': 'BlockdevCreateNotSupported',
'file': 'BlockdevCreateOptionsFile',
'ftp': 'BlockdevCreateNotSupported',
diff --git a/qemu-img.c b/qemu-img.c
index ea62d2d..60e45ec 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -277,12 +277,12 @@ static BlockBackend *img_open_opts(const char *optstr,
options = qemu_opts_to_qdict(opts, NULL);
if (force_share) {
if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
- && !qdict_get_bool(options, BDRV_OPT_FORCE_SHARE)) {
+ && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
error_report("--force-share/-U conflicts with image options");
qobject_unref(options);
return NULL;
}
- qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
+ qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
}
blk = blk_new_open(NULL, NULL, options, flags, &local_err);
if (!blk) {
@@ -3381,7 +3381,7 @@ static int img_resize(int argc, char **argv)
Error *err = NULL;
int c, ret, relative;
const char *filename, *fmt, *size;
- int64_t n, total_size, current_size;
+ int64_t n, total_size, current_size, new_size;
bool quiet = false;
BlockBackend *blk = NULL;
PreallocMode prealloc = PREALLOC_MODE_OFF;
@@ -3557,11 +3557,42 @@ static int img_resize(int argc, char **argv)
}
ret = blk_truncate(blk, total_size, prealloc, &err);
- if (!ret) {
- qprintf(quiet, "Image resized.\n");
- } else {
+ if (ret < 0) {
error_report_err(err);
+ goto out;
+ }
+
+ new_size = blk_getlength(blk);
+ if (new_size < 0) {
+ error_report("Failed to verify truncated image length: %s",
+ strerror(-new_size));
+ ret = -1;
+ goto out;
}
+
+ /* Some block drivers implement a truncation method, but only so
+ * the user can cause qemu to refresh the image's size from disk.
+ * The idea is that the user resizes the image outside of qemu and
+ * then invokes block_resize to inform qemu about it.
+ * (This includes iscsi and file-posix for device files.)
+ * Of course, that is not the behavior someone invoking
+ * qemu-img resize would find useful, so we catch that behavior
+ * here and tell the user. */
+ if (new_size != total_size && new_size == current_size) {
+ error_report("Image was not resized; resizing may not be supported "
+ "for this image");
+ ret = -1;
+ goto out;
+ }
+
+ if (new_size != total_size) {
+ warn_report("Image should have been resized to %" PRIi64
+ " bytes, but was resized to %" PRIi64 " bytes",
+ total_size, new_size);
+ }
+
+ qprintf(quiet, "Image resized.\n");
+
out:
blk_unref(blk);
if (ret) {
diff --git a/qemu-io.c b/qemu-io.c
index 72fee0d..73c638f 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -95,12 +95,12 @@ static int openfile(char *name, int flags, bool writethrough, bool force_share,
opts = qdict_new();
}
if (qdict_haskey(opts, BDRV_OPT_FORCE_SHARE)
- && !qdict_get_bool(opts, BDRV_OPT_FORCE_SHARE)) {
+ && strcmp(qdict_get_str(opts, BDRV_OPT_FORCE_SHARE), "on")) {
error_report("-U conflicts with image options");
qobject_unref(opts);
return 1;
}
- qdict_put_bool(opts, BDRV_OPT_FORCE_SHARE, true);
+ qdict_put_str(opts, BDRV_OPT_FORCE_SHARE, "on");
}
qemuio_blk = blk_new_open(name, NULL, opts, flags, &local_err);
if (!qemuio_blk) {
diff --git a/tests/qemu-iotests/122 b/tests/qemu-iotests/122
index 6cf4fcb..45b359c 100755
--- a/tests/qemu-iotests/122
+++ b/tests/qemu-iotests/122
@@ -130,53 +130,6 @@ $QEMU_IO -c "read -P 0 1024k 1022k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _fil
echo
-echo "=== Corrupted size field in compressed cluster descriptor ==="
-echo
-# Create an empty image and fill half of it with compressed data.
-# The L2 entries of the two compressed clusters are located at
-# 0x800000 and 0x800008, their original values are 0x4008000000a00000
-# and 0x4008000000a00802 (5 sectors for compressed data each).
-_make_test_img 8M -o cluster_size=2M
-$QEMU_IO -c "write -c -P 0x11 0 2M" -c "write -c -P 0x11 2M 2M" "$TEST_IMG" \
- 2>&1 | _filter_qemu_io | _filter_testdir
-
-# Reduce size of compressed data to 4 sectors: this corrupts the image.
-poke_file "$TEST_IMG" $((0x800000)) "\x40\x06"
-$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
-
-# 'qemu-img check' however doesn't see anything wrong because it
-# doesn't try to decompress the data and the refcounts are consistent.
-# TODO: update qemu-img so this can be detected.
-_check_test_img
-
-# Increase size of compressed data to the maximum (8192 sectors).
-# This makes QEMU read more data (8192 sectors instead of 5, host
-# addresses [0xa00000, 0xdfffff]), but the decompression algorithm
-# stops once we have enough to restore the uncompressed cluster, so
-# the rest of the data is ignored.
-poke_file "$TEST_IMG" $((0x800000)) "\x7f\xfe"
-# Do it also for the second compressed cluster (L2 entry at 0x800008).
-# In this case the compressed data would span 3 host clusters
-# (host addresses: [0xa00802, 0xe00801])
-poke_file "$TEST_IMG" $((0x800008)) "\x7f\xfe"
-
-# Here the image is too small so we're asking QEMU to read beyond the
-# end of the image.
-$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
-# But if we grow the image we won't be reading beyond its end anymore.
-$QEMU_IO -c "write -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
-$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
-
-# The refcount data is however wrong because due to the increased size
-# of the compressed data it now reaches the following host clusters.
-# This can be repaired by qemu-img check by increasing the refcount of
-# those clusters.
-# TODO: update qemu-img to correct the compressed cluster size instead.
-_check_test_img -r all
-$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
-$QEMU_IO -c "read -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
-
-echo
echo "=== Full allocation with -S 0 ==="
echo
diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out
index a6b7fe0..47d8656 100644
--- a/tests/qemu-iotests/122.out
+++ b/tests/qemu-iotests/122.out
@@ -99,39 +99,6 @@ read 1024/1024 bytes at offset 1047552
read 1046528/1046528 bytes at offset 1048576
1022 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-=== Corrupted size field in compressed cluster descriptor ===
-
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8388608
-wrote 2097152/2097152 bytes at offset 0
-2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 2097152/2097152 bytes at offset 2097152
-2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read failed: Input/output error
-No errors were found on the image.
-read 4194304/4194304 bytes at offset 0
-4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 4194304/4194304 bytes at offset 4194304
-4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 4194304/4194304 bytes at offset 0
-4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-ERROR cluster 6 refcount=1 reference=3
-ERROR cluster 7 refcount=1 reference=2
-Repairing cluster 6 refcount=1 reference=3
-Repairing cluster 7 refcount=1 reference=2
-Repairing OFLAG_COPIED data cluster: l2_entry=8000000000c00000 refcount=3
-Repairing OFLAG_COPIED data cluster: l2_entry=8000000000e00000 refcount=2
-The following inconsistencies were found and repaired:
-
- 0 leaked clusters
- 4 corruptions
-
-Double checking the fixed image now...
-No errors were found on the image.
-read 4194304/4194304 bytes at offset 0
-4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 4194304/4194304 bytes at offset 4194304
-4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-
=== Full allocation with -S 0 ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out
index e28e1ea..96724a6 100644
--- a/tests/qemu-iotests/137.out
+++ b/tests/qemu-iotests/137.out
@@ -22,7 +22,7 @@ refcount-cache-size may not exceed cache-size
L2 cache size too big
L2 cache entry size must be a power of two between 512 and the cluster size (65536)
L2 cache entry size must be a power of two between 512 and the cluster size (65536)
-L2 cache size too big
+Refcount cache size too big
Conflicting values for qcow2 options 'overlap-check' ('constant') and 'overlap-check.template' ('all')
Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all
Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all
diff --git a/tests/qemu-iotests/153 b/tests/qemu-iotests/153
index a0fd815..ec508c7 100755
--- a/tests/qemu-iotests/153
+++ b/tests/qemu-iotests/153
@@ -242,6 +242,23 @@ _run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512'
_cleanup_qemu
+echo
+echo "== Detecting -U and force-share conflicts =="
+
+echo
+echo 'No conflict:'
+$QEMU_IMG info -U --image-opts driver=null-co,force-share=on
+echo
+echo 'Conflict:'
+$QEMU_IMG info -U --image-opts driver=null-co,force-share=off
+
+echo
+echo 'No conflict:'
+$QEMU_IO -c 'open -r -U -o driver=null-co,force-share=on'
+echo
+echo 'Conflict:'
+$QEMU_IO -c 'open -r -U -o driver=null-co,force-share=off'
+
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/153.out b/tests/qemu-iotests/153.out
index bb721cb..2510762 100644
--- a/tests/qemu-iotests/153.out
+++ b/tests/qemu-iotests/153.out
@@ -399,4 +399,20 @@ Is another process using the image?
Closing the other
_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512
+
+== Detecting -U and force-share conflicts ==
+
+No conflict:
+image: null-co://
+file format: null-co
+virtual size: 1.0G (1073741824 bytes)
+disk size: unavailable
+
+Conflict:
+qemu-img: --force-share/-U conflicts with image options
+
+No conflict:
+
+Conflict:
+-U conflicts with image options
*** done
diff --git a/tests/qemu-iotests/181 b/tests/qemu-iotests/181
index 5e767c6..e029793 100755
--- a/tests/qemu-iotests/181
+++ b/tests/qemu-iotests/181
@@ -96,6 +96,19 @@ echo
# Enable postcopy-ram capability both on source and destination
silent=yes
_send_qemu_cmd $dest 'migrate_set_capability postcopy-ram on' "(qemu)"
+
+qemu_error_no_exit=yes success_or_failure=yes \
+ _send_qemu_cmd $dest '' "(qemu)" "Postcopy is not supported"
+if [ ${QEMU_STATUS[$dest]} -lt 0 ]; then
+ _send_qemu_cmd $dest '' "(qemu)"
+
+ _send_qemu_cmd $src 'quit' ""
+ _send_qemu_cmd $dest 'quit' ""
+ wait=1 _cleanup_qemu
+
+ _notrun 'Postcopy is not supported'
+fi
+
_send_qemu_cmd $src 'migrate_set_speed 4k' "(qemu)"
_send_qemu_cmd $src 'migrate_set_capability postcopy-ram on' "(qemu)"
_send_qemu_cmd $src "migrate -d unix:${MIG_SOCKET}" "(qemu)"
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
index 5e869fe..3ae4975 100755
--- a/tests/qemu-iotests/197
+++ b/tests/qemu-iotests/197
@@ -44,6 +44,7 @@ esac
_cleanup()
{
_cleanup_test_img
+ rm -f "$TEST_WRAP"
rm -f "$BLKDBG_CONF"
}
trap "_cleanup; exit \$status" 0 1 2 3 15
diff --git a/tests/qemu-iotests/201 b/tests/qemu-iotests/201
index 11f640f..c1a1e00 100755
--- a/tests/qemu-iotests/201
+++ b/tests/qemu-iotests/201
@@ -82,6 +82,19 @@ echo
silent=yes
_send_qemu_cmd $dest 'migrate_set_capability postcopy-ram on' "(qemu)"
+
+qemu_error_no_exit=yes success_or_failure=yes \
+ _send_qemu_cmd $dest '' "(qemu)" "Postcopy is not supported"
+if [ ${QEMU_STATUS[$dest]} -lt 0 ]; then
+ _send_qemu_cmd $dest '' "(qemu)"
+
+ _send_qemu_cmd $src 'quit' ""
+ _send_qemu_cmd $dest 'quit' ""
+ wait=1 _cleanup_qemu
+
+ _notrun 'Postcopy is not supported'
+fi
+
_send_qemu_cmd $src 'migrate_set_capability postcopy-ram on' "(qemu)"
_send_qemu_cmd $src "migrate -d unix:${MIG_SOCKET}" "(qemu)"
diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214
new file mode 100755
index 0000000..c46ca2a
--- /dev/null
+++ b/tests/qemu-iotests/214
@@ -0,0 +1,97 @@
+#!/bin/bash
+#
+# Test qcow2 image compression
+#
+# Copyright (C) 2018 Igalia, S.L.
+# Author: Alberto Garcia <berto@igalia.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq=$(basename "$0")
+echo "QA output created by $seq"
+
+here=$PWD
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+# Repairing the corrupted image requires qemu-img check to store a
+# refcount up to 3, which requires at least two refcount bits.
+_unsupported_imgopts 'refcount_bits=1[^0-9]'
+
+
+echo
+echo "=== Corrupted size field in compressed cluster descriptor ==="
+echo
+# Create an empty image and fill half of it with compressed data.
+# The L2 entries of the two compressed clusters are located at
+# 0x800000 and 0x800008, their original values are 0x4008000000a00000
+# and 0x4008000000a00802 (5 sectors for compressed data each).
+_make_test_img 8M -o cluster_size=2M
+$QEMU_IO -c "write -c -P 0x11 0 2M" -c "write -c -P 0x11 2M 2M" "$TEST_IMG" \
+ 2>&1 | _filter_qemu_io | _filter_testdir
+
+# Reduce size of compressed data to 4 sectors: this corrupts the image.
+poke_file "$TEST_IMG" $((0x800000)) "\x40\x06"
+$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+# 'qemu-img check' however doesn't see anything wrong because it
+# doesn't try to decompress the data and the refcounts are consistent.
+# TODO: update qemu-img so this can be detected.
+_check_test_img
+
+# Increase size of compressed data to the maximum (8192 sectors).
+# This makes QEMU read more data (8192 sectors instead of 5, host
+# addresses [0xa00000, 0xdfffff]), but the decompression algorithm
+# stops once we have enough to restore the uncompressed cluster, so
+# the rest of the data is ignored.
+poke_file "$TEST_IMG" $((0x800000)) "\x7f\xfe"
+# Do it also for the second compressed cluster (L2 entry at 0x800008).
+# In this case the compressed data would span 3 host clusters
+# (host addresses: [0xa00802, 0xe00801])
+poke_file "$TEST_IMG" $((0x800008)) "\x7f\xfe"
+
+# Here the image is too small so we're asking QEMU to read beyond the
+# end of the image.
+$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+# But if we grow the image we won't be reading beyond its end anymore.
+$QEMU_IO -c "write -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+# The refcount data is however wrong because due to the increased size
+# of the compressed data it now reaches the following host clusters.
+# This can be repaired by qemu-img check by increasing the refcount of
+# those clusters.
+# TODO: update qemu-img to correct the compressed cluster size instead.
+_check_test_img -r all
+$QEMU_IO -c "read -P 0x11 0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/214.out b/tests/qemu-iotests/214.out
new file mode 100644
index 0000000..0fcd8dc
--- /dev/null
+++ b/tests/qemu-iotests/214.out
@@ -0,0 +1,35 @@
+QA output created by 214
+
+=== Corrupted size field in compressed cluster descriptor ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8388608
+wrote 2097152/2097152 bytes at offset 0
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 2097152
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read failed: Input/output error
+No errors were found on the image.
+read 4194304/4194304 bytes at offset 0
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 4194304/4194304 bytes at offset 4194304
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4194304/4194304 bytes at offset 0
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+ERROR cluster 6 refcount=1 reference=3
+ERROR cluster 7 refcount=1 reference=2
+Repairing cluster 6 refcount=1 reference=3
+Repairing cluster 7 refcount=1 reference=2
+Repairing OFLAG_COPIED data cluster: l2_entry=8000000000c00000 refcount=3
+Repairing OFLAG_COPIED data cluster: l2_entry=8000000000e00000 refcount=2
+The following inconsistencies were found and repaired:
+
+ 0 leaked clusters
+ 4 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+read 4194304/4194304 bytes at offset 0
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4194304/4194304 bytes at offset 4194304
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/215 b/tests/qemu-iotests/215
new file mode 100755
index 0000000..2e616ed
--- /dev/null
+++ b/tests/qemu-iotests/215
@@ -0,0 +1,120 @@
+#!/bin/bash
+#
+# Test case for copy-on-read into qcow2, using the COR filter driver
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1 # failure is the default!
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+TEST_WRAP="$TEST_DIR/t.wrap.qcow2"
+BLKDBG_CONF="$TEST_DIR/blkdebug.conf"
+
+# Sanity check: our use of blkdebug fails if $TEST_DIR contains spaces
+# or other problems
+case "$TEST_DIR" in
+ *[^-_a-zA-Z0-9/]*)
+ _notrun "Suspicious TEST_DIR='$TEST_DIR', cowardly refusing to run" ;;
+esac
+
+_cleanup()
+{
+ _cleanup_test_img
+ rm -f "$TEST_WRAP"
+ rm -f "$BLKDBG_CONF"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# Test is supported for any backing file; but we force qcow2 for our wrapper.
+_supported_fmt generic
+_supported_proto generic
+_supported_os Linux
+# LUKS support may be possible, but it complicates things.
+_unsupported_fmt luks
+
+echo
+echo '=== Copy-on-read ==='
+echo
+
+# Prep the images
+# VPC rounds image sizes to a specific geometry, force a specific size.
+if [ "$IMGFMT" = "vpc" ]; then
+ IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size")
+fi
+_make_test_img 4G
+$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
+IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
+ _make_test_img -F "$IMGFMT" -b "$TEST_IMG" | _filter_img_create
+$QEMU_IO -f qcow2 -c "write -z -u 1M 64k" "$TEST_WRAP" | _filter_qemu_io
+
+# Ensure that a read of two clusters, but where one is already allocated,
+# does not re-write the allocated cluster
+cat > "$BLKDBG_CONF" <<EOF
+[inject-error]
+event = "cor_write"
+sector = "2048"
+EOF
+$QEMU_IO -c "open \
+ -o driver=copy-on-read,file.driver=blkdebug,file.config=$BLKDBG_CONF,file.image.driver=qcow2 $TEST_WRAP" \
+ -c "read -P 0 1M 128k" | _filter_qemu_io
+
+# Read the areas we want copied. A zero-length read should still be a
+# no-op. The next read is under 2G, but aligned so that rounding to
+# clusters copies more than 2G of zeroes. The final read will pick up
+# the non-zero data in the same cluster. Since a 2G read may exhaust
+# memory on some machines (particularly 32-bit), we skip the test if
+# that fails due to memory pressure.
+$QEMU_IO \
+ -c "open -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
+ -c "read 0 0" \
+ | _filter_qemu_io
+output=$($QEMU_IO \
+ -c "open -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
+ -c "read -P 0 1k $((2*1024*1024*1024 - 512))" \
+ 2>&1 | _filter_qemu_io)
+case $output in
+ *allocate*)
+ _notrun "Insufficent memory to run test" ;;
+ *) printf '%s\n' "$output" ;;
+esac
+$QEMU_IO \
+ -c "open -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
+ -c "read -P 0 $((3*1024*1024*1024 + 1024)) 1k" \
+ | _filter_qemu_io
+
+# Copy-on-read is incompatible with read-only
+$QEMU_IO \
+ -c "open -r -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
+ 2>&1 | _filter_testdir
+
+# Break the backing chain, and show that images are identical, and that
+# we properly copied over explicit zeros.
+$QEMU_IMG rebase -u -b "" -f qcow2 "$TEST_WRAP"
+$QEMU_IO -f qcow2 -c map "$TEST_WRAP"
+_check_test_img
+$QEMU_IMG compare -f $IMGFMT -F qcow2 "$TEST_IMG" "$TEST_WRAP"
+
+# success, all done
+echo '*** done'
+status=0
diff --git a/tests/qemu-iotests/215.out b/tests/qemu-iotests/215.out
new file mode 100644
index 0000000..70b0f5f
--- /dev/null
+++ b/tests/qemu-iotests/215.out
@@ -0,0 +1,26 @@
+QA output created by 215
+
+=== Copy-on-read ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296
+wrote 1024/1024 bytes at offset 3221225472
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.wrap.IMGFMT', fmt=IMGFMT size=4294967296 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
+wrote 65536/65536 bytes at offset 1048576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 131072/131072 bytes at offset 1048576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 0/0 bytes at offset 0
+0 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2147483136/2147483136 bytes at offset 1024
+2 GiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 3221226496
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+can't open device TEST_DIR/t.wrap.qcow2: Block node is read-only
+2 GiB (0x80010000) bytes allocated at offset 0 bytes (0x0)
+1023.938 MiB (0x3fff0000) bytes not allocated at offset 2 GiB (0x80010000)
+64 KiB (0x10000) bytes allocated at offset 3 GiB (0xc0000000)
+1023.938 MiB (0x3fff0000) bytes not allocated at offset 3 GiB (0xc0010000)
+No errors were found on the image.
+Images are identical.
+*** done
diff --git a/tests/qemu-iotests/216 b/tests/qemu-iotests/216
new file mode 100755
index 0000000..ca9b47a
--- /dev/null
+++ b/tests/qemu-iotests/216
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+#
+# Copy-on-read tests using a COR filter node
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Creator/Owner: Max Reitz <mreitz@redhat.com>
+
+import iotests
+from iotests import log, qemu_img_pipe, qemu_io, filter_qemu_io
+
+# Need backing file support
+iotests.verify_image_format(supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk'])
+iotests.verify_platform(['linux'])
+
+log('')
+log('=== Copy-on-read across nodes ===')
+log('')
+
+# The old copy-on-read mechanism without a filter node cannot request
+# WRITE_UNCHANGED permissions for its child. Therefore it just tries
+# to sneak its write by the usual permission system and holds its
+# fingers crossed. However, that sneaking does not work so well when
+# there is a filter node in the way: That will receive the write
+# request and re-issue a new one to its child, which this time is a
+# proper write request that will make the permission system cough --
+# unless there is someone at the top (like a guest device) that has
+# requested write permissions.
+#
+# A COR filter node, however, can request the proper permissions for
+# its child and therefore is not hit by this issue.
+
+with iotests.FilePath('base.img') as base_img_path, \
+ iotests.FilePath('top.img') as top_img_path, \
+ iotests.VM() as vm:
+
+ log('--- Setting up images ---')
+ log('')
+
+ qemu_img_pipe('create', '-f', iotests.imgfmt, base_img_path, '64M')
+
+ log(filter_qemu_io(qemu_io(base_img_path, '-c', 'write -P 1 0M 1M')))
+
+ qemu_img_pipe('create', '-f', iotests.imgfmt, '-b', base_img_path,
+ top_img_path)
+
+ log(filter_qemu_io(qemu_io(top_img_path, '-c', 'write -P 2 1M 1M')))
+
+ log('')
+ log('--- Doing COR ---')
+ log('')
+
+ # Compare with e.g. the following:
+ # vm.add_drive_raw('if=none,node-name=node0,copy-on-read=on,driver=raw,' \
+ # 'file.driver=%s,file.file.filename=%s' %
+ # (iotests.imgfmt, top_img_path))
+ # (Remove the blockdev-add instead.)
+ # ((Not tested here because it hits an assertion in the permission
+ # system.))
+
+ vm.launch()
+
+ log(vm.qmp('blockdev-add',
+ node_name='node0',
+ driver='copy-on-read',
+ file={
+ 'driver': 'raw',
+ 'file': {
+ 'driver': 'copy-on-read',
+ 'file': {
+ 'driver': 'raw',
+ 'file': {
+ 'driver': iotests.imgfmt,
+ 'file': {
+ 'driver': 'file',
+ 'filename': top_img_path
+ },
+ 'backing': {
+ 'driver': iotests.imgfmt,
+ 'file': {
+ 'driver': 'file',
+ 'filename': base_img_path
+ }
+ }
+ }
+ }
+ }
+ }))
+
+ # Trigger COR
+ log(vm.qmp('human-monitor-command',
+ command_line='qemu-io node0 "read 0 64M"'))
+
+ vm.shutdown()
+
+ log('')
+ log('--- Checking COR result ---')
+ log('')
+
+ log(filter_qemu_io(qemu_io(base_img_path, '-c', 'discard 0 64M')))
+ log(filter_qemu_io(qemu_io(top_img_path, '-c', 'read -P 1 0M 1M')))
+ log(filter_qemu_io(qemu_io(top_img_path, '-c', 'read -P 2 1M 1M')))
diff --git a/tests/qemu-iotests/216.out b/tests/qemu-iotests/216.out
new file mode 100644
index 0000000..d3fc590
--- /dev/null
+++ b/tests/qemu-iotests/216.out
@@ -0,0 +1,28 @@
+
+=== Copy-on-read across nodes ===
+
+--- Setting up images ---
+
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+wrote 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+
+--- Doing COR ---
+
+{u'return': {}}
+{u'return': u''}
+
+--- Checking COR result ---
+
+discard 67108864/67108864 bytes at offset 0
+64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
index 85f66b8..f285484 100644
--- a/tests/qemu-iotests/common.qemu
+++ b/tests/qemu-iotests/common.qemu
@@ -52,11 +52,29 @@ _in_fd=4
# response is not echoed out.
# If $mismatch_only is set, only non-matching responses will
# be echoed.
+#
+# If $success_or_failure is set, the meaning of the arguments is
+# changed as follows:
+# $2: A string to search for in the response; if found, this indicates
+# success and ${QEMU_STATUS[$1]} is set to 0.
+# $3: A string to search for in the response; if found, this indicates
+# failure and the test is either aborted (if $qemu_error_no_exit
+# is not set) or ${QEMU_STATUS[$1]} is set to -1 (otherwise).
function _timed_wait_for()
{
local h=${1}
shift
+ if [ -z "${success_or_failure}" ]; then
+ success_match=${*}
+ failure_match=
+ else
+ success_match=${1}
+ failure_match=${2}
+ fi
+
+ timeout=yes
+
QEMU_STATUS[$h]=0
while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]}
do
@@ -64,10 +82,18 @@ function _timed_wait_for()
echo "${resp}" | _filter_testdir | _filter_qemu \
| _filter_qemu_io | _filter_qmp | _filter_hmp
fi
- grep -q "${*}" < <(echo "${resp}")
+ if [ -n "${failure_match}" ]; then
+ grep -q "${failure_match}" < <(echo "${resp}")
+ if [ $? -eq 0 ]; then
+ timeout=
+ break
+ fi
+ fi
+ grep -q "${success_match}" < <(echo "${resp}")
if [ $? -eq 0 ]; then
return
- elif [ -z "${silent}" ] && [ -n "${mismatch_only}" ]; then
+ fi
+ if [ -z "${silent}" ] && [ -n "${mismatch_only}" ]; then
echo "${resp}" | _filter_testdir | _filter_qemu \
| _filter_qemu_io | _filter_qmp | _filter_hmp
fi
@@ -75,8 +101,12 @@ function _timed_wait_for()
done
QEMU_STATUS[$h]=-1
if [ -z "${qemu_error_no_exit}" ]; then
- echo "Timeout waiting for ${*} on handle ${h}"
- exit 1 # Timeout means the test failed
+ if [ -n "${timeout}" ]; then
+ echo "Timeout waiting for ${success_match} on handle ${h}"
+ else
+ echo "Wrong response matching ${failure_match} on handle ${h}"
+ fi
+ exit 1 # Timeout or wrong match mean the test failed
fi
}
@@ -96,6 +126,11 @@ function _timed_wait_for()
# If $qemu_error_no_exit is set, then even if the expected response
# is not seen, we will not exit. $QEMU_STATUS[$1] will be set it -1 in
# that case.
+#
+# If $success_or_failure is set, then the last two strings are the
+# strings the response will be scanned for. The first of the two
+# indicates success, the latter indicates failure. Failure is handled
+# like a timeout.
function _send_qemu_cmd()
{
local h=${1}
@@ -109,14 +144,23 @@ function _send_qemu_cmd()
use_error="no"
fi
# This array element extraction is done to accommodate pathnames with spaces
- cmd=${@: 1:${#@}-1}
- shift $(($# - 1))
+ if [ -z "${success_or_failure}" ]; then
+ cmd=${@: 1:${#@}-1}
+ shift $(($# - 1))
+ else
+ cmd=${@: 1:${#@}-2}
+ shift $(($# - 2))
+ fi
while [ ${count} -gt 0 ]
do
echo "${cmd}" >&${QEMU_IN[${h}]}
if [ -n "${1}" ]; then
- qemu_error_no_exit=${use_error} _timed_wait_for ${h} "${1}"
+ if [ -z "${success_or_failure}" ]; then
+ qemu_error_no_exit=${use_error} _timed_wait_for ${h} "${1}"
+ else
+ qemu_error_no_exit=${use_error} _timed_wait_for ${h} "${1}" "${2}"
+ fi
if [ ${QEMU_STATUS[$h]} -eq 0 ]; then
return
fi
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 5daef24..cc8cd8c 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -212,4 +212,7 @@
211 rw auto quick
212 rw auto quick
213 rw auto quick
+214 rw auto
+215 rw auto quick
+216 rw auto quick
218 rw auto quick