From 808c4b6f30d77295292cef8ee38c462957a6b9ca Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 24 Oct 2014 15:57:30 +0200 Subject: qcow2: Allow "full" discard Normally, discarded sectors should read back as zero. However, there are cases in which a sector (or rather cluster) should be discarded as if they were never written in the first place, that is, reading them should fall through to the backing file again. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Message-id: 1414159063-25977-2-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/qcow2.c') diff --git a/block/qcow2.c b/block/qcow2.c index d031515..d64a4ba 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2089,7 +2089,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, qemu_co_mutex_lock(&s->lock); ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors, QCOW2_DISCARD_REQUEST); + nb_sectors, QCOW2_DISCARD_REQUEST, false); qemu_co_mutex_unlock(&s->lock); return ret; } -- cgit v1.1 From 491d27e2af4f6e157c4b29d43269c5cb0d191171 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 24 Oct 2014 15:57:31 +0200 Subject: qcow2: Implement bdrv_make_empty() Implement this function by making all clusters in the image file fall through to the backing file (by using the recently extended discard). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Message-id: 1414159063-25977-3-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'block/qcow2.c') diff --git a/block/qcow2.c b/block/qcow2.c index d64a4ba..bf871d5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2230,6 +2230,32 @@ fail: return ret; } +static int qcow2_make_empty(BlockDriverState *bs) +{ + int ret = 0; + uint64_t start_sector; + int sector_step = INT_MAX / BDRV_SECTOR_SIZE; + + for (start_sector = 0; start_sector < bs->total_sectors; + start_sector += sector_step) + { + /* As this function is generally used after committing an external + * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the + * default action for this kind of discard is to pass the discard, + * which will ideally result in an actually smaller image file, as + * is probably desired. */ + ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE, + MIN(sector_step, + bs->total_sectors - start_sector), + QCOW2_DISCARD_SNAPSHOT, true); + if (ret < 0) { + break; + } + } + + return ret; +} + static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; @@ -2676,6 +2702,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_co_discard = qcow2_co_discard, .bdrv_truncate = qcow2_truncate, .bdrv_write_compressed = qcow2_write_compressed, + .bdrv_make_empty = qcow2_make_empty, .bdrv_snapshot_create = qcow2_snapshot_create, .bdrv_snapshot_goto = qcow2_snapshot_goto, -- cgit v1.1 From 94054183daffaa41cd77ced9301c01a01027923a Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 24 Oct 2014 15:57:32 +0200 Subject: qcow2: Optimize bdrv_make_empty() bdrv_make_empty() is currently only called if the current image represents an external snapshot that has been committed to its base image; it is therefore unlikely to have internal snapshots. In this case, bdrv_make_empty() can be greatly sped up by emptying the L1 and refcount table (while having the dirty flag set, which only works for compat=1.1) and creating a trivial refcount structure. If there are snapshots or for compat=0.10, fall back to the simple implementation (discard all clusters). [Applied s/clusters/cluster/ typo fix suggested by Eric Blake --Stefan] Signed-off-by: Max Reitz Reviewed-by: Kevin Wolf Reviewed-by: Eric Blake Message-id: 1414159063-25977-4-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 164 insertions(+), 1 deletion(-) (limited to 'block/qcow2.c') diff --git a/block/qcow2.c b/block/qcow2.c index bf871d5..7ec7830 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2230,12 +2230,175 @@ fail: return ret; } +static int make_completely_empty(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + int ret, l1_clusters; + int64_t offset; + uint64_t *new_reftable = NULL; + uint64_t rt_entry, l1_size2; + struct { + uint64_t l1_offset; + uint64_t reftable_offset; + uint32_t reftable_clusters; + } QEMU_PACKED l1_ofs_rt_ofs_cls; + + ret = qcow2_cache_empty(bs, s->l2_table_cache); + if (ret < 0) { + goto fail; + } + + ret = qcow2_cache_empty(bs, s->refcount_block_cache); + if (ret < 0) { + goto fail; + } + + /* Refcounts will be broken utterly */ + ret = qcow2_mark_dirty(bs); + if (ret < 0) { + goto fail; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); + + l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); + l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t); + + /* After this call, neither the in-memory nor the on-disk refcount + * information accurately describe the actual references */ + + ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE, + l1_clusters * s->cluster_sectors, 0); + if (ret < 0) { + goto fail_broken_refcounts; + } + memset(s->l1_table, 0, l1_size2); + + BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); + + /* Overwrite enough clusters at the beginning of the sectors to place + * the refcount table, a refcount block and the L1 table in; this may + * overwrite parts of the existing refcount and L1 table, which is not + * an issue because the dirty flag is set, complete data loss is in fact + * desired and partial data loss is consequently fine as well */ + ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE, + (2 + l1_clusters) * s->cluster_size / + BDRV_SECTOR_SIZE, 0); + /* This call (even if it failed overall) may have overwritten on-disk + * refcount structures; in that case, the in-memory refcount information + * will probably differ from the on-disk information which makes the BDS + * unusable */ + if (ret < 0) { + goto fail_broken_refcounts; + } + + BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); + BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); + + /* "Create" an empty reftable (one cluster) directly after the image + * header and an empty L1 table three clusters after the image header; + * the cluster between those two will be used as the first refblock */ + cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size); + cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size); + cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1); + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), + &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); + if (ret < 0) { + goto fail_broken_refcounts; + } + + s->l1_table_offset = 3 * s->cluster_size; + + new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t)); + if (!new_reftable) { + ret = -ENOMEM; + goto fail_broken_refcounts; + } + + s->refcount_table_offset = s->cluster_size; + s->refcount_table_size = s->cluster_size / sizeof(uint64_t); + + g_free(s->refcount_table); + s->refcount_table = new_reftable; + new_reftable = NULL; + + /* Now the in-memory refcount information again corresponds to the on-disk + * information (reftable is empty and no refblocks (the refblock cache is + * empty)); however, this means some clusters (e.g. the image header) are + * referenced, but not refcounted, but the normal qcow2 code assumes that + * the in-memory information is always correct */ + + BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); + + /* Enter the first refblock into the reftable */ + rt_entry = cpu_to_be64(2 * s->cluster_size); + ret = bdrv_pwrite_sync(bs->file, s->cluster_size, + &rt_entry, sizeof(rt_entry)); + if (ret < 0) { + goto fail_broken_refcounts; + } + s->refcount_table[0] = 2 * s->cluster_size; + + s->free_cluster_index = 0; + assert(3 + l1_clusters <= s->refcount_block_size); + offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); + if (offset < 0) { + ret = offset; + goto fail_broken_refcounts; + } else if (offset > 0) { + error_report("First cluster in emptied image is in use"); + abort(); + } + + /* Now finally the in-memory information corresponds to the on-disk + * structures and is correct */ + ret = qcow2_mark_clean(bs); + if (ret < 0) { + goto fail; + } + + ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size); + if (ret < 0) { + goto fail; + } + + return 0; + +fail_broken_refcounts: + /* The BDS is unusable at this point. If we wanted to make it usable, we + * would have to call qcow2_refcount_close(), qcow2_refcount_init(), + * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() + * again. However, because the functions which could have caused this error + * path to be taken are used by those functions as well, it's very likely + * that that sequence will fail as well. Therefore, just eject the BDS. */ + bs->drv = NULL; + +fail: + g_free(new_reftable); + return ret; +} + static int qcow2_make_empty(BlockDriverState *bs) { - int ret = 0; + BDRVQcowState *s = bs->opaque; uint64_t start_sector; int sector_step = INT_MAX / BDRV_SECTOR_SIZE; + int l1_clusters, ret = 0; + + l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); + + if (s->qcow_version >= 3 && !s->snapshots && + 3 + l1_clusters <= s->refcount_block_size) { + /* The following function only works for qcow2 v3 images (it requires + * the dirty flag) and only as long as there are no snapshots (because + * it completely empties the image). Furthermore, the L1 table and three + * additional clusters (image header, refcount table, one refcount + * block) have to fit inside one refcount block. */ + return make_completely_empty(bs); + } + /* This fallback code simply discards every active cluster; this is slow, + * but works in all cases */ for (start_sector = 0; start_sector < bs->total_sectors; start_sector += sector_step) { -- cgit v1.1 From 77485434206bbbfbb7f6a446866f6a327b062d5e Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 27 Oct 2014 11:12:50 +0100 Subject: block: Add status callback to bdrv_amend_options() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depending on the changed options and the image format, bdrv_amend_options() may take a significant amount of time. In these cases, a way to be informed about the operation's status is desirable. Since the operation is rather complex and may fundamentally change the image, implementing it as AIO or a coroutine does not seem feasible. On the other hand, implementing it as a block job would be significantly more difficult than a simple callback and would not add benefits other than progress report to the amending operation, because it should not actually be run as a block job at all. A callback may not be very pretty, but it's very easy to implement and perfectly fits its purpose here. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Benoît Canet Reviewed-by: Kevin Wolf Message-id: 1414404776-4919-2-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'block/qcow2.c') diff --git a/block/qcow2.c b/block/qcow2.c index 7ec7830..0242793 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2613,7 +2613,8 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version) return 0; } -static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts) +static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb) { BDRVQcowState *s = bs->opaque; int old_version = s->qcow_version, new_version = old_version; -- cgit v1.1 From 4057a2b24a5f5dae826d438217f77332c3a6842e Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 27 Oct 2014 11:12:53 +0100 Subject: block/qcow2: Implement status CB for amend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only really time-consuming operation potentially performed by qcow2_amend_options() is zero cluster expansion when downgrading qcow2 images from compat=1.1 to compat=0.10, so report status of that operation and that operation only through the status CB. For this, approximate the progress as the number of L1 entries visited during the operation. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Benoît Canet Reviewed-by: Kevin Wolf Reviewed-by: Benoit Canet Message-id: 1414404776-4919-5-git-send-email-mreitz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/qcow2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'block/qcow2.c') diff --git a/block/qcow2.c b/block/qcow2.c index 0242793..d120494 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2550,7 +2550,8 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, * Downgrades an image's version. To achieve this, any incompatible features * have to be removed. */ -static int qcow2_downgrade(BlockDriverState *bs, int target_version) +static int qcow2_downgrade(BlockDriverState *bs, int target_version, + BlockDriverAmendStatusCB *status_cb) { BDRVQcowState *s = bs->opaque; int current_version = s->qcow_version; @@ -2599,7 +2600,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version) /* clearing autoclear features is trivial */ s->autoclear_features = 0; - ret = qcow2_expand_zero_clusters(bs); + ret = qcow2_expand_zero_clusters(bs, status_cb); if (ret < 0) { return ret; } @@ -2692,7 +2693,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, return ret; } } else { - ret = qcow2_downgrade(bs, new_version); + ret = qcow2_downgrade(bs, new_version, status_cb); if (ret < 0) { return ret; } -- cgit v1.1