aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Reitz <mreitz@redhat.com>2013-09-03 10:09:52 +0200
committerKevin Wolf <kwolf@redhat.com>2013-09-12 10:12:46 +0200
commit32b6444d23d0ff618d73e5b766600cd258066169 (patch)
tree423a268eaa2281d71e0d7d1d75ade22e153510aa
parente7108feaace8e02b3a4bf010448fc2744f753381 (diff)
downloadqemu-32b6444d23d0ff618d73e5b766600cd258066169.zip
qemu-32b6444d23d0ff618d73e5b766600cd258066169.tar.gz
qemu-32b6444d23d0ff618d73e5b766600cd258066169.tar.bz2
qcow2-cluster: Expand zero clusters
Add functionality for expanding zero clusters. This is necessary for downgrading the image version to one without zero cluster support. For non-backed images, this function may also just discard zero clusters instead of truly expanding them. Signed-off-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
-rw-r--r--block/qcow2-cluster.c233
-rw-r--r--block/qcow2-refcount.c29
-rw-r--r--block/qcow2.h5
3 files changed, 253 insertions, 14 deletions
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index b0d688e..738ff73 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1497,3 +1497,236 @@ fail:
return ret;
}
+
+/*
+ * Expands all zero clusters in a specific L1 table (or deallocates them, for
+ * non-backed non-pre-allocated zero clusters).
+ *
+ * expanded_clusters is a bitmap where every bit corresponds to one cluster in
+ * the image file; a bit gets set if the corresponding cluster has been used for
+ * zero expansion (i.e., has been filled with zeroes and is referenced from an
+ * L2 table). nb_clusters contains the total cluster count of the image file,
+ * i.e., the number of bits in expanded_clusters.
+ */
+static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
+ int l1_size, uint8_t *expanded_clusters,
+ uint64_t nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ bool is_active_l1 = (l1_table == s->l1_table);
+ uint64_t *l2_table = NULL;
+ int ret;
+ int i, j;
+
+ if (!is_active_l1) {
+ /* inactive L2 tables require a buffer to be stored in when loading
+ * them from disk */
+ l2_table = qemu_blockalign(bs, s->cluster_size);
+ }
+
+ for (i = 0; i < l1_size; i++) {
+ uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
+ bool l2_dirty = false;
+
+ if (!l2_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ if (is_active_l1) {
+ /* get active L2 tables from cache */
+ ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
+ (void **)&l2_table);
+ } else {
+ /* load inactive L2 tables from disk */
+ ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
+ (void *)l2_table, s->cluster_sectors);
+ }
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for (j = 0; j < s->l2_size; j++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[j]);
+ int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index;
+ int cluster_type = qcow2_get_cluster_type(l2_entry);
+
+ if (cluster_type == QCOW2_CLUSTER_NORMAL) {
+ cluster_index = offset >> s->cluster_bits;
+ assert((cluster_index >= 0) && (cluster_index < nb_clusters));
+ if (expanded_clusters[cluster_index / 8] &
+ (1 << (cluster_index % 8))) {
+ /* Probably a shared L2 table; this cluster was a zero
+ * cluster which has been expanded, its refcount
+ * therefore most likely requires an update. */
+ ret = qcow2_update_cluster_refcount(bs, cluster_index, 1,
+ QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ goto fail;
+ }
+ /* Since we just increased the refcount, the COPIED flag may
+ * no longer be set. */
+ l2_table[j] = cpu_to_be64(l2_entry & ~QCOW_OFLAG_COPIED);
+ l2_dirty = true;
+ }
+ continue;
+ }
+ else if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) {
+ continue;
+ }
+
+ if (!offset) {
+ /* not preallocated */
+ if (!bs->backing_hd) {
+ /* not backed; therefore we can simply deallocate the
+ * cluster */
+ l2_table[j] = 0;
+ l2_dirty = true;
+ continue;
+ }
+
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ ret = offset;
+ goto fail;
+ }
+ }
+
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ offset, s->cluster_size);
+ if (ret < 0) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ goto fail;
+ }
+
+ ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
+ s->cluster_sectors);
+ if (ret < 0) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ goto fail;
+ }
+
+ l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+ l2_dirty = true;
+
+ cluster_index = offset >> s->cluster_bits;
+ assert((cluster_index >= 0) && (cluster_index < nb_clusters));
+ expanded_clusters[cluster_index / 8] |= 1 << (cluster_index % 8);
+ }
+
+ if (is_active_l1) {
+ if (l2_dirty) {
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ qcow2_cache_depends_on_flush(s->l2_table_cache);
+ }
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+ if (ret < 0) {
+ l2_table = NULL;
+ goto fail;
+ }
+ } else {
+ if (l2_dirty) {
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT &
+ ~(QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2), l2_offset,
+ s->cluster_size);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
+ (void *)l2_table, s->cluster_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ }
+ }
+
+ ret = 0;
+
+fail:
+ if (l2_table) {
+ if (!is_active_l1) {
+ qemu_vfree(l2_table);
+ } else {
+ if (ret < 0) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+ } else {
+ ret = qcow2_cache_put(bs, s->l2_table_cache,
+ (void **)&l2_table);
+ }
+ }
+ }
+ return ret;
+}
+
+/*
+ * For backed images, expands all zero clusters on the image. For non-backed
+ * images, deallocates all non-pre-allocated zero clusters (and claims the
+ * allocation for pre-allocated ones). This is important for downgrading to a
+ * qcow2 version which doesn't yet support metadata zero clusters.
+ */
+int qcow2_expand_zero_clusters(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l1_table = NULL;
+ int cluster_to_sector_bits = s->cluster_bits - BDRV_SECTOR_BITS;
+ uint64_t nb_clusters;
+ uint8_t *expanded_clusters;
+ int ret;
+ int i, j;
+
+ nb_clusters = (bs->total_sectors + (1 << cluster_to_sector_bits) - 1)
+ >> cluster_to_sector_bits;
+ expanded_clusters = g_malloc0((nb_clusters + 7) / 8);
+
+ ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
+ expanded_clusters, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Inactive L1 tables may point to active L2 tables - therefore it is
+ * necessary to flush the L2 table cache before trying to access the L2
+ * tables pointed to by inactive L1 entries (else we might try to expand
+ * zero clusters that have already been expanded); furthermore, it is also
+ * necessary to empty the L2 table cache, since it may contain tables which
+ * are now going to be modified directly on disk, bypassing the cache.
+ * qcow2_cache_empty() does both for us. */
+ ret = qcow2_cache_empty(bs, s->l2_table_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for (i = 0; i < s->nb_snapshots; i++) {
+ int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) +
+ BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE;
+
+ l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
+
+ ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
+ BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for (j = 0; j < s->snapshots[i].l1_size; j++) {
+ be64_to_cpus(&l1_table[j]);
+ }
+
+ ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
+ expanded_clusters, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = 0;
+
+fail:
+ g_free(expanded_clusters);
+ g_free(l1_table);
+ return ret;
+}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index ba129de..4264148 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -601,10 +601,10 @@ fail:
* If the return value is non-negative, it is the new refcount of the cluster.
* If it is negative, it is -errno and indicates an error.
*/
-static int update_cluster_refcount(BlockDriverState *bs,
- int64_t cluster_index,
- int addend,
- enum qcow2_discard_type type)
+int qcow2_update_cluster_refcount(BlockDriverState *bs,
+ int64_t cluster_index,
+ int addend,
+ enum qcow2_discard_type type)
{
BDRVQcowState *s = bs->opaque;
int ret;
@@ -733,8 +733,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
if (free_in_cluster == 0)
s->free_byte_offset = 0;
if ((offset & (s->cluster_size - 1)) != 0)
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
+ qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
} else {
offset = qcow2_alloc_clusters(bs, s->cluster_size);
if (offset < 0) {
@@ -744,8 +744,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
if ((cluster_offset + s->cluster_size) == offset) {
/* we are lucky: contiguous data */
offset = s->free_byte_offset;
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
+ qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
s->free_byte_offset += size;
} else {
s->free_byte_offset = offset;
@@ -754,8 +754,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
}
/* The cluster refcount was incremented, either by qcow2_alloc_clusters()
- * or explicitly by update_cluster_refcount(). Refcount blocks must be
- * flushed before the caller's L2 table updates.
+ * or explicitly by qcow2_update_cluster_refcount(). Refcount blocks must
+ * be flushed before the caller's L2 table updates.
*/
qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
return offset;
@@ -896,8 +896,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
break;
}
if (addend != 0) {
- refcount = update_cluster_refcount(bs, cluster_index, addend,
- QCOW2_DISCARD_SNAPSHOT);
+ refcount = qcow2_update_cluster_refcount(bs,
+ cluster_index, addend,
+ QCOW2_DISCARD_SNAPSHOT);
} else {
refcount = get_refcount(bs, cluster_index);
}
@@ -936,8 +937,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
if (addend != 0) {
- refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
- QCOW2_DISCARD_SNAPSHOT);
+ refcount = qcow2_update_cluster_refcount(bs, l2_offset >>
+ s->cluster_bits, addend, QCOW2_DISCARD_SNAPSHOT);
} else {
refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
}
diff --git a/block/qcow2.h b/block/qcow2.h
index 35c822b..48080fd 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -411,6 +411,9 @@ int qcow2_update_header(BlockDriverState *bs);
int qcow2_refcount_init(BlockDriverState *bs);
void qcow2_refcount_close(BlockDriverState *bs);
+int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
+ int addend, enum qcow2_discard_type type);
+
int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
int nb_clusters);
@@ -458,6 +461,8 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
int nb_sectors, enum qcow2_discard_type type);
int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
+int qcow2_expand_zero_clusters(BlockDriverState *bs);
+
/* qcow2-snapshot.c functions */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);