aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/qcow2-cache.c18
-rw-r--r--block/qcow2-cluster.c279
-rw-r--r--block/qcow2-refcount.c28
-rw-r--r--block/qcow2.c12
-rw-r--r--block/qcow2.h3
-rw-r--r--block/qed-l2-cache.c22
6 files changed, 278 insertions, 84 deletions
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 340a6f2..710d4b1 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -25,6 +25,7 @@
#include "block_int.h"
#include "qemu-common.h"
#include "qcow2.h"
+#include "trace.h"
typedef struct Qcow2CachedTable {
void* table;
@@ -100,6 +101,9 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
return 0;
}
+ trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+
if (c->depends) {
ret = qcow2_cache_flush_dependency(bs, c);
} else if (c->depends_on_flush) {
@@ -132,10 +136,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
{
+ BDRVQcowState *s = bs->opaque;
int result = 0;
int ret;
int i;
+ trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
+
for (i = 0; i < c->size; i++) {
ret = qcow2_cache_entry_flush(bs, c, i);
if (ret < 0 && result != -ENOSPC) {
@@ -218,6 +225,9 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
int i;
int ret;
+ trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
+ offset, read_from_disk);
+
/* Check if the table is already cached */
for (i = 0; i < c->size; i++) {
if (c->entries[i].offset == offset) {
@@ -227,6 +237,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
/* If not, write a table back and replace it */
i = qcow2_cache_find_entry_to_replace(c);
+ trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
if (i < 0) {
return i;
}
@@ -236,6 +248,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
return ret;
}
+ trace_qcow2_cache_get_read(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
c->entries[i].offset = 0;
if (read_from_disk) {
if (c == s->l2_table_cache) {
@@ -258,6 +272,10 @@ found:
c->entries[i].cache_hits++;
c->entries[i].ref++;
*table = c->entries[i].table;
+
+ trace_qcow2_cache_get_done(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+
return 0;
}
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 07a2e93..e0fb907 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -27,6 +27,7 @@
#include "qemu-common.h"
#include "block_int.h"
#include "block/qcow2.h"
+#include "trace.h"
int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
{
@@ -170,6 +171,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
old_l2_offset = s->l1_table[l1_index];
+ trace_qcow2_l2_allocate(bs, l1_index);
+
/* allocate a new l2 entry */
l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
@@ -184,6 +187,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
/* allocate a new entry in the l2 cache */
+ trace_qcow2_l2_allocate_get_empty(bs, l1_index);
ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
if (ret < 0) {
return ret;
@@ -216,6 +220,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
/* write the l2 table to the file */
BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
+ trace_qcow2_l2_allocate_write_l2(bs, l1_index);
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret < 0) {
@@ -223,6 +228,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
}
/* update the L1 entry */
+ trace_qcow2_l2_allocate_write_l1(bs, l1_index);
s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
ret = write_l1_entry(bs, l1_index);
if (ret < 0) {
@@ -230,9 +236,11 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
}
*table = l2_table;
+ trace_qcow2_l2_allocate_done(bs, l1_index, 0);
return 0;
fail:
+ trace_qcow2_l2_allocate_done(bs, l1_index, ret);
qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
s->l1_table[l1_index] = old_l2_offset;
return ret;
@@ -581,9 +589,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
BDRVQcowState *s = bs->opaque;
int i, j = 0, l2_index, ret;
uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
- uint64_t cluster_offset = m->cluster_offset;
+ uint64_t cluster_offset = m->alloc_offset;
bool cow = false;
+ trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+
if (m->nb_clusters == 0)
return 0;
@@ -667,64 +677,15 @@ err:
}
/*
- * alloc_cluster_offset
- *
- * For a given offset of the disk image, return cluster offset in qcow2 file.
- * If the offset is not found, allocate a new cluster.
- *
- * If the cluster was already allocated, m->nb_clusters is set to 0,
- * other fields in m are meaningless.
- *
- * If the cluster is newly allocated, m->nb_clusters is set to the number of
- * contiguous clusters that have been allocated. In this case, the other
- * fields of m are valid and contain information about the first allocated
- * cluster.
- *
- * If the request conflicts with another write request in flight, the coroutine
- * is queued and will be reentered when the dependency has completed.
- *
- * Return 0 on success and -errno in error cases
+ * Returns the number of contiguous clusters that can be used for an allocating
+ * write, but require COW to be performed (this includes yet unallocated space,
+ * which must copy from the backing file)
*/
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, QCowL2Meta *m)
+static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+ uint64_t *l2_table, int l2_index)
{
- BDRVQcowState *s = bs->opaque;
- int l2_index, ret;
- uint64_t l2_offset, *l2_table;
- int64_t cluster_offset;
- unsigned int nb_clusters, i = 0;
- QCowL2Meta *old_alloc;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
-again:
- nb_clusters = size_to_clusters(s, n_end << 9);
-
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
-
- /* We keep all QCOW_OFLAG_COPIED clusters */
-
- if (cluster_offset & QCOW_OFLAG_COPIED) {
- nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0, 0);
-
- cluster_offset &= ~QCOW_OFLAG_COPIED;
- m->nb_clusters = 0;
-
- goto out;
- }
-
- /* for the moment, multiple compressed clusters are not managed */
-
- if (cluster_offset & QCOW_OFLAG_COMPRESSED)
- nb_clusters = 1;
-
- /* how many available clusters ? */
+ int i = 0;
+ uint64_t cluster_offset;
while (i < nb_clusters) {
i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
@@ -745,8 +706,39 @@ again:
(cluster_offset & QCOW_OFLAG_COMPRESSED))
break;
}
+
assert(i <= nb_clusters);
- nb_clusters = i;
+ return i;
+}
+
+/*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, unsigned int *nb_clusters, uint64_t *l2_table)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t cluster_offset;
+ QCowL2Meta *old_alloc;
+
+ trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+ *host_offset, *nb_clusters);
/*
* Check if there already is an AIO write request in flight which allocates
@@ -755,8 +747,8 @@ again:
*/
QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
- uint64_t start = offset >> s->cluster_bits;
- uint64_t end = start + nb_clusters;
+ uint64_t start = guest_offset >> s->cluster_bits;
+ uint64_t end = start + *nb_clusters;
uint64_t old_start = old_alloc->offset >> s->cluster_bits;
uint64_t old_end = old_start + old_alloc->nb_clusters;
@@ -765,58 +757,185 @@ again:
} else {
if (start < old_start) {
/* Stop at the start of a running allocation */
- nb_clusters = old_start - start;
+ *nb_clusters = old_start - start;
} else {
- nb_clusters = 0;
+ *nb_clusters = 0;
}
- if (nb_clusters == 0) {
+ if (*nb_clusters == 0) {
/* Wait for the dependency to complete. We need to recheck
* the free/allocated clusters when we continue. */
qemu_co_mutex_unlock(&s->lock);
qemu_co_queue_wait(&old_alloc->dependent_requests);
qemu_co_mutex_lock(&s->lock);
- goto again;
+ return -EAGAIN;
}
}
}
- if (!nb_clusters) {
+ if (!*nb_clusters) {
abort();
}
- /* save info needed for meta data update */
- m->offset = offset;
- m->n_start = n_start;
- m->nb_clusters = nb_clusters;
+ /* Allocate new clusters */
+ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
+ if (*host_offset == 0) {
+ cluster_offset = qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
+ } else {
+ cluster_offset = *host_offset;
+ *nb_clusters = qcow2_alloc_clusters_at(bs, cluster_offset, *nb_clusters);
+ }
+
+ if (cluster_offset < 0) {
+ return cluster_offset;
+ }
+ *host_offset = cluster_offset;
+ return 0;
+}
- QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+/*
+ * alloc_cluster_offset
+ *
+ * For a given offset on the virtual disk, find the cluster offset in qcow2
+ * file. If the offset is not found, allocate a new cluster.
+ *
+ * If the cluster was already allocated, m->nb_clusters is set to 0 and
+ * other fields in m are meaningless.
+ *
+ * If the cluster is newly allocated, m->nb_clusters is set to the number of
+ * contiguous clusters that have been allocated. In this case, the other
+ * fields of m are valid and contain information about the first allocated
+ * cluster.
+ *
+ * If the request conflicts with another write request in flight, the coroutine
+ * is queued and will be reentered when the dependency has completed.
+ *
+ * Return 0 on success and -errno in error cases
+ */
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int n_start, int n_end, int *num, QCowL2Meta *m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index, ret, sectors;
+ uint64_t l2_offset, *l2_table;
+ unsigned int nb_clusters, keep_clusters;
+ uint64_t cluster_offset;
- /* allocate a new cluster */
+ trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
+ n_start, n_end);
- cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
- if (cluster_offset < 0) {
- ret = cluster_offset;
- goto fail;
+ /* Find L2 entry for the first involved cluster */
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+ if (ret < 0) {
+ return ret;
}
-out:
+ /*
+ * Calculate the number of clusters to look for. We stop at L2 table
+ * boundaries to keep things simple.
+ */
+again:
+ nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS),
+ s->l2_size - l2_index);
+
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+
+ /*
+ * Check how many clusters are already allocated and don't need COW, and how
+ * many need a new allocation.
+ */
+ if (cluster_offset & QCOW_OFLAG_COPIED) {
+ /* We keep all QCOW_OFLAG_COPIED clusters */
+ keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0, 0);
+ assert(keep_clusters <= nb_clusters);
+ nb_clusters -= keep_clusters;
+ } else {
+ /* For the moment, overwrite compressed clusters one by one */
+ if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ nb_clusters = 1;
+ } else {
+ nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+ }
+
+ keep_clusters = 0;
+ cluster_offset = 0;
+ }
+
+ cluster_offset &= ~QCOW_OFLAG_COPIED;
+
+ /* If there is something left to allocate, do that now */
+ *m = (QCowL2Meta) {
+ .cluster_offset = cluster_offset,
+ .nb_clusters = 0,
+ };
+ qemu_co_queue_init(&m->dependent_requests);
+
+ if (nb_clusters > 0) {
+ uint64_t alloc_offset;
+ uint64_t alloc_cluster_offset;
+ uint64_t keep_bytes = keep_clusters * s->cluster_size;
+
+ /* Calculate start and size of allocation */
+ alloc_offset = offset + keep_bytes;
+
+ if (keep_clusters == 0) {
+ alloc_cluster_offset = 0;
+ } else {
+ alloc_cluster_offset = cluster_offset + keep_bytes;
+ }
+
+ /* Allocate, if necessary at a given offset in the image file */
+ ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset,
+ &nb_clusters, l2_table);
+ if (ret == -EAGAIN) {
+ goto again;
+ } else if (ret < 0) {
+ goto fail;
+ }
+
+ /* save info needed for meta data update */
+ if (nb_clusters > 0) {
+ int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
+ int avail_sectors = (keep_clusters + nb_clusters)
+ << (s->cluster_bits - BDRV_SECTOR_BITS);
+
+ *m = (QCowL2Meta) {
+ .cluster_offset = keep_clusters == 0 ?
+ alloc_cluster_offset : cluster_offset,
+ .alloc_offset = alloc_cluster_offset,
+ .offset = alloc_offset,
+ .n_start = keep_clusters == 0 ? n_start : 0,
+ .nb_clusters = nb_clusters,
+ .nb_available = MIN(requested_sectors, avail_sectors),
+ };
+ qemu_co_queue_init(&m->dependent_requests);
+ QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+ }
+ }
+
+ /* Some cleanup work */
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
if (ret < 0) {
goto fail_put;
}
- m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
- m->cluster_offset = cluster_offset;
+ sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9);
+ if (sectors > n_end) {
+ sectors = n_end;
+ }
- *num = m->nb_available - n_start;
+ assert(sectors > n_start);
+ *num = sectors - n_start;
return 0;
fail:
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
fail_put:
- QLIST_REMOVE(m, next_in_flight);
+ if (nb_clusters > 0) {
+ QLIST_REMOVE(m, next_in_flight);
+ }
return ret;
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2db2ede..f39928a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -582,6 +582,34 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
return offset;
}
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t cluster_index;
+ int i, refcount, ret;
+
+ /* Check how many clusters there are free */
+ cluster_index = offset >> s->cluster_bits;
+ for(i = 0; i < nb_clusters; i++) {
+ refcount = get_refcount(bs, cluster_index++);
+
+ if (refcount < 0) {
+ return refcount;
+ } else if (refcount != 0) {
+ break;
+ }
+ }
+
+ /* And then allocate them */
+ ret = update_refcount(bs, offset, i << s->cluster_bits, 1);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return i;
+}
+
/* only used to allocate compressed sectors. We try to allocate
contiguous sectors. size must be <= cluster_size */
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
diff --git a/block/qcow2.c b/block/qcow2.c
index eb5ea48..7aece65 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -29,6 +29,7 @@
#include "block/qcow2.h"
#include "qemu-error.h"
#include "qerror.h"
+#include "trace.h"
/*
Differences with QCOW:
@@ -569,6 +570,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
.nb_clusters = 0,
};
+ trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
+ remaining_sectors);
+
qemu_co_queue_init(&l2meta.dependent_requests);
qemu_iovec_init(&hd_qiov, qiov->niov);
@@ -579,6 +583,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
while (remaining_sectors != 0) {
+ trace_qcow2_writev_start_part(qemu_coroutine_self());
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n_end = index_in_cluster + remaining_sectors;
if (s->crypt_method &&
@@ -619,6 +624,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
qemu_co_mutex_unlock(&s->lock);
+ trace_qcow2_writev_data(qemu_coroutine_self(),
+ (cluster_offset >> 9) + index_in_cluster);
ret = bdrv_co_writev(bs->file,
(cluster_offset >> 9) + index_in_cluster,
cur_nr_sectors, &hd_qiov);
@@ -637,6 +644,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
remaining_sectors -= cur_nr_sectors;
sector_num += cur_nr_sectors;
bytes_done += cur_nr_sectors * 512;
+ trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
}
ret = 0;
@@ -647,6 +655,7 @@ fail:
qemu_iovec_destroy(&hd_qiov);
qemu_vfree(cluster_data);
+ trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
return ret;
}
@@ -1111,16 +1120,19 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
int ret, new_l1_size;
if (offset & 511) {
+ error_report("The new size must be a multiple of 512");
return -EINVAL;
}
/* cannot proceed if image has snapshots */
if (s->nb_snapshots) {
+ error_report("Can't resize an image which has snapshots");
return -ENOTSUP;
}
/* shrinking is currently not supported */
if (offset < bs->total_sectors * 512) {
+ error_report("qcow2 doesn't support shrinking images yet");
return -ENOTSUP;
}
diff --git a/block/qcow2.h b/block/qcow2.h
index fc35838..e4ac366 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -155,6 +155,7 @@ typedef struct QCowL2Meta
{
uint64_t offset;
uint64_t cluster_offset;
+ uint64_t alloc_offset;
int n_start;
int nb_available;
int nb_clusters;
@@ -193,6 +194,8 @@ int qcow2_refcount_init(BlockDriverState *bs);
void qcow2_refcount_close(BlockDriverState *bs);
int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int nb_clusters);
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
void qcow2_free_clusters(BlockDriverState *bs,
int64_t offset, int64_t size);
diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c
index 02b81a2..e9b2aae 100644
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -161,11 +161,25 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
return;
}
+ /* Evict an unused cache entry so we have space. If all entries are in use
+ * we can grow the cache temporarily and we try to shrink back down later.
+ */
if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
- entry = QTAILQ_FIRST(&l2_cache->entries);
- QTAILQ_REMOVE(&l2_cache->entries, entry, node);
- l2_cache->n_entries--;
- qed_unref_l2_cache_entry(entry);
+ CachedL2Table *next;
+ QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
+ if (entry->ref > 1) {
+ continue;
+ }
+
+ QTAILQ_REMOVE(&l2_cache->entries, entry, node);
+ l2_cache->n_entries--;
+ qed_unref_l2_cache_entry(entry);
+
+ /* Stop evicting when we've shrunk back to max size */
+ if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
+ break;
+ }
+ }
}
l2_cache->n_entries++;