diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/qcow2-cache.c | 18 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 279 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 28 | ||||
-rw-r--r-- | block/qcow2.c | 12 | ||||
-rw-r--r-- | block/qcow2.h | 3 | ||||
-rw-r--r-- | block/qed-l2-cache.c | 22 |
6 files changed, 278 insertions, 84 deletions
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 340a6f2..710d4b1 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -25,6 +25,7 @@ #include "block_int.h" #include "qemu-common.h" #include "qcow2.h" +#include "trace.h" typedef struct Qcow2CachedTable { void* table; @@ -100,6 +101,9 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) return 0; } + trace_qcow2_cache_entry_flush(qemu_coroutine_self(), + c == s->l2_table_cache, i); + if (c->depends) { ret = qcow2_cache_flush_dependency(bs, c); } else if (c->depends_on_flush) { @@ -132,10 +136,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) { + BDRVQcowState *s = bs->opaque; int result = 0; int ret; int i; + trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); + for (i = 0; i < c->size; i++) { ret = qcow2_cache_entry_flush(bs, c, i); if (ret < 0 && result != -ENOSPC) { @@ -218,6 +225,9 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, int i; int ret; + trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, + offset, read_from_disk); + /* Check if the table is already cached */ for (i = 0; i < c->size; i++) { if (c->entries[i].offset == offset) { @@ -227,6 +237,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, /* If not, write a table back and replace it */ i = qcow2_cache_find_entry_to_replace(c); + trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), + c == s->l2_table_cache, i); if (i < 0) { return i; } @@ -236,6 +248,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, return ret; } + trace_qcow2_cache_get_read(qemu_coroutine_self(), + c == s->l2_table_cache, i); c->entries[i].offset = 0; if (read_from_disk) { if (c == s->l2_table_cache) { @@ -258,6 +272,10 @@ found: c->entries[i].cache_hits++; c->entries[i].ref++; *table = c->entries[i].table; + + trace_qcow2_cache_get_done(qemu_coroutine_self(), + c == s->l2_table_cache, i); + return 0; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 07a2e93..e0fb907 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -27,6 +27,7 @@ #include "qemu-common.h" #include "block_int.h" #include "block/qcow2.h" +#include "trace.h" int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) { @@ -170,6 +171,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) old_l2_offset = s->l1_table[l1_index]; + trace_qcow2_l2_allocate(bs, l1_index); + /* allocate a new l2 entry */ l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); @@ -184,6 +187,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* allocate a new entry in the l2 cache */ + trace_qcow2_l2_allocate_get_empty(bs, l1_index); ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); if (ret < 0) { return ret; @@ -216,6 +220,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* write the l2 table to the file */ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); + trace_qcow2_l2_allocate_write_l2(bs, l1_index); qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); ret = qcow2_cache_flush(bs, s->l2_table_cache); if (ret < 0) { @@ -223,6 +228,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) } /* update the L1 entry */ + trace_qcow2_l2_allocate_write_l1(bs, l1_index); s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; ret = write_l1_entry(bs, l1_index); if (ret < 0) { @@ -230,9 +236,11 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) } *table = l2_table; + trace_qcow2_l2_allocate_done(bs, l1_index, 0); return 0; fail: + trace_qcow2_l2_allocate_done(bs, l1_index, ret); qcow2_cache_put(bs, s->l2_table_cache, (void**) table); s->l1_table[l1_index] = old_l2_offset; return ret; @@ -581,9 +589,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) BDRVQcowState *s = bs->opaque; int i, j = 0, l2_index, ret; uint64_t *old_cluster, start_sect, l2_offset, *l2_table; - uint64_t cluster_offset = m->cluster_offset; + uint64_t cluster_offset = m->alloc_offset; bool cow = false; + trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + if (m->nb_clusters == 0) return 0; @@ -667,64 +677,15 @@ err: } /* - * alloc_cluster_offset - * - * For a given offset of the disk image, return cluster offset in qcow2 file. - * If the offset is not found, allocate a new cluster. - * - * If the cluster was already allocated, m->nb_clusters is set to 0, - * other fields in m are meaningless. - * - * If the cluster is newly allocated, m->nb_clusters is set to the number of - * contiguous clusters that have been allocated. In this case, the other - * fields of m are valid and contain information about the first allocated - * cluster. - * - * If the request conflicts with another write request in flight, the coroutine - * is queued and will be reentered when the dependency has completed. - * - * Return 0 on success and -errno in error cases + * Returns the number of contiguous clusters that can be used for an allocating + * write, but require COW to be performed (this includes yet unallocated space, + * which must copy from the backing file) */ -int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m) +static int count_cow_clusters(BDRVQcowState *s, int nb_clusters, + uint64_t *l2_table, int l2_index) { - BDRVQcowState *s = bs->opaque; - int l2_index, ret; - uint64_t l2_offset, *l2_table; - int64_t cluster_offset; - unsigned int nb_clusters, i = 0; - QCowL2Meta *old_alloc; - - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); - if (ret < 0) { - return ret; - } - -again: - nb_clusters = size_to_clusters(s, n_end << 9); - - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); - - cluster_offset = be64_to_cpu(l2_table[l2_index]); - - /* We keep all QCOW_OFLAG_COPIED clusters */ - - if (cluster_offset & QCOW_OFLAG_COPIED) { - nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, 0); - - cluster_offset &= ~QCOW_OFLAG_COPIED; - m->nb_clusters = 0; - - goto out; - } - - /* for the moment, multiple compressed clusters are not managed */ - - if (cluster_offset & QCOW_OFLAG_COMPRESSED) - nb_clusters = 1; - - /* how many available clusters ? */ + int i = 0; + uint64_t cluster_offset; while (i < nb_clusters) { i += count_contiguous_clusters(nb_clusters - i, s->cluster_size, @@ -745,8 +706,39 @@ again: (cluster_offset & QCOW_OFLAG_COMPRESSED)) break; } + assert(i <= nb_clusters); - nb_clusters = i; + return i; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is non-zero, it specifies the offset in the image file at + * which the new clusters must start. *nb_clusters can be 0 on return in this + * case if the cluster at host_offset is already in use. If *host_offset is + * zero, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, unsigned int *nb_clusters, uint64_t *l2_table) +{ + BDRVQcowState *s = bs->opaque; + int64_t cluster_offset; + QCowL2Meta *old_alloc; + + trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); /* * Check if there already is an AIO write request in flight which allocates @@ -755,8 +747,8 @@ again: */ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { - uint64_t start = offset >> s->cluster_bits; - uint64_t end = start + nb_clusters; + uint64_t start = guest_offset >> s->cluster_bits; + uint64_t end = start + *nb_clusters; uint64_t old_start = old_alloc->offset >> s->cluster_bits; uint64_t old_end = old_start + old_alloc->nb_clusters; @@ -765,58 +757,185 @@ again: } else { if (start < old_start) { /* Stop at the start of a running allocation */ - nb_clusters = old_start - start; + *nb_clusters = old_start - start; } else { - nb_clusters = 0; + *nb_clusters = 0; } - if (nb_clusters == 0) { + if (*nb_clusters == 0) { /* Wait for the dependency to complete. We need to recheck * the free/allocated clusters when we continue. */ qemu_co_mutex_unlock(&s->lock); qemu_co_queue_wait(&old_alloc->dependent_requests); qemu_co_mutex_lock(&s->lock); - goto again; + return -EAGAIN; } } } - if (!nb_clusters) { + if (!*nb_clusters) { abort(); } - /* save info needed for meta data update */ - m->offset = offset; - m->n_start = n_start; - m->nb_clusters = nb_clusters; + /* Allocate new clusters */ + trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); + if (*host_offset == 0) { + cluster_offset = qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); + } else { + cluster_offset = *host_offset; + *nb_clusters = qcow2_alloc_clusters_at(bs, cluster_offset, *nb_clusters); + } + + if (cluster_offset < 0) { + return cluster_offset; + } + *host_offset = cluster_offset; + return 0; +} - QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); +/* + * alloc_cluster_offset + * + * For a given offset on the virtual disk, find the cluster offset in qcow2 + * file. If the offset is not found, allocate a new cluster. + * + * If the cluster was already allocated, m->nb_clusters is set to 0 and + * other fields in m are meaningless. + * + * If the cluster is newly allocated, m->nb_clusters is set to the number of + * contiguous clusters that have been allocated. In this case, the other + * fields of m are valid and contain information about the first allocated + * cluster. + * + * If the request conflicts with another write request in flight, the coroutine + * is queued and will be reentered when the dependency has completed. + * + * Return 0 on success and -errno in error cases + */ +int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, + int n_start, int n_end, int *num, QCowL2Meta *m) +{ + BDRVQcowState *s = bs->opaque; + int l2_index, ret, sectors; + uint64_t l2_offset, *l2_table; + unsigned int nb_clusters, keep_clusters; + uint64_t cluster_offset; - /* allocate a new cluster */ + trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, + n_start, n_end); - cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size); - if (cluster_offset < 0) { - ret = cluster_offset; - goto fail; + /* Find L2 entry for the first involved cluster */ + ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + if (ret < 0) { + return ret; } -out: + /* + * Calculate the number of clusters to look for. We stop at L2 table + * boundaries to keep things simple. + */ +again: + nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS), + s->l2_size - l2_index); + + cluster_offset = be64_to_cpu(l2_table[l2_index]); + + /* + * Check how many clusters are already allocated and don't need COW, and how + * many need a new allocation. + */ + if (cluster_offset & QCOW_OFLAG_COPIED) { + /* We keep all QCOW_OFLAG_COPIED clusters */ + keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], 0, 0); + assert(keep_clusters <= nb_clusters); + nb_clusters -= keep_clusters; + } else { + /* For the moment, overwrite compressed clusters one by one */ + if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + nb_clusters = 1; + } else { + nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); + } + + keep_clusters = 0; + cluster_offset = 0; + } + + cluster_offset &= ~QCOW_OFLAG_COPIED; + + /* If there is something left to allocate, do that now */ + *m = (QCowL2Meta) { + .cluster_offset = cluster_offset, + .nb_clusters = 0, + }; + qemu_co_queue_init(&m->dependent_requests); + + if (nb_clusters > 0) { + uint64_t alloc_offset; + uint64_t alloc_cluster_offset; + uint64_t keep_bytes = keep_clusters * s->cluster_size; + + /* Calculate start and size of allocation */ + alloc_offset = offset + keep_bytes; + + if (keep_clusters == 0) { + alloc_cluster_offset = 0; + } else { + alloc_cluster_offset = cluster_offset + keep_bytes; + } + + /* Allocate, if necessary at a given offset in the image file */ + ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset, + &nb_clusters, l2_table); + if (ret == -EAGAIN) { + goto again; + } else if (ret < 0) { + goto fail; + } + + /* save info needed for meta data update */ + if (nb_clusters > 0) { + int requested_sectors = n_end - keep_clusters * s->cluster_sectors; + int avail_sectors = (keep_clusters + nb_clusters) + << (s->cluster_bits - BDRV_SECTOR_BITS); + + *m = (QCowL2Meta) { + .cluster_offset = keep_clusters == 0 ? + alloc_cluster_offset : cluster_offset, + .alloc_offset = alloc_cluster_offset, + .offset = alloc_offset, + .n_start = keep_clusters == 0 ? n_start : 0, + .nb_clusters = nb_clusters, + .nb_available = MIN(requested_sectors, avail_sectors), + }; + qemu_co_queue_init(&m->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); + } + } + + /* Some cleanup work */ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); if (ret < 0) { goto fail_put; } - m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); - m->cluster_offset = cluster_offset; + sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9); + if (sectors > n_end) { + sectors = n_end; + } - *num = m->nb_available - n_start; + assert(sectors > n_start); + *num = sectors - n_start; return 0; fail: qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); fail_put: - QLIST_REMOVE(m, next_in_flight); + if (nb_clusters > 0) { + QLIST_REMOVE(m, next_in_flight); + } return ret; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 2db2ede..f39928a 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -582,6 +582,34 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size) return offset; } +int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, + int nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + uint64_t cluster_index; + int i, refcount, ret; + + /* Check how many clusters there are free */ + cluster_index = offset >> s->cluster_bits; + for(i = 0; i < nb_clusters; i++) { + refcount = get_refcount(bs, cluster_index++); + + if (refcount < 0) { + return refcount; + } else if (refcount != 0) { + break; + } + } + + /* And then allocate them */ + ret = update_refcount(bs, offset, i << s->cluster_bits, 1); + if (ret < 0) { + return ret; + } + + return i; +} + /* only used to allocate compressed sectors. We try to allocate contiguous sectors. size must be <= cluster_size */ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) diff --git a/block/qcow2.c b/block/qcow2.c index eb5ea48..7aece65 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -29,6 +29,7 @@ #include "block/qcow2.h" #include "qemu-error.h" #include "qerror.h" +#include "trace.h" /* Differences with QCOW: @@ -569,6 +570,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, .nb_clusters = 0, }; + trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, + remaining_sectors); + qemu_co_queue_init(&l2meta.dependent_requests); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -579,6 +583,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { + trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num & (s->cluster_sectors - 1); n_end = index_in_cluster + remaining_sectors; if (s->crypt_method && @@ -619,6 +624,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); qemu_co_mutex_unlock(&s->lock); + trace_qcow2_writev_data(qemu_coroutine_self(), + (cluster_offset >> 9) + index_in_cluster); ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + index_in_cluster, cur_nr_sectors, &hd_qiov); @@ -637,6 +644,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; bytes_done += cur_nr_sectors * 512; + trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors); } ret = 0; @@ -647,6 +655,7 @@ fail: qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); + trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); return ret; } @@ -1111,16 +1120,19 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) int ret, new_l1_size; if (offset & 511) { + error_report("The new size must be a multiple of 512"); return -EINVAL; } /* cannot proceed if image has snapshots */ if (s->nb_snapshots) { + error_report("Can't resize an image which has snapshots"); return -ENOTSUP; } /* shrinking is currently not supported */ if (offset < bs->total_sectors * 512) { + error_report("qcow2 doesn't support shrinking images yet"); return -ENOTSUP; } diff --git a/block/qcow2.h b/block/qcow2.h index fc35838..e4ac366 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -155,6 +155,7 @@ typedef struct QCowL2Meta { uint64_t offset; uint64_t cluster_offset; + uint64_t alloc_offset; int n_start; int nb_available; int nb_clusters; @@ -193,6 +194,8 @@ int qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size); +int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, + int nb_clusters); int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size); void qcow2_free_clusters(BlockDriverState *bs, int64_t offset, int64_t size); diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c index 02b81a2..e9b2aae 100644 --- a/block/qed-l2-cache.c +++ b/block/qed-l2-cache.c @@ -161,11 +161,25 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table) return; } + /* Evict an unused cache entry so we have space. If all entries are in use + * we can grow the cache temporarily and we try to shrink back down later. + */ if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) { - entry = QTAILQ_FIRST(&l2_cache->entries); - QTAILQ_REMOVE(&l2_cache->entries, entry, node); - l2_cache->n_entries--; - qed_unref_l2_cache_entry(entry); + CachedL2Table *next; + QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) { + if (entry->ref > 1) { + continue; + } + + QTAILQ_REMOVE(&l2_cache->entries, entry, node); + l2_cache->n_entries--; + qed_unref_l2_cache_entry(entry); + + /* Stop evicting when we've shrunk back to max size */ + if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) { + break; + } + } } l2_cache->n_entries++; |