aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2011-12-05 09:39:25 -0600
committerAnthony Liguori <aliguori@us.ibm.com>2011-12-05 09:39:25 -0600
commiteb5d5beaebd102599a915f6c4813d445ddc9dc84 (patch)
tree12ce2331571a30c67bde0b8f4ddb55996dd0ba65 /block
parentf6480ca3f3423be5bee8b673ee6f5cc387659def (diff)
parent922453bca6a927bb527068ae8679d587cfa45dbc (diff)
downloadqemu-eb5d5beaebd102599a915f6c4813d445ddc9dc84.zip
qemu-eb5d5beaebd102599a915f6c4813d445ddc9dc84.tar.gz
qemu-eb5d5beaebd102599a915f6c4813d445ddc9dc84.tar.bz2
Merge remote-tracking branch 'kwolf/for-anthony' into staging
Diffstat (limited to 'block')
-rw-r--r--block/cow.c46
-rw-r--r--block/qcow.c12
-rw-r--r--block/qcow2-cluster.c115
-rw-r--r--block/qcow2-refcount.c7
-rw-r--r--block/qcow2-snapshot.c330
-rw-r--r--block/qcow2.c28
-rw-r--r--block/qed-table.c6
-rw-r--r--block/qed.c15
-rw-r--r--block/sheepdog.c4
-rw-r--r--block/vdi.c6
-rw-r--r--block/vmdk.c8
-rw-r--r--block/vvfat.c4
12 files changed, 362 insertions, 219 deletions
diff --git a/block/cow.c b/block/cow.c
index 3448296..3c52735 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -132,8 +132,8 @@ static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
/* Return true if first block has been changed (ie. current version is
* in COW file). Set the number of continuous blocks for which that
* is true. */
-static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *num_same)
+static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *num_same)
{
int changed;
@@ -171,14 +171,14 @@ static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
return error;
}
-static int cow_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
{
BDRVCowState *s = bs->opaque;
int ret, n;
while (nb_sectors > 0) {
- if (cow_is_allocated(bs, sector_num, nb_sectors, &n)) {
+ if (bdrv_co_is_allocated(bs, sector_num, nb_sectors, &n)) {
ret = bdrv_pread(bs->file,
s->cow_sectors_offset + sector_num * 512,
buf, n * 512);
@@ -243,12 +243,12 @@ static void cow_close(BlockDriverState *bs)
static int cow_create(const char *filename, QEMUOptionParameter *options)
{
- int fd, cow_fd;
struct cow_header_v2 cow_header;
struct stat st;
int64_t image_sectors = 0;
const char *image_filename = NULL;
int ret;
+ BlockDriverState *cow_bs;
/* Read out options */
while (options && options->name) {
@@ -260,10 +260,16 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
options++;
}
- cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
- 0644);
- if (cow_fd < 0)
- return -errno;
+ ret = bdrv_create_file(filename, options);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&cow_bs, filename, BDRV_O_RDWR);
+ if (ret < 0) {
+ return ret;
+ }
+
memset(&cow_header, 0, sizeof(cow_header));
cow_header.magic = cpu_to_be32(COW_MAGIC);
cow_header.version = cpu_to_be32(COW_VERSION);
@@ -271,16 +277,9 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
/* Note: if no file, we put a dummy mtime */
cow_header.mtime = cpu_to_be32(0);
- fd = open(image_filename, O_RDONLY | O_BINARY);
- if (fd < 0) {
- close(cow_fd);
- goto mtime_fail;
- }
- if (fstat(fd, &st) != 0) {
- close(fd);
+ if (stat(image_filename, &st) != 0) {
goto mtime_fail;
}
- close(fd);
cow_header.mtime = cpu_to_be32(st.st_mtime);
mtime_fail:
pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
@@ -288,21 +287,20 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
}
cow_header.sectorsize = cpu_to_be32(512);
cow_header.size = cpu_to_be64(image_sectors * 512);
- ret = qemu_write_full(cow_fd, &cow_header, sizeof(cow_header));
+ ret = bdrv_pwrite(cow_bs, 0, &cow_header, sizeof(cow_header));
if (ret != sizeof(cow_header)) {
- ret = -errno;
goto exit;
}
/* resize to include at least all the bitmap */
- ret = ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3));
+ ret = bdrv_truncate(cow_bs,
+ sizeof(cow_header) + ((image_sectors + 7) >> 3));
if (ret) {
- ret = -errno;
goto exit;
}
exit:
- close(cow_fd);
+ bdrv_delete(cow_bs);
return ret;
}
@@ -337,7 +335,7 @@ static BlockDriver bdrv_cow = {
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,
.bdrv_co_flush_to_disk = cow_co_flush,
- .bdrv_is_allocated = cow_is_allocated,
+ .bdrv_co_is_allocated = cow_co_is_allocated,
.create_options = cow_create_options,
};
diff --git a/block/qcow.c b/block/qcow.c
index 4814ed0..b16955d 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -368,14 +368,16 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
return cluster_offset;
}
-static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
+static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVQcowState *s = bs->opaque;
int index_in_cluster, n;
uint64_t cluster_offset;
+ qemu_co_mutex_lock(&s->lock);
cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+ qemu_co_mutex_unlock(&s->lock);
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
@@ -433,7 +435,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
return 0;
}
-static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
@@ -531,7 +533,7 @@ fail:
goto done;
}
-static int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
+static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
@@ -844,7 +846,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
.bdrv_co_flush_to_disk = qcow_co_flush,
- .bdrv_is_allocated = qcow_is_allocated,
+ .bdrv_co_is_allocated = qcow_co_is_allocated,
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f4e049f..07a2e93 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -289,89 +289,62 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
}
}
-
-static int qcow2_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn copy_sectors(BlockDriverState *bs,
+ uint64_t start_sect,
+ uint64_t cluster_offset,
+ int n_start, int n_end)
{
BDRVQcowState *s = bs->opaque;
- int ret, index_in_cluster, n, n1;
- uint64_t cluster_offset;
- struct iovec iov;
QEMUIOVector qiov;
+ struct iovec iov;
+ int n, ret;
- while (nb_sectors > 0) {
- n = nb_sectors;
-
- ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n,
- &cluster_offset);
- if (ret < 0) {
- return ret;
- }
-
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- if (!cluster_offset) {
- if (bs->backing_hd) {
- /* read from the base image */
- iov.iov_base = buf;
- iov.iov_len = n * 512;
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
- if (n1 > 0) {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
- ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
- if (ret < 0)
- return -1;
- }
- } else {
- memset(buf, 0, 512 * n);
- }
- } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- if (qcow2_decompress_cluster(bs, cluster_offset) < 0)
- return -1;
- memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
- } else {
- BLKDBG_EVENT(bs->file, BLKDBG_READ);
- ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
- if (ret != n * 512)
- return -1;
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
- &s->aes_decrypt_key);
- }
- }
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
+ /*
+ * If this is the last cluster and it is only partially used, we must only
+ * copy until the end of the image, or bdrv_check_request will fail for the
+ * bdrv_read/write calls below.
+ */
+ if (start_sect + n_end > bs->total_sectors) {
+ n_end = bs->total_sectors - start_sect;
}
- return 0;
-}
-
-static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
- uint64_t cluster_offset, int n_start, int n_end)
-{
- BDRVQcowState *s = bs->opaque;
- int n, ret;
n = n_end - n_start;
- if (n <= 0)
+ if (n <= 0) {
return 0;
+ }
+
+ iov.iov_len = n * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
- ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n);
- if (ret < 0)
- return ret;
+
+ /* Call .bdrv_co_readv() directly instead of using the public block-layer
+ * interface. This avoids double I/O throttling and request tracking,
+ * which can lead to deadlock when block layer copy-on-read is enabled.
+ */
+ ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+
if (s->crypt_method) {
qcow2_encrypt_sectors(s, start_sect + n_start,
- s->cluster_data,
- s->cluster_data, n, 1,
+ iov.iov_base, iov.iov_base, n, 1,
&s->aes_encrypt_key);
}
+
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
- s->cluster_data, n);
- if (ret < 0)
- return ret;
- return 0;
+ ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0;
+out:
+ qemu_vfree(iov.iov_base);
+ return ret;
}
@@ -620,7 +593,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
if (m->n_start) {
cow = true;
+ qemu_co_mutex_unlock(&s->lock);
ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
+ qemu_co_mutex_lock(&s->lock);
if (ret < 0)
goto err;
}
@@ -628,8 +603,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
if (m->nb_available & (s->cluster_sectors - 1)) {
uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
cow = true;
+ qemu_co_mutex_unlock(&s->lock);
ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
m->nb_available - end, s->cluster_sectors);
+ qemu_co_mutex_lock(&s->lock);
if (ret < 0)
goto err;
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 9605367..2db2ede 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -700,6 +700,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
l2_table = NULL;
l1_table = NULL;
l1_size2 = l1_size * sizeof(uint64_t);
+
+ /* WARNING: qcow2_snapshot_goto relies on this function not using the
+ * l1_table_offset when it is the current s->l1_table_offset! Be careful
+ * when changing this! */
if (l1_table_offset != s->l1_table_offset) {
if (l1_size2 != 0) {
l1_table = g_malloc0(align_offset(l1_size2, 512));
@@ -819,7 +823,8 @@ fail:
qcow2_cache_set_writethrough(bs, s->refcount_block_cache,
old_refcount_writethrough);
- if (l1_modified) {
+ /* Update L1 only if it isn't deleted anyway (addend = -1) */
+ if (addend >= 0 && l1_modified) {
for(i = 0; i < l1_size; i++)
cpu_to_be64s(&l1_table[i]);
if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table,
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index bdc33ba..c3112bf 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -68,6 +68,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
int i, id_str_size, name_size;
int64_t offset;
uint32_t extra_data_size;
+ int ret;
if (!s->nb_snapshots) {
s->snapshots = NULL;
@@ -77,10 +78,15 @@ int qcow2_read_snapshots(BlockDriverState *bs)
offset = s->snapshots_offset;
s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
+
for(i = 0; i < s->nb_snapshots; i++) {
+ /* Read statically sized part of the snapshot header */
offset = align_offset(offset, 8);
- if (bdrv_pread(bs->file, offset, &h, sizeof(h)) != sizeof(h))
+ ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+ if (ret < 0) {
goto fail;
+ }
+
offset += sizeof(h);
sn = s->snapshots + i;
sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
@@ -94,25 +100,34 @@ int qcow2_read_snapshots(BlockDriverState *bs)
id_str_size = be16_to_cpu(h.id_str_size);
name_size = be16_to_cpu(h.name_size);
+ /* Skip extra data */
offset += extra_data_size;
+ /* Read snapshot ID */
sn->id_str = g_malloc(id_str_size + 1);
- if (bdrv_pread(bs->file, offset, sn->id_str, id_str_size) != id_str_size)
+ ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
+ if (ret < 0) {
goto fail;
+ }
offset += id_str_size;
sn->id_str[id_str_size] = '\0';
+ /* Read snapshot name */
sn->name = g_malloc(name_size + 1);
- if (bdrv_pread(bs->file, offset, sn->name, name_size) != name_size)
+ ret = bdrv_pread(bs->file, offset, sn->name, name_size);
+ if (ret < 0) {
goto fail;
+ }
offset += name_size;
sn->name[name_size] = '\0';
}
+
s->snapshots_size = offset - s->snapshots_offset;
return 0;
- fail:
+
+fail:
qcow2_free_snapshots(bs);
- return -1;
+ return ret;
}
/* add at the end of the file a new list of snapshots */
@@ -122,9 +137,12 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
QCowSnapshot *sn;
QCowSnapshotHeader h;
int i, name_size, id_str_size, snapshots_size;
- uint64_t data64;
- uint32_t data32;
+ struct {
+ uint32_t nb_snapshots;
+ uint64_t snapshots_offset;
+ } QEMU_PACKED header_data;
int64_t offset, snapshots_offset;
+ int ret;
/* compute the size of the snapshots */
offset = 0;
@@ -137,6 +155,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
}
snapshots_size = offset;
+ /* Allocate space for the new snapshot list */
snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
bdrv_flush(bs->file);
offset = snapshots_offset;
@@ -144,6 +163,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
return offset;
}
+ /* Write all snapshots to the new list */
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
memset(&h, 0, sizeof(h));
@@ -159,34 +179,55 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
h.id_str_size = cpu_to_be16(id_str_size);
h.name_size = cpu_to_be16(name_size);
offset = align_offset(offset, 8);
- if (bdrv_pwrite_sync(bs->file, offset, &h, sizeof(h)) < 0)
+
+ ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+ if (ret < 0) {
goto fail;
+ }
offset += sizeof(h);
- if (bdrv_pwrite_sync(bs->file, offset, sn->id_str, id_str_size) < 0)
+
+ ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
+ if (ret < 0) {
goto fail;
+ }
offset += id_str_size;
- if (bdrv_pwrite_sync(bs->file, offset, sn->name, name_size) < 0)
+
+ ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
+ if (ret < 0) {
goto fail;
+ }
offset += name_size;
}
- /* update the various header fields */
- data64 = cpu_to_be64(snapshots_offset);
- if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, snapshots_offset),
- &data64, sizeof(data64)) < 0)
+ /*
+ * Update the header to point to the new snapshot table. This requires the
+ * new table and its refcounts to be stable on disk.
+ */
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
goto fail;
- data32 = cpu_to_be32(s->nb_snapshots);
- if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
- &data32, sizeof(data32)) < 0)
+ }
+
+ QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
+ offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
+
+ header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
+ header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
+
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+ &header_data, sizeof(header_data));
+ if (ret < 0) {
goto fail;
+ }
/* free the old snapshot table */
qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size);
s->snapshots_offset = snapshots_offset;
s->snapshots_size = snapshots_size;
return 0;
- fail:
- return -1;
+
+fail:
+ return ret;
}
static void find_new_snapshot_id(BlockDriverState *bs,
@@ -236,72 +277,92 @@ static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
BDRVQcowState *s = bs->opaque;
- QCowSnapshot *snapshots1, sn1, *sn = &sn1;
+ QCowSnapshot *new_snapshot_list = NULL;
+ QCowSnapshot *old_snapshot_list = NULL;
+ QCowSnapshot sn1, *sn = &sn1;
int i, ret;
uint64_t *l1_table = NULL;
int64_t l1_table_offset;
memset(sn, 0, sizeof(*sn));
+ /* Generate an ID if it wasn't passed */
if (sn_info->id_str[0] == '\0') {
- /* compute a new id */
find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
}
- /* check that the ID is unique */
- if (find_snapshot_by_id(bs, sn_info->id_str) >= 0)
+ /* Check that the ID is unique */
+ if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
return -ENOENT;
+ }
+ /* Populate sn with passed data */
sn->id_str = g_strdup(sn_info->id_str);
- if (!sn->id_str)
- goto fail;
sn->name = g_strdup(sn_info->name);
- if (!sn->name)
- goto fail;
+
sn->vm_state_size = sn_info->vm_state_size;
sn->date_sec = sn_info->date_sec;
sn->date_nsec = sn_info->date_nsec;
sn->vm_clock_nsec = sn_info->vm_clock_nsec;
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
- if (ret < 0)
- goto fail;
-
- /* create the L1 table of the snapshot */
+ /* Allocate the L1 table of the snapshot and copy the current one there. */
l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
if (l1_table_offset < 0) {
+ ret = l1_table_offset;
goto fail;
}
- bdrv_flush(bs->file);
sn->l1_table_offset = l1_table_offset;
sn->l1_size = s->l1_size;
- if (s->l1_size != 0) {
- l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
- } else {
- l1_table = NULL;
- }
-
+ l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
for(i = 0; i < s->l1_size; i++) {
l1_table[i] = cpu_to_be64(s->l1_table[i]);
}
- if (bdrv_pwrite_sync(bs->file, sn->l1_table_offset,
- l1_table, s->l1_size * sizeof(uint64_t)) < 0)
+
+ ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
goto fail;
+ }
+
g_free(l1_table);
l1_table = NULL;
- snapshots1 = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
+ /*
+ * Increase the refcounts of all clusters and make sure everything is
+ * stable on disk before updating the snapshot table to contain a pointer
+ * to the new L1 table.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Append the new snapshot to the snapshot list */
+ new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
if (s->snapshots) {
- memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot));
- g_free(s->snapshots);
+ memcpy(new_snapshot_list, s->snapshots,
+ s->nb_snapshots * sizeof(QCowSnapshot));
+ old_snapshot_list = s->snapshots;
}
- s->snapshots = snapshots1;
+ s->snapshots = new_snapshot_list;
s->snapshots[s->nb_snapshots++] = *sn;
- if (qcow2_write_snapshots(bs) < 0)
+ ret = qcow2_write_snapshots(bs);
+ if (ret < 0) {
+ g_free(s->snapshots);
+ s->snapshots = old_snapshot_list;
goto fail;
+ }
+
+ g_free(old_snapshot_list);
+
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
@@ -309,10 +370,13 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
}
#endif
return 0;
- fail:
+
+fail:
+ g_free(sn->id_str);
g_free(sn->name);
g_free(l1_table);
- return -1;
+
+ return ret;
}
/* copy the snapshot 'snapshot_name' into the current disk image */
@@ -322,38 +386,92 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
QCowSnapshot *sn;
int i, snapshot_index;
int cur_l1_bytes, sn_l1_bytes;
+ int ret;
+ uint64_t *sn_l1_table = NULL;
+ /* Search the snapshot */
snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
- if (snapshot_index < 0)
+ if (snapshot_index < 0) {
return -ENOENT;
+ }
sn = &s->snapshots[snapshot_index];
- if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0)
- goto fail;
-
- if (qcow2_grow_l1_table(bs, sn->l1_size, true) < 0)
+ /*
+ * Make sure that the current L1 table is big enough to contain the whole
+ * L1 table of the snapshot. If the snapshot L1 table is smaller, the
+ * current one must be padded with zeros.
+ */
+ ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
+ if (ret < 0) {
goto fail;
+ }
cur_l1_bytes = s->l1_size * sizeof(uint64_t);
sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
- if (cur_l1_bytes > sn_l1_bytes) {
- memset(s->l1_table + sn->l1_size, 0, cur_l1_bytes - sn_l1_bytes);
+ /*
+ * Copy the snapshot L1 table to the current L1 table.
+ *
+ * Before overwriting the old current L1 table on disk, make sure to
+ * increase all refcounts for the clusters referenced by the new one.
+ * Decrease the refcount referenced by the old one only when the L1
+ * table is overwritten.
+ */
+ sn_l1_table = g_malloc0(cur_l1_bytes);
+
+ ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
+ if (ret < 0) {
+ goto fail;
}
- /* copy the snapshot l1 table to the current l1 table */
- if (bdrv_pread(bs->file, sn->l1_table_offset,
- s->l1_table, sn_l1_bytes) < 0)
+ ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
+ sn->l1_size, 1);
+ if (ret < 0) {
goto fail;
- if (bdrv_pwrite_sync(bs->file, s->l1_table_offset,
- s->l1_table, cur_l1_bytes) < 0)
+ }
+
+ ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
+ cur_l1_bytes);
+ if (ret < 0) {
goto fail;
+ }
+
+ /*
+ * Decrease refcount of clusters of current L1 table.
+ *
+ * At this point, the in-memory s->l1_table points to the old L1 table,
+ * whereas on disk we already have the new one.
+ *
+ * qcow2_update_snapshot_refcount special cases the current L1 table to use
+ * the in-memory data instead of really using the offset to load a new one,
+ * which is why this works.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
+ s->l1_size, -1);
+
+ /*
+ * Now update the in-memory L1 table to be in sync with the on-disk one. We
+ * need to do this even if updating refcounts failed.
+ */
for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
+ s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
}
- if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0)
+ if (ret < 0) {
goto fail;
+ }
+
+ g_free(sn_l1_table);
+ sn_l1_table = NULL;
+
+ /*
+ * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
+ * when we decreased the refcount of the old snapshot.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
+ if (ret < 0) {
+ goto fail;
+ }
#ifdef DEBUG_ALLOC
{
@@ -362,39 +480,59 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
}
#endif
return 0;
- fail:
- return -EIO;
+
+fail:
+ g_free(sn_l1_table);
+ return ret;
}
int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
{
BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
+ QCowSnapshot sn;
int snapshot_index, ret;
+ /* Search the snapshot */
snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
- if (snapshot_index < 0)
+ if (snapshot_index < 0) {
return -ENOENT;
- sn = &s->snapshots[snapshot_index];
+ }
+ sn = s->snapshots[snapshot_index];
+
+ /* Remove it from the snapshot list */
+ memmove(s->snapshots + snapshot_index,
+ s->snapshots + snapshot_index + 1,
+ (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
+ s->nb_snapshots--;
+ ret = qcow2_write_snapshots(bs);
+ if (ret < 0) {
+ return ret;
+ }
- ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1);
- if (ret < 0)
+ /*
+ * The snapshot is now unused, clean up. If we fail after this point, we
+ * won't recover but just leak clusters.
+ */
+ g_free(sn.id_str);
+ g_free(sn.name);
+
+ /*
+ * Now decrease the refcounts of clusters referenced by the snapshot and
+ * free the L1 table.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
+ sn.l1_size, -1);
+ if (ret < 0) {
return ret;
+ }
+ qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t));
+
/* must update the copied flag on the current cluster offsets */
ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
- if (ret < 0)
- return ret;
- qcow2_free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t));
-
- g_free(sn->id_str);
- g_free(sn->name);
- memmove(sn, sn + 1, (s->nb_snapshots - snapshot_index - 1) * sizeof(*sn));
- s->nb_snapshots--;
- ret = qcow2_write_snapshots(bs);
if (ret < 0) {
- /* XXX: restore snapshot if error ? */
return ret;
}
+
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
@@ -435,32 +573,42 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
{
- int i, snapshot_index, l1_size2;
+ int i, snapshot_index;
BDRVQcowState *s = bs->opaque;
QCowSnapshot *sn;
+ uint64_t *new_l1_table;
+ int new_l1_bytes;
+ int ret;
+
+ assert(bs->read_only);
+ /* Search the snapshot */
snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
if (snapshot_index < 0) {
return -ENOENT;
}
-
sn = &s->snapshots[snapshot_index];
- s->l1_size = sn->l1_size;
- l1_size2 = s->l1_size * sizeof(uint64_t);
- if (s->l1_table != NULL) {
- g_free(s->l1_table);
- }
- s->l1_table_offset = sn->l1_table_offset;
- s->l1_table = g_malloc0(align_offset(l1_size2, 512));
+ /* Allocate and read in the snapshot's L1 table */
+ new_l1_bytes = s->l1_size * sizeof(uint64_t);
+ new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
- if (bdrv_pread(bs->file, sn->l1_table_offset,
- s->l1_table, l1_size2) != l1_size2) {
- return -1;
+ ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
+ if (ret < 0) {
+ g_free(new_l1_table);
+ return ret;
}
+ /* Switch the L1 table */
+ g_free(s->l1_table);
+
+ s->l1_size = sn->l1_size;
+ s->l1_table_offset = sn->l1_table_offset;
+ s->l1_table = new_l1_table;
+
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
}
+
return 0;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 9e1b1eb..37cd442 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -273,8 +273,9 @@ static int qcow2_open(BlockDriverState *bs, int flags)
}
bs->backing_file[len] = '\0';
}
- if (qcow2_read_snapshots(bs) < 0) {
- ret = -EINVAL;
+
+ ret = qcow2_read_snapshots(bs);
+ if (ret < 0) {
goto fail;
}
@@ -343,16 +344,19 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key)
return 0;
}
-static int qcow2_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
+static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
{
+ BDRVQcowState *s = bs->opaque;
uint64_t cluster_offset;
int ret;
*pnum = nb_sectors;
- /* FIXME We can get errors here, but the bdrv_is_allocated interface can't
- * pass them on today */
+ /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
+ * can't pass them on today */
+ qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+ qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
*pnum = 0;
}
@@ -377,7 +381,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
return n1;
}
-static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
int remaining_sectors, QEMUIOVector *qiov)
{
BDRVQcowState *s = bs->opaque;
@@ -512,12 +516,12 @@ static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
/* Restart all dependent requests */
if (!qemu_co_queue_empty(&m->dependent_requests)) {
qemu_co_mutex_unlock(&s->lock);
- while(qemu_co_queue_next(&m->dependent_requests));
+ qemu_co_queue_restart_all(&m->dependent_requests);
qemu_co_mutex_lock(&s->lock);
}
}
-static int qcow2_co_writev(BlockDriverState *bs,
+static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
int64_t sector_num,
int remaining_sectors,
QEMUIOVector *qiov)
@@ -1137,7 +1141,7 @@ fail:
return ret;
}
-static int qcow2_co_flush_to_os(BlockDriverState *bs)
+static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
int ret;
@@ -1159,7 +1163,7 @@ static int qcow2_co_flush_to_os(BlockDriverState *bs)
return 0;
}
-static int qcow2_co_flush_to_disk(BlockDriverState *bs)
+static coroutine_fn int qcow2_co_flush_to_disk(BlockDriverState *bs)
{
return bdrv_co_flush(bs->file);
}
@@ -1276,7 +1280,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_open = qcow2_open,
.bdrv_close = qcow2_close,
.bdrv_create = qcow2_create,
- .bdrv_is_allocated = qcow2_is_allocated,
+ .bdrv_co_is_allocated = qcow2_co_is_allocated,
.bdrv_set_key = qcow2_set_key,
.bdrv_make_empty = qcow2_make_empty,
diff --git a/block/qed-table.c b/block/qed-table.c
index f31f9ff..8ee8443 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -29,7 +29,7 @@ static void qed_read_table_cb(void *opaque, int ret)
{
QEDReadTableCB *read_table_cb = opaque;
QEDTable *table = read_table_cb->table;
- int noffsets = read_table_cb->iov.iov_len / sizeof(uint64_t);
+ int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
int i;
/* Handle I/O error */
@@ -65,7 +65,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
aiocb = bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
- read_table_cb->iov.iov_len / BDRV_SECTOR_SIZE,
+ qiov->size / BDRV_SECTOR_SIZE,
qed_read_table_cb, read_table_cb);
if (!aiocb) {
qed_read_table_cb(read_table_cb, -EIO);
@@ -160,7 +160,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
aiocb = bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
&write_table_cb->qiov,
- write_table_cb->iov.iov_len / BDRV_SECTOR_SIZE,
+ write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_write_table_cb, write_table_cb);
if (!aiocb) {
qed_write_table_cb(write_table_cb, -EIO);
diff --git a/block/qed.c b/block/qed.c
index 7e22e77..22e4672 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -661,6 +661,7 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
}
typedef struct {
+ Coroutine *co;
int is_allocated;
int *pnum;
} QEDIsAllocatedCB;
@@ -670,10 +671,14 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
QEDIsAllocatedCB *cb = opaque;
*cb->pnum = len / BDRV_SECTOR_SIZE;
cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
+ if (cb->co) {
+ qemu_coroutine_enter(cb->co, NULL);
+ }
}
-static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
+static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
{
BDRVQEDState *s = bs->opaque;
uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
@@ -686,8 +691,10 @@ static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
+ /* Now sleep if the callback wasn't invoked immediately */
while (cb.is_allocated == -1) {
- qemu_aio_wait();
+ cb.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
}
qed_unref_l2_cache_entry(request.l2_table);
@@ -1485,7 +1492,7 @@ static BlockDriver bdrv_qed = {
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_create = bdrv_qed_create,
- .bdrv_is_allocated = bdrv_qed_is_allocated,
+ .bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
.bdrv_make_empty = bdrv_qed_make_empty,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 62f1f3a..aa9707f 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1715,7 +1715,7 @@ out:
return 1;
}
-static int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
+static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
SheepdogAIOCB *acb;
@@ -1744,7 +1744,7 @@ static int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
return acb->ret;
}
-static int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
+static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
SheepdogAIOCB *acb;
diff --git a/block/vdi.c b/block/vdi.c
index 02da6b4..e1d8cff 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -472,8 +472,8 @@ static int vdi_open(BlockDriverState *bs, int flags)
return -1;
}
-static int vdi_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
+static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
{
/* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
@@ -996,7 +996,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_close = vdi_close,
.bdrv_create = vdi_create,
.bdrv_co_flush_to_disk = vdi_co_flush,
- .bdrv_is_allocated = vdi_is_allocated,
+ .bdrv_co_is_allocated = vdi_co_is_allocated,
.bdrv_make_empty = vdi_make_empty,
.bdrv_aio_readv = vdi_aio_readv,
diff --git a/block/vmdk.c b/block/vmdk.c
index f544159..5623ac1 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -861,8 +861,8 @@ static VmdkExtent *find_extent(BDRVVmdkState *s,
return NULL;
}
-static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
+static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVVmdkState *s = bs->opaque;
int64_t index_in_cluster, n, ret;
@@ -873,8 +873,10 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return 0;
}
+ qemu_co_mutex_lock(&s->lock);
ret = get_cluster_offset(bs, extent, NULL,
sector_num * 512, 0, &offset);
+ qemu_co_mutex_unlock(&s->lock);
/* get_cluster_offset returning 0 means success */
ret = !ret;
@@ -1596,7 +1598,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,
- .bdrv_is_allocated = vmdk_is_allocated,
+ .bdrv_co_is_allocated = vmdk_co_is_allocated,
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
.create_options = vmdk_create_options,
diff --git a/block/vvfat.c b/block/vvfat.c
index a310ce8..eeffc4a 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2758,7 +2758,7 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
return ret;
}
-static int vvfat_is_allocated(BlockDriverState *bs,
+static int coroutine_fn vvfat_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int* n)
{
BDRVVVFATState* s = bs->opaque;
@@ -2855,7 +2855,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_read = vvfat_co_read,
.bdrv_write = vvfat_co_write,
.bdrv_close = vvfat_close,
- .bdrv_is_allocated = vvfat_is_allocated,
+ .bdrv_co_is_allocated = vvfat_co_is_allocated,
.protocol_name = "fat",
};