aboutsummaryrefslogtreecommitdiff
path: root/block/vmdk.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2014-08-15 14:49:50 +0100
committerPeter Maydell <peter.maydell@linaro.org>2014-08-15 14:49:50 +0100
commitf2fb1da9412ae7b4cb512cfbd86c0185f191e2f9 (patch)
tree94e077ff9f6c344af73d34bd66468b8852ce2c37 /block/vmdk.c
parentf083201667fddd51055c2ac67f67221e82298a35 (diff)
parent908bcd540f489f7adf2d804347905b0025d808d3 (diff)
downloadqemu-f2fb1da9412ae7b4cb512cfbd86c0185f191e2f9.zip
qemu-f2fb1da9412ae7b4cb512cfbd86c0185f191e2f9.tar.gz
qemu-f2fb1da9412ae7b4cb512cfbd86c0185f191e2f9.tar.bz2
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block patches # gpg: Signature made Fri 15 Aug 2014 14:07:42 BST using RSA key ID C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" * remotes/kevin/tags/for-upstream: (59 commits) block: Catch !bs->drv in bdrv_check() iotests: Add test for image header overlap qcow2: Catch !*host_offset for data allocation qcow2: Return useful error code in refcount_init() mirror: Handle failure for potentially large allocations vpc: Handle failure for potentially large allocations vmdk: Handle failure for potentially large allocations vhdx: Handle failure for potentially large allocations vdi: Handle failure for potentially large allocations rbd: Handle failure for potentially large allocations raw-win32: Handle failure for potentially large allocations raw-posix: Handle failure for potentially large allocations qed: Handle failure for potentially large allocations qcow2: Handle failure for potentially large allocations qcow1: Handle failure for potentially large allocations parallels: Handle failure for potentially large allocations nfs: Handle failure for potentially large allocations iscsi: Handle failure for potentially large allocations dmg: Handle failure for potentially large allocations curl: Handle failure for potentially large allocations ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block/vmdk.c')
-rw-r--r--block/vmdk.c239
1 files changed, 152 insertions, 87 deletions
diff --git a/block/vmdk.c b/block/vmdk.c
index 0517bba..01412a8 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -106,6 +106,7 @@ typedef struct VmdkExtent {
uint32_t l2_cache_counts[L2_CACHE_SIZE];
int64_t cluster_sectors;
+ int64_t next_cluster_sector;
char *type;
} VmdkExtent;
@@ -124,7 +125,6 @@ typedef struct BDRVVmdkState {
} BDRVVmdkState;
typedef struct VmdkMetaData {
- uint32_t offset;
unsigned int l1_index;
unsigned int l2_index;
unsigned int l2_offset;
@@ -397,6 +397,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
{
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
+ int64_t length;
if (cluster_sectors > 0x200000) {
/* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
@@ -412,6 +413,11 @@ static int vmdk_add_extent(BlockDriverState *bs,
return -EFBIG;
}
+ length = bdrv_getlength(file);
+ if (length < 0) {
+ return length;
+ }
+
s->extents = g_realloc(s->extents,
(s->num_extents + 1) * sizeof(VmdkExtent));
extent = &s->extents[s->num_extents];
@@ -427,6 +433,8 @@ static int vmdk_add_extent(BlockDriverState *bs,
extent->l1_entry_sectors = l2_size * cluster_sectors;
extent->l2_size = l2_size;
extent->cluster_sectors = flat ? sectors : cluster_sectors;
+ extent->next_cluster_sector =
+ ROUND_UP(DIV_ROUND_UP(length, BDRV_SECTOR_SIZE), cluster_sectors);
if (s->num_extents > 1) {
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
@@ -448,7 +456,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
/* read the L1 table */
l1_size = extent->l1_size * sizeof(uint32_t);
- extent->l1_table = g_malloc(l1_size);
+ extent->l1_table = g_try_malloc(l1_size);
+ if (l1_size && extent->l1_table == NULL) {
+ return -ENOMEM;
+ }
+
ret = bdrv_pread(extent->file,
extent->l1_table_offset,
extent->l1_table,
@@ -464,7 +476,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
}
if (extent->l1_backup_table_offset) {
- extent->l1_backup_table = g_malloc(l1_size);
+ extent->l1_backup_table = g_try_malloc(l1_size);
+ if (l1_size && extent->l1_backup_table == NULL) {
+ ret = -ENOMEM;
+ goto fail_l1;
+ }
ret = bdrv_pread(extent->file,
extent->l1_backup_table_offset,
extent->l1_backup_table,
@@ -669,8 +685,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
- if (bdrv_getlength(file) <
- le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
+ if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
(int64_t)(le64_to_cpu(header.grain_offset)
* BDRV_SECTOR_SIZE));
@@ -952,57 +967,97 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
}
}
+/**
+ * get_whole_cluster
+ *
+ * Copy backing file's cluster that covers @sector_num, otherwise write zero,
+ * to the cluster at @cluster_sector_num.
+ *
+ * If @skip_start_sector < @skip_end_sector, the relative range
+ * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
+ * it for call to write user data in the request.
+ */
static int get_whole_cluster(BlockDriverState *bs,
- VmdkExtent *extent,
- uint64_t cluster_offset,
- uint64_t offset,
- bool allocate)
+ VmdkExtent *extent,
+ uint64_t cluster_sector_num,
+ uint64_t sector_num,
+ uint64_t skip_start_sector,
+ uint64_t skip_end_sector)
{
int ret = VMDK_OK;
- uint8_t *whole_grain = NULL;
+ int64_t cluster_bytes;
+ uint8_t *whole_grain;
+ /* For COW, align request sector_num to cluster start */
+ sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
+ cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
+ whole_grain = qemu_blockalign(bs, cluster_bytes);
+
+ if (!bs->backing_hd) {
+ memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS);
+ memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
+ cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
+ }
+
+ assert(skip_end_sector <= extent->cluster_sectors);
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
- if (bs->backing_hd) {
- whole_grain =
- qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
- if (!vmdk_is_cid_valid(bs)) {
- ret = VMDK_ERROR;
- goto exit;
- }
+ if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
+ ret = VMDK_ERROR;
+ goto exit;
+ }
- /* floor offset to cluster */
- offset -= offset % (extent->cluster_sectors * 512);
- ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
- extent->cluster_sectors);
+ /* Read backing data before skip range */
+ if (skip_start_sector > 0) {
+ if (bs->backing_hd) {
+ ret = bdrv_read(bs->backing_hd, sector_num,
+ whole_grain, skip_start_sector);
+ if (ret < 0) {
+ ret = VMDK_ERROR;
+ goto exit;
+ }
+ }
+ ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
+ skip_start_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
-
- /* Write grain only into the active image */
- ret = bdrv_write(extent->file, cluster_offset, whole_grain,
- extent->cluster_sectors);
+ }
+ /* Read backing data after skip range */
+ if (skip_end_sector < extent->cluster_sectors) {
+ if (bs->backing_hd) {
+ ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
+ whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
+ extent->cluster_sectors - skip_end_sector);
+ if (ret < 0) {
+ ret = VMDK_ERROR;
+ goto exit;
+ }
+ }
+ ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
+ whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
+ extent->cluster_sectors - skip_end_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
+
exit:
qemu_vfree(whole_grain);
return ret;
}
-static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
+static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
+ uint32_t offset)
{
- uint32_t offset;
- QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
- offset = cpu_to_le32(m_data->offset);
+ offset = cpu_to_le32(offset);
/* update L2 table */
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
- + (m_data->l2_index * sizeof(m_data->offset)),
+ + (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
@@ -1012,7 +1067,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
- + (m_data->l2_index * sizeof(m_data->offset)),
+ + (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
@@ -1024,17 +1079,41 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
return VMDK_OK;
}
+/**
+ * get_cluster_offset
+ *
+ * Look up cluster offset in extent file by sector number, and store in
+ * @cluster_offset.
+ *
+ * For flat extents, the start offset as parsed from the description file is
+ * returned.
+ *
+ * For sparse extents, look up in L1, L2 table. If allocate is true, return an
+ * offset for a new cluster and update L2 cache. If there is a backing file,
+ * COW is done before returning; otherwise, zeroes are written to the allocated
+ * cluster. Both COW and zero writing skips the sector range
+ * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
+ * has new data to write there.
+ *
+ * Returns: VMDK_OK if cluster exists and mapped in the image.
+ * VMDK_UNALLOC if cluster is not mapped and @allocate is false.
+ * VMDK_ERROR if failed.
+ */
static int get_cluster_offset(BlockDriverState *bs,
- VmdkExtent *extent,
- VmdkMetaData *m_data,
- uint64_t offset,
- int allocate,
- uint64_t *cluster_offset)
+ VmdkExtent *extent,
+ VmdkMetaData *m_data,
+ uint64_t offset,
+ bool allocate,
+ uint64_t *cluster_offset,
+ uint64_t skip_start_sector,
+ uint64_t skip_end_sector)
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
uint32_t min_count, *l2_table;
bool zeroed = false;
+ int64_t ret;
+ int32_t cluster_sector;
if (m_data) {
m_data->valid = 0;
@@ -1088,52 +1167,41 @@ static int get_cluster_offset(BlockDriverState *bs,
extent->l2_cache_counts[min_index] = 1;
found:
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
- *cluster_offset = le32_to_cpu(l2_table[l2_index]);
+ cluster_sector = le32_to_cpu(l2_table[l2_index]);
if (m_data) {
m_data->valid = 1;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
- m_data->offset = *cluster_offset;
m_data->l2_offset = l2_offset;
m_data->l2_cache_entry = &l2_table[l2_index];
}
- if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
zeroed = true;
}
- if (!*cluster_offset || zeroed) {
+ if (!cluster_sector || zeroed) {
if (!allocate) {
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
}
- /* Avoid the L2 tables update for the images that have snapshots. */
- *cluster_offset = bdrv_getlength(extent->file);
- if (!extent->compressed) {
- bdrv_truncate(
- extent->file,
- *cluster_offset + (extent->cluster_sectors << 9)
- );
- }
-
- *cluster_offset >>= 9;
- l2_table[l2_index] = cpu_to_le32(*cluster_offset);
+ cluster_sector = extent->next_cluster_sector;
+ extent->next_cluster_sector += extent->cluster_sectors;
/* First of all we write grain itself, to avoid race condition
* that may to corrupt the image.
* This problem may occur because of insufficient space on host disk
* or inappropriate VM shutdown.
*/
- if (get_whole_cluster(
- bs, extent, *cluster_offset, offset, allocate) == -1) {
- return VMDK_ERROR;
- }
-
- if (m_data) {
- m_data->offset = *cluster_offset;
+ ret = get_whole_cluster(bs, extent,
+ cluster_sector,
+ offset >> BDRV_SECTOR_BITS,
+ skip_start_sector, skip_end_sector);
+ if (ret) {
+ return ret;
}
}
- *cluster_offset <<= 9;
+ *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
return VMDK_OK;
}
@@ -1168,7 +1236,8 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
}
qemu_co_mutex_lock(&s->lock);
ret = get_cluster_offset(bs, extent, NULL,
- sector_num * 512, 0, &offset);
+ sector_num * 512, false, &offset,
+ 0, 0);
qemu_co_mutex_unlock(&s->lock);
switch (ret) {
@@ -1321,9 +1390,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return -EIO;
}
- ret = get_cluster_offset(
- bs, extent, NULL,
- sector_num << 9, 0, &cluster_offset);
+ ret = get_cluster_offset(bs, extent, NULL,
+ sector_num << 9, false, &cluster_offset,
+ 0, 0);
extent_begin_sector = extent->end_sector - extent->sectors;
extent_relative_sector_num = sector_num - extent_begin_sector;
index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
@@ -1404,12 +1473,17 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return -EIO;
}
- ret = get_cluster_offset(
- bs,
- extent,
- &m_data,
- sector_num << 9, !extent->compressed,
- &cluster_offset);
+ extent_begin_sector = extent->end_sector - extent->sectors;
+ extent_relative_sector_num = sector_num - extent_begin_sector;
+ index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
+ n = extent->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
+ ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ !(extent->compressed || zeroed),
+ &cluster_offset,
+ index_in_cluster, index_in_cluster + n);
if (extent->compressed) {
if (ret == VMDK_OK) {
/* Refuse write to allocated cluster for streamOptimized */
@@ -1418,24 +1492,13 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return -EIO;
} else {
/* allocate */
- ret = get_cluster_offset(
- bs,
- extent,
- &m_data,
- sector_num << 9, 1,
- &cluster_offset);
+ ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ true, &cluster_offset, 0, 0);
}
}
if (ret == VMDK_ERROR) {
return -EINVAL;
}
- extent_begin_sector = extent->end_sector - extent->sectors;
- extent_relative_sector_num = sector_num - extent_begin_sector;
- index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
if (zeroed) {
/* Do zeroed write, buf is ignored */
if (extent->has_zero_grain &&
@@ -1443,9 +1506,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
n >= extent->cluster_sectors) {
n = extent->cluster_sectors;
if (!zero_dry_run) {
- m_data.offset = VMDK_GTE_ZEROED;
/* update L2 tables */
- if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+ if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
+ != VMDK_OK) {
return -EIO;
}
}
@@ -1461,7 +1524,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
}
if (m_data.valid) {
/* update L2 tables */
- if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+ if (vmdk_L2update(extent, &m_data,
+ cluster_offset >> BDRV_SECTOR_BITS)
+ != VMDK_OK) {
return -EIO;
}
}
@@ -1999,7 +2064,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent = NULL;
int64_t sector_num = 0;
- int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
+ int64_t total_sectors = bdrv_nb_sectors(bs);
int ret;
uint64_t cluster_offset;
@@ -2020,7 +2085,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
}
ret = get_cluster_offset(bs, extent, NULL,
sector_num << BDRV_SECTOR_BITS,
- 0, &cluster_offset);
+ false, &cluster_offset, 0, 0);
if (ret == VMDK_ERROR) {
fprintf(stderr,
"ERROR: could not get cluster_offset for sector %"