diff options
Diffstat (limited to 'block/io.c')
-rw-r--r-- | block/io.c | 134 |
1 files changed, 98 insertions, 36 deletions
@@ -38,10 +38,14 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "system/replay.h" +#include "qemu/units.h" /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) +/* Maximum read size for checking if data reads as zero, in bytes */ +#define MAX_ZERO_CHECK_BUFFER (128 * KiB) + static void coroutine_fn GRAPH_RDLOCK bdrv_parent_cb_resize(BlockDriverState *bs); @@ -357,7 +361,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, GLOBAL_STATE_CODE(); /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + if (bs->quiesce_counter++ == 0) { GRAPH_RDLOCK_GUARD_MAINLOOP(); bdrv_parent_drained_begin(bs, parent); if (bs->drv && bs->drv->bdrv_drain_begin) { @@ -397,8 +401,6 @@ bdrv_drained_begin(BlockDriverState *bs) */ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) { - int old_quiesce_counter; - IO_OR_GS_CODE(); if (qemu_in_coroutine()) { @@ -409,11 +411,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) /* At this point, we should be always running in the main loop. */ GLOBAL_STATE_CODE(); assert(bs->quiesce_counter > 0); - GLOBAL_STATE_CODE(); /* Re-enable things in child-to-parent order */ - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); - if (old_quiesce_counter == 1) { + if (--bs->quiesce_counter == 0) { GRAPH_RDLOCK_GUARD_MAINLOOP(); if (bs->drv && bs->drv->bdrv_drain_end) { bs->drv->bdrv_drain_end(bs); @@ -2364,10 +2364,8 @@ int bdrv_flush_all(void) * Drivers not implementing the functionality are assumed to not support * backing files, hence all their sectors are reported as allocated. * - * If 'want_zero' is true, the caller is querying for mapping - * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and - * _ZERO where possible; otherwise, the result favors larger 'pnum', - * with a focus on accurate BDRV_BLOCK_ALLOCATED. + * 'mode' serves as a hint as to which results are favored; see the + * BDRV_WANT_* macros for details. * * If 'offset' is beyond the end of the disk image the return value is * BDRV_BLOCK_EOF and 'pnum' is set to 0. @@ -2387,7 +2385,7 @@ int bdrv_flush_all(void) * set to the host mapping and BDS corresponding to the guest offset. */ static int coroutine_fn GRAPH_RDLOCK -bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, +bdrv_co_do_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -2476,7 +2474,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, local_file = bs; local_map = aligned_offset; } else { - ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, + ret = bs->drv->bdrv_co_block_status(bs, mode, aligned_offset, aligned_bytes, pnum, &local_map, &local_file); @@ -2488,10 +2486,10 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, * the cache requires an RCU update, so double check here to avoid * such an update if possible. * - * Check want_zero, because we only want to update the cache when we + * Check mode, because we only want to update the cache when we * have accurate information about what is zero and what is data. */ - if (want_zero && + if (mode == BDRV_WANT_PRECISE && ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && QLIST_EMPTY(&bs->children)) { @@ -2548,7 +2546,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file); - ret = bdrv_co_do_block_status(local_file, want_zero, local_map, + ret = bdrv_co_do_block_status(local_file, mode, local_map, *pnum, pnum, &local_map, &local_file); goto out; } @@ -2560,7 +2558,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, if (!cow_bs) { ret |= BDRV_BLOCK_ZERO; - } else if (want_zero) { + } else if (mode == BDRV_WANT_PRECISE) { int64_t size2 = bdrv_co_getlength(cow_bs); if (size2 >= 0 && offset >= size2) { @@ -2569,14 +2567,14 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, } } - if (want_zero && ret & BDRV_BLOCK_RECURSE && + if (mode == BDRV_WANT_PRECISE && ret & BDRV_BLOCK_RECURSE && local_file && local_file != bs && (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && (ret & BDRV_BLOCK_OFFSET_VALID)) { int64_t file_pnum; int ret2; - ret2 = bdrv_co_do_block_status(local_file, want_zero, local_map, + ret2 = bdrv_co_do_block_status(local_file, mode, local_map, *pnum, &file_pnum, NULL, NULL); if (ret2 >= 0) { /* Ignore errors. This is just providing extra information, it @@ -2627,7 +2625,7 @@ int coroutine_fn bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -2654,7 +2652,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, return 0; } - ret = bdrv_co_do_block_status(bs, want_zero, offset, bytes, pnum, + ret = bdrv_co_do_block_status(bs, mode, offset, bytes, pnum, map, file); ++*depth; if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) { @@ -2671,7 +2669,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base; p = bdrv_filter_or_cow_bs(p)) { - ret = bdrv_co_do_block_status(p, want_zero, offset, bytes, pnum, + ret = bdrv_co_do_block_status(p, mode, offset, bytes, pnum, map, file); ++*depth; if (ret < 0) { @@ -2734,7 +2732,8 @@ int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState **file) { IO_CODE(); - return bdrv_co_common_block_status_above(bs, base, false, true, offset, + return bdrv_co_common_block_status_above(bs, base, false, + BDRV_WANT_PRECISE, offset, bytes, pnum, map, file, NULL); } @@ -2752,27 +2751,89 @@ int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t offset, * by @offset and @bytes is known to read as zeroes. * Return 1 if that is the case, 0 otherwise and -errno on error. * This test is meant to be fast rather than accurate so returning 0 - * does not guarantee non-zero data. + * does not guarantee non-zero data; but a return of 1 is reliable. */ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes) { int ret; - int64_t pnum = bytes; + int64_t pnum; IO_CODE(); - if (!bytes) { - return 1; + while (bytes) { + ret = bdrv_co_common_block_status_above(bs, NULL, false, + BDRV_WANT_ZERO, offset, bytes, + &pnum, NULL, NULL, NULL); + + if (ret < 0) { + return ret; + } + if (!(ret & BDRV_BLOCK_ZERO)) { + return 0; + } + offset += pnum; + bytes -= pnum; } - ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset, - bytes, &pnum, NULL, NULL, NULL); + return 1; +} +/* + * Check @bs (and its backing chain) to see if the entire image is known + * to read as zeroes. + * Return 1 if that is the case, 0 otherwise and -errno on error. + * This test is meant to be fast rather than accurate so returning 0 + * does not guarantee non-zero data; however, a return of 1 is reliable, + * and this function can report 1 in more cases than bdrv_co_is_zero_fast. + */ +int coroutine_fn bdrv_co_is_all_zeroes(BlockDriverState *bs) +{ + int ret; + int64_t pnum, bytes; + char *buf; + QEMUIOVector local_qiov; + IO_CODE(); + + bytes = bdrv_co_getlength(bs); + if (bytes < 0) { + return bytes; + } + + /* First probe - see if the entire image reads as zero */ + ret = bdrv_co_common_block_status_above(bs, NULL, false, BDRV_WANT_ZERO, + 0, bytes, &pnum, NULL, NULL, + NULL); if (ret < 0) { return ret; } + if (ret & BDRV_BLOCK_ZERO) { + return bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); + } - return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO); + /* + * Because of the way 'blockdev-create' works, raw files tend to + * be created with a non-sparse region at the front to make + * alignment probing easier. If the block starts with only a + * small allocated region, it is still worth the effort to see if + * the rest of the image is still sparse, coupled with manually + * reading the first region to see if it reads zero after all. + */ + if (pnum > MAX_ZERO_CHECK_BUFFER) { + return 0; + } + ret = bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); + if (ret <= 0) { + return ret; + } + /* Only the head of the image is unknown, and it's small. Read it. */ + buf = qemu_blockalign(bs, pnum); + qemu_iovec_init_buf(&local_qiov, buf, pnum); + ret = bdrv_driver_preadv(bs, 0, pnum, &local_qiov, 0, 0); + if (ret >= 0) { + ret = buffer_is_zero(buf, pnum); + } + qemu_vfree(buf); + return ret; } int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, @@ -2782,9 +2843,9 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t dummy; IO_CODE(); - ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset, - bytes, pnum ? pnum : &dummy, NULL, - NULL, NULL); + ret = bdrv_co_common_block_status_above(bs, bs, true, BDRV_WANT_ALLOCATED, + offset, bytes, pnum ? pnum : &dummy, + NULL, NULL, NULL); if (ret < 0) { return ret; } @@ -2817,7 +2878,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *bs, int ret; IO_CODE(); - ret = bdrv_co_common_block_status_above(bs, base, include_base, false, + ret = bdrv_co_common_block_status_above(bs, base, include_base, + BDRV_WANT_ALLOCATED, offset, bytes, pnum, NULL, NULL, &depth); if (ret < 0) { @@ -3698,8 +3760,8 @@ bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes, } int coroutine_fn -bdrv_co_snapshot_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, +bdrv_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -3717,7 +3779,7 @@ bdrv_co_snapshot_block_status(BlockDriverState *bs, } bdrv_inc_in_flight(bs); - ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes, + ret = drv->bdrv_co_snapshot_block_status(bs, mode, offset, bytes, pnum, map, file); bdrv_dec_in_flight(bs); |