diff options
48 files changed, 980 insertions, 610 deletions
@@ -418,7 +418,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) CreateCo *cco = opaque; assert(cco->drv); - ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); + ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err); error_propagate(&cco->err, local_err); cco->ret = ret; } @@ -437,7 +437,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, .err = NULL, }; - if (!drv->bdrv_create) { + if (!drv->bdrv_co_create_opts) { error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); ret = -ENOTSUP; goto out; @@ -4711,7 +4711,12 @@ out: AioContext *bdrv_get_aio_context(BlockDriverState *bs) { - return bs->aio_context; + return bs ? bs->aio_context : qemu_get_aio_context(); +} + +AioWait *bdrv_get_aio_wait(BlockDriverState *bs) +{ + return bs ? &bs->wait : NULL; } void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) diff --git a/block/blkdebug.c b/block/blkdebug.c index d83f23f..5897124 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -627,15 +627,17 @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, return bdrv_co_pdiscard(bs->file->bs, offset, bytes); } -static int64_t coroutine_fn blkdebug_co_get_block_status( - BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, - BlockDriverState **file) +static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) { - assert(QEMU_IS_ALIGNED(sector_num | nb_sectors, - DIV_ROUND_UP(bs->bl.request_alignment, - BDRV_SECTOR_SIZE))); - return bdrv_co_get_block_status_from_file(bs, sector_num, nb_sectors, - pnum, file); + assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); + return bdrv_co_block_status_from_file(bs, want_zero, offset, bytes, + pnum, map, file); } static void blkdebug_close(BlockDriverState *bs) @@ -907,7 +909,7 @@ static BlockDriver bdrv_blkdebug = { .bdrv_co_flush_to_disk = blkdebug_co_flush, .bdrv_co_pwrite_zeroes = blkdebug_co_pwrite_zeroes, .bdrv_co_pdiscard = blkdebug_co_pdiscard, - .bdrv_co_get_block_status = blkdebug_co_get_block_status, + .bdrv_co_block_status = blkdebug_co_block_status, .bdrv_debug_event = blkdebug_debug_event, .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, diff --git a/block/block-backend.c b/block/block-backend.c index 94ffbb6..b3c790e 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -73,6 +73,14 @@ struct BlockBackend { int quiesce_counter; VMChangeStateEntry *vmsh; bool force_allow_inactivate; + + /* Number of in-flight aio requests. BlockDriverState also counts + * in-flight requests but aio requests can exist even when blk->root is + * NULL, so we cannot rely on its counter for that case. + * Accessed with atomic ops. + */ + unsigned int in_flight; + AioWait wait; }; typedef struct BlockBackendAIOCB { @@ -1225,11 +1233,22 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags) return bdrv_make_zero(blk->root, flags); } +static void blk_inc_in_flight(BlockBackend *blk) +{ + atomic_inc(&blk->in_flight); +} + +static void blk_dec_in_flight(BlockBackend *blk) +{ + atomic_dec(&blk->in_flight); + aio_wait_kick(&blk->wait); +} + static void error_callback_bh(void *opaque) { struct BlockBackendAIOCB *acb = opaque; - bdrv_dec_in_flight(acb->common.bs); + blk_dec_in_flight(acb->blk); acb->common.cb(acb->common.opaque, acb->ret); qemu_aio_unref(acb); } @@ -1240,7 +1259,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, { struct BlockBackendAIOCB *acb; - bdrv_inc_in_flight(blk_bs(blk)); + blk_inc_in_flight(blk); acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); acb->blk = blk; acb->ret = ret; @@ -1263,7 +1282,7 @@ static const AIOCBInfo blk_aio_em_aiocb_info = { static void blk_aio_complete(BlkAioEmAIOCB *acb) { if (acb->has_returned) { - bdrv_dec_in_flight(acb->common.bs); + blk_dec_in_flight(acb->rwco.blk); acb->common.cb(acb->common.opaque, acb->rwco.ret); qemu_aio_unref(acb); } @@ -1284,7 +1303,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, BlkAioEmAIOCB *acb; Coroutine *co; - bdrv_inc_in_flight(blk_bs(blk)); + blk_inc_in_flight(blk); acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); acb->rwco = (BlkRwCo) { .blk = blk, @@ -1521,14 +1540,41 @@ int blk_flush(BlockBackend *blk) void blk_drain(BlockBackend *blk) { - if (blk_bs(blk)) { - bdrv_drain(blk_bs(blk)); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_drained_begin(bs); + } + + /* We may have -ENOMEDIUM completions in flight */ + AIO_WAIT_WHILE(&blk->wait, + blk_get_aio_context(blk), + atomic_mb_read(&blk->in_flight) > 0); + + if (bs) { + bdrv_drained_end(bs); } } void blk_drain_all(void) { - bdrv_drain_all(); + BlockBackend *blk = NULL; + + bdrv_drain_all_begin(); + + while ((blk = blk_all_next(blk)) != NULL) { + AioContext *ctx = blk_get_aio_context(blk); + + aio_context_acquire(ctx); + + /* We may have -ENOMEDIUM completions in flight */ + AIO_WAIT_WHILE(&blk->wait, ctx, + atomic_mb_read(&blk->in_flight) > 0); + + aio_context_release(ctx); + } + + bdrv_drain_all_end(); } void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, @@ -1569,10 +1615,11 @@ static void send_qmp_error_event(BlockBackend *blk, bool is_read, int error) { IoOperationType optype; + BlockDriverState *bs = blk_bs(blk); optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; - qapi_event_send_block_io_error(blk_name(blk), - bdrv_get_node_name(blk_bs(blk)), optype, + qapi_event_send_block_io_error(blk_name(blk), !!bs, + bs ? bdrv_get_node_name(bs) : NULL, optype, action, blk_iostatus_is_enabled(blk), error == ENOSPC, strerror(error), &error_abort); diff --git a/block/commit.c b/block/commit.c index bb6c904..1943c9c 100644 --- a/block/commit.c +++ b/block/commit.c @@ -265,7 +265,7 @@ static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, static BlockDriver bdrv_commit_top = { .format_name = "commit_top", .bdrv_co_preadv = bdrv_commit_top_preadv, - .bdrv_co_get_block_status = bdrv_co_get_block_status_from_backing, + .bdrv_co_block_status = bdrv_co_block_status_from_backing, .bdrv_refresh_filename = bdrv_commit_top_refresh_filename, .bdrv_close = bdrv_commit_top_close, .bdrv_child_perm = bdrv_commit_top_child_perm, diff --git a/block/crypto.c b/block/crypto.c index aeac482..17b5c0a 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -556,9 +556,9 @@ static int block_crypto_open_luks(BlockDriverState *bs, bs, options, flags, errp); } -static int block_crypto_create_luks(const char *filename, - QemuOpts *opts, - Error **errp) +static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, + QemuOpts *opts, + Error **errp) { return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS, filename, opts, errp); @@ -617,7 +617,7 @@ BlockDriver bdrv_crypto_luks = { .bdrv_open = block_crypto_open_luks, .bdrv_close = block_crypto_close, .bdrv_child_perm = bdrv_format_default_perms, - .bdrv_create = block_crypto_create_luks, + .bdrv_co_create_opts = block_crypto_co_create_opts_luks, .bdrv_truncate = block_crypto_truncate, .create_opts = &block_crypto_create_opts_luks, diff --git a/block/file-posix.c b/block/file-posix.c index ca49c1a..7f2cc63 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1982,7 +1982,8 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs) return (int64_t)st.st_blocks * 512; } -static int raw_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { int fd; int result = 0; @@ -2131,25 +2132,24 @@ static int find_allocation(BlockDriverState *bs, off_t start, } /* - * Returns the allocation status of the specified sectors. + * Returns the allocation status of the specified offset. * - * If 'sector_num' is beyond the end of the disk image the return value is 0 - * and 'pnum' is set to 0. + * The block layer guarantees 'offset' and 'bytes' are within bounds. * - * 'pnum' is set to the number of sectors (including and immediately following - * the specified sector) that are known to be in the same + * 'pnum' is set to the number of bytes (including and immediately following + * the specified offset) that are known to be in the same * allocated/unallocated state. * - * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes - * beyond the end of the disk image it will be clamped. + * 'bytes' is the max value 'pnum' should be set to. */ -static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) -{ - off_t start, data = 0, hole = 0; - int64_t total_size; +static int coroutine_fn raw_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + off_t data = 0, hole = 0; int ret; ret = fd_open(bs); @@ -2157,39 +2157,36 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, return ret; } - start = sector_num * BDRV_SECTOR_SIZE; - total_size = bdrv_getlength(bs); - if (total_size < 0) { - return total_size; - } else if (start >= total_size) { - *pnum = 0; - return 0; - } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { - nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); + if (!want_zero) { + *pnum = bytes; + *map = offset; + *file = bs; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; } - ret = find_allocation(bs, start, &data, &hole); + ret = find_allocation(bs, offset, &data, &hole); if (ret == -ENXIO) { /* Trailing hole */ - *pnum = nb_sectors; + *pnum = bytes; ret = BDRV_BLOCK_ZERO; } else if (ret < 0) { /* No info available, so pretend there are no holes */ - *pnum = nb_sectors; + *pnum = bytes; ret = BDRV_BLOCK_DATA; - } else if (data == start) { - /* On a data extent, compute sectors to the end of the extent, + } else if (data == offset) { + /* On a data extent, compute bytes to the end of the extent, * possibly including a partial sector at EOF. */ - *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); + *pnum = MIN(bytes, hole - offset); ret = BDRV_BLOCK_DATA; } else { - /* On a hole, compute sectors to the beginning of the next extent. */ - assert(hole == start); - *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); + /* On a hole, compute bytes to the beginning of the next extent. */ + assert(hole == offset); + *pnum = MIN(bytes, data - offset); ret = BDRV_BLOCK_ZERO; } + *map = offset; *file = bs; - return ret | BDRV_BLOCK_OFFSET_VALID | start; + return ret | BDRV_BLOCK_OFFSET_VALID; } static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, @@ -2280,9 +2277,9 @@ BlockDriver bdrv_file = { .bdrv_reopen_commit = raw_reopen_commit, .bdrv_reopen_abort = raw_reopen_abort, .bdrv_close = raw_close, - .bdrv_create = raw_create, + .bdrv_co_create_opts = raw_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_get_block_status = raw_co_get_block_status, + .bdrv_co_block_status = raw_co_block_status, .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, .bdrv_co_preadv = raw_co_preadv, @@ -2684,8 +2681,8 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, return -ENOTSUP; } -static int hdev_create(const char *filename, QemuOpts *opts, - Error **errp) +static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { int fd; int ret = 0; @@ -2758,7 +2755,7 @@ static BlockDriver bdrv_host_device = { .bdrv_reopen_prepare = raw_reopen_prepare, .bdrv_reopen_commit = raw_reopen_commit, .bdrv_reopen_abort = raw_reopen_abort, - .bdrv_create = hdev_create, + .bdrv_co_create_opts = hdev_co_create_opts, .create_opts = &raw_create_opts, .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, @@ -2880,7 +2877,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_reopen_prepare = raw_reopen_prepare, .bdrv_reopen_commit = raw_reopen_commit, .bdrv_reopen_abort = raw_reopen_abort, - .bdrv_create = hdev_create, + .bdrv_co_create_opts = hdev_co_create_opts, .create_opts = &raw_create_opts, @@ -3011,7 +3008,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_reopen_prepare = raw_reopen_prepare, .bdrv_reopen_commit = raw_reopen_commit, .bdrv_reopen_abort = raw_reopen_abort, - .bdrv_create = hdev_create, + .bdrv_co_create_opts = hdev_co_create_opts, .create_opts = &raw_create_opts, .bdrv_co_preadv = raw_co_preadv, diff --git a/block/file-win32.c b/block/file-win32.c index f24c7bb..4a430d4 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -553,7 +553,8 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs) return st.st_size; } -static int raw_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { int fd; int64_t total_size = 0; @@ -599,7 +600,7 @@ BlockDriver bdrv_file = { .bdrv_file_open = raw_open, .bdrv_refresh_limits = raw_probe_alignment, .bdrv_close = raw_close, - .bdrv_create = raw_create, + .bdrv_co_create_opts = raw_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_aio_readv = raw_aio_readv, diff --git a/block/gluster.c b/block/gluster.c index 3f17b78..79b4cfd 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1021,8 +1021,9 @@ static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset, return 0; } -static int qemu_gluster_create(const char *filename, - QemuOpts *opts, Error **errp) +static int coroutine_fn qemu_gluster_co_create_opts(const char *filename, + QemuOpts *opts, + Error **errp) { BlockdevOptionsGluster *gconf; struct glfs *glfs; @@ -1362,68 +1363,66 @@ exit: } /* - * Returns the allocation status of the specified sectors. + * Returns the allocation status of the specified offset. * - * If 'sector_num' is beyond the end of the disk image the return value is 0 - * and 'pnum' is set to 0. + * The block layer guarantees 'offset' and 'bytes' are within bounds. * - * 'pnum' is set to the number of sectors (including and immediately following - * the specified sector) that are known to be in the same + * 'pnum' is set to the number of bytes (including and immediately following + * the specified offset) that are known to be in the same * allocated/unallocated state. * - * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes - * beyond the end of the disk image it will be clamped. + * 'bytes' is the max value 'pnum' should be set to. * - * (Based on raw_co_get_block_status() from file-posix.c.) + * (Based on raw_co_block_status() from file-posix.c.) */ -static int64_t coroutine_fn qemu_gluster_co_get_block_status( - BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, - BlockDriverState **file) +static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) { BDRVGlusterState *s = bs->opaque; - off_t start, data = 0, hole = 0; - int64_t total_size; + off_t data = 0, hole = 0; int ret = -EINVAL; if (!s->fd) { return ret; } - start = sector_num * BDRV_SECTOR_SIZE; - total_size = bdrv_getlength(bs); - if (total_size < 0) { - return total_size; - } else if (start >= total_size) { - *pnum = 0; - return 0; - } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { - nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); + if (!want_zero) { + *pnum = bytes; + *map = offset; + *file = bs; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; } - ret = find_allocation(bs, start, &data, &hole); + ret = find_allocation(bs, offset, &data, &hole); if (ret == -ENXIO) { /* Trailing hole */ - *pnum = nb_sectors; + *pnum = bytes; ret = BDRV_BLOCK_ZERO; } else if (ret < 0) { /* No info available, so pretend there are no holes */ - *pnum = nb_sectors; + *pnum = bytes; ret = BDRV_BLOCK_DATA; - } else if (data == start) { - /* On a data extent, compute sectors to the end of the extent, + } else if (data == offset) { + /* On a data extent, compute bytes to the end of the extent, * possibly including a partial sector at EOF. */ - *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); + *pnum = MIN(bytes, hole - offset); ret = BDRV_BLOCK_DATA; } else { - /* On a hole, compute sectors to the beginning of the next extent. */ - assert(hole == start); - *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); + /* On a hole, compute bytes to the beginning of the next extent. */ + assert(hole == offset); + *pnum = MIN(bytes, data - offset); ret = BDRV_BLOCK_ZERO; } + *map = offset; *file = bs; - return ret | BDRV_BLOCK_OFFSET_VALID | start; + return ret | BDRV_BLOCK_OFFSET_VALID; } @@ -1437,7 +1436,7 @@ static BlockDriver bdrv_gluster = { .bdrv_reopen_commit = qemu_gluster_reopen_commit, .bdrv_reopen_abort = qemu_gluster_reopen_abort, .bdrv_close = qemu_gluster_close, - .bdrv_create = qemu_gluster_create, + .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, @@ -1451,7 +1450,7 @@ static BlockDriver bdrv_gluster = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif - .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, + .bdrv_co_block_status = qemu_gluster_co_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -1465,7 +1464,7 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_reopen_commit = qemu_gluster_reopen_commit, .bdrv_reopen_abort = qemu_gluster_reopen_abort, .bdrv_close = qemu_gluster_close, - .bdrv_create = qemu_gluster_create, + .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, @@ -1479,7 +1478,7 @@ static BlockDriver bdrv_gluster_tcp = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif - .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, + .bdrv_co_block_status = qemu_gluster_co_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -1493,7 +1492,7 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_reopen_commit = qemu_gluster_reopen_commit, .bdrv_reopen_abort = qemu_gluster_reopen_abort, .bdrv_close = qemu_gluster_close, - .bdrv_create = qemu_gluster_create, + .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, @@ -1507,7 +1506,7 @@ static BlockDriver bdrv_gluster_unix = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif - .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, + .bdrv_co_block_status = qemu_gluster_co_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -1527,7 +1526,7 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_reopen_commit = qemu_gluster_reopen_commit, .bdrv_reopen_abort = qemu_gluster_reopen_abort, .bdrv_close = qemu_gluster_close, - .bdrv_create = qemu_gluster_create, + .bdrv_co_create_opts = qemu_gluster_co_create_opts, .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, @@ -1541,7 +1540,7 @@ static BlockDriver bdrv_gluster_rdma = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif - .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, + .bdrv_co_block_status = qemu_gluster_co_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "trace.h" #include "sysemu/block-backend.h" +#include "block/aio-wait.h" #include "block/blockjob.h" #include "block/blockjob_int.h" #include "block/block_int.h" @@ -587,16 +588,9 @@ void bdrv_inc_in_flight(BlockDriverState *bs) atomic_inc(&bs->in_flight); } -static void dummy_bh_cb(void *opaque) -{ -} - void bdrv_wakeup(BlockDriverState *bs) { - /* The barrier (or an atomic op) is in the caller. */ - if (atomic_read(&bs->wakeup)) { - aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); - } + aio_wait_kick(bdrv_get_aio_wait(bs)); } void bdrv_dec_in_flight(BlockDriverState *bs) @@ -1701,7 +1695,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, */ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE); - if (!qiov) { + if (flags & BDRV_REQ_ZERO_WRITE) { ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req); goto out; } @@ -1868,30 +1862,34 @@ typedef struct BdrvCoBlockStatusData { bool done; } BdrvCoBlockStatusData; -int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, - int *pnum, - BlockDriverState **file) +int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) { assert(bs->file && bs->file->bs); - *pnum = nb_sectors; + *pnum = bytes; + *map = offset; *file = bs->file->bs; - return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | - (sector_num << BDRV_SECTOR_BITS); + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; } -int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, - int *pnum, - BlockDriverState **file) +int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) { assert(bs->backing && bs->backing->bs); - *pnum = nb_sectors; + *pnum = bytes; + *map = offset; *file = bs->backing->bs; - return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | - (sector_num << BDRV_SECTOR_BITS); + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; } /* @@ -1899,10 +1897,10 @@ int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs, * Drivers not implementing the functionality are assumed to not support * backing files, hence all their sectors are reported as allocated. * - * If 'want_zero' is true, the caller is querying for mapping purposes, - * and the result should include BDRV_BLOCK_OFFSET_VALID and - * BDRV_BLOCK_ZERO where possible; otherwise, the result may omit those - * bits particularly if it allows for a larger value in 'pnum'. + * If 'want_zero' is true, the caller is querying for mapping + * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and + * _ZERO where possible; otherwise, the result favors larger 'pnum', + * with a focus on accurate BDRV_BLOCK_ALLOCATED. * * If 'offset' is beyond the end of the disk image the return value is * BDRV_BLOCK_EOF and 'pnum' is set to 0. @@ -1959,7 +1957,7 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, /* Must be non-NULL or bdrv_getlength() would have failed */ assert(bs->drv); - if (!bs->drv->bdrv_co_get_block_status) { + if (!bs->drv->bdrv_co_block_status) { *pnum = bytes; ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; if (offset + bytes == total_size) { @@ -1976,44 +1974,24 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, bdrv_inc_in_flight(bs); /* Round out to request_alignment boundaries */ - /* TODO: until we have a byte-based driver callback, we also have to - * round out to sectors, even if that is bigger than request_alignment */ - align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE); + align = bs->bl.request_alignment; aligned_offset = QEMU_ALIGN_DOWN(offset, align); aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset; - { - int count; /* sectors */ - int64_t longret; - - assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes, - BDRV_SECTOR_SIZE)); - /* - * The contract allows us to return pnum smaller than bytes, even - * if the next query would see the same status; we truncate the - * request to avoid overflowing the driver's 32-bit interface. - */ - longret = bs->drv->bdrv_co_get_block_status( - bs, aligned_offset >> BDRV_SECTOR_BITS, - MIN(INT_MAX, aligned_bytes) >> BDRV_SECTOR_BITS, &count, - &local_file); - if (longret < 0) { - assert(INT_MIN <= longret); - ret = longret; - goto out; - } - if (longret & BDRV_BLOCK_OFFSET_VALID) { - local_map = longret & BDRV_BLOCK_OFFSET_MASK; - } - ret = longret & ~BDRV_BLOCK_OFFSET_MASK; - *pnum = count * BDRV_SECTOR_SIZE; + ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, + aligned_bytes, pnum, &local_map, + &local_file); + if (ret < 0) { + *pnum = 0; + goto out; } /* - * The driver's result must be a multiple of request_alignment. + * The driver's result must be a non-zero multiple of request_alignment. * Clamp pnum and adjust map to original request. */ - assert(QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset); + assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) && + align > offset - aligned_offset); *pnum -= offset - aligned_offset; if (*pnum > bytes) { *pnum = bytes; diff --git a/block/iscsi.c b/block/iscsi.c index d2b320e..8bf0e87 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -86,7 +86,7 @@ typedef struct IscsiLun { unsigned long *allocmap; unsigned long *allocmap_valid; long allocmap_size; - int cluster_sectors; + int cluster_size; bool use_16_for_rw; bool write_protected; bool lbpme; @@ -430,9 +430,10 @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags) { iscsi_allocmap_free(iscsilun); + assert(iscsilun->cluster_size); iscsilun->allocmap_size = - DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun), - iscsilun->cluster_sectors); + DIV_ROUND_UP(iscsilun->num_blocks * iscsilun->block_size, + iscsilun->cluster_size); iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size); if (!iscsilun->allocmap) { @@ -440,7 +441,7 @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags) } if (open_flags & BDRV_O_NOCACHE) { - /* in case that cache.direct = on all allocmap entries are + /* when cache.direct = on all allocmap entries are * treated as invalid to force a relookup of the block * status on every read request */ return 0; @@ -457,8 +458,8 @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags) } static void -iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num, - int nb_sectors, bool allocated, bool valid) +iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset, + int64_t bytes, bool allocated, bool valid) { int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk; @@ -466,13 +467,13 @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num, return; } /* expand to entirely contain all affected clusters */ - cl_num_expanded = sector_num / iscsilun->cluster_sectors; - nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors, - iscsilun->cluster_sectors) - cl_num_expanded; + assert(iscsilun->cluster_size); + cl_num_expanded = offset / iscsilun->cluster_size; + nb_cls_expanded = DIV_ROUND_UP(offset + bytes, + iscsilun->cluster_size) - cl_num_expanded; /* shrink to touch only completely contained clusters */ - cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors); - nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors - - cl_num_shrunk; + cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size); + nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk; if (allocated) { bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded); } else { @@ -495,26 +496,26 @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num, } static void -iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num, - int nb_sectors) +iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) { - iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true); + iscsi_allocmap_update(iscsilun, offset, bytes, true, true); } static void -iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num, - int nb_sectors) +iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) { /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update * is ignored, so this will in effect be an iscsi_allocmap_set_invalid. */ - iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true); + iscsi_allocmap_update(iscsilun, offset, bytes, false, true); } -static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num, - int nb_sectors) +static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) { - iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false); + iscsi_allocmap_update(iscsilun, offset, bytes, false, false); } static void iscsi_allocmap_invalidate(IscsiLun *iscsilun) @@ -528,28 +529,30 @@ static void iscsi_allocmap_invalidate(IscsiLun *iscsilun) } static inline bool -iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num, - int nb_sectors) +iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset, + int64_t bytes) { unsigned long size; if (iscsilun->allocmap == NULL) { return true; } - size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors); + assert(iscsilun->cluster_size); + size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size); return !(find_next_bit(iscsilun->allocmap, size, - sector_num / iscsilun->cluster_sectors) == size); + offset / iscsilun->cluster_size) == size); } static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun, - int64_t sector_num, int nb_sectors) + int64_t offset, int64_t bytes) { unsigned long size; if (iscsilun->allocmap_valid == NULL) { return false; } - size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors); + assert(iscsilun->cluster_size); + size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size); return (find_next_zero_bit(iscsilun->allocmap_valid, size, - sector_num / iscsilun->cluster_sectors) == size); + offset / iscsilun->cluster_size) == size); } static int coroutine_fn @@ -631,14 +634,16 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors); + iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE); error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba, iTask.err_str); r = iTask.err_code; goto out_unlock; } - iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors); + iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE); out_unlock: qemu_mutex_unlock(&iscsilun->mutex); @@ -648,36 +653,36 @@ out_unlock: -static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) +static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, + BlockDriverState **file) { IscsiLun *iscsilun = bs->opaque; struct scsi_get_lba_status *lbas = NULL; struct scsi_lba_status_descriptor *lbasd = NULL; struct IscsiTask iTask; uint64_t lba; - int64_t ret; + int ret; iscsi_co_init_iscsitask(iscsilun, &iTask); - if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { - ret = -EINVAL; - goto out; - } + assert(QEMU_IS_ALIGNED(offset | bytes, iscsilun->block_size)); /* default to all sectors allocated */ - ret = BDRV_BLOCK_DATA; - ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID; - *pnum = nb_sectors; + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + if (map) { + *map = offset; + } + *pnum = bytes; /* LUN does not support logical block provisioning */ if (!iscsilun->lbpme) { goto out; } - lba = sector_qemu2lun(sector_num, iscsilun); + lba = offset / iscsilun->block_size; qemu_mutex_lock(&iscsilun->mutex); retry: @@ -722,12 +727,12 @@ retry: lbasd = &lbas->descriptors[0]; - if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) { + if (lba != lbasd->lba) { ret = -EIO; goto out_unlock; } - *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun); + *pnum = lbasd->num_blocks * iscsilun->block_size; if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { @@ -738,13 +743,13 @@ retry: } if (ret & BDRV_BLOCK_ZERO) { - iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum); + iscsi_allocmap_set_unallocated(iscsilun, offset, *pnum); } else { - iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum); + iscsi_allocmap_set_allocated(iscsilun, offset, *pnum); } - if (*pnum > nb_sectors) { - *pnum = nb_sectors; + if (*pnum > bytes) { + *pnum = bytes; } out_unlock: qemu_mutex_unlock(&iscsilun->mutex); @@ -753,7 +758,7 @@ out: if (iTask.task != NULL) { scsi_free_scsi_task(iTask.task); } - if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) { + if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) { *file = bs; } return ret; @@ -780,29 +785,37 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, /* if cache.direct is off and we have a valid entry in our allocation map * we can skip checking the block status and directly return zeroes if * the request falls within an unallocated area */ - if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) && - !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) { + if (iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE) && + !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE)) { qemu_iovec_memset(iov, 0, 0x00, iov->size); return 0; } if (nb_sectors >= ISCSI_CHECKALLOC_THRES && - !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) && - !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) { - int pnum; - BlockDriverState *file; + !iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE) && + !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE)) { + int64_t pnum; /* check the block status from the beginning of the cluster * containing the start sector */ - int64_t ret = iscsi_co_get_block_status(bs, - sector_num - sector_num % iscsilun->cluster_sectors, - BDRV_REQUEST_MAX_SECTORS, &pnum, &file); + int64_t head; + int ret; + + assert(iscsilun->cluster_size); + head = (sector_num * BDRV_SECTOR_SIZE) % iscsilun->cluster_size; + ret = iscsi_co_block_status(bs, true, + sector_num * BDRV_SECTOR_SIZE - head, + BDRV_REQUEST_MAX_BYTES, &pnum, NULL, NULL); if (ret < 0) { return ret; } /* if the whole request falls into an unallocated area we can avoid - * to read and directly return zeroes instead */ + * reading and directly return zeroes instead */ if (ret & BDRV_BLOCK_ZERO && - pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) { + pnum >= nb_sectors * BDRV_SECTOR_SIZE + head) { qemu_iovec_memset(iov, 0, 0x00, iov->size); return 0; } @@ -1146,8 +1159,7 @@ retry: goto retry; } - iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS); + iscsi_allocmap_set_invalid(iscsilun, offset, bytes); if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { /* the target might fail with a check condition if it @@ -1260,8 +1272,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS); + iscsi_allocmap_set_invalid(iscsilun, offset, bytes); error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s", lba, iTask.err_str); r = iTask.err_code; @@ -1269,11 +1280,9 @@ retry: } if (flags & BDRV_REQ_MAY_UNMAP) { - iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS); + iscsi_allocmap_set_invalid(iscsilun, offset, bytes); } else { - iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS); + iscsi_allocmap_set_allocated(iscsilun, offset, bytes); } out_unlock: @@ -1953,8 +1962,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, * reasonable size */ if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 && iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) { - iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran * - iscsilun->block_size) >> BDRV_SECTOR_BITS; + iscsilun->cluster_size = iscsilun->bl.opt_unmap_gran * + iscsilun->block_size; if (iscsilun->lbprz) { ret = iscsi_allocmap_init(iscsilun, bs->open_flags); } @@ -2108,7 +2117,8 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset, return 0; } -static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { int ret = 0; int64_t total_size = 0; @@ -2163,7 +2173,7 @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { IscsiLun *iscsilun = bs->opaque; bdi->unallocated_blocks_are_zero = iscsilun->lbprz; - bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE; + bdi->cluster_size = iscsilun->cluster_size; return 0; } @@ -2195,7 +2205,7 @@ static BlockDriver bdrv_iscsi = { .bdrv_parse_filename = iscsi_parse_filename, .bdrv_file_open = iscsi_open, .bdrv_close = iscsi_close, - .bdrv_create = iscsi_create, + .bdrv_co_create_opts = iscsi_co_create_opts, .create_opts = &iscsi_create_opts, .bdrv_reopen_prepare = iscsi_reopen_prepare, .bdrv_reopen_commit = iscsi_reopen_commit, @@ -2206,7 +2216,7 @@ static BlockDriver bdrv_iscsi = { .bdrv_truncate = iscsi_truncate, .bdrv_refresh_limits = iscsi_refresh_limits, - .bdrv_co_get_block_status = iscsi_co_get_block_status, + .bdrv_co_block_status = iscsi_co_block_status, .bdrv_co_pdiscard = iscsi_co_pdiscard, .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes, .bdrv_co_readv = iscsi_co_readv, @@ -2230,7 +2240,7 @@ static BlockDriver bdrv_iser = { .bdrv_parse_filename = iscsi_parse_filename, .bdrv_file_open = iscsi_open, .bdrv_close = iscsi_close, - .bdrv_create = iscsi_create, + .bdrv_co_create_opts = iscsi_co_create_opts, .create_opts = &iscsi_create_opts, .bdrv_reopen_prepare = iscsi_reopen_prepare, .bdrv_reopen_commit = iscsi_reopen_commit, @@ -2241,7 +2251,7 @@ static BlockDriver bdrv_iser = { .bdrv_truncate = iscsi_truncate, .bdrv_refresh_limits = iscsi_refresh_limits, - .bdrv_co_get_block_status = iscsi_co_get_block_status, + .bdrv_co_block_status = iscsi_co_block_status, .bdrv_co_pdiscard = iscsi_co_pdiscard, .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes, .bdrv_co_readv = iscsi_co_readv, diff --git a/block/mirror.c b/block/mirror.c index c9badc1..f5bf620 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1094,7 +1094,7 @@ static BlockDriver bdrv_mirror_top = { .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, .bdrv_co_flush = bdrv_mirror_top_flush, - .bdrv_co_get_block_status = bdrv_co_get_block_status_from_backing, + .bdrv_co_block_status = bdrv_co_block_status_from_backing, .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename, .bdrv_close = bdrv_mirror_top_close, .bdrv_child_perm = bdrv_mirror_top_child_perm, diff --git a/block/nfs.c b/block/nfs.c index bbdb4fa..1d82ff5 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -684,7 +684,8 @@ static QemuOptsList nfs_create_opts = { } }; -static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp) +static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts, + Error **errp) { int64_t ret, total_size; NFSClient *client = g_new0(NFSClient, 1); @@ -897,7 +898,7 @@ static BlockDriver bdrv_nfs = { .bdrv_file_open = nfs_file_open, .bdrv_close = nfs_file_close, - .bdrv_create = nfs_file_create, + .bdrv_co_create_opts = nfs_file_co_create_opts, .bdrv_reopen_prepare = nfs_reopen_prepare, .bdrv_co_preadv = nfs_co_preadv, diff --git a/block/null.c b/block/null.c index 214d394..806a863 100644 --- a/block/null.c +++ b/block/null.c @@ -223,22 +223,23 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state, return 0; } -static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) +static int coroutine_fn null_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, + BlockDriverState **file) { BDRVNullState *s = bs->opaque; - off_t start = sector_num * BDRV_SECTOR_SIZE; + int ret = BDRV_BLOCK_OFFSET_VALID; - *pnum = nb_sectors; + *pnum = bytes; + *map = offset; *file = bs; if (s->read_zeroes) { - return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO; - } else { - return BDRV_BLOCK_OFFSET_VALID | start; + ret |= BDRV_BLOCK_ZERO; } + return ret; } static void null_refresh_filename(BlockDriverState *bs, QDict *opts) @@ -270,7 +271,7 @@ static BlockDriver bdrv_null_co = { .bdrv_co_flush_to_disk = null_co_flush, .bdrv_reopen_prepare = null_reopen_prepare, - .bdrv_co_get_block_status = null_co_get_block_status, + .bdrv_co_block_status = null_co_block_status, .bdrv_refresh_filename = null_refresh_filename, }; @@ -290,7 +291,7 @@ static BlockDriver bdrv_null_aio = { .bdrv_aio_flush = null_aio_flush, .bdrv_reopen_prepare = null_reopen_prepare, - .bdrv_co_get_block_status = null_co_get_block_status, + .bdrv_co_block_status = null_co_block_status, .bdrv_refresh_filename = null_refresh_filename, }; diff --git a/block/nvme.c b/block/nvme.c index 7507802..8bca57a 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -1072,18 +1072,6 @@ static int nvme_reopen_prepare(BDRVReopenState *reopen_state, return 0; } -static int64_t coroutine_fn nvme_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) -{ - *pnum = nb_sectors; - *file = bs; - - return BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_OFFSET_VALID | - (sector_num << BDRV_SECTOR_BITS); -} - static void nvme_refresh_filename(BlockDriverState *bs, QDict *opts) { QINCREF(opts); @@ -1183,8 +1171,6 @@ static BlockDriver bdrv_nvme = { .bdrv_co_flush_to_disk = nvme_co_flush, .bdrv_reopen_prepare = nvme_reopen_prepare, - .bdrv_co_get_block_status = nvme_co_get_block_status, - .bdrv_refresh_filename = nvme_refresh_filename, .bdrv_refresh_limits = nvme_refresh_limits, diff --git a/block/parallels.c b/block/parallels.c index e1e3d80..8108579 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -261,23 +261,31 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) } -static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int coroutine_fn parallels_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) { BDRVParallelsState *s = bs->opaque; - int64_t offset; + int count; + assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); qemu_co_mutex_lock(&s->lock); - offset = block_status(s, sector_num, nb_sectors, pnum); + offset = block_status(s, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS, &count); qemu_co_mutex_unlock(&s->lock); + *pnum = count * BDRV_SECTOR_SIZE; if (offset < 0) { return 0; } + *map = offset * BDRV_SECTOR_SIZE; *file = bs->file->bs; - return (offset << BDRV_SECTOR_BITS) | - BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; } static coroutine_fn int parallels_co_writev(BlockDriverState *bs, @@ -467,7 +475,9 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res, } -static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn parallels_co_create_opts(const char *filename, + QemuOpts *opts, + Error **errp) { int64_t total_size, cl_size; uint8_t tmp[BDRV_SECTOR_SIZE]; @@ -782,13 +792,13 @@ static BlockDriver bdrv_parallels = { .bdrv_open = parallels_open, .bdrv_close = parallels_close, .bdrv_child_perm = bdrv_format_default_perms, - .bdrv_co_get_block_status = parallels_co_get_block_status, + .bdrv_co_block_status = parallels_co_block_status, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_flush_to_os = parallels_co_flush_to_os, .bdrv_co_readv = parallels_co_readv, .bdrv_co_writev = parallels_co_writev, .supports_backing = true, - .bdrv_create = parallels_create, + .bdrv_co_create_opts = parallels_co_create_opts, .bdrv_check = parallels_check, .create_opts = ¶llels_create_opts, }; diff --git a/block/qcow.c b/block/qcow.c index 8631155..47a18d9 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -524,23 +524,28 @@ static int get_cluster_offset(BlockDriverState *bs, return 1; } -static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int coroutine_fn qcow_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) { BDRVQcowState *s = bs->opaque; - int index_in_cluster, n, ret; + int index_in_cluster, ret; + int64_t n; uint64_t cluster_offset; qemu_co_mutex_lock(&s->lock); - ret = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0, &cluster_offset); + ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { return ret; } - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) - n = nb_sectors; + index_in_cluster = offset & (s->cluster_size - 1); + n = s->cluster_size - index_in_cluster; + if (n > bytes) { + n = bytes; + } *pnum = n; if (!cluster_offset) { return 0; @@ -548,9 +553,9 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) { return BDRV_BLOCK_DATA; } - cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); + *map = cluster_offset | index_in_cluster; *file = bs->file->bs; - return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; } static int decompress_buffer(uint8_t *out_buf, int out_buf_size, @@ -805,7 +810,8 @@ static void qcow_close(BlockDriverState *bs) error_free(s->migration_blocker); } -static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn qcow_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { int header_size, backing_filename_len, l1_size, shift, i; QCowHeader header; @@ -1122,13 +1128,13 @@ static BlockDriver bdrv_qcow = { .bdrv_close = qcow_close, .bdrv_child_perm = bdrv_format_default_perms, .bdrv_reopen_prepare = qcow_reopen_prepare, - .bdrv_create = qcow_create, + .bdrv_co_create_opts = qcow_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, .supports_backing = true, .bdrv_co_readv = qcow_co_readv, .bdrv_co_writev = qcow_co_writev, - .bdrv_co_get_block_status = qcow_co_get_block_status, + .bdrv_co_block_status = qcow_co_block_status, .bdrv_make_empty = qcow_make_empty, .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed, diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 4f6fd86..5127276 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -413,8 +413,8 @@ static inline void bitmap_dir_entry_to_be(Qcow2BitmapDirEntry *entry) static inline int calc_dir_entry_size(size_t name_size, size_t extra_data_size) { - return align_offset(sizeof(Qcow2BitmapDirEntry) + - name_size + extra_data_size, 8); + int size = sizeof(Qcow2BitmapDirEntry) + name_size + extra_data_size; + return ROUND_UP(size, 8); } static inline int dir_entry_size(Qcow2BitmapDirEntry *entry) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index e406b0f..98908c4 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -126,11 +126,11 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, new_l1_size2 = sizeof(uint64_t) * new_l1_size; new_l1_table = qemu_try_blockalign(bs->file->bs, - align_offset(new_l1_size2, 512)); + ROUND_UP(new_l1_size2, 512)); if (new_l1_table == NULL) { return -ENOMEM; } - memset(new_l1_table, 0, align_offset(new_l1_size2, 512)); + memset(new_l1_table, 0, ROUND_UP(new_l1_size2, 512)); if (s->l1_size) { memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index d46b69d..126cca3 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1204,7 +1204,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, * l1_table_offset when it is the current s->l1_table_offset! Be careful * when changing this! */ if (l1_table_offset != s->l1_table_offset) { - l1_table = g_try_malloc0(align_offset(l1_size2, 512)); + l1_table = g_try_malloc0(ROUND_UP(l1_size2, 512)); if (l1_size2 && l1_table == NULL) { ret = -ENOMEM; goto fail; @@ -2553,7 +2553,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, } /* align range to test to cluster boundaries */ - size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size); + size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size); offset = start_of_cluster(s, offset); if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) { diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 44243e0..cee25f5 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -66,7 +66,7 @@ int qcow2_read_snapshots(BlockDriverState *bs) for(i = 0; i < s->nb_snapshots; i++) { /* Read statically sized part of the snapshot header */ - offset = align_offset(offset, 8); + offset = ROUND_UP(offset, 8); ret = bdrv_pread(bs->file, offset, &h, sizeof(h)); if (ret < 0) { goto fail; @@ -155,7 +155,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs) offset = 0; for(i = 0; i < s->nb_snapshots; i++) { sn = s->snapshots + i; - offset = align_offset(offset, 8); + offset = ROUND_UP(offset, 8); offset += sizeof(h); offset += sizeof(extra); offset += strlen(sn->id_str); @@ -215,7 +215,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs) assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX); h.id_str_size = cpu_to_be16(id_str_size); h.name_size = cpu_to_be16(name_size); - offset = align_offset(offset, 8); + offset = ROUND_UP(offset, 8); ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h)); if (ret < 0) { @@ -441,7 +441,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) /* The VM state isn't needed any more in the active L1 table; in fact, it * hurts by causing expensive COW for the next snapshot. */ qcow2_cluster_discard(bs, qcow2_vm_state_offset(s), - align_offset(sn->vm_state_size, s->cluster_size), + ROUND_UP(sn->vm_state_size, s->cluster_size), QCOW2_DISCARD_NEVER, false); #ifdef DEBUG_ALLOC @@ -710,7 +710,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, } new_l1_bytes = sn->l1_size * sizeof(uint64_t); new_l1_table = qemu_try_blockalign(bs->file->bs, - align_offset(new_l1_bytes, 512)); + ROUND_UP(new_l1_bytes, 512)); if (new_l1_table == NULL) { return -ENOMEM; } diff --git a/block/qcow2.c b/block/qcow2.c index 3dd098b..071dc4d 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1377,7 +1377,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, if (s->l1_size > 0) { s->l1_table = qemu_try_blockalign(bs->file->bs, - align_offset(s->l1_size * sizeof(uint64_t), 512)); + ROUND_UP(s->l1_size * sizeof(uint64_t), 512)); if (s->l1_table == NULL) { error_setg(errp, "Could not allocate L1 table"); ret = -ENOMEM; @@ -1668,32 +1668,34 @@ static void qcow2_join_options(QDict *options, QDict *old_options) } } -static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t count, + int64_t *pnum, int64_t *map, + BlockDriverState **file) { BDRVQcow2State *s = bs->opaque; uint64_t cluster_offset; int index_in_cluster, ret; unsigned int bytes; - int64_t status = 0; + int status = 0; - bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE); + bytes = MIN(INT_MAX, count); qemu_co_mutex_lock(&s->lock); - ret = qcow2_get_cluster_offset(bs, sector_num << BDRV_SECTOR_BITS, &bytes, - &cluster_offset); + ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { return ret; } - *pnum = bytes >> BDRV_SECTOR_BITS; + *pnum = bytes; if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && !s->crypto) { - index_in_cluster = sector_num & (s->cluster_sectors - 1); - cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); + index_in_cluster = offset & (s->cluster_size - 1); + *map = cluster_offset | index_in_cluster; *file = bs->file->bs; - status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; + status |= BDRV_BLOCK_OFFSET_VALID; } if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { status |= BDRV_BLOCK_ZERO; @@ -2638,19 +2640,19 @@ static int64_t qcow2_calc_prealloc_size(int64_t total_size, { int64_t meta_size = 0; uint64_t nl1e, nl2e; - int64_t aligned_total_size = align_offset(total_size, cluster_size); + int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); /* header: 1 cluster */ meta_size += cluster_size; /* total size of L2 tables */ nl2e = aligned_total_size / cluster_size; - nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t)); + nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t)); meta_size += nl2e * sizeof(uint64_t); /* total size of L1 tables */ nl1e = nl2e * sizeof(uint64_t) / cluster_size; - nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t)); + nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t)); meta_size += nl1e * sizeof(uint64_t); /* total size of refcount table and blocks */ @@ -2721,11 +2723,12 @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, return refcount_bits; } -static int qcow2_create2(const char *filename, int64_t total_size, - const char *backing_file, const char *backing_format, - int flags, size_t cluster_size, PreallocMode prealloc, - QemuOpts *opts, int version, int refcount_order, - const char *encryptfmt, Error **errp) +static int coroutine_fn +qcow2_co_create2(const char *filename, int64_t total_size, + const char *backing_file, const char *backing_format, + int flags, size_t cluster_size, PreallocMode prealloc, + QemuOpts *opts, int version, int refcount_order, + const char *encryptfmt, Error **errp) { QDict *options; @@ -2912,7 +2915,8 @@ out: return ret; } -static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { char *backing_file = NULL; char *backing_fmt = NULL; @@ -2993,9 +2997,9 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) refcount_order = ctz32(refcount_bits); - ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags, - cluster_size, prealloc, opts, version, refcount_order, - encryptfmt, &local_err); + ret = qcow2_co_create2(filename, size, backing_file, backing_fmt, flags, + cluster_size, prealloc, opts, version, refcount_order, + encryptfmt, &local_err); error_propagate(errp, local_err); finish: @@ -3704,8 +3708,8 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, has_backing_file = !!optstr; g_free(optstr); - virtual_size = align_offset(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), - cluster_size); + virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); + virtual_size = ROUND_UP(virtual_size, cluster_size); /* Check that virtual disk size is valid */ l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, @@ -3725,7 +3729,7 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, goto err; } - virtual_size = align_offset(ssize, cluster_size); + virtual_size = ROUND_UP(ssize, cluster_size); if (has_backing_file) { /* We don't how much of the backing chain is shared by the input @@ -4348,9 +4352,9 @@ BlockDriver bdrv_qcow2 = { .bdrv_reopen_abort = qcow2_reopen_abort, .bdrv_join_options = qcow2_join_options, .bdrv_child_perm = bdrv_format_default_perms, - .bdrv_create = qcow2_create, + .bdrv_co_create_opts = qcow2_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_get_block_status = qcow2_co_get_block_status, + .bdrv_co_block_status = qcow2_co_block_status, .bdrv_co_preadv = qcow2_co_preadv, .bdrv_co_pwritev = qcow2_co_pwritev, diff --git a/block/qcow2.h b/block/qcow2.h index 8838022..1a84cc7 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -480,12 +480,6 @@ static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset) return (offset >> s->cluster_bits) & (s->l2_slice_size - 1); } -static inline int64_t align_offset(int64_t offset, int n) -{ - offset = (offset + n - 1) & ~(n - 1); - return offset; -} - static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s) { return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits); diff --git a/block/qed.c b/block/qed.c index c6ff3ab..72cf2f5 100644 --- a/block/qed.c +++ b/block/qed.c @@ -638,7 +638,9 @@ out: return ret; } -static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn bdrv_qed_co_create_opts(const char *filename, + QemuOpts *opts, + Error **errp) { uint64_t image_size = 0; uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE; @@ -688,74 +690,46 @@ finish: return ret; } -typedef struct { - BlockDriverState *bs; - Coroutine *co; - uint64_t pos; - int64_t status; - int *pnum; - BlockDriverState **file; -} QEDIsAllocatedCB; - -/* Called with table_lock held. */ -static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) +static int coroutine_fn bdrv_qed_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t pos, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) { - QEDIsAllocatedCB *cb = opaque; - BDRVQEDState *s = cb->bs->opaque; - *cb->pnum = len / BDRV_SECTOR_SIZE; + BDRVQEDState *s = bs->opaque; + size_t len = MIN(bytes, SIZE_MAX); + int status; + QEDRequest request = { .l2_table = NULL }; + uint64_t offset; + int ret; + + qemu_co_mutex_lock(&s->table_lock); + ret = qed_find_cluster(s, &request, pos, &len, &offset); + + *pnum = len; switch (ret) { case QED_CLUSTER_FOUND: - offset |= qed_offset_into_cluster(s, cb->pos); - cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; - *cb->file = cb->bs->file->bs; + *map = offset | qed_offset_into_cluster(s, pos); + status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + *file = bs->file->bs; break; case QED_CLUSTER_ZERO: - cb->status = BDRV_BLOCK_ZERO; + status = BDRV_BLOCK_ZERO; break; case QED_CLUSTER_L2: case QED_CLUSTER_L1: - cb->status = 0; + status = 0; break; default: assert(ret < 0); - cb->status = ret; + status = ret; break; } - if (cb->co) { - aio_co_wake(cb->co); - } -} - -static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, - BlockDriverState **file) -{ - BDRVQEDState *s = bs->opaque; - size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE; - QEDIsAllocatedCB cb = { - .bs = bs, - .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE, - .status = BDRV_BLOCK_OFFSET_MASK, - .pnum = pnum, - .file = file, - }; - QEDRequest request = { .l2_table = NULL }; - uint64_t offset; - int ret; - - qemu_co_mutex_lock(&s->table_lock); - ret = qed_find_cluster(s, &request, cb.pos, &len, &offset); - qed_is_allocated_cb(&cb, ret, offset, len); - - /* The callback was invoked immediately */ - assert(cb.status != BDRV_BLOCK_OFFSET_MASK); - qed_unref_l2_cache_entry(request.l2_table); qemu_co_mutex_unlock(&s->table_lock); - return cb.status; + return status; } static BDRVQEDState *acb_to_s(QEDAIOCB *acb) @@ -1592,9 +1566,9 @@ static BlockDriver bdrv_qed = { .bdrv_close = bdrv_qed_close, .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, .bdrv_child_perm = bdrv_format_default_perms, - .bdrv_create = bdrv_qed_create, + .bdrv_co_create_opts = bdrv_qed_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, + .bdrv_co_block_status = bdrv_qed_co_block_status, .bdrv_co_readv = bdrv_qed_co_readv, .bdrv_co_writev = bdrv_qed_co_writev, .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes, diff --git a/block/raw-format.c b/block/raw-format.c index ab552c0..a378547 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -250,17 +250,17 @@ fail: return ret; } -static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum, +static int coroutine_fn raw_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVRawState *s = bs->opaque; - *pnum = nb_sectors; + *pnum = bytes; *file = bs->file->bs; - sector_num += s->offset / BDRV_SECTOR_SIZE; - return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | - (sector_num << BDRV_SECTOR_BITS); + *map = offset + s->offset; + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; } static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, @@ -396,7 +396,8 @@ static int raw_has_zero_init(BlockDriverState *bs) return bdrv_has_zero_init(bs->file->bs); } -static int raw_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { return bdrv_create_file(filename, opts, errp); } @@ -491,12 +492,12 @@ BlockDriver bdrv_raw = { .bdrv_open = &raw_open, .bdrv_close = &raw_close, .bdrv_child_perm = bdrv_filter_default_perms, - .bdrv_create = &raw_create, + .bdrv_co_create_opts = &raw_co_create_opts, .bdrv_co_preadv = &raw_co_preadv, .bdrv_co_pwritev = &raw_co_pwritev, .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, .bdrv_co_pdiscard = &raw_co_pdiscard, - .bdrv_co_get_block_status = &raw_co_get_block_status, + .bdrv_co_block_status = &raw_co_block_status, .bdrv_truncate = &raw_truncate, .bdrv_getlength = &raw_getlength, .has_variable_length = true, diff --git a/block/rbd.c b/block/rbd.c index 8474b0b..c7dd32e 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -351,7 +351,9 @@ static QemuOptsList runtime_opts = { }, }; -static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn qemu_rbd_co_create_opts(const char *filename, + QemuOpts *opts, + Error **errp) { Error *local_err = NULL; int64_t bytes = 0; @@ -1132,7 +1134,7 @@ static BlockDriver bdrv_rbd = { .bdrv_file_open = qemu_rbd_open, .bdrv_close = qemu_rbd_close, .bdrv_reopen_prepare = qemu_rbd_reopen_prepare, - .bdrv_create = qemu_rbd_create, + .bdrv_co_create_opts = qemu_rbd_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_get_info = qemu_rbd_getinfo, .create_opts = &qemu_rbd_create_opts, diff --git a/block/sheepdog.c b/block/sheepdog.c index 2152230..d8c10b7 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1959,8 +1959,8 @@ static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt) return 0; } -static int sd_create(const char *filename, QemuOpts *opts, - Error **errp) +static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { Error *err = NULL; int ret = 0; @@ -3004,19 +3004,19 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset, return acb.ret; } -static coroutine_fn int64_t -sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - int *pnum, BlockDriverState **file) +static coroutine_fn int +sd_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, + BlockDriverState **file) { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); - uint64_t offset = sector_num * BDRV_SECTOR_SIZE; unsigned long start = offset / object_size, - end = DIV_ROUND_UP((sector_num + nb_sectors) * - BDRV_SECTOR_SIZE, object_size); + end = DIV_ROUND_UP(offset + bytes, object_size); unsigned long idx; - int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; + *map = offset; + int ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; for (idx = start; idx < end; idx++) { if (inode->data_vdi_id[idx] == 0) { @@ -3033,9 +3033,9 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, } } - *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE; - if (*pnum > nb_sectors) { - *pnum = nb_sectors; + *pnum = (idx - start) * object_size; + if (*pnum > bytes) { + *pnum = bytes; } if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) { *file = bs; @@ -3103,7 +3103,7 @@ static BlockDriver bdrv_sheepdog = { .bdrv_reopen_commit = sd_reopen_commit, .bdrv_reopen_abort = sd_reopen_abort, .bdrv_close = sd_close, - .bdrv_create = sd_create, + .bdrv_co_create_opts = sd_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, @@ -3113,7 +3113,7 @@ static BlockDriver bdrv_sheepdog = { .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_co_pdiscard = sd_co_pdiscard, - .bdrv_co_get_block_status = sd_co_get_block_status, + .bdrv_co_block_status = sd_co_block_status, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, @@ -3139,7 +3139,7 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_reopen_commit = sd_reopen_commit, .bdrv_reopen_abort = sd_reopen_abort, .bdrv_close = sd_close, - .bdrv_create = sd_create, + .bdrv_co_create_opts = sd_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, @@ -3149,7 +3149,7 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_co_pdiscard = sd_co_pdiscard, - .bdrv_co_get_block_status = sd_co_get_block_status, + .bdrv_co_block_status = sd_co_block_status, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, @@ -3175,7 +3175,7 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_reopen_commit = sd_reopen_commit, .bdrv_reopen_abort = sd_reopen_abort, .bdrv_close = sd_close, - .bdrv_create = sd_create, + .bdrv_co_create_opts = sd_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_get_allocated_file_size = sd_get_allocated_file_size, @@ -3185,7 +3185,7 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_co_pdiscard = sd_co_pdiscard, - .bdrv_co_get_block_status = sd_co_get_block_status, + .bdrv_co_block_status = sd_co_block_status, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, diff --git a/block/ssh.c b/block/ssh.c index b11d4c5..ff99294 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -803,6 +803,33 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags, return ret; } +/* Note: This is a blocking operation */ +static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp) +{ + ssize_t ret; + char c[1] = { '\0' }; + int was_blocking = libssh2_session_get_blocking(s->session); + + /* offset must be strictly greater than the current size so we do + * not overwrite anything */ + assert(offset > 0 && offset > s->attrs.filesize); + + libssh2_session_set_blocking(s->session, 1); + + libssh2_sftp_seek64(s->sftp_handle, offset - 1); + ret = libssh2_sftp_write(s->sftp_handle, c, 1); + + libssh2_session_set_blocking(s->session, was_blocking); + + if (ret < 0) { + sftp_error_setg(errp, s, "Failed to grow file"); + return -EIO; + } + + s->attrs.filesize = offset; + return 0; +} + static QemuOptsList ssh_create_opts = { .name = "ssh-create-opts", .head = QTAILQ_HEAD_INITIALIZER(ssh_create_opts.head), @@ -816,14 +843,13 @@ static QemuOptsList ssh_create_opts = { } }; -static int ssh_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { int r, ret; int64_t total_size = 0; QDict *uri_options = NULL; BDRVSSHState s; - ssize_t r2; - char c[1] = { '\0' }; ssh_state_init(&s); @@ -849,14 +875,10 @@ static int ssh_create(const char *filename, QemuOpts *opts, Error **errp) } if (total_size > 0) { - libssh2_sftp_seek64(s.sftp_handle, total_size-1); - r2 = libssh2_sftp_write(s.sftp_handle, c, 1); - if (r2 < 0) { - sftp_error_setg(errp, &s, "truncate failed"); - ret = -EINVAL; + ret = ssh_grow_file(&s, total_size, errp); + if (ret < 0) { goto out; } - s.attrs.filesize = total_size; } ret = 0; @@ -1198,18 +1220,42 @@ static int64_t ssh_getlength(BlockDriverState *bs) return length; } +static int ssh_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) +{ + BDRVSSHState *s = bs->opaque; + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_str(prealloc)); + return -ENOTSUP; + } + + if (offset < s->attrs.filesize) { + error_setg(errp, "ssh driver does not support shrinking files"); + return -ENOTSUP; + } + + if (offset == s->attrs.filesize) { + return 0; + } + + return ssh_grow_file(s, offset, errp); +} + static BlockDriver bdrv_ssh = { .format_name = "ssh", .protocol_name = "ssh", .instance_size = sizeof(BDRVSSHState), .bdrv_parse_filename = ssh_parse_filename, .bdrv_file_open = ssh_file_open, - .bdrv_create = ssh_create, + .bdrv_co_create_opts = ssh_co_create_opts, .bdrv_close = ssh_close, .bdrv_has_zero_init = ssh_has_zero_init, .bdrv_co_readv = ssh_co_readv, .bdrv_co_writev = ssh_co_writev, .bdrv_getlength = ssh_getlength, + .bdrv_truncate = ssh_truncate, .bdrv_co_flush_to_disk = ssh_co_flush, .create_opts = &ssh_create_opts, }; diff --git a/block/throttle.c b/block/throttle.c index 495f88c..5f4d43d 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -240,7 +240,7 @@ static BlockDriver bdrv_throttle = { .bdrv_reopen_prepare = throttle_reopen_prepare, .bdrv_reopen_commit = throttle_reopen_commit, .bdrv_reopen_abort = throttle_reopen_abort, - .bdrv_co_get_block_status = bdrv_co_get_block_status_from_file, + .bdrv_co_block_status = bdrv_co_block_status_from_file, .bdrv_co_drain_begin = throttle_co_drain_begin, .bdrv_co_drain_end = throttle_co_drain_end, diff --git a/block/vdi.c b/block/vdi.c index fc1c614..68592cc 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -87,12 +87,18 @@ #define DEFAULT_CLUSTER_SIZE (1 * MiB) #if defined(CONFIG_VDI_DEBUG) -#define logout(fmt, ...) \ - fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__) +#define VDI_DEBUG 1 #else -#define logout(fmt, ...) ((void)0) +#define VDI_DEBUG 0 #endif +#define logout(fmt, ...) \ + do { \ + if (VDI_DEBUG) { \ + fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__); \ + } \ + } while (0) + /* Image signature. */ #define VDI_SIGNATURE 0xbeda107f @@ -166,8 +172,6 @@ typedef struct { uint32_t *bmap; /* Size of block (bytes). */ uint32_t block_size; - /* Size of block (sectors). */ - uint32_t block_sectors; /* First sector of block map. */ uint32_t bmap_sector; /* VDI header (converted to host endianness). */ @@ -457,7 +461,6 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, bs->total_sectors = header.disk_size / SECTOR_SIZE; s->block_size = header.block_size; - s->block_sectors = header.block_size / SECTOR_SIZE; s->bmap_sector = header.offset_bmap / SECTOR_SIZE; s->header = header; @@ -503,33 +506,29 @@ static int vdi_reopen_prepare(BDRVReopenState *state, return 0; } -static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int coroutine_fn vdi_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) { - /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */ BDRVVdiState *s = (BDRVVdiState *)bs->opaque; - size_t bmap_index = sector_num / s->block_sectors; - size_t sector_in_block = sector_num % s->block_sectors; - int n_sectors = s->block_sectors - sector_in_block; + size_t bmap_index = offset / s->block_size; + size_t index_in_block = offset % s->block_size; uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]); - uint64_t offset; int result; - logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum); - if (n_sectors > nb_sectors) { - n_sectors = nb_sectors; - } - *pnum = n_sectors; + logout("%p, %" PRId64 ", %" PRId64 ", %p\n", bs, offset, bytes, pnum); + *pnum = MIN(s->block_size - index_in_block, bytes); result = VDI_IS_ALLOCATED(bmap_entry); if (!result) { return 0; } - offset = s->header.offset_data + - (uint64_t)bmap_entry * s->block_size + - sector_in_block * SECTOR_SIZE; + *map = s->header.offset_data + (uint64_t)bmap_entry * s->block_size + + index_in_block; *file = bs->file->bs; - return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; } static int coroutine_fn @@ -717,7 +716,8 @@ nonallocating_write: return ret; } -static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { int ret = 0; uint64_t bytes = 0; @@ -895,9 +895,9 @@ static BlockDriver bdrv_vdi = { .bdrv_close = vdi_close, .bdrv_reopen_prepare = vdi_reopen_prepare, .bdrv_child_perm = bdrv_format_default_perms, - .bdrv_create = vdi_create, + .bdrv_co_create_opts = vdi_co_create_opts, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_get_block_status = vdi_co_get_block_status, + .bdrv_co_block_status = vdi_co_block_status, .bdrv_make_empty = vdi_make_empty, .bdrv_co_preadv = vdi_co_preadv, diff --git a/block/vhdx.c b/block/vhdx.c index c449c5d..3fbff50 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1792,7 +1792,8 @@ exit: * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. * 1MB */ -static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn vhdx_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { int ret = 0; uint64_t image_size = (uint64_t) 2 * GiB; @@ -2003,7 +2004,7 @@ static BlockDriver bdrv_vhdx = { .bdrv_child_perm = bdrv_format_default_perms, .bdrv_co_readv = vhdx_co_readv, .bdrv_co_writev = vhdx_co_writev, - .bdrv_create = vhdx_create, + .bdrv_co_create_opts = vhdx_co_create_opts, .bdrv_get_info = vhdx_get_info, .bdrv_check = vhdx_check, .bdrv_has_zero_init = bdrv_has_zero_init_1, diff --git a/block/vmdk.c b/block/vmdk.c index ef15ddb..67342ed 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1304,33 +1304,27 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent, return extent_relative_offset % cluster_size; } -static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent, - int64_t sector_num) -{ - uint64_t offset; - offset = vmdk_find_offset_in_cluster(extent, sector_num * BDRV_SECTOR_SIZE); - return offset / BDRV_SECTOR_SIZE; -} - -static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) { BDRVVmdkState *s = bs->opaque; int64_t index_in_cluster, n, ret; - uint64_t offset; + uint64_t cluster_offset; VmdkExtent *extent; - extent = find_extent(s, sector_num, NULL); + extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL); if (!extent) { - return 0; + return -EIO; } qemu_co_mutex_lock(&s->lock); - ret = get_cluster_offset(bs, extent, NULL, - sector_num * 512, false, &offset, + ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset, 0, 0); qemu_co_mutex_unlock(&s->lock); - index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); + index_in_cluster = vmdk_find_offset_in_cluster(extent, offset); switch (ret) { case VMDK_ERROR: ret = -EIO; @@ -1345,18 +1339,14 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, ret = BDRV_BLOCK_DATA; if (!extent->compressed) { ret |= BDRV_BLOCK_OFFSET_VALID; - ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS)) - & BDRV_BLOCK_OFFSET_MASK; + *map = cluster_offset + index_in_cluster; } *file = extent->file->bs; break; } - n = extent->cluster_sectors - index_in_cluster; - if (n > nb_sectors) { - n = nb_sectors; - } - *pnum = n; + n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster; + *pnum = MIN(n, bytes); return ret; } @@ -1892,7 +1882,8 @@ static int filename_decompose(const char *filename, char *path, char *prefix, return VMDK_OK; } -static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { int idx = 0; BlockBackend *new_blk = NULL; @@ -2408,9 +2399,9 @@ static BlockDriver bdrv_vmdk = { .bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed, .bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes, .bdrv_close = vmdk_close, - .bdrv_create = vmdk_create, + .bdrv_co_create_opts = vmdk_co_create_opts, .bdrv_co_flush_to_disk = vmdk_co_flush, - .bdrv_co_get_block_status = vmdk_co_get_block_status, + .bdrv_co_block_status = vmdk_co_block_status, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .bdrv_has_zero_init = vmdk_has_zero_init, .bdrv_get_specific_info = vmdk_get_specific_info, diff --git a/block/vpc.c b/block/vpc.c index cfa5144..b2e2b9e 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -706,53 +706,54 @@ fail: return ret; } -static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) +static int coroutine_fn vpc_co_block_status(BlockDriverState *bs, + bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) { BDRVVPCState *s = bs->opaque; VHDFooter *footer = (VHDFooter*) s->footer_buf; - int64_t start, offset; + int64_t image_offset; bool allocated; - int64_t ret; - int n; + int ret; + int64_t n; if (be32_to_cpu(footer->type) == VHD_FIXED) { - *pnum = nb_sectors; + *pnum = bytes; + *map = offset; *file = bs->file->bs; - return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | - (sector_num << BDRV_SECTOR_BITS); + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID; } qemu_co_mutex_lock(&s->lock); - offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, NULL); - start = offset; - allocated = (offset != -1); + image_offset = get_image_offset(bs, offset, false, NULL); + allocated = (image_offset != -1); *pnum = 0; ret = 0; do { /* All sectors in a block are contiguous (without using the bitmap) */ - n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE) - - sector_num; - n = MIN(n, nb_sectors); + n = ROUND_UP(offset + 1, s->block_size) - offset; + n = MIN(n, bytes); *pnum += n; - sector_num += n; - nb_sectors -= n; + offset += n; + bytes -= n; /* *pnum can't be greater than one block for allocated * sectors since there is always a bitmap in between. */ if (allocated) { *file = bs->file->bs; - ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; + *map = image_offset; + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; break; } - if (nb_sectors == 0) { + if (bytes == 0) { break; } - offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, - NULL); - } while (offset == -1); + image_offset = get_image_offset(bs, offset, false, NULL); + } while (image_offset == -1); qemu_co_mutex_unlock(&s->lock); return ret; @@ -896,7 +897,8 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, return ret; } -static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) +static int coroutine_fn vpc_co_create_opts(const char *filename, QemuOpts *opts, + Error **errp) { uint8_t buf[1024]; VHDFooter *footer = (VHDFooter *) buf; @@ -1094,11 +1096,11 @@ static BlockDriver bdrv_vpc = { .bdrv_close = vpc_close, .bdrv_reopen_prepare = vpc_reopen_prepare, .bdrv_child_perm = bdrv_format_default_perms, - .bdrv_create = vpc_create, + .bdrv_co_create_opts = vpc_co_create_opts, .bdrv_co_preadv = vpc_co_preadv, .bdrv_co_pwritev = vpc_co_pwritev, - .bdrv_co_get_block_status = vpc_co_get_block_status, + .bdrv_co_block_status = vpc_co_block_status, .bdrv_get_info = vpc_get_info, diff --git a/block/vvfat.c b/block/vvfat.c index 7e06eba..4a17a49 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3088,15 +3088,13 @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, return ret; } -static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file) +static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *n, + int64_t *map, + BlockDriverState **file) { - *n = bs->total_sectors - sector_num; - if (*n > nb_sectors) { - *n = nb_sectors; - } else if (*n < 0) { - return 0; - } + *n = bytes; return BDRV_BLOCK_DATA; } @@ -3257,7 +3255,7 @@ static BlockDriver bdrv_vvfat = { .bdrv_co_preadv = vvfat_co_preadv, .bdrv_co_pwritev = vvfat_co_pwritev, - .bdrv_co_get_block_status = vvfat_co_get_block_status, + .bdrv_co_block_status = vvfat_co_block_status, }; static void bdrv_vvfat_init(void) diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt index d7fdb1f..feb711f 100644 --- a/docs/interop/qcow2.txt +++ b/docs/interop/qcow2.txt @@ -426,10 +426,20 @@ Standard Cluster Descriptor: Compressed Clusters Descriptor (x = 62 - (cluster_bits - 8)): - Bit 0 - x: Host cluster offset. This is usually _not_ aligned to a - cluster boundary! + Bit 0 - x-1: Host cluster offset. This is usually _not_ aligned to a + cluster or sector boundary! - x+1 - 61: Compressed size of the images in sectors of 512 bytes + x - 61: Number of additional 512-byte sectors used for the + compressed data, beyond the sector containing the offset + in the previous field. Some of these sectors may reside + in the next contiguous host cluster. + + Note that the compressed data does not necessarily occupy + all of the bytes in the final sector; rather, decompression + stops when it has produced a cluster of data. + + Another compressed cluster may map to the tail of the final + sector used by this compressed cluster. If a cluster is unallocated, read requests shall read the data from the backing file (except if bit 0 in the Standard Cluster Descriptor is set). If there is diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt index b0571de..170191a 100644 --- a/docs/qcow2-cache.txt +++ b/docs/qcow2-cache.txt @@ -1,6 +1,6 @@ qcow2 L2/refcount cache configuration ===================================== -Copyright (C) 2015 Igalia, S.L. +Copyright (C) 2015, 2018 Igalia, S.L. Author: Alberto Garcia <berto@igalia.com> This work is licensed under the terms of the GNU GPL, version 2 or @@ -118,8 +118,8 @@ There are three options available, and all of them take bytes: There are two things that need to be taken into account: - - Both caches must have a size that is a multiple of the cluster - size. + - Both caches must have a size that is a multiple of the cluster size + (or the cache entry size: see "Using smaller cache sizes" below). - If you only set one of the options above, QEMU will automatically adjust the others so that the L2 cache is 4 times bigger than the @@ -143,6 +143,46 @@ much less often than the L2 cache, so it's perfectly reasonable to keep it small. +Using smaller cache entries +--------------------------- +The qcow2 L2 cache stores complete tables by default. This means that +if QEMU needs an entry from an L2 table then the whole table is read +from disk and is kept in the cache. If the cache is full then a +complete table needs to be evicted first. + +This can be inefficient with large cluster sizes since it results in +more disk I/O and wastes more cache memory. + +Since QEMU 2.12 you can change the size of the L2 cache entry and make +it smaller than the cluster size. This can be configured using the +"l2-cache-entry-size" parameter: + + -drive file=hd.qcow2,l2-cache-size=2097152,l2-cache-entry-size=4096 + +Some things to take into account: + + - The L2 cache entry size has the same restrictions as the cluster + size (power of two, at least 512 bytes). + + - Smaller entry sizes generally improve the cache efficiency and make + disk I/O faster. This is particularly true with solid state drives + so it's a good idea to reduce the entry size in those cases. With + rotating hard drives the situation is a bit more complicated so you + should test it first and stay with the default size if unsure. + + - Try different entry sizes to see which one gives faster performance + in your case. The block size of the host filesystem is generally a + good default (usually 4096 bytes in the case of ext4). + + - Only the L2 cache can be configured this way. The refcount cache + always uses the cluster size as the entry size. + + - If the L2 cache is big enough to hold all of the image's L2 tables + (as explained in the "Choosing the right cache sizes" section + earlier in this document) then none of this is necessary and you + can omit the "l2-cache-entry-size" parameter altogether. + + Reducing the memory usage ------------------------- It is possible to clean unused cache entries in order to reduce the diff --git a/hw/ide/core.c b/hw/ide/core.c index 257b429..139c843 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1087,15 +1087,7 @@ static void ide_flush_cache(IDEState *s) s->status |= BUSY_STAT; ide_set_retry(s); block_acct_start(blk_get_stats(s->blk), &s->acct, 0, BLOCK_ACCT_FLUSH); - - if (blk_bs(s->blk)) { - s->pio_aiocb = blk_aio_flush(s->blk, ide_flush_cb, s); - } else { - /* XXX blk_aio_flush() crashes when blk_bs(blk) is NULL, remove this - * temporary workaround when blk_aio_*() functions handle NULL blk_bs. - */ - ide_flush_cb(s, 0); - } + s->pio_aiocb = blk_aio_flush(s->blk, ide_flush_cb, s); } static void ide_cfata_metadata_inquiry(IDEState *s) diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h new file mode 100644 index 0000000..a48c744 --- /dev/null +++ b/include/block/aio-wait.h @@ -0,0 +1,116 @@ +/* + * AioContext wait support + * + * Copyright (C) 2018 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_AIO_WAIT_H +#define QEMU_AIO_WAIT_H + +#include "block/aio.h" + +/** + * AioWait: + * + * An object that facilitates synchronous waiting on a condition. The main + * loop can wait on an operation running in an IOThread as follows: + * + * AioWait *wait = ...; + * AioContext *ctx = ...; + * MyWork work = { .done = false }; + * schedule_my_work_in_iothread(ctx, &work); + * AIO_WAIT_WHILE(wait, ctx, !work.done); + * + * The IOThread must call aio_wait_kick() to notify the main loop when + * work.done changes: + * + * static void do_work(...) + * { + * ... + * work.done = true; + * aio_wait_kick(wait); + * } + */ +typedef struct { + /* Is the main loop waiting for a kick? Accessed with atomic ops. */ + bool need_kick; +} AioWait; + +/** + * AIO_WAIT_WHILE: + * @wait: the aio wait object + * @ctx: the aio context + * @cond: wait while this conditional expression is true + * + * Wait while a condition is true. Use this to implement synchronous + * operations that require event loop activity. + * + * The caller must be sure that something calls aio_wait_kick() when the value + * of @cond might have changed. + * + * The caller's thread must be the IOThread that owns @ctx or the main loop + * thread (with @ctx acquired exactly once). This function cannot be used to + * wait on conditions between two IOThreads since that could lead to deadlock, + * go via the main loop instead. + */ +#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \ + bool waited_ = false; \ + bool busy_ = true; \ + AioWait *wait_ = (wait); \ + AioContext *ctx_ = (ctx); \ + if (in_aio_context_home_thread(ctx_)) { \ + while ((cond) || busy_) { \ + busy_ = aio_poll(ctx_, (cond)); \ + waited_ |= !!(cond) | busy_; \ + } \ + } else { \ + assert(qemu_get_current_aio_context() == \ + qemu_get_aio_context()); \ + assert(!wait_->need_kick); \ + /* Set wait_->need_kick before evaluating cond. */ \ + atomic_mb_set(&wait_->need_kick, true); \ + while (busy_) { \ + if ((cond)) { \ + waited_ = busy_ = true; \ + aio_context_release(ctx_); \ + aio_poll(qemu_get_aio_context(), true); \ + aio_context_acquire(ctx_); \ + } else { \ + busy_ = aio_poll(ctx_, false); \ + waited_ |= busy_; \ + } \ + } \ + atomic_set(&wait_->need_kick, false); \ + } \ + waited_; }) + +/** + * aio_wait_kick: + * @wait: the aio wait object that should re-evaluate its condition + * + * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During + * synchronous operations performed in an IOThread, the main thread lets the + * IOThread's event loop run, waiting for the operation to complete. A + * aio_wait_kick() call will wake up the main thread. + */ +void aio_wait_kick(AioWait *wait); + +#endif /* QEMU_AIO_WAIT */ diff --git a/include/block/aio.h b/include/block/aio.h index e9aeeae..a1d6b9e 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -534,11 +534,14 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co); AioContext *qemu_get_current_aio_context(void); /** + * in_aio_context_home_thread: * @ctx: the aio context * - * Return whether we are running in the I/O thread that manages @ctx. + * Return whether we are running in the thread that normally runs @ctx. Note + * that acquiring/releasing ctx does not affect the outcome, each AioContext + * still only has one home thread that is responsible for running it. */ -static inline bool aio_context_in_iothread(AioContext *ctx) +static inline bool in_aio_context_home_thread(AioContext *ctx) { return ctx == qemu_get_current_aio_context(); } diff --git a/include/block/block.h b/include/block/block.h index fac401b..8b6db95 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -3,6 +3,7 @@ #include "block/aio.h" #include "qapi/qapi-types-block-core.h" +#include "block/aio-wait.h" #include "qemu/iov.h" #include "qemu/coroutine.h" #include "block/accounting.h" @@ -115,19 +116,19 @@ typedef struct HDGeometry { * BDRV_BLOCK_ZERO: offset reads as zero * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this - * layer (short for DATA || ZERO), set by block layer - * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer + * layer rather than any backing, set by block layer + * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this + * layer, set by block layer * * Internal flag: * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request * that the block layer recompute the answer from the returned * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. * - * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of - * the return value (old interface) or the entire map parameter (new - * interface) represent the offset in the returned BDS that is allocated for - * the corresponding raw data. However, whether that offset actually - * contains data also depends on BDRV_BLOCK_DATA, as follows: + * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the + * host offset within the returned BDS that is allocated for the + * corresponding raw guest data. However, whether that offset + * actually contains data also depends on BDRV_BLOCK_DATA, as follows: * * DATA ZERO OFFSET_VALID * t t t sectors read as zero, returned file is zero at offset @@ -367,41 +368,14 @@ void bdrv_drain_all_begin(void); void bdrv_drain_all_end(void); void bdrv_drain_all(void); +/* Returns NULL when bs == NULL */ +AioWait *bdrv_get_aio_wait(BlockDriverState *bs); + #define BDRV_POLL_WHILE(bs, cond) ({ \ - bool waited_ = false; \ - bool busy_ = true; \ BlockDriverState *bs_ = (bs); \ - AioContext *ctx_ = bdrv_get_aio_context(bs_); \ - if (aio_context_in_iothread(ctx_)) { \ - while ((cond) || busy_) { \ - busy_ = aio_poll(ctx_, (cond)); \ - waited_ |= !!(cond) | busy_; \ - } \ - } else { \ - assert(qemu_get_current_aio_context() == \ - qemu_get_aio_context()); \ - /* Ask bdrv_dec_in_flight to wake up the main \ - * QEMU AioContext. Extra I/O threads never take \ - * other I/O threads' AioContexts (see for example \ - * block_job_defer_to_main_loop for how to do it). \ - */ \ - assert(!bs_->wakeup); \ - /* Set bs->wakeup before evaluating cond. */ \ - atomic_mb_set(&bs_->wakeup, true); \ - while (busy_) { \ - if ((cond)) { \ - waited_ = busy_ = true; \ - aio_context_release(ctx_); \ - aio_poll(qemu_get_aio_context(), true); \ - aio_context_acquire(ctx_); \ - } else { \ - busy_ = aio_poll(ctx_, false); \ - waited_ |= busy_; \ - } \ - } \ - atomic_set(&bs_->wakeup, false); \ - } \ - waited_; }) + AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \ + bdrv_get_aio_context(bs_), \ + cond); }) int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes); int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes); diff --git a/include/block/block_int.h b/include/block/block_int.h index 5ea63f8..64a5700 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -26,6 +26,7 @@ #include "block/accounting.h" #include "block/block.h" +#include "block/aio-wait.h" #include "qemu/queue.h" #include "qemu/coroutine.h" #include "qemu/stats64.h" @@ -128,7 +129,8 @@ struct BlockDriver { int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, Error **errp); void (*bdrv_close)(BlockDriverState *bs); - int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp); + int coroutine_fn (*bdrv_co_create_opts)(const char *filename, QemuOpts *opts, + Error **errp); int (*bdrv_make_empty)(BlockDriverState *bs); void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options); @@ -202,15 +204,22 @@ struct BlockDriver { /* * Building block for bdrv_block_status[_above] and * bdrv_is_allocated[_above]. The driver should answer only - * according to the current layer, and should not set - * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h - * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. The block - * layer guarantees input aligned to request_alignment, as well as - * non-NULL pnum and file. + * according to the current layer, and should only need to set + * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, + * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing + * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See + * block.h for the overall meaning of the bits. As a hint, the + * flag want_zero is true if the caller cares more about precise + * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for + * overall allocation (favor larger *pnum, perhaps by reporting + * _DATA instead of _ZERO). The block layer guarantees input + * clamped to bdrv_getlength() and aligned to request_alignment, + * as well as non-NULL pnum, map, and file; in turn, the driver + * must return an error or set pnum to an aligned non-zero value. */ - int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum, - BlockDriverState **file); + int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); /* * Invalidate any cached meta-data. @@ -709,10 +718,8 @@ struct BlockDriverState { unsigned int in_flight; unsigned int serialising_in_flight; - /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic - * ops. - */ - bool wakeup; + /* Kicked to signal main loop when a request completes. */ + AioWait wait; /* counter for nested bdrv_io_plug. * Accessed with atomic ops. @@ -1031,23 +1038,27 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t *nperm, uint64_t *nshared); /* - * Default implementation for drivers to pass bdrv_co_get_block_status() to + * Default implementation for drivers to pass bdrv_co_block_status() to * their file. */ -int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, - int *pnum, - BlockDriverState **file); +int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file); /* - * Default implementation for drivers to pass bdrv_co_get_block_status() to + * Default implementation for drivers to pass bdrv_co_block_status() to * their backing file. */ -int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, - int *pnum, - BlockDriverState **file); +int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file); const char *bdrv_get_parent_name(const BlockDriverState *bs); void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); bool blk_dev_has_removable_media(BlockBackend *blk); diff --git a/qapi/block-core.json b/qapi/block-core.json index 5c5921b..00475f0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3676,7 +3676,8 @@ # # @node-name: node name. Note that errors may be reported for the root node # that is directly attached to a guest device rather than for the -# node where the error occurred. (Since: 2.8) +# node where the error occurred. The node name is not present if +# the drive is empty. (Since: 2.8) # # @operation: I/O operation # @@ -3707,7 +3708,8 @@ # ## { 'event': 'BLOCK_IO_ERROR', - 'data': { 'device': 'str', 'node-name': 'str', 'operation': 'IoOperationType', + 'data': { 'device': 'str', '*node-name': 'str', + 'operation': 'IoOperationType', 'action': 'BlockErrorAction', '*nospace': 'bool', 'reason': 'str' } } @@ -3469,7 +3469,7 @@ static int img_resize(int argc, char **argv) } } if (optind != argc - 1) { - error_exit("Expecting one image file name"); + error_exit("Expecting image file name and size"); } filename = argv[optind++]; diff --git a/tests/Makefile.include b/tests/Makefile.include index fdca062..ef9b88c 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -92,6 +92,7 @@ gcov-files-test-hbitmap-y = blockjob.c check-unit-y += tests/test-bdrv-drain$(EXESUF) check-unit-y += tests/test-blockjob$(EXESUF) check-unit-y += tests/test-blockjob-txn$(EXESUF) +check-unit-y += tests/test-block-backend$(EXESUF) check-unit-y += tests/test-x86-cpuid$(EXESUF) # all code tested by test-x86-cpuid is inside topology.h gcov-files-test-x86-cpuid-y = @@ -622,6 +623,7 @@ tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y) tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y) tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y) tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y) +tests/test-block-backend$(EXESUF): tests/test-block-backend.o $(test-block-obj-y) $(test-util-obj-y) tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y) tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) $(test-crypto-obj-y) diff --git a/tests/qemu-iotests/033 b/tests/qemu-iotests/033 index 2cdfd13..a1d8357 100755 --- a/tests/qemu-iotests/033 +++ b/tests/qemu-iotests/033 @@ -64,6 +64,9 @@ do_test() } | $QEMU_IO $IO_EXTRA_ARGS } +echo +echo "=== Test aligned and misaligned write zeroes operations ===" + for write_zero_cmd in "write -z" "aio_write -z"; do for align in 512 4k; do echo @@ -102,7 +105,33 @@ for align in 512 4k; do done done + +# Trigger truncate that would shrink qcow2 L1 table, which is done by +# clearing one entry (8 bytes) with bdrv_co_pwrite_zeroes() + +echo +echo "=== Test misaligned write zeroes via truncate ===" +echo + +# any size will do, but the smaller the size the smaller the required image +CLUSTER_SIZE=$((4 * 1024)) +L2_COVERAGE=$(($CLUSTER_SIZE * $CLUSTER_SIZE / 8)) +_make_test_img $(($L2_COVERAGE * 2)) + +do_test 512 "write -P 1 0 0x200" "$TEST_IMG" | _filter_qemu_io +# next L2 table +do_test 512 "write -P 1 $L2_COVERAGE 0x200" "$TEST_IMG" | _filter_qemu_io + +# only interested in qcow2 here; also other formats might respond with +# "not supported" error message +if [ $IMGFMT = "qcow2" ]; then + do_test 512 "truncate $L2_COVERAGE" "$TEST_IMG" | _filter_qemu_io +fi + +do_test 512 "read -P 1 0 0x200" "$TEST_IMG" | _filter_qemu_io + # success, all done +echo echo "*** done" rm -f $seq.full status=0 diff --git a/tests/qemu-iotests/033.out b/tests/qemu-iotests/033.out index 95929ef..9683f6b 100644 --- a/tests/qemu-iotests/033.out +++ b/tests/qemu-iotests/033.out @@ -1,6 +1,8 @@ QA output created by 033 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +=== Test aligned and misaligned write zeroes operations === + == preparing image == wrote 1024/1024 bytes at offset 512 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -164,4 +166,15 @@ read 512/512 bytes at offset 512 read 3072/3072 bytes at offset 1024 3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Test misaligned write zeroes via truncate === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2097152 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + *** done diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c new file mode 100644 index 0000000..fd59f02 --- /dev/null +++ b/tests/test-block-backend.c @@ -0,0 +1,82 @@ +/* + * BlockBackend tests + * + * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/block.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" + +static void test_drain_aio_error_flush_cb(void *opaque, int ret) +{ + bool *completed = opaque; + + g_assert(ret == -ENOMEDIUM); + *completed = true; +} + +static void test_drain_aio_error(void) +{ + BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + BlockAIOCB *acb; + bool completed = false; + + acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed); + g_assert(acb != NULL); + g_assert(completed == false); + + blk_drain(blk); + g_assert(completed == true); + + blk_unref(blk); +} + +static void test_drain_all_aio_error(void) +{ + BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + BlockAIOCB *acb; + bool completed = false; + + acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed); + g_assert(acb != NULL); + g_assert(completed == false); + + blk_drain_all(); + g_assert(completed == true); + + blk_unref(blk); +} + +int main(int argc, char **argv) +{ + bdrv_init(); + qemu_init_main_loop(&error_abort); + + g_test_init(&argc, &argv, NULL); + + g_test_add_func("/block-backend/drain_aio_error", test_drain_aio_error); + g_test_add_func("/block-backend/drain_all_aio_error", + test_drain_all_aio_error); + + return g_test_run(); +} diff --git a/util/Makefile.objs b/util/Makefile.objs index 3fb6116..ae90b99 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -1,7 +1,7 @@ util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o util-obj-y += bufferiszero.o util-obj-y += lockcnt.o -util-obj-y += aiocb.o async.o thread-pool.o qemu-timer.o +util-obj-y += aiocb.o async.o aio-wait.o thread-pool.o qemu-timer.o util-obj-y += main-loop.o iohandler.o util-obj-$(CONFIG_POSIX) += aio-posix.o util-obj-$(CONFIG_POSIX) += compatfd.o diff --git a/util/aio-wait.c b/util/aio-wait.c new file mode 100644 index 0000000..a487cdb --- /dev/null +++ b/util/aio-wait.c @@ -0,0 +1,40 @@ +/* + * AioContext wait support + * + * Copyright (C) 2018 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "block/aio-wait.h" + +static void dummy_bh_cb(void *opaque) +{ + /* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */ +} + +void aio_wait_kick(AioWait *wait) +{ + /* The barrier (or an atomic op) is in the caller. */ + if (atomic_read(&wait->need_kick)) { + aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); + } +} |