diff options
Diffstat (limited to 'block')
38 files changed, 775 insertions, 308 deletions
diff --git a/block/backup.c b/block/backup.c index 79652bf..d4713fa 100644 --- a/block/backup.c +++ b/block/backup.c @@ -361,6 +361,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, BackupPerf *perf, BlockdevOnError on_source_error, BlockdevOnError on_target_error, + OnCbwError on_cbw_error, int creation_flags, BlockCompletionFunc *cb, void *opaque, JobTxn *txn, Error **errp) @@ -458,7 +459,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source, - perf->min_cluster_size, &bcs, errp); + perf->min_cluster_size, &bcs, on_cbw_error, errp); if (!cbw) { goto error; } @@ -497,7 +498,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, block_copy_set_speed(bcs, speed); /* Required permissions are taken by copy-before-write filter target */ - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, &error_abort); bdrv_graph_wrunlock(); diff --git a/block/blkdebug.c b/block/blkdebug.c index 1c1967f..c54aee0 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -751,9 +751,9 @@ blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) } static int coroutine_fn GRAPH_RDLOCK -blkdebug_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) +blkdebug_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { int err; diff --git a/block/blkio.c b/block/blkio.c index 5f4fce2..4142673 100644 --- a/block/blkio.c +++ b/block/blkio.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include <blkio.h> #include "block/block_int.h" -#include "exec/memory.h" +#include "system/memory.h" #include "exec/cpu-common.h" /* for qemu_ram_get_fd() */ #include "qemu/defer-call.h" #include "qapi/error.h" @@ -19,7 +19,7 @@ #include "qobject/qdict.h" #include "qemu/module.h" #include "system/block-backend.h" -#include "exec/memory.h" /* for ram_block_discard_disable() */ +#include "system/memory.h" /* for ram_block_discard_disable() */ #include "block/block-io.h" diff --git a/block/blklogwrites.c b/block/blklogwrites.c index b0f78c4..aa1f888 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -281,7 +281,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, ret = 0; fail_log: if (ret < 0) { - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, s->log_file); bdrv_graph_wrunlock(); s->log_file = NULL; @@ -296,7 +296,7 @@ static void blk_log_writes_close(BlockDriverState *bs) { BDRVBlkLogWritesState *s = bs->opaque; - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, s->log_file); s->log_file = NULL; bdrv_graph_wrunlock(); diff --git a/block/blkverify.c b/block/blkverify.c index db79a36..72efcbe 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -151,7 +151,7 @@ static void blkverify_close(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, s->test_file); s->test_file = NULL; bdrv_graph_wrunlock(); diff --git a/block/block-backend.c b/block/block-backend.c index a402db1..f8d6ba6 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -136,9 +136,9 @@ static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); -static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); +static bool GRAPH_RDLOCK +blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, GHashTable *visited, + Transaction *tran, Error **errp); static char *blk_root_get_parent_desc(BdrvChild *child) { @@ -889,7 +889,7 @@ void blk_remove_bs(BlockBackend *blk) root = blk->root; blk->root = NULL; - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_root_unref_child(root); bdrv_graph_wrunlock(); } @@ -904,7 +904,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) GLOBAL_STATE_CODE(); bdrv_ref(bs); - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) { blk->disable_perm = true; diff --git a/block/commit.c b/block/commit.c index 5df3d05..0d9e1a1 100644 --- a/block/commit.c +++ b/block/commit.c @@ -15,6 +15,8 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" #include "trace.h" +#include "block/block-common.h" +#include "block/coroutines.h" #include "block/block_int.h" #include "block/blockjob_int.h" #include "qapi/error.h" @@ -66,7 +68,7 @@ static int commit_prepare(Job *job) s->backing_mask_protocol); } -static void commit_abort(Job *job) +static void GRAPH_UNLOCKED commit_abort(Job *job) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); BlockDriverState *top_bs = blk_bs(s->top); @@ -126,6 +128,84 @@ static void commit_clean(Job *job) blk_unref(s->top); } +static int commit_iteration(CommitBlockJob *s, int64_t offset, + int64_t *requested_bytes, void *buf) +{ + BlockErrorAction action; + int64_t bytes = *requested_bytes; + int ret = 0; + bool error_in_source = true; + + /* Copy if allocated above the base */ + WITH_GRAPH_RDLOCK_GUARD() { + ret = bdrv_co_common_block_status_above(blk_bs(s->top), + s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE, + &bytes, NULL, NULL, NULL); + } + + trace_commit_one_iteration(s, offset, bytes, ret); + + if (ret < 0) { + goto fail; + } + + if (ret & BDRV_BLOCK_ALLOCATED) { + if (ret & BDRV_BLOCK_ZERO) { + /* + * If the top (sub)clusters are smaller than the base + * (sub)clusters, this will not unmap unless the underlying device + * does some tracking of these requests. Ideally, we would find + * the maximal extent of the zero clusters. + */ + ret = blk_co_pwrite_zeroes(s->base, offset, bytes, + BDRV_REQ_MAY_UNMAP); + if (ret < 0) { + error_in_source = false; + goto fail; + } + } else { + assert(bytes < SIZE_MAX); + + ret = blk_co_pread(s->top, offset, bytes, buf, 0); + if (ret < 0) { + goto fail; + } + + ret = blk_co_pwrite(s->base, offset, bytes, buf, 0); + if (ret < 0) { + error_in_source = false; + goto fail; + } + } + + /* + * Whether zeroes actually end up on disk depends on the details of + * the underlying driver. Therefore, this might rate limit more than + * is necessary. + */ + block_job_ratelimit_processed_bytes(&s->common, bytes); + } + + /* Publish progress */ + + job_progress_update(&s->common.job, bytes); + + *requested_bytes = bytes; + + return 0; + +fail: + action = block_job_error_action(&s->common, s->on_error, + error_in_source, -ret); + if (action == BLOCK_ERROR_ACTION_REPORT) { + return ret; + } + + *requested_bytes = 0; + + return 0; +} + static int coroutine_fn commit_run(Job *job, Error **errp) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); @@ -156,9 +236,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); for (offset = 0; offset < len; offset += n) { - bool copy; - bool error_in_source = true; - /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ @@ -166,38 +243,11 @@ static int coroutine_fn commit_run(Job *job, Error **errp) if (job_is_cancelled(&s->common.job)) { break; } - /* Copy if allocated above the base */ - ret = blk_co_is_allocated_above(s->top, s->base_overlay, true, - offset, COMMIT_BUFFER_SIZE, &n); - copy = (ret > 0); - trace_commit_one_iteration(s, offset, n, ret); - if (copy) { - assert(n < SIZE_MAX); - - ret = blk_co_pread(s->top, offset, n, buf, 0); - if (ret >= 0) { - ret = blk_co_pwrite(s->base, offset, n, buf, 0); - if (ret < 0) { - error_in_source = false; - } - } - } - if (ret < 0) { - BlockErrorAction action = - block_job_error_action(&s->common, s->on_error, - error_in_source, -ret); - if (action == BLOCK_ERROR_ACTION_REPORT) { - return ret; - } else { - n = 0; - continue; - } - } - /* Publish progress */ - job_progress_update(&s->common.job, n); - if (copy) { - block_job_ratelimit_processed_bytes(&s->common, n); + ret = commit_iteration(s, offset, &n, buf); + + if (ret < 0) { + return ret; } } @@ -342,7 +392,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, * this is the responsibility of the interface (i.e. whoever calls * commit_start()). */ - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); s->base_overlay = bdrv_find_overlay(top, base); assert(s->base_overlay); @@ -464,28 +514,32 @@ int bdrv_commit(BlockDriverState *bs) Error *local_err = NULL; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!drv) return -ENOMEDIUM; + bdrv_graph_rdlock_main_loop(); + backing_file_bs = bdrv_cow_bs(bs); if (!backing_file_bs) { - return -ENOTSUP; + ret = -ENOTSUP; + goto out; } if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || bdrv_op_is_blocked(backing_file_bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { - return -EBUSY; + ret = -EBUSY; + goto out; } ro = bdrv_is_read_only(backing_file_bs); if (ro) { if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) { - return -EACCES; + ret = -EACCES; + goto out; } } @@ -509,8 +563,14 @@ int bdrv_commit(BlockDriverState *bs) goto ro_cleanup; } + bdrv_graph_rdunlock_main_loop(); + + bdrv_graph_wrlock_drained(); bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); + bdrv_graph_wrunlock(); + + bdrv_graph_rdlock_main_loop(); ret = blk_insert_bs(backing, backing_file_bs, &local_err); if (ret < 0) { @@ -585,9 +645,14 @@ int bdrv_commit(BlockDriverState *bs) ret = 0; ro_cleanup: blk_unref(backing); + + bdrv_graph_rdunlock_main_loop(); + bdrv_graph_wrlock_drained(); if (bdrv_cow_bs(bs) != backing_file_bs) { bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); } + bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); bdrv_unref(commit_top_bs); blk_unref(src); @@ -596,5 +661,8 @@ ro_cleanup: bdrv_reopen_set_read_only(backing_file_bs, true, NULL); } +out: + bdrv_graph_rdunlock_main_loop(); + return ret; } diff --git a/block/copy-before-write.c b/block/copy-before-write.c index fd470f5..36d5d3e 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -291,8 +291,8 @@ cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes, } static int coroutine_fn GRAPH_RDLOCK -cbw_co_snapshot_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, +cbw_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -551,6 +551,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, bool discard_source, uint64_t min_cluster_size, BlockCopyState **bcs, + OnCbwError on_cbw_error, Error **errp) { BDRVCopyBeforeWriteState *state; @@ -568,6 +569,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, } qdict_put_str(opts, "file", bdrv_get_node_name(source)); qdict_put_str(opts, "target", bdrv_get_node_name(target)); + qdict_put_str(opts, "on-cbw-error", OnCbwError_str(on_cbw_error)); if (min_cluster_size > INT64_MAX) { error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64, diff --git a/block/copy-before-write.h b/block/copy-before-write.h index 2a5d4ba..eb93364 100644 --- a/block/copy-before-write.h +++ b/block/copy-before-write.h @@ -42,6 +42,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, bool discard_source, uint64_t min_cluster_size, BlockCopyState **bcs, + OnCbwError on_cbw_error, Error **errp); void bdrv_cbw_drop(BlockDriverState *bs); diff --git a/block/coroutines.h b/block/coroutines.h index 79e5efb..892646b 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -47,7 +47,7 @@ int coroutine_fn GRAPH_RDLOCK bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -78,7 +78,7 @@ int co_wrapper_mixed_bdrv_rdlock bdrv_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, diff --git a/block/file-posix.c b/block/file-posix.c index 56d1972..8c73867 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -41,6 +41,7 @@ #include "scsi/pr-manager.h" #include "scsi/constants.h" +#include "scsi/utils.h" #if defined(__APPLE__) && (__MACH__) #include <sys/ioctl.h> @@ -72,6 +73,7 @@ #include <linux/blkzoned.h> #endif #include <linux/cdrom.h> +#include <linux/dm-ioctl.h> #include <linux/fd.h> #include <linux/fs.h> #include <linux/hdreg.h> @@ -110,6 +112,10 @@ #include <sys/diskslice.h> #endif +#ifdef EMSCRIPTEN +#include <sys/ioctl.h> +#endif + /* OS X does not have O_DSYNC */ #ifndef O_DSYNC #ifdef O_SYNC @@ -134,6 +140,22 @@ #define RAW_LOCK_PERM_BASE 100 #define RAW_LOCK_SHARED_BASE 200 +/* + * Multiple retries are mostly meant for two separate scenarios: + * + * - DM_MPATH_PROBE_PATHS returns success, but before SG_IO completes, another + * path goes down. + * + * - DM_MPATH_PROBE_PATHS failed all paths in the current path group, so we have + * to send another SG_IO to switch to another path group to probe the paths in + * it. + * + * Even if each path is in a separate path group (path_grouping_policy set to + * failover), it's rare to have more than eight path groups - and even then + * pretty unlikely that only bad path groups would be chosen in eight retries. + */ +#define SG_IO_MAX_RETRIES 8 + typedef struct BDRVRawState { int fd; bool use_lock; @@ -161,6 +183,7 @@ typedef struct BDRVRawState { bool use_linux_aio:1; bool has_laio_fdsync:1; bool use_linux_io_uring:1; + bool use_mpath:1; int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; @@ -781,17 +804,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } #endif - if (S_ISBLK(st.st_mode)) { -#ifdef __linux__ - /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do - * not rely on the contents of discarded blocks unless using O_DIRECT. - * Same for BLKZEROOUT. - */ - if (!(bs->open_flags & BDRV_O_NOCACHE)) { - s->has_write_zeroes = false; - } -#endif - } #ifdef __FreeBSD__ if (S_ISCHR(st.st_mode)) { /* @@ -1276,10 +1288,10 @@ static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned) } #endif /* defined(CONFIG_BLKZONED) */ +#ifdef CONFIG_LINUX /* * Get a sysfs attribute value as a long integer. */ -#ifdef CONFIG_LINUX static long get_sysfs_long_val(struct stat *st, const char *attribute) { g_autofree char *str = NULL; @@ -1299,6 +1311,30 @@ static long get_sysfs_long_val(struct stat *st, const char *attribute) } return ret; } + +/* + * Get a sysfs attribute value as a uint32_t. + */ +static int get_sysfs_u32_val(struct stat *st, const char *attribute, + uint32_t *u32) +{ + g_autofree char *str = NULL; + const char *end; + unsigned int val; + int ret; + + ret = get_sysfs_str_val(st, attribute, &str); + if (ret < 0) { + return ret; + } + + /* The file is ended with '\n', pass 'end' to accept that. */ + ret = qemu_strtoui(str, &end, 10, &val); + if (ret == 0 && end && *end == '\0') { + *u32 = val; + } + return ret; +} #endif static int hdev_get_max_segments(int fd, struct stat *st) @@ -1318,6 +1354,23 @@ static int hdev_get_max_segments(int fd, struct stat *st) #endif } +/* + * Fills in *dalign with the discard alignment and returns 0 on success, + * -errno otherwise. + */ +static int hdev_get_pdiscard_alignment(struct stat *st, uint32_t *dalign) +{ +#ifdef CONFIG_LINUX + /* + * Note that Linux "discard_granularity" is QEMU "discard_alignment". Linux + * "discard_alignment" is something else. + */ + return get_sysfs_u32_val(st, "discard_granularity", dalign); +#else + return -ENOTSUP; +#endif +} + #if defined(CONFIG_BLKZONED) /* * If the reset_all flag is true, then the wps of zone whose state is @@ -1527,6 +1580,30 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) } } + if (S_ISBLK(st.st_mode)) { + uint32_t dalign = 0; + int ret; + + ret = hdev_get_pdiscard_alignment(&st, &dalign); + if (ret == 0 && dalign != 0) { + uint32_t ralign = bs->bl.request_alignment; + + /* Probably never happens, but handle it just in case */ + if (dalign < ralign && (ralign % dalign == 0)) { + dalign = ralign; + } + + /* The block layer requires a multiple of request_alignment */ + if (dalign % ralign != 0) { + error_setg(errp, "Invalid pdiscard_alignment limit %u is not a " + "multiple of request_alignment %u", dalign, ralign); + return; + } + + bs->bl.pdiscard_alignment = dalign; + } + } + raw_refresh_zoned_limits(bs, &st, errp); } @@ -2011,8 +2088,11 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque) } #ifndef HAVE_COPY_FILE_RANGE -static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, - off_t *out_off, size_t len, unsigned int flags) +#ifndef EMSCRIPTEN +static +#endif +ssize_t copy_file_range(int in_fd, off_t *in_off, int out_fd, + off_t *out_off, size_t len, unsigned int flags) { #ifdef __NR_copy_file_range return syscall(__NR_copy_file_range, in_fd, in_off, out_fd, @@ -2484,9 +2564,9 @@ static inline bool raw_check_linux_aio(BDRVRawState *s) } #endif -static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, - uint64_t bytes, QEMUIOVector *qiov, int type, - int flags) +static int coroutine_fn GRAPH_RDLOCK +raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, uint64_t bytes, + QEMUIOVector *qiov, int type, int flags) { BDRVRawState *s = bs->opaque; RawPosixAIOData acb; @@ -2545,7 +2625,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, ret = raw_thread_pool_submit(handle_aiocb_rw, &acb); if (ret == 0 && (flags & BDRV_REQ_FUA)) { /* TODO Use pwritev2() instead if it's available */ - ret = raw_co_flush_to_disk(bs); + ret = bdrv_co_flush(bs); } goto out; /* Avoid the compiler err of unused label */ @@ -2580,16 +2660,16 @@ out: return ret; } -static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn GRAPH_RDLOCK +raw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags); } -static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn GRAPH_RDLOCK +raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags); } @@ -3201,7 +3281,7 @@ static int find_allocation(BlockDriverState *bs, off_t start, * well exceed it. */ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, @@ -3217,7 +3297,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, return ret; } - if (!want_zero) { + if (!(mode & BDRV_WANT_ZERO)) { + /* There is no backing file - all bytes are allocated in this file. */ *pnum = bytes; *map = offset; *file = bs; @@ -3525,10 +3606,11 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op, #endif #if defined(CONFIG_BLKZONED) -static int coroutine_fn raw_co_zone_append(BlockDriverState *bs, - int64_t *offset, - QEMUIOVector *qiov, - BdrvRequestFlags flags) { +static int coroutine_fn GRAPH_RDLOCK +raw_co_zone_append(BlockDriverState *bs, + int64_t *offset, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { assert(flags == 0); int64_t zone_size_mask = bs->bl.zone_size - 1; int64_t iov_len = 0; @@ -4191,15 +4273,105 @@ hdev_open_Mac_error: /* Since this does ioctl the device must be already opened */ bs->sg = hdev_is_sg(bs); + /* sg devices aren't even block devices and can't use dm-mpath */ + s->use_mpath = !bs->sg; + return ret; } #if defined(__linux__) +#if defined(DM_MPATH_PROBE_PATHS) +static bool coroutine_fn sgio_path_error(int ret, sg_io_hdr_t *io_hdr) +{ + if (ret < 0) { + switch (ret) { + case -ENODEV: + return true; + case -EAGAIN: + /* + * The device is probably suspended. This happens while the dm table + * is reloaded, e.g. because a path is added or removed. This is an + * operation that should complete within 1ms, so just wait a bit and + * retry. + * + * If the device was suspended for another reason, we'll wait and + * retry SG_IO_MAX_RETRIES times. This is a tolerable delay before + * we return an error and potentially stop the VM. + */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); + return true; + default: + return false; + } + } + + if (io_hdr->host_status != SCSI_HOST_OK) { + return true; + } + + switch (io_hdr->status) { + case GOOD: + case CONDITION_GOOD: + case INTERMEDIATE_GOOD: + case INTERMEDIATE_C_GOOD: + case RESERVATION_CONFLICT: + case COMMAND_TERMINATED: + return false; + case CHECK_CONDITION: + return !scsi_sense_buf_is_guest_recoverable(io_hdr->sbp, + io_hdr->mx_sb_len); + default: + return true; + } +} + +static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) +{ + BDRVRawState *s = acb->bs->opaque; + RawPosixAIOData probe_acb; + + if (!s->use_mpath) { + return false; + } + + if (!sgio_path_error(ret, acb->ioctl.buf)) { + return false; + } + + probe_acb = (RawPosixAIOData) { + .bs = acb->bs, + .aio_type = QEMU_AIO_IOCTL, + .aio_fildes = s->fd, + .aio_offset = 0, + .ioctl = { + .buf = NULL, + .cmd = DM_MPATH_PROBE_PATHS, + }, + }; + + ret = raw_thread_pool_submit(handle_aiocb_ioctl, &probe_acb); + if (ret == -ENOTTY) { + s->use_mpath = false; + } else if (ret == -EAGAIN) { + /* The device might be suspended for a table reload, worth retrying */ + return true; + } + + return ret == 0; +} +#else +static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) +{ + return false; +} +#endif /* DM_MPATH_PROBE_PATHS */ + static int coroutine_fn hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) { BDRVRawState *s = bs->opaque; RawPosixAIOData acb; + int retries = SG_IO_MAX_RETRIES; int ret; ret = fd_open(bs); @@ -4227,7 +4399,11 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) }, }; - return raw_thread_pool_submit(handle_aiocb_ioctl, &acb); + do { + ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb); + } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret)); + + return ret; } #endif /* linux */ diff --git a/block/gluster.c b/block/gluster.c index c6d25ae..89abd40 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -972,8 +972,6 @@ static void qemu_gluster_reopen_commit(BDRVReopenState *state) g_free(state->opaque); state->opaque = NULL; - - return; } @@ -993,8 +991,6 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state) g_free(state->opaque); state->opaque = NULL; - - return; } #ifdef CONFIG_GLUSTERFS_ZEROFILL @@ -1465,7 +1461,7 @@ exit: * (Based on raw_co_block_status() from file-posix.c.) */ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -1482,7 +1478,7 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, return ret; } - if (!want_zero) { + if (!(mode & BDRV_WANT_ZERO)) { *pnum = bytes; *map = offset; *file = bs; diff --git a/block/graph-lock.c b/block/graph-lock.c index c81162b..b731947 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -34,6 +34,17 @@ static QemuMutex aio_context_list_lock; static int has_writer; /* + * Many write-locked sections are also drained sections. There is a convenience + * wrapper bdrv_graph_wrlock_drained() which begins a drained section before + * acquiring the lock. This variable here is used so bdrv_graph_wrunlock() knows + * if it also needs to end such a drained section. It needs to be a counter, + * because the aio_poll() call in bdrv_graph_wrlock() might re-enter + * bdrv_graph_wrlock_drained(). And note that aio_bh_poll() in + * bdrv_graph_wrunlock() might also re-enter a write-locked section. + */ +static int wrlock_quiesced_counter; + +/* * A reader coroutine could move from an AioContext to another. * If this happens, there is no problem from the point of view of * counters. The problem is that the total count becomes @@ -112,8 +123,14 @@ void no_coroutine_fn bdrv_graph_wrlock(void) assert(!qatomic_read(&has_writer)); assert(!qemu_in_coroutine()); - /* Make sure that constantly arriving new I/O doesn't cause starvation */ - bdrv_drain_all_begin_nopoll(); + bool need_drain = wrlock_quiesced_counter == 0; + + if (need_drain) { + /* + * Make sure that constantly arriving new I/O doesn't cause starvation + */ + bdrv_drain_all_begin_nopoll(); + } /* * reader_count == 0: this means writer will read has_reader as 1 @@ -139,7 +156,18 @@ void no_coroutine_fn bdrv_graph_wrlock(void) smp_mb(); } while (reader_count() >= 1); - bdrv_drain_all_end(); + if (need_drain) { + bdrv_drain_all_end(); + } +} + +void no_coroutine_fn bdrv_graph_wrlock_drained(void) +{ + GLOBAL_STATE_CODE(); + + bdrv_drain_all_begin(); + wrlock_quiesced_counter++; + bdrv_graph_wrlock(); } void no_coroutine_fn bdrv_graph_wrunlock(void) @@ -168,6 +196,12 @@ void no_coroutine_fn bdrv_graph_wrunlock(void) * progress. */ aio_bh_poll(qemu_get_aio_context()); + + if (wrlock_quiesced_counter > 0) { + bdrv_drain_all_end(); + wrlock_quiesced_counter--; + } + } void coroutine_fn bdrv_graph_co_rdlock(void) @@ -38,10 +38,14 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "system/replay.h" +#include "qemu/units.h" /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) +/* Maximum read size for checking if data reads as zero, in bytes */ +#define MAX_ZERO_CHECK_BUFFER (128 * KiB) + static void coroutine_fn GRAPH_RDLOCK bdrv_parent_cb_resize(BlockDriverState *bs); @@ -357,7 +361,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, GLOBAL_STATE_CODE(); /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + if (bs->quiesce_counter++ == 0) { GRAPH_RDLOCK_GUARD_MAINLOOP(); bdrv_parent_drained_begin(bs, parent); if (bs->drv && bs->drv->bdrv_drain_begin) { @@ -397,8 +401,6 @@ bdrv_drained_begin(BlockDriverState *bs) */ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) { - int old_quiesce_counter; - IO_OR_GS_CODE(); if (qemu_in_coroutine()) { @@ -409,11 +411,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) /* At this point, we should be always running in the main loop. */ GLOBAL_STATE_CODE(); assert(bs->quiesce_counter > 0); - GLOBAL_STATE_CODE(); /* Re-enable things in child-to-parent order */ - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); - if (old_quiesce_counter == 1) { + if (--bs->quiesce_counter == 0) { GRAPH_RDLOCK_GUARD_MAINLOOP(); if (bs->drv && bs->drv->bdrv_drain_end) { bs->drv->bdrv_drain_end(bs); @@ -2364,10 +2364,8 @@ int bdrv_flush_all(void) * Drivers not implementing the functionality are assumed to not support * backing files, hence all their sectors are reported as allocated. * - * If 'want_zero' is true, the caller is querying for mapping - * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and - * _ZERO where possible; otherwise, the result favors larger 'pnum', - * with a focus on accurate BDRV_BLOCK_ALLOCATED. + * 'mode' serves as a hint as to which results are favored; see the + * BDRV_WANT_* macros for details. * * If 'offset' is beyond the end of the disk image the return value is * BDRV_BLOCK_EOF and 'pnum' is set to 0. @@ -2387,7 +2385,7 @@ int bdrv_flush_all(void) * set to the host mapping and BDS corresponding to the guest offset. */ static int coroutine_fn GRAPH_RDLOCK -bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, +bdrv_co_do_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -2476,7 +2474,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, local_file = bs; local_map = aligned_offset; } else { - ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, + ret = bs->drv->bdrv_co_block_status(bs, mode, aligned_offset, aligned_bytes, pnum, &local_map, &local_file); @@ -2488,10 +2486,10 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, * the cache requires an RCU update, so double check here to avoid * such an update if possible. * - * Check want_zero, because we only want to update the cache when we + * Check mode, because we only want to update the cache when we * have accurate information about what is zero and what is data. */ - if (want_zero && + if (mode == BDRV_WANT_PRECISE && ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && QLIST_EMPTY(&bs->children)) { @@ -2548,7 +2546,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file); - ret = bdrv_co_do_block_status(local_file, want_zero, local_map, + ret = bdrv_co_do_block_status(local_file, mode, local_map, *pnum, pnum, &local_map, &local_file); goto out; } @@ -2560,7 +2558,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, if (!cow_bs) { ret |= BDRV_BLOCK_ZERO; - } else if (want_zero) { + } else if (mode == BDRV_WANT_PRECISE) { int64_t size2 = bdrv_co_getlength(cow_bs); if (size2 >= 0 && offset >= size2) { @@ -2569,14 +2567,14 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, } } - if (want_zero && ret & BDRV_BLOCK_RECURSE && + if (mode == BDRV_WANT_PRECISE && ret & BDRV_BLOCK_RECURSE && local_file && local_file != bs && (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && (ret & BDRV_BLOCK_OFFSET_VALID)) { int64_t file_pnum; int ret2; - ret2 = bdrv_co_do_block_status(local_file, want_zero, local_map, + ret2 = bdrv_co_do_block_status(local_file, mode, local_map, *pnum, &file_pnum, NULL, NULL); if (ret2 >= 0) { /* Ignore errors. This is just providing extra information, it @@ -2627,7 +2625,7 @@ int coroutine_fn bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, bool include_base, - bool want_zero, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, @@ -2654,7 +2652,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, return 0; } - ret = bdrv_co_do_block_status(bs, want_zero, offset, bytes, pnum, + ret = bdrv_co_do_block_status(bs, mode, offset, bytes, pnum, map, file); ++*depth; if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) { @@ -2671,7 +2669,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base; p = bdrv_filter_or_cow_bs(p)) { - ret = bdrv_co_do_block_status(p, want_zero, offset, bytes, pnum, + ret = bdrv_co_do_block_status(p, mode, offset, bytes, pnum, map, file); ++*depth; if (ret < 0) { @@ -2734,7 +2732,8 @@ int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState **file) { IO_CODE(); - return bdrv_co_common_block_status_above(bs, base, false, true, offset, + return bdrv_co_common_block_status_above(bs, base, false, + BDRV_WANT_PRECISE, offset, bytes, pnum, map, file, NULL); } @@ -2752,27 +2751,89 @@ int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t offset, * by @offset and @bytes is known to read as zeroes. * Return 1 if that is the case, 0 otherwise and -errno on error. * This test is meant to be fast rather than accurate so returning 0 - * does not guarantee non-zero data. + * does not guarantee non-zero data; but a return of 1 is reliable. */ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes) { int ret; - int64_t pnum = bytes; + int64_t pnum; IO_CODE(); - if (!bytes) { - return 1; + while (bytes) { + ret = bdrv_co_common_block_status_above(bs, NULL, false, + BDRV_WANT_ZERO, offset, bytes, + &pnum, NULL, NULL, NULL); + + if (ret < 0) { + return ret; + } + if (!(ret & BDRV_BLOCK_ZERO)) { + return 0; + } + offset += pnum; + bytes -= pnum; } - ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset, - bytes, &pnum, NULL, NULL, NULL); + return 1; +} + +/* + * Check @bs (and its backing chain) to see if the entire image is known + * to read as zeroes. + * Return 1 if that is the case, 0 otherwise and -errno on error. + * This test is meant to be fast rather than accurate so returning 0 + * does not guarantee non-zero data; however, a return of 1 is reliable, + * and this function can report 1 in more cases than bdrv_co_is_zero_fast. + */ +int coroutine_fn bdrv_co_is_all_zeroes(BlockDriverState *bs) +{ + int ret; + int64_t pnum, bytes; + char *buf; + QEMUIOVector local_qiov; + IO_CODE(); + + bytes = bdrv_co_getlength(bs); + if (bytes < 0) { + return bytes; + } + /* First probe - see if the entire image reads as zero */ + ret = bdrv_co_common_block_status_above(bs, NULL, false, BDRV_WANT_ZERO, + 0, bytes, &pnum, NULL, NULL, + NULL); if (ret < 0) { return ret; } + if (ret & BDRV_BLOCK_ZERO) { + return bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); + } - return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO); + /* + * Because of the way 'blockdev-create' works, raw files tend to + * be created with a non-sparse region at the front to make + * alignment probing easier. If the block starts with only a + * small allocated region, it is still worth the effort to see if + * the rest of the image is still sparse, coupled with manually + * reading the first region to see if it reads zero after all. + */ + if (pnum > MAX_ZERO_CHECK_BUFFER) { + return 0; + } + ret = bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); + if (ret <= 0) { + return ret; + } + /* Only the head of the image is unknown, and it's small. Read it. */ + buf = qemu_blockalign(bs, pnum); + qemu_iovec_init_buf(&local_qiov, buf, pnum); + ret = bdrv_driver_preadv(bs, 0, pnum, &local_qiov, 0, 0); + if (ret >= 0) { + ret = buffer_is_zero(buf, pnum); + } + qemu_vfree(buf); + return ret; } int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, @@ -2782,9 +2843,9 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t dummy; IO_CODE(); - ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset, - bytes, pnum ? pnum : &dummy, NULL, - NULL, NULL); + ret = bdrv_co_common_block_status_above(bs, bs, true, BDRV_WANT_ALLOCATED, + offset, bytes, pnum ? pnum : &dummy, + NULL, NULL, NULL); if (ret < 0) { return ret; } @@ -2817,7 +2878,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *bs, int ret; IO_CODE(); - ret = bdrv_co_common_block_status_above(bs, base, include_base, false, + ret = bdrv_co_common_block_status_above(bs, base, include_base, + BDRV_WANT_ALLOCATED, offset, bytes, pnum, NULL, NULL, &depth); if (ret < 0) { @@ -3102,18 +3164,19 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, return 0; } - if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) { + if (!bs->drv->bdrv_co_pdiscard) { return 0; } /* Invalidate the cached block-status data range if this discard overlaps */ bdrv_bsc_invalidate_range(bs, offset, bytes); - /* Discard is advisory, but some devices track and coalesce + /* + * Discard is advisory, but some devices track and coalesce * unaligned requests, so we must pass everything down rather than - * round here. Still, most devices will just silently ignore - * unaligned requests (by returning -ENOTSUP), so we must fragment - * the request accordingly. */ + * round here. Still, most devices reject unaligned requests with + * -EINVAL or -ENOTSUP, so we must fragment the request accordingly. + */ align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); assert(align % bs->bl.request_alignment == 0); head = offset % align; @@ -3161,27 +3224,15 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, ret = -ENOMEDIUM; goto out; } - if (bs->drv->bdrv_co_pdiscard) { - ret = bs->drv->bdrv_co_pdiscard(bs, offset, num); - } else { - BlockAIOCB *acb; - CoroutineIOCompletion co = { - .coroutine = qemu_coroutine_self(), - }; - - acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num, - bdrv_co_io_em_complete, &co); - if (acb == NULL) { - ret = -EIO; - goto out; + + ret = bs->drv->bdrv_co_pdiscard(bs, offset, num); + if (ret && ret != -ENOTSUP) { + if (ret == -EINVAL && (offset % align != 0 || num % align != 0)) { + /* Silently skip rejected unaligned head/tail requests */ } else { - qemu_coroutine_yield(); - ret = co.ret; + goto out; /* bail out */ } } - if (ret && ret != -ENOTSUP) { - goto out; - } offset += num; bytes -= num; @@ -3709,8 +3760,8 @@ bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes, } int coroutine_fn -bdrv_co_snapshot_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, +bdrv_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -3728,7 +3779,7 @@ bdrv_co_snapshot_block_status(BlockDriverState *bs, } bdrv_inc_in_flight(bs); - ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes, + ret = drv->bdrv_co_snapshot_block_status(bs, mode, offset, bytes, pnum, map, file); bdrv_dec_in_flight(bs); diff --git a/block/iscsi.c b/block/iscsi.c index 2f0f4da..15b96ee 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -694,9 +694,9 @@ out_unlock: static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { IscsiLun *iscsilun = bs->opaque; diff --git a/block/linux-aio.c b/block/linux-aio.c index 407369f..c200e7a 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -291,7 +291,7 @@ static void ioq_submit(LinuxAioState *s) { int ret, len; struct qemu_laiocb *aiocb; - struct iocb *iocbs[MAX_EVENTS]; + QEMU_UNINITIALIZED struct iocb *iocbs[MAX_EVENTS]; QSIMPLEQ_HEAD(, qemu_laiocb) completed; do { diff --git a/block/mirror.c b/block/mirror.c index a53582f..b344182 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -51,10 +51,10 @@ typedef struct MirrorBlockJob { BlockDriverState *to_replace; /* Used to block operations on the drive-mirror-replace target */ Error *replace_blocker; - bool is_none_mode; + MirrorSyncMode sync_mode; BlockMirrorBackingMode backing_mode; - /* Whether the target image requires explicit zero-initialization */ - bool zero_target; + /* Whether the target should be assumed to be already zero initialized */ + bool target_is_zero; /* * To be accesssed with atomics. Written only under the BQL (required by the * current implementation of mirror_change()). @@ -73,6 +73,7 @@ typedef struct MirrorBlockJob { size_t buf_size; int64_t bdev_length; unsigned long *cow_bitmap; + unsigned long *zero_bitmap; BdrvDirtyBitmap *dirty_bitmap; BdrvDirtyBitmapIter *dbi; uint8_t *buf; @@ -108,9 +109,12 @@ struct MirrorOp { int64_t offset; uint64_t bytes; - /* The pointee is set by mirror_co_read(), mirror_co_zero(), and - * mirror_co_discard() before yielding for the first time */ + /* + * These pointers are set by mirror_co_read(), mirror_co_zero(), and + * mirror_co_discard() before yielding for the first time + */ int64_t *bytes_handled; + bool *io_skipped; bool is_pseudo_op; bool is_active_write; @@ -408,15 +412,34 @@ static void coroutine_fn mirror_co_read(void *opaque) static void coroutine_fn mirror_co_zero(void *opaque) { MirrorOp *op = opaque; - int ret; + bool write_needed = true; + int ret = 0; op->s->in_flight++; op->s->bytes_in_flight += op->bytes; *op->bytes_handled = op->bytes; op->is_in_flight = true; - ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, - op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); + if (op->s->zero_bitmap) { + unsigned long end = DIV_ROUND_UP(op->offset + op->bytes, + op->s->granularity); + assert(QEMU_IS_ALIGNED(op->offset, op->s->granularity)); + assert(QEMU_IS_ALIGNED(op->bytes, op->s->granularity) || + op->offset + op->bytes == op->s->bdev_length); + if (find_next_zero_bit(op->s->zero_bitmap, end, + op->offset / op->s->granularity) == end) { + write_needed = false; + *op->io_skipped = true; + } + } + if (write_needed) { + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); + } + if (ret >= 0 && op->s->zero_bitmap) { + bitmap_set(op->s->zero_bitmap, op->offset / op->s->granularity, + DIV_ROUND_UP(op->bytes, op->s->granularity)); + } mirror_write_complete(op, ret); } @@ -435,29 +458,43 @@ static void coroutine_fn mirror_co_discard(void *opaque) } static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, - unsigned bytes, MirrorMethod mirror_method) + unsigned bytes, MirrorMethod mirror_method, + bool *io_skipped) { MirrorOp *op; Coroutine *co; int64_t bytes_handled = -1; + assert(QEMU_IS_ALIGNED(offset, s->granularity)); + assert(QEMU_IS_ALIGNED(bytes, s->granularity) || + offset + bytes == s->bdev_length); op = g_new(MirrorOp, 1); *op = (MirrorOp){ .s = s, .offset = offset, .bytes = bytes, .bytes_handled = &bytes_handled, + .io_skipped = io_skipped, }; qemu_co_queue_init(&op->waiting_requests); switch (mirror_method) { case MIRROR_METHOD_COPY: + if (s->zero_bitmap) { + bitmap_clear(s->zero_bitmap, offset / s->granularity, + DIV_ROUND_UP(bytes, s->granularity)); + } co = qemu_coroutine_create(mirror_co_read, op); break; case MIRROR_METHOD_ZERO: + /* s->zero_bitmap handled in mirror_co_zero */ co = qemu_coroutine_create(mirror_co_zero, op); break; case MIRROR_METHOD_DISCARD: + if (s->zero_bitmap) { + bitmap_clear(s->zero_bitmap, offset / s->granularity, + DIV_ROUND_UP(bytes, s->granularity)); + } co = qemu_coroutine_create(mirror_co_discard, op); break; default: @@ -568,6 +605,7 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) int ret = -1; int64_t io_bytes; int64_t io_bytes_acct; + bool io_skipped = false; MirrorMethod mirror_method = MIRROR_METHOD_COPY; assert(!(offset % s->granularity)); @@ -611,8 +649,10 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) } io_bytes = mirror_clip_bytes(s, offset, io_bytes); - io_bytes = mirror_perform(s, offset, io_bytes, mirror_method); - if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) { + io_bytes = mirror_perform(s, offset, io_bytes, mirror_method, + &io_skipped); + if (io_skipped || + (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok)) { io_bytes_acct = 0; } else { io_bytes_acct = io_bytes; @@ -721,11 +761,16 @@ static int mirror_exit_common(Job *job) bdrv_graph_rdlock_main_loop(); bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, &error_abort); + bdrv_graph_rdunlock_main_loop(); if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { - BlockDriverState *backing = s->is_none_mode ? src : s->base; - BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs); + BlockDriverState *backing; + BlockDriverState *unfiltered_target; + bdrv_graph_wrlock_drained(); + unfiltered_target = bdrv_skip_filters(target_bs); + + backing = s->sync_mode == MIRROR_SYNC_MODE_NONE ? src : s->base; if (bdrv_cow_bs(unfiltered_target) != backing) { bdrv_set_backing_hd(unfiltered_target, backing, &local_err); if (local_err) { @@ -734,16 +779,18 @@ static int mirror_exit_common(Job *job) ret = -EPERM; } } + bdrv_graph_wrunlock(); } else if (!abort && s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { + bdrv_graph_rdlock_main_loop(); assert(!bdrv_backing_chain_next(target_bs)); ret = bdrv_open_backing_file(bdrv_skip_filters(target_bs), NULL, "backing", &local_err); + bdrv_graph_rdunlock_main_loop(); if (ret < 0) { error_report_err(local_err); local_err = NULL; } } - bdrv_graph_rdunlock_main_loop(); if (s->should_complete && !abort) { BlockDriverState *to_replace = s->to_replace ?: src; @@ -841,15 +888,54 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) int64_t offset; BlockDriverState *bs; BlockDriverState *target_bs = blk_bs(s->target); - int ret = -1; + int ret = -EIO; int64_t count; + bool punch_holes = + target_bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && + bdrv_can_write_zeroes_with_unmap(target_bs); + int64_t bitmap_length = DIV_ROUND_UP(s->bdev_length, s->granularity); + /* Determine if the image is already zero, regardless of sync mode. */ + s->zero_bitmap = bitmap_new(bitmap_length); bdrv_graph_co_rdlock(); bs = s->mirror_top_bs->backing->bs; + if (s->target_is_zero) { + ret = 1; + } else { + ret = bdrv_co_is_all_zeroes(target_bs); + } bdrv_graph_co_rdunlock(); - if (s->zero_target) { - if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { + /* Determine if a pre-zeroing pass is necessary. */ + if (ret < 0) { + return ret; + } else if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { + /* + * In TOP mode, there is no benefit to a pre-zeroing pass, but + * the zero bitmap can be set if the destination already reads + * as zero and we are not punching holes. + */ + if (ret > 0 && !punch_holes) { + bitmap_set(s->zero_bitmap, 0, bitmap_length); + } + } else if (ret == 0 || punch_holes) { + /* + * Here, we are in FULL mode; our goal is to avoid writing + * zeroes if the destination already reads as zero, except + * when we are trying to punch holes. This is possible if + * zeroing happened externally (ret > 0) or if we have a fast + * way to pre-zero the image (the dirty bitmap will be + * populated later by the non-zero portions, the same as for + * TOP mode). If pre-zeroing is not fast, or we need to visit + * the entire image in order to punch holes even in the + * non-allocated regions of the source, then just mark the + * entire image dirty and leave the zero bitmap clear at this + * point in time. Otherwise, it can be faster to pre-zero the + * image now, even if we re-write the allocated portions of + * the disk later, and the pre-zero pass will populate the + * zero bitmap. + */ + if (!bdrv_can_write_zeroes_with_unmap(target_bs) || punch_holes) { bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); return 0; } @@ -858,6 +944,7 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) for (offset = 0; offset < s->bdev_length; ) { int bytes = MIN(s->bdev_length - offset, QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); + bool ignored; mirror_throttle(s); @@ -873,12 +960,15 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) continue; } - mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO); + mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO, &ignored); offset += bytes; } mirror_wait_for_all_io(s); s->initial_zeroing_ongoing = false; + } else { + /* In FULL mode, and image already reads as zero. */ + bitmap_set(s->zero_bitmap, 0, bitmap_length); } /* First part, loop on the sectors and initialize the dirty bitmap. */ @@ -1020,7 +1110,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) mirror_free_init(s); s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - if (!s->is_none_mode) { + if (s->sync_mode != MIRROR_SYNC_MODE_NONE) { ret = mirror_dirty_init(s); if (ret < 0 || job_is_cancelled(&s->common.job)) { goto immediate_exit; @@ -1163,6 +1253,7 @@ immediate_exit: assert(s->in_flight == 0); qemu_vfree(s->buf); g_free(s->cow_bitmap); + g_free(s->zero_bitmap); g_free(s->in_flight_bitmap); bdrv_dirty_iter_free(s->dbi); @@ -1341,7 +1432,8 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, { int ret; size_t qiov_offset = 0; - int64_t bitmap_offset, bitmap_end; + int64_t dirty_bitmap_offset, dirty_bitmap_end; + int64_t zero_bitmap_offset, zero_bitmap_end; if (!QEMU_IS_ALIGNED(offset, job->granularity) && bdrv_dirty_bitmap_get(job->dirty_bitmap, offset)) @@ -1385,31 +1477,54 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, } /* - * Tails are either clean or shrunk, so for bitmap resetting - * we safely align the range down. + * Tails are either clean or shrunk, so for dirty bitmap resetting + * we safely align the range narrower. But for zero bitmap, round + * range wider for checking or clearing, and narrower for setting. */ - bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); - bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); - if (bitmap_offset < bitmap_end) { - bdrv_reset_dirty_bitmap(job->dirty_bitmap, bitmap_offset, - bitmap_end - bitmap_offset); + dirty_bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); + dirty_bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); + if (dirty_bitmap_offset < dirty_bitmap_end) { + bdrv_reset_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset, + dirty_bitmap_end - dirty_bitmap_offset); } + zero_bitmap_offset = offset / job->granularity; + zero_bitmap_end = DIV_ROUND_UP(offset + bytes, job->granularity); job_progress_increase_remaining(&job->common.job, bytes); job->active_write_bytes_in_flight += bytes; switch (method) { case MIRROR_METHOD_COPY: + if (job->zero_bitmap) { + bitmap_clear(job->zero_bitmap, zero_bitmap_offset, + zero_bitmap_end - zero_bitmap_offset); + } ret = blk_co_pwritev_part(job->target, offset, bytes, qiov, qiov_offset, flags); break; case MIRROR_METHOD_ZERO: + if (job->zero_bitmap) { + if (find_next_zero_bit(job->zero_bitmap, zero_bitmap_end, + zero_bitmap_offset) == zero_bitmap_end) { + ret = 0; + break; + } + } assert(!qiov); ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags); + if (job->zero_bitmap && ret >= 0) { + bitmap_set(job->zero_bitmap, dirty_bitmap_offset / job->granularity, + (dirty_bitmap_end - dirty_bitmap_offset) / + job->granularity); + } break; case MIRROR_METHOD_DISCARD: + if (job->zero_bitmap) { + bitmap_clear(job->zero_bitmap, zero_bitmap_offset, + zero_bitmap_end - zero_bitmap_offset); + } assert(!qiov); ret = blk_co_pdiscard(job->target, offset, bytes); break; @@ -1430,10 +1545,10 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, * at function start, and they must be still dirty, as we've locked * the region for in-flight op. */ - bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); - bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); - bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset, - bitmap_end - bitmap_offset); + dirty_bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); + dirty_bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); + bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset, + dirty_bitmap_end - dirty_bitmap_offset); qatomic_set(&job->actively_synced, false); action = mirror_error_action(job, false, -ret); @@ -1711,15 +1826,16 @@ static BlockJob *mirror_start_job( int creation_flags, BlockDriverState *target, const char *replaces, int64_t speed, uint32_t granularity, int64_t buf_size, + MirrorSyncMode sync_mode, BlockMirrorBackingMode backing_mode, - bool zero_target, + bool target_is_zero, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, BlockCompletionFunc *cb, void *opaque, const BlockJobDriver *driver, - bool is_none_mode, BlockDriverState *base, + BlockDriverState *base, bool auto_complete, const char *filter_node_name, bool is_mirror, MirrorCopyMode copy_mode, bool base_ro, @@ -1878,9 +1994,9 @@ static BlockJob *mirror_start_job( s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; s->on_target_error = on_target_error; - s->is_none_mode = is_none_mode; + s->sync_mode = sync_mode; s->backing_mode = backing_mode; - s->zero_target = zero_target; + s->target_is_zero = target_is_zero; qatomic_set(&s->copy_mode, copy_mode); s->base = base; s->base_overlay = bdrv_find_overlay(bs, base); @@ -1904,7 +2020,7 @@ static BlockJob *mirror_start_job( */ bdrv_disable_dirty_bitmap(s->dirty_bitmap); - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); ret = block_job_add_bdrv(&s->common, "source", bs, 0, BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ, @@ -2009,13 +2125,12 @@ void mirror_start(const char *job_id, BlockDriverState *bs, int creation_flags, int64_t speed, uint32_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, - bool zero_target, + bool target_is_zero, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, const char *filter_node_name, MirrorCopyMode copy_mode, Error **errp) { - bool is_none_mode; BlockDriverState *base; GLOBAL_STATE_CODE(); @@ -2028,14 +2143,13 @@ void mirror_start(const char *job_id, BlockDriverState *bs, } bdrv_graph_rdlock_main_loop(); - is_none_mode = mode == MIRROR_SYNC_MODE_NONE; base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL; bdrv_graph_rdunlock_main_loop(); mirror_start_job(job_id, bs, creation_flags, target, replaces, - speed, granularity, buf_size, backing_mode, zero_target, - on_source_error, on_target_error, unmap, NULL, NULL, - &mirror_job_driver, is_none_mode, base, false, + speed, granularity, buf_size, mode, backing_mode, + target_is_zero, on_source_error, on_target_error, unmap, + NULL, NULL, &mirror_job_driver, base, false, filter_node_name, true, copy_mode, false, errp); } @@ -2061,9 +2175,9 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, job = mirror_start_job( job_id, bs, creation_flags, base, NULL, speed, 0, 0, - MIRROR_LEAVE_BACKING_CHAIN, false, + MIRROR_SYNC_MODE_TOP, MIRROR_LEAVE_BACKING_CHAIN, false, on_error, on_error, true, cb, opaque, - &commit_active_job_driver, false, base, auto_complete, + &commit_active_job_driver, base, auto_complete, filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, base_read_only, errp); if (!job) { diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c index 6919a49..282d1c3 100644 --- a/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c @@ -144,7 +144,7 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) Error *local_err = NULL; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + bdrv_graph_rdlock_main_loop(); bs = bdrv_find_node(id); if (bs) { @@ -152,29 +152,31 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) if (local_err) { error_report_err(local_err); } - return; + goto unlock; } blk = blk_by_name(id); if (!blk) { error_report("Device '%s' not found", id); - return; + goto unlock; } if (!blk_legacy_dinfo(blk)) { error_report("Deleting device added with blockdev-add" " is not supported"); - return; + goto unlock; } bs = blk_bs(blk); if (bs) { if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) { error_report_err(local_err); - return; + goto unlock; } + bdrv_graph_rdunlock_main_loop(); blk_remove_bs(blk); + bdrv_graph_rdlock_main_loop(); } /* Make the BlockBackend and the attached BlockDriverState anonymous */ @@ -191,6 +193,9 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) } else { blk_unref(blk); } + +unlock: + bdrv_graph_rdunlock_main_loop(); } void hmp_commit(Monitor *mon, const QDict *qdict) diff --git a/block/nbd.c b/block/nbd.c index 887841b..d5a2b21 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -1397,8 +1397,8 @@ nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) } static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status( - BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file) + BlockDriverState *bs, unsigned int mode, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { int ret, request_ret; NBDExtent64 extent = { 0 }; diff --git a/block/null.c b/block/null.c index dc0b1fd..4e448d5 100644 --- a/block/null.c +++ b/block/null.c @@ -227,9 +227,9 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state, } static int coroutine_fn null_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVNullState *s = bs->opaque; diff --git a/block/nvme.c b/block/nvme.c index bbf7c23..8df53ee 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -18,6 +18,7 @@ #include "qobject/qstring.h" #include "qemu/defer-call.h" #include "qemu/error-report.h" +#include "qemu/host-pci-mmio.h" #include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/cutils.h" @@ -60,7 +61,7 @@ typedef struct { uint8_t *queue; uint64_t iova; /* Hardware MMIO register */ - volatile uint32_t *doorbell; + uint32_t *doorbell; } NVMeQueue; typedef struct { @@ -100,7 +101,7 @@ struct BDRVNVMeState { QEMUVFIOState *vfio; void *bar0_wo_map; /* Memory mapped registers */ - volatile struct { + struct { uint32_t sq_tail; uint32_t cq_head; } *doorbells; @@ -292,7 +293,7 @@ static void nvme_kick(NVMeQueuePair *q) assert(!(q->sq.tail & 0xFF00)); /* Fence the write to submission queue entry before notifying the device. */ smp_wmb(); - *q->sq.doorbell = cpu_to_le32(q->sq.tail); + host_pci_stl_le_p(q->sq.doorbell, q->sq.tail); q->inflight += q->need_kick; q->need_kick = 0; } @@ -441,7 +442,7 @@ static bool nvme_process_completion(NVMeQueuePair *q) if (progress) { /* Notify the device so it can post more completions. */ smp_mb_release(); - *q->cq.doorbell = cpu_to_le32(q->cq.head); + host_pci_stl_le_p(q->cq.doorbell, q->cq.head); nvme_wake_free_req_locked(q); } @@ -460,7 +461,7 @@ static void nvme_process_completion_bh(void *opaque) * so notify the device that it has space to fill in more completions now. */ smp_mb_release(); - *q->cq.doorbell = cpu_to_le32(q->cq.head); + host_pci_stl_le_p(q->cq.doorbell, q->cq.head); nvme_wake_free_req_locked(q); nvme_process_completion(q); @@ -749,9 +750,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, int ret; uint64_t cap; uint32_t ver; + uint32_t cc; uint64_t timeout_ms; uint64_t deadline, now; - volatile NvmeBar *regs = NULL; + NvmeBar *regs = NULL; qemu_co_mutex_init(&s->dma_map_lock); qemu_co_queue_init(&s->dma_flush_queue); @@ -779,7 +781,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, /* Perform initialize sequence as described in NVMe spec "7.6.1 * Initialization". */ - cap = le64_to_cpu(regs->cap); + cap = host_pci_ldq_le_p(®s->cap); trace_nvme_controller_capability_raw(cap); trace_nvme_controller_capability("Maximum Queue Entries Supported", 1 + NVME_CAP_MQES(cap)); @@ -805,16 +807,17 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, bs->bl.request_alignment = s->page_size; timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); - ver = le32_to_cpu(regs->vs); + ver = host_pci_ldl_le_p(®s->vs); trace_nvme_controller_spec_version(extract32(ver, 16, 16), extract32(ver, 8, 8), extract32(ver, 0, 8)); /* Reset device to get a clean state. */ - regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE); + cc = host_pci_ldl_le_p(®s->cc); + host_pci_stl_le_p(®s->cc, cc & 0xFE); /* Wait for CSTS.RDY = 0. */ deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS; - while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + while (NVME_CSTS_RDY(host_pci_ldl_le_p(®s->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to reset (%" PRId64 " ms)", @@ -843,19 +846,21 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, s->queues[INDEX_ADMIN] = q; s->queue_count = 1; QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000); - regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | - ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); - regs->asq = cpu_to_le64(q->sq.iova); - regs->acq = cpu_to_le64(q->cq.iova); + host_pci_stl_le_p(®s->aqa, + ((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) | + ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT)); + host_pci_stq_le_p(®s->asq, q->sq.iova); + host_pci_stq_le_p(®s->acq, q->cq.iova); /* After setting up all control registers we can enable device now. */ - regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | - (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | - CC_EN_MASK); + host_pci_stl_le_p(®s->cc, + (ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | + (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | + CC_EN_MASK); /* Wait for CSTS.RDY = 1. */ now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); deadline = now + timeout_ms * SCALE_MS; - while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { + while (!NVME_CSTS_RDY(host_pci_ldl_le_p(®s->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to start (%" PRId64 " ms)", diff --git a/block/parallels.c b/block/parallels.c index 347ca12..3a375e2 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -416,9 +416,9 @@ parallels_co_flush_to_os(BlockDriverState *bs) } static int coroutine_fn GRAPH_RDLOCK -parallels_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) +parallels_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVParallelsState *s = bs->opaque; int count; diff --git a/block/qapi.c b/block/qapi.c index 2c50a6b..12fbf8d 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -51,6 +51,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, ImageInfo *backing_info; BlockDriverState *backing; BlockDeviceInfo *info; + BlockdevChildList **children_list_tail; + BdrvChild *child; if (!bs->drv) { error_setg(errp, "Block device %s is ejected", bs->node_name); @@ -73,8 +75,14 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, .no_flush = !!(bs->open_flags & BDRV_O_NO_FLUSH), }; - if (bs->node_name[0]) { - info->node_name = g_strdup(bs->node_name); + info->node_name = g_strdup(bs->node_name); + + children_list_tail = &info->children; + QLIST_FOREACH(child, &bs->children, next) { + BlockdevChild *child_ref = g_new0(BlockdevChild, 1); + child_ref->child = g_strdup(child->name); + child_ref->node_name = g_strdup(child->bs->node_name); + QAPI_LIST_APPEND(children_list_tail, child_ref); } backing = bdrv_cow_bs(bs); diff --git a/block/qcow.c b/block/qcow.c index da8ad4d..8a3e759 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -530,7 +530,7 @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate, } static int coroutine_fn GRAPH_RDLOCK -qcow_co_block_status(BlockDriverState *bs, bool want_zero, +qcow_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { diff --git a/block/qcow2.c b/block/qcow2.c index dd6bcaf..4aa9f9e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1721,7 +1721,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, ret = -EINVAL; goto fail; } - } else if (!(flags & BDRV_O_NO_IO)) { + } else { error_setg(errp, "Missing CRYPTO header for crypt method %d", s->crypt_method_header); ret = -EINVAL; @@ -1895,7 +1895,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, g_free(s->image_data_file); if (open_data_file && has_data_file(bs)) { bdrv_graph_co_rdunlock(); + bdrv_drain_all_begin(); bdrv_co_unref_child(bs, s->data_file); + bdrv_drain_all_end(); bdrv_graph_co_rdlock(); s->data_file = NULL; } @@ -1976,7 +1978,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) { BDRVQcow2State *s = bs->opaque; - if (bs->encrypted) { + if (s->crypto) { /* Encryption works on a sector granularity */ bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto); } @@ -2141,9 +2143,9 @@ static void qcow2_join_options(QDict *options, QDict *old_options) } static int coroutine_fn GRAPH_RDLOCK -qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t count, int64_t *pnum, int64_t *map, - BlockDriverState **file) +qcow2_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t count, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVQcow2State *s = bs->opaque; uint64_t host_offset; @@ -2821,7 +2823,7 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file) if (close_data_file && has_data_file(bs)) { GLOBAL_STATE_CODE(); bdrv_graph_rdunlock_main_loop(); - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, s->data_file); bdrv_graph_wrunlock(); s->data_file = NULL; diff --git a/block/qed.c b/block/qed.c index ac24449..4a36fb3 100644 --- a/block/qed.c +++ b/block/qed.c @@ -833,9 +833,9 @@ fail: } static int coroutine_fn GRAPH_RDLOCK -bdrv_qed_co_block_status(BlockDriverState *bs, bool want_zero, int64_t pos, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) +bdrv_qed_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t pos, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { BDRVQEDState *s = bs->opaque; size_t len = MIN(bytes, SIZE_MAX); diff --git a/block/quorum.c b/block/quorum.c index 30747a6..76a4feb 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1037,7 +1037,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, close_exit: /* cleanup on error */ - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); for (i = 0; i < s->num_children; i++) { if (!opened[i]) { continue; @@ -1057,7 +1057,7 @@ static void quorum_close(BlockDriverState *bs) BDRVQuorumState *s = bs->opaque; int i; - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); for (i = 0; i < s->num_children; i++) { bdrv_unref_child(bs, s->children[i]); } @@ -1226,7 +1226,7 @@ static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c, * region contains zeroes, and BDRV_BLOCK_DATA otherwise. */ static int coroutine_fn GRAPH_RDLOCK -quorum_co_block_status(BlockDriverState *bs, bool want_zero, +quorum_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t count, int64_t *pnum, int64_t *map, BlockDriverState **file) { @@ -1238,7 +1238,7 @@ quorum_co_block_status(BlockDriverState *bs, bool want_zero, for (i = 0; i < s->num_children; i++) { int64_t bytes; ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, false, - want_zero, offset, count, + mode, offset, count, &bytes, NULL, NULL, NULL); if (ret < 0) { quorum_report_bad(QUORUM_OP_TYPE_READ, offset, count, diff --git a/block/raw-format.c b/block/raw-format.c index e08526e..df16ac1 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -283,8 +283,8 @@ fail: } static int coroutine_fn GRAPH_RDLOCK -raw_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, +raw_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVRawState *s = bs->opaque; diff --git a/block/rbd.c b/block/rbd.c index af984fb..951cd63 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -254,7 +254,6 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, done: g_free(buf); qobject_unref(keypairs); - return; } static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts, @@ -1504,9 +1503,9 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, } static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVRBDState *s = bs->opaque; diff --git a/block/replication.c b/block/replication.c index 0020f33..3a431e9 100644 --- a/block/replication.c +++ b/block/replication.c @@ -176,7 +176,6 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c, *nshared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED; - return; } static int64_t coroutine_fn GRAPH_RDLOCK @@ -365,14 +364,15 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, BlockReopenQueue *reopen_queue = NULL; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + bdrv_graph_rdlock_main_loop(); /* * s->hidden_disk and s->secondary_disk may not be set yet, as they will * only be set after the children are writable. */ hidden_disk = bs->file->bs->backing; secondary_disk = hidden_disk->bs->backing; + bdrv_graph_rdunlock_main_loop(); if (writable) { s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs); @@ -541,7 +541,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_ref(hidden_disk->bs); s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk", @@ -584,7 +584,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, - BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL, + BLOCKDEV_ON_ERROR_REPORT, + ON_CBW_ERROR_BREAK_GUEST_WRITE, + JOB_INTERNAL, backup_job_completed, bs, NULL, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -650,7 +652,7 @@ static void replication_done(void *opaque, int ret) if (ret == 0) { s->stage = BLOCK_REPLICATION_DONE; - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, s->secondary_disk); s->secondary_disk = NULL; bdrv_unref_child(bs, s->hidden_disk); diff --git a/block/snapshot-access.c b/block/snapshot-access.c index 71ac83c..17ed240 100644 --- a/block/snapshot-access.c +++ b/block/snapshot-access.c @@ -41,11 +41,11 @@ snapshot_access_co_preadv_part(BlockDriverState *bs, static int coroutine_fn GRAPH_RDLOCK snapshot_access_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, + unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { - return bdrv_co_snapshot_block_status(bs->file->bs, want_zero, offset, + return bdrv_co_snapshot_block_status(bs->file->bs, mode, offset, bytes, pnum, map, file); } diff --git a/block/snapshot.c b/block/snapshot.c index 22567f1..bd9d759 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -291,7 +291,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, } /* .bdrv_open() will re-attach it */ - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, fallback); bdrv_graph_wrunlock(); @@ -327,7 +327,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, /** * Delete an internal snapshot by @snapshot_id and @name. - * @bs: block device used in the operation + * @bs: block device used in the operation, must be drained * @snapshot_id: unique snapshot ID, or NULL * @name: snapshot name, or NULL * @errp: location to store error @@ -358,6 +358,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, GLOBAL_STATE_CODE(); + assert(bs->quiesce_counter > 0); + if (!drv) { error_setg(errp, "Device '%s' has no medium", bdrv_get_device_name(bs)); @@ -368,9 +370,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, return -EINVAL; } - /* drain all pending i/o before deleting snapshot */ - bdrv_drained_begin(bs); - if (drv->bdrv_snapshot_delete) { ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); } else if (fallback_bs) { @@ -382,7 +381,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, ret = -ENOTSUP; } - bdrv_drained_end(bs); return ret; } @@ -571,19 +569,22 @@ int bdrv_all_delete_snapshot(const char *name, ERRP_GUARD(); g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + int ret = 0; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); - if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { - return -1; + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); + + ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp); + if (ret < 0) { + goto out; } iterbdrvs = bdrvs; while (iterbdrvs) { BlockDriverState *bs = iterbdrvs->data; QEMUSnapshotInfo sn1, *snapshot = &sn1; - int ret = 0; if ((devices || bdrv_all_snapshots_includes_bs(bs)) && bdrv_snapshot_find(bs, snapshot, name) >= 0) @@ -594,13 +595,16 @@ int bdrv_all_delete_snapshot(const char *name, if (ret < 0) { error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", name, bdrv_get_device_or_node_name(bs)); - return -1; + goto out; } iterbdrvs = iterbdrvs->next; } - return 0; +out: + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); + return ret; } diff --git a/block/stream.c b/block/stream.c index 999d9e5..c0616b6 100644 --- a/block/stream.c +++ b/block/stream.c @@ -51,7 +51,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk, return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH); } -static int stream_prepare(Job *job) +static int GRAPH_UNLOCKED stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockDriverState *unfiltered_bs; @@ -73,18 +73,16 @@ static int stream_prepare(Job *job) s->cor_filter_bs = NULL; /* - * bdrv_set_backing_hd() requires that the unfiltered_bs and the COW child - * of unfiltered_bs is drained. Drain already here and use - * bdrv_set_backing_hd_drained() instead because the polling during - * drained_begin() might change the graph, and if we do this only later, we - * may end up working with the wrong base node (or it might even have gone - * away by the time we want to use it). + * bdrv_set_backing_hd() requires that all block nodes are drained. Drain + * already here, because the polling during drained_begin() might change the + * graph, and if we do this only later, we may end up working with the wrong + * base node (or it might even have gone away by the time we want to use + * it). */ - bdrv_drained_begin(unfiltered_bs); if (unfiltered_bs_cow) { bdrv_ref(unfiltered_bs_cow); - bdrv_drained_begin(unfiltered_bs_cow); } + bdrv_drain_all_begin(); bdrv_graph_rdlock_main_loop(); base = bdrv_filter_or_cow_bs(s->above_base); @@ -106,7 +104,7 @@ static int stream_prepare(Job *job) } bdrv_graph_wrlock(); - bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); + bdrv_set_backing_hd(unfiltered_bs, base, &local_err); bdrv_graph_wrunlock(); /* @@ -123,11 +121,10 @@ static int stream_prepare(Job *job) } out: + bdrv_drain_all_end(); if (unfiltered_bs_cow) { - bdrv_drained_end(unfiltered_bs_cow); bdrv_unref(unfiltered_bs_cow); } - bdrv_drained_end(unfiltered_bs); return ret; } @@ -373,7 +370,7 @@ void stream_start(const char *job_id, BlockDriverState *bs, * already have our own plans. Also don't allow resize as the image size is * queried only at the job start and then cached. */ - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); if (block_job_add_bdrv(&s->common, "active node", bs, 0, basic_flags | BLK_PERM_WRITE, errp)) { bdrv_graph_wrunlock(); diff --git a/block/throttle-groups.c b/block/throttle-groups.c index 32553b3..66fdce9 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -908,7 +908,6 @@ unlock: qemu_mutex_unlock(&tg->lock); qapi_free_ThrottleLimits(argp); error_propagate(errp, local_err); - return; } static void throttle_group_get_limits(Object *obj, Visitor *v, @@ -934,7 +933,8 @@ static bool throttle_group_can_be_deleted(UserCreatable *uc) return OBJECT(uc)->ref == 1; } -static void throttle_group_obj_class_init(ObjectClass *klass, void *class_data) +static void throttle_group_obj_class_init(ObjectClass *klass, + const void *class_data) { size_t i = 0; UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); @@ -967,7 +967,7 @@ static const TypeInfo throttle_group_info = { .instance_size = sizeof(ThrottleGroup), .instance_init = throttle_group_obj_init, .instance_finalize = throttle_group_obj_finalize, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_USER_CREATABLE }, { } }, diff --git a/block/vdi.c b/block/vdi.c index a2da6ec..3ddc62a 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -523,8 +523,8 @@ static int vdi_reopen_prepare(BDRVReopenState *state, } static int coroutine_fn GRAPH_RDLOCK -vdi_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, +vdi_co_block_status(BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { BDRVVdiState *s = (BDRVVdiState *)bs->opaque; diff --git a/block/vmdk.c b/block/vmdk.c index 2adec49..7b98deb 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -271,7 +271,7 @@ static void vmdk_free_extents(BlockDriverState *bs) BDRVVmdkState *s = bs->opaque; VmdkExtent *e; - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); for (i = 0; i < s->num_extents; i++) { e = &s->extents[i]; g_free(e->l1_table); @@ -1229,9 +1229,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, extent_role |= BDRV_CHILD_METADATA; } + bdrv_graph_rdunlock_main_loop(); extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix, bs, &child_of_bds, extent_role, false, &local_err); + bdrv_graph_rdlock_main_loop(); g_free(extent_path); if (!extent_file) { error_propagate(errp, local_err); @@ -1247,7 +1249,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, 0, 0, 0, 0, 0, &extent, errp); if (ret < 0) { bdrv_graph_rdunlock_main_loop(); - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); bdrv_graph_rdlock_main_loop(); @@ -1266,7 +1268,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, g_free(buf); if (ret) { bdrv_graph_rdunlock_main_loop(); - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); bdrv_graph_rdlock_main_loop(); @@ -1277,7 +1279,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); if (ret) { bdrv_graph_rdunlock_main_loop(); - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); bdrv_graph_rdlock_main_loop(); @@ -1287,7 +1289,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, } else { error_setg(errp, "Unsupported extent type '%s'", type); bdrv_graph_rdunlock_main_loop(); - bdrv_graph_wrlock(); + bdrv_graph_wrlock_drained(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); bdrv_graph_rdlock_main_loop(); @@ -1352,13 +1354,13 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, BDRVVmdkState *s = bs->opaque; uint32_t magic; - GRAPH_RDLOCK_GUARD_MAINLOOP(); - ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { return ret; } + GRAPH_RDLOCK_GUARD_MAINLOOP(); + buf = vmdk_read_desc(bs->file, 0, errp); if (!buf) { return -EINVAL; @@ -1777,7 +1779,7 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent, } static int coroutine_fn GRAPH_RDLOCK -vmdk_co_block_status(BlockDriverState *bs, bool want_zero, +vmdk_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { diff --git a/block/vpc.c b/block/vpc.c index 0309e31..801ff57 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -726,7 +726,7 @@ fail: } static int coroutine_fn GRAPH_RDLOCK -vpc_co_block_status(BlockDriverState *bs, bool want_zero, +vpc_co_block_status(BlockDriverState *bs, unsigned int mode, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) diff --git a/block/vvfat.c b/block/vvfat.c index 91d69b3..814796d 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3134,9 +3134,9 @@ vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, } static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *n, - int64_t *map, + unsigned int mode, + int64_t offset, int64_t bytes, + int64_t *n, int64_t *map, BlockDriverState **file) { *n = bytes; |