aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/backup.c5
-rw-r--r--block/blkdebug.c6
-rw-r--r--block/blkio.c4
-rw-r--r--block/blklogwrites.c4
-rw-r--r--block/blkverify.c2
-rw-r--r--block/block-backend.c10
-rw-r--r--block/commit.c148
-rw-r--r--block/copy-before-write.c6
-rw-r--r--block/copy-before-write.h1
-rw-r--r--block/coroutines.h4
-rw-r--r--block/file-posix.c238
-rw-r--r--block/gluster.c8
-rw-r--r--block/graph-lock.c40
-rw-r--r--block/io.c169
-rw-r--r--block/iscsi.c6
-rw-r--r--block/linux-aio.c2
-rw-r--r--block/mirror.c202
-rw-r--r--block/monitor/block-hmp-cmds.c15
-rw-r--r--block/nbd.c4
-rw-r--r--block/null.c6
-rw-r--r--block/nvme.c41
-rw-r--r--block/parallels.c6
-rw-r--r--block/qapi.c12
-rw-r--r--block/qcow.c2
-rw-r--r--block/qcow2.c14
-rw-r--r--block/qed.c6
-rw-r--r--block/quorum.c8
-rw-r--r--block/raw-format.c4
-rw-r--r--block/rbd.c7
-rw-r--r--block/replication.c12
-rw-r--r--block/snapshot-access.c4
-rw-r--r--block/snapshot.c28
-rw-r--r--block/stream.c23
-rw-r--r--block/throttle-groups.c6
-rw-r--r--block/vdi.c4
-rw-r--r--block/vmdk.c18
-rw-r--r--block/vpc.c2
-rw-r--r--block/vvfat.c6
38 files changed, 775 insertions, 308 deletions
diff --git a/block/backup.c b/block/backup.c
index 79652bf..d4713fa 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -361,6 +361,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BackupPerf *perf,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
+ OnCbwError on_cbw_error,
int creation_flags,
BlockCompletionFunc *cb, void *opaque,
JobTxn *txn, Error **errp)
@@ -458,7 +459,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
}
cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
- perf->min_cluster_size, &bcs, errp);
+ perf->min_cluster_size, &bcs, on_cbw_error, errp);
if (!cbw) {
goto error;
}
@@ -497,7 +498,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
block_copy_set_speed(bcs, speed);
/* Required permissions are taken by copy-before-write filter target */
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
bdrv_graph_wrunlock();
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 1c1967f..c54aee0 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -751,9 +751,9 @@ blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
}
static int coroutine_fn GRAPH_RDLOCK
-blkdebug_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum, int64_t *map,
- BlockDriverState **file)
+blkdebug_co_block_status(BlockDriverState *bs, unsigned int mode,
+ int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file)
{
int err;
diff --git a/block/blkio.c b/block/blkio.c
index 5f4fce2..4142673 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -11,7 +11,7 @@
#include "qemu/osdep.h"
#include <blkio.h>
#include "block/block_int.h"
-#include "exec/memory.h"
+#include "system/memory.h"
#include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
#include "qemu/defer-call.h"
#include "qapi/error.h"
@@ -19,7 +19,7 @@
#include "qobject/qdict.h"
#include "qemu/module.h"
#include "system/block-backend.h"
-#include "exec/memory.h" /* for ram_block_discard_disable() */
+#include "system/memory.h" /* for ram_block_discard_disable() */
#include "block/block-io.h"
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index b0f78c4..aa1f888 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -281,7 +281,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
ret = 0;
fail_log:
if (ret < 0) {
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->log_file);
bdrv_graph_wrunlock();
s->log_file = NULL;
@@ -296,7 +296,7 @@ static void blk_log_writes_close(BlockDriverState *bs)
{
BDRVBlkLogWritesState *s = bs->opaque;
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->log_file);
s->log_file = NULL;
bdrv_graph_wrunlock();
diff --git a/block/blkverify.c b/block/blkverify.c
index db79a36..72efcbe 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -151,7 +151,7 @@ static void blkverify_close(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->test_file);
s->test_file = NULL;
bdrv_graph_wrunlock();
diff --git a/block/block-backend.c b/block/block-backend.c
index a402db1..f8d6ba6 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -136,9 +136,9 @@ static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
-static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp);
+static bool GRAPH_RDLOCK
+blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, GHashTable *visited,
+ Transaction *tran, Error **errp);
static char *blk_root_get_parent_desc(BdrvChild *child)
{
@@ -889,7 +889,7 @@ void blk_remove_bs(BlockBackend *blk)
root = blk->root;
blk->root = NULL;
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_root_unref_child(root);
bdrv_graph_wrunlock();
}
@@ -904,7 +904,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
GLOBAL_STATE_CODE();
bdrv_ref(bs);
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) {
blk->disable_perm = true;
diff --git a/block/commit.c b/block/commit.c
index 5df3d05..0d9e1a1 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -15,6 +15,8 @@
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "trace.h"
+#include "block/block-common.h"
+#include "block/coroutines.h"
#include "block/block_int.h"
#include "block/blockjob_int.h"
#include "qapi/error.h"
@@ -66,7 +68,7 @@ static int commit_prepare(Job *job)
s->backing_mask_protocol);
}
-static void commit_abort(Job *job)
+static void GRAPH_UNLOCKED commit_abort(Job *job)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
BlockDriverState *top_bs = blk_bs(s->top);
@@ -126,6 +128,84 @@ static void commit_clean(Job *job)
blk_unref(s->top);
}
+static int commit_iteration(CommitBlockJob *s, int64_t offset,
+ int64_t *requested_bytes, void *buf)
+{
+ BlockErrorAction action;
+ int64_t bytes = *requested_bytes;
+ int ret = 0;
+ bool error_in_source = true;
+
+ /* Copy if allocated above the base */
+ WITH_GRAPH_RDLOCK_GUARD() {
+ ret = bdrv_co_common_block_status_above(blk_bs(s->top),
+ s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE,
+ &bytes, NULL, NULL, NULL);
+ }
+
+ trace_commit_one_iteration(s, offset, bytes, ret);
+
+ if (ret < 0) {
+ goto fail;
+ }
+
+ if (ret & BDRV_BLOCK_ALLOCATED) {
+ if (ret & BDRV_BLOCK_ZERO) {
+ /*
+ * If the top (sub)clusters are smaller than the base
+ * (sub)clusters, this will not unmap unless the underlying device
+ * does some tracking of these requests. Ideally, we would find
+ * the maximal extent of the zero clusters.
+ */
+ ret = blk_co_pwrite_zeroes(s->base, offset, bytes,
+ BDRV_REQ_MAY_UNMAP);
+ if (ret < 0) {
+ error_in_source = false;
+ goto fail;
+ }
+ } else {
+ assert(bytes < SIZE_MAX);
+
+ ret = blk_co_pread(s->top, offset, bytes, buf, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = blk_co_pwrite(s->base, offset, bytes, buf, 0);
+ if (ret < 0) {
+ error_in_source = false;
+ goto fail;
+ }
+ }
+
+ /*
+ * Whether zeroes actually end up on disk depends on the details of
+ * the underlying driver. Therefore, this might rate limit more than
+ * is necessary.
+ */
+ block_job_ratelimit_processed_bytes(&s->common, bytes);
+ }
+
+ /* Publish progress */
+
+ job_progress_update(&s->common.job, bytes);
+
+ *requested_bytes = bytes;
+
+ return 0;
+
+fail:
+ action = block_job_error_action(&s->common, s->on_error,
+ error_in_source, -ret);
+ if (action == BLOCK_ERROR_ACTION_REPORT) {
+ return ret;
+ }
+
+ *requested_bytes = 0;
+
+ return 0;
+}
+
static int coroutine_fn commit_run(Job *job, Error **errp)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
@@ -156,9 +236,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
for (offset = 0; offset < len; offset += n) {
- bool copy;
- bool error_in_source = true;
-
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
@@ -166,38 +243,11 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
if (job_is_cancelled(&s->common.job)) {
break;
}
- /* Copy if allocated above the base */
- ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
- offset, COMMIT_BUFFER_SIZE, &n);
- copy = (ret > 0);
- trace_commit_one_iteration(s, offset, n, ret);
- if (copy) {
- assert(n < SIZE_MAX);
-
- ret = blk_co_pread(s->top, offset, n, buf, 0);
- if (ret >= 0) {
- ret = blk_co_pwrite(s->base, offset, n, buf, 0);
- if (ret < 0) {
- error_in_source = false;
- }
- }
- }
- if (ret < 0) {
- BlockErrorAction action =
- block_job_error_action(&s->common, s->on_error,
- error_in_source, -ret);
- if (action == BLOCK_ERROR_ACTION_REPORT) {
- return ret;
- } else {
- n = 0;
- continue;
- }
- }
- /* Publish progress */
- job_progress_update(&s->common.job, n);
- if (copy) {
- block_job_ratelimit_processed_bytes(&s->common, n);
+ ret = commit_iteration(s, offset, &n, buf);
+
+ if (ret < 0) {
+ return ret;
}
}
@@ -342,7 +392,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
* this is the responsibility of the interface (i.e. whoever calls
* commit_start()).
*/
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
s->base_overlay = bdrv_find_overlay(top, base);
assert(s->base_overlay);
@@ -464,28 +514,32 @@ int bdrv_commit(BlockDriverState *bs)
Error *local_err = NULL;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
if (!drv)
return -ENOMEDIUM;
+ bdrv_graph_rdlock_main_loop();
+
backing_file_bs = bdrv_cow_bs(bs);
if (!backing_file_bs) {
- return -ENOTSUP;
+ ret = -ENOTSUP;
+ goto out;
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
bdrv_op_is_blocked(backing_file_bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL))
{
- return -EBUSY;
+ ret = -EBUSY;
+ goto out;
}
ro = bdrv_is_read_only(backing_file_bs);
if (ro) {
if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) {
- return -EACCES;
+ ret = -EACCES;
+ goto out;
}
}
@@ -509,8 +563,14 @@ int bdrv_commit(BlockDriverState *bs)
goto ro_cleanup;
}
+ bdrv_graph_rdunlock_main_loop();
+
+ bdrv_graph_wrlock_drained();
bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
+ bdrv_graph_wrunlock();
+
+ bdrv_graph_rdlock_main_loop();
ret = blk_insert_bs(backing, backing_file_bs, &local_err);
if (ret < 0) {
@@ -585,9 +645,14 @@ int bdrv_commit(BlockDriverState *bs)
ret = 0;
ro_cleanup:
blk_unref(backing);
+
+ bdrv_graph_rdunlock_main_loop();
+ bdrv_graph_wrlock_drained();
if (bdrv_cow_bs(bs) != backing_file_bs) {
bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
}
+ bdrv_graph_wrunlock();
+ bdrv_graph_rdlock_main_loop();
bdrv_unref(commit_top_bs);
blk_unref(src);
@@ -596,5 +661,8 @@ ro_cleanup:
bdrv_reopen_set_read_only(backing_file_bs, true, NULL);
}
+out:
+ bdrv_graph_rdunlock_main_loop();
+
return ret;
}
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index fd470f5..36d5d3e 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -291,8 +291,8 @@ cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes,
}
static int coroutine_fn GRAPH_RDLOCK
-cbw_co_snapshot_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset, int64_t bytes,
+cbw_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode,
+ int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map,
BlockDriverState **file)
{
@@ -551,6 +551,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
bool discard_source,
uint64_t min_cluster_size,
BlockCopyState **bcs,
+ OnCbwError on_cbw_error,
Error **errp)
{
BDRVCopyBeforeWriteState *state;
@@ -568,6 +569,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
}
qdict_put_str(opts, "file", bdrv_get_node_name(source));
qdict_put_str(opts, "target", bdrv_get_node_name(target));
+ qdict_put_str(opts, "on-cbw-error", OnCbwError_str(on_cbw_error));
if (min_cluster_size > INT64_MAX) {
error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64,
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 2a5d4ba..eb93364 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -42,6 +42,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
bool discard_source,
uint64_t min_cluster_size,
BlockCopyState **bcs,
+ OnCbwError on_cbw_error,
Error **errp);
void bdrv_cbw_drop(BlockDriverState *bs);
diff --git a/block/coroutines.h b/block/coroutines.h
index 79e5efb..892646b 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -47,7 +47,7 @@ int coroutine_fn GRAPH_RDLOCK
bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool include_base,
- bool want_zero,
+ unsigned int mode,
int64_t offset,
int64_t bytes,
int64_t *pnum,
@@ -78,7 +78,7 @@ int co_wrapper_mixed_bdrv_rdlock
bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool include_base,
- bool want_zero,
+ unsigned int mode,
int64_t offset,
int64_t bytes,
int64_t *pnum,
diff --git a/block/file-posix.c b/block/file-posix.c
index 56d1972..8c73867 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -41,6 +41,7 @@
#include "scsi/pr-manager.h"
#include "scsi/constants.h"
+#include "scsi/utils.h"
#if defined(__APPLE__) && (__MACH__)
#include <sys/ioctl.h>
@@ -72,6 +73,7 @@
#include <linux/blkzoned.h>
#endif
#include <linux/cdrom.h>
+#include <linux/dm-ioctl.h>
#include <linux/fd.h>
#include <linux/fs.h>
#include <linux/hdreg.h>
@@ -110,6 +112,10 @@
#include <sys/diskslice.h>
#endif
+#ifdef EMSCRIPTEN
+#include <sys/ioctl.h>
+#endif
+
/* OS X does not have O_DSYNC */
#ifndef O_DSYNC
#ifdef O_SYNC
@@ -134,6 +140,22 @@
#define RAW_LOCK_PERM_BASE 100
#define RAW_LOCK_SHARED_BASE 200
+/*
+ * Multiple retries are mostly meant for two separate scenarios:
+ *
+ * - DM_MPATH_PROBE_PATHS returns success, but before SG_IO completes, another
+ * path goes down.
+ *
+ * - DM_MPATH_PROBE_PATHS failed all paths in the current path group, so we have
+ * to send another SG_IO to switch to another path group to probe the paths in
+ * it.
+ *
+ * Even if each path is in a separate path group (path_grouping_policy set to
+ * failover), it's rare to have more than eight path groups - and even then
+ * pretty unlikely that only bad path groups would be chosen in eight retries.
+ */
+#define SG_IO_MAX_RETRIES 8
+
typedef struct BDRVRawState {
int fd;
bool use_lock;
@@ -161,6 +183,7 @@ typedef struct BDRVRawState {
bool use_linux_aio:1;
bool has_laio_fdsync:1;
bool use_linux_io_uring:1;
+ bool use_mpath:1;
int page_cache_inconsistent; /* errno from fdatasync failure */
bool has_fallocate;
bool needs_alignment;
@@ -781,17 +804,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif
- if (S_ISBLK(st.st_mode)) {
-#ifdef __linux__
- /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do
- * not rely on the contents of discarded blocks unless using O_DIRECT.
- * Same for BLKZEROOUT.
- */
- if (!(bs->open_flags & BDRV_O_NOCACHE)) {
- s->has_write_zeroes = false;
- }
-#endif
- }
#ifdef __FreeBSD__
if (S_ISCHR(st.st_mode)) {
/*
@@ -1276,10 +1288,10 @@ static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned)
}
#endif /* defined(CONFIG_BLKZONED) */
+#ifdef CONFIG_LINUX
/*
* Get a sysfs attribute value as a long integer.
*/
-#ifdef CONFIG_LINUX
static long get_sysfs_long_val(struct stat *st, const char *attribute)
{
g_autofree char *str = NULL;
@@ -1299,6 +1311,30 @@ static long get_sysfs_long_val(struct stat *st, const char *attribute)
}
return ret;
}
+
+/*
+ * Get a sysfs attribute value as a uint32_t.
+ */
+static int get_sysfs_u32_val(struct stat *st, const char *attribute,
+ uint32_t *u32)
+{
+ g_autofree char *str = NULL;
+ const char *end;
+ unsigned int val;
+ int ret;
+
+ ret = get_sysfs_str_val(st, attribute, &str);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* The file is ended with '\n', pass 'end' to accept that. */
+ ret = qemu_strtoui(str, &end, 10, &val);
+ if (ret == 0 && end && *end == '\0') {
+ *u32 = val;
+ }
+ return ret;
+}
#endif
static int hdev_get_max_segments(int fd, struct stat *st)
@@ -1318,6 +1354,23 @@ static int hdev_get_max_segments(int fd, struct stat *st)
#endif
}
+/*
+ * Fills in *dalign with the discard alignment and returns 0 on success,
+ * -errno otherwise.
+ */
+static int hdev_get_pdiscard_alignment(struct stat *st, uint32_t *dalign)
+{
+#ifdef CONFIG_LINUX
+ /*
+ * Note that Linux "discard_granularity" is QEMU "discard_alignment". Linux
+ * "discard_alignment" is something else.
+ */
+ return get_sysfs_u32_val(st, "discard_granularity", dalign);
+#else
+ return -ENOTSUP;
+#endif
+}
+
#if defined(CONFIG_BLKZONED)
/*
* If the reset_all flag is true, then the wps of zone whose state is
@@ -1527,6 +1580,30 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
}
}
+ if (S_ISBLK(st.st_mode)) {
+ uint32_t dalign = 0;
+ int ret;
+
+ ret = hdev_get_pdiscard_alignment(&st, &dalign);
+ if (ret == 0 && dalign != 0) {
+ uint32_t ralign = bs->bl.request_alignment;
+
+ /* Probably never happens, but handle it just in case */
+ if (dalign < ralign && (ralign % dalign == 0)) {
+ dalign = ralign;
+ }
+
+ /* The block layer requires a multiple of request_alignment */
+ if (dalign % ralign != 0) {
+ error_setg(errp, "Invalid pdiscard_alignment limit %u is not a "
+ "multiple of request_alignment %u", dalign, ralign);
+ return;
+ }
+
+ bs->bl.pdiscard_alignment = dalign;
+ }
+ }
+
raw_refresh_zoned_limits(bs, &st, errp);
}
@@ -2011,8 +2088,11 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque)
}
#ifndef HAVE_COPY_FILE_RANGE
-static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
- off_t *out_off, size_t len, unsigned int flags)
+#ifndef EMSCRIPTEN
+static
+#endif
+ssize_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
+ off_t *out_off, size_t len, unsigned int flags)
{
#ifdef __NR_copy_file_range
return syscall(__NR_copy_file_range, in_fd, in_off, out_fd,
@@ -2484,9 +2564,9 @@ static inline bool raw_check_linux_aio(BDRVRawState *s)
}
#endif
-static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
- uint64_t bytes, QEMUIOVector *qiov, int type,
- int flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, uint64_t bytes,
+ QEMUIOVector *qiov, int type, int flags)
{
BDRVRawState *s = bs->opaque;
RawPosixAIOData acb;
@@ -2545,7 +2625,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
if (ret == 0 && (flags & BDRV_REQ_FUA)) {
/* TODO Use pwritev2() instead if it's available */
- ret = raw_co_flush_to_disk(bs);
+ ret = bdrv_co_flush(bs);
}
goto out; /* Avoid the compiler err of unused label */
@@ -2580,16 +2660,16 @@ out:
return ret;
}
-static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
- int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
{
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags);
}
-static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
- int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
{
return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags);
}
@@ -3201,7 +3281,7 @@ static int find_allocation(BlockDriverState *bs, off_t start,
* well exceed it.
*/
static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
- bool want_zero,
+ unsigned int mode,
int64_t offset,
int64_t bytes, int64_t *pnum,
int64_t *map,
@@ -3217,7 +3297,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
return ret;
}
- if (!want_zero) {
+ if (!(mode & BDRV_WANT_ZERO)) {
+ /* There is no backing file - all bytes are allocated in this file. */
*pnum = bytes;
*map = offset;
*file = bs;
@@ -3525,10 +3606,11 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
#endif
#if defined(CONFIG_BLKZONED)
-static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
- int64_t *offset,
- QEMUIOVector *qiov,
- BdrvRequestFlags flags) {
+static int coroutine_fn GRAPH_RDLOCK
+raw_co_zone_append(BlockDriverState *bs,
+ int64_t *offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags) {
assert(flags == 0);
int64_t zone_size_mask = bs->bl.zone_size - 1;
int64_t iov_len = 0;
@@ -4191,15 +4273,105 @@ hdev_open_Mac_error:
/* Since this does ioctl the device must be already opened */
bs->sg = hdev_is_sg(bs);
+ /* sg devices aren't even block devices and can't use dm-mpath */
+ s->use_mpath = !bs->sg;
+
return ret;
}
#if defined(__linux__)
+#if defined(DM_MPATH_PROBE_PATHS)
+static bool coroutine_fn sgio_path_error(int ret, sg_io_hdr_t *io_hdr)
+{
+ if (ret < 0) {
+ switch (ret) {
+ case -ENODEV:
+ return true;
+ case -EAGAIN:
+ /*
+ * The device is probably suspended. This happens while the dm table
+ * is reloaded, e.g. because a path is added or removed. This is an
+ * operation that should complete within 1ms, so just wait a bit and
+ * retry.
+ *
+ * If the device was suspended for another reason, we'll wait and
+ * retry SG_IO_MAX_RETRIES times. This is a tolerable delay before
+ * we return an error and potentially stop the VM.
+ */
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ if (io_hdr->host_status != SCSI_HOST_OK) {
+ return true;
+ }
+
+ switch (io_hdr->status) {
+ case GOOD:
+ case CONDITION_GOOD:
+ case INTERMEDIATE_GOOD:
+ case INTERMEDIATE_C_GOOD:
+ case RESERVATION_CONFLICT:
+ case COMMAND_TERMINATED:
+ return false;
+ case CHECK_CONDITION:
+ return !scsi_sense_buf_is_guest_recoverable(io_hdr->sbp,
+ io_hdr->mx_sb_len);
+ default:
+ return true;
+ }
+}
+
+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret)
+{
+ BDRVRawState *s = acb->bs->opaque;
+ RawPosixAIOData probe_acb;
+
+ if (!s->use_mpath) {
+ return false;
+ }
+
+ if (!sgio_path_error(ret, acb->ioctl.buf)) {
+ return false;
+ }
+
+ probe_acb = (RawPosixAIOData) {
+ .bs = acb->bs,
+ .aio_type = QEMU_AIO_IOCTL,
+ .aio_fildes = s->fd,
+ .aio_offset = 0,
+ .ioctl = {
+ .buf = NULL,
+ .cmd = DM_MPATH_PROBE_PATHS,
+ },
+ };
+
+ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &probe_acb);
+ if (ret == -ENOTTY) {
+ s->use_mpath = false;
+ } else if (ret == -EAGAIN) {
+ /* The device might be suspended for a table reload, worth retrying */
+ return true;
+ }
+
+ return ret == 0;
+}
+#else
+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret)
+{
+ return false;
+}
+#endif /* DM_MPATH_PROBE_PATHS */
+
static int coroutine_fn
hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
{
BDRVRawState *s = bs->opaque;
RawPosixAIOData acb;
+ int retries = SG_IO_MAX_RETRIES;
int ret;
ret = fd_open(bs);
@@ -4227,7 +4399,11 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
},
};
- return raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
+ do {
+ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
+ } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret));
+
+ return ret;
}
#endif /* linux */
diff --git a/block/gluster.c b/block/gluster.c
index c6d25ae..89abd40 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -972,8 +972,6 @@ static void qemu_gluster_reopen_commit(BDRVReopenState *state)
g_free(state->opaque);
state->opaque = NULL;
-
- return;
}
@@ -993,8 +991,6 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
g_free(state->opaque);
state->opaque = NULL;
-
- return;
}
#ifdef CONFIG_GLUSTERFS_ZEROFILL
@@ -1465,7 +1461,7 @@ exit:
* (Based on raw_co_block_status() from file-posix.c.)
*/
static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
- bool want_zero,
+ unsigned int mode,
int64_t offset,
int64_t bytes,
int64_t *pnum,
@@ -1482,7 +1478,7 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
return ret;
}
- if (!want_zero) {
+ if (!(mode & BDRV_WANT_ZERO)) {
*pnum = bytes;
*map = offset;
*file = bs;
diff --git a/block/graph-lock.c b/block/graph-lock.c
index c81162b..b731947 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -34,6 +34,17 @@ static QemuMutex aio_context_list_lock;
static int has_writer;
/*
+ * Many write-locked sections are also drained sections. There is a convenience
+ * wrapper bdrv_graph_wrlock_drained() which begins a drained section before
+ * acquiring the lock. This variable here is used so bdrv_graph_wrunlock() knows
+ * if it also needs to end such a drained section. It needs to be a counter,
+ * because the aio_poll() call in bdrv_graph_wrlock() might re-enter
+ * bdrv_graph_wrlock_drained(). And note that aio_bh_poll() in
+ * bdrv_graph_wrunlock() might also re-enter a write-locked section.
+ */
+static int wrlock_quiesced_counter;
+
+/*
* A reader coroutine could move from an AioContext to another.
* If this happens, there is no problem from the point of view of
* counters. The problem is that the total count becomes
@@ -112,8 +123,14 @@ void no_coroutine_fn bdrv_graph_wrlock(void)
assert(!qatomic_read(&has_writer));
assert(!qemu_in_coroutine());
- /* Make sure that constantly arriving new I/O doesn't cause starvation */
- bdrv_drain_all_begin_nopoll();
+ bool need_drain = wrlock_quiesced_counter == 0;
+
+ if (need_drain) {
+ /*
+ * Make sure that constantly arriving new I/O doesn't cause starvation
+ */
+ bdrv_drain_all_begin_nopoll();
+ }
/*
* reader_count == 0: this means writer will read has_reader as 1
@@ -139,7 +156,18 @@ void no_coroutine_fn bdrv_graph_wrlock(void)
smp_mb();
} while (reader_count() >= 1);
- bdrv_drain_all_end();
+ if (need_drain) {
+ bdrv_drain_all_end();
+ }
+}
+
+void no_coroutine_fn bdrv_graph_wrlock_drained(void)
+{
+ GLOBAL_STATE_CODE();
+
+ bdrv_drain_all_begin();
+ wrlock_quiesced_counter++;
+ bdrv_graph_wrlock();
}
void no_coroutine_fn bdrv_graph_wrunlock(void)
@@ -168,6 +196,12 @@ void no_coroutine_fn bdrv_graph_wrunlock(void)
* progress.
*/
aio_bh_poll(qemu_get_aio_context());
+
+ if (wrlock_quiesced_counter > 0) {
+ bdrv_drain_all_end();
+ wrlock_quiesced_counter--;
+ }
+
}
void coroutine_fn bdrv_graph_co_rdlock(void)
diff --git a/block/io.c b/block/io.c
index 1ba8d1a..9bd8ba8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -38,10 +38,14 @@
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
#include "system/replay.h"
+#include "qemu/units.h"
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
+/* Maximum read size for checking if data reads as zero, in bytes */
+#define MAX_ZERO_CHECK_BUFFER (128 * KiB)
+
static void coroutine_fn GRAPH_RDLOCK
bdrv_parent_cb_resize(BlockDriverState *bs);
@@ -357,7 +361,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
GLOBAL_STATE_CODE();
/* Stop things in parent-to-child order */
- if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
+ if (bs->quiesce_counter++ == 0) {
GRAPH_RDLOCK_GUARD_MAINLOOP();
bdrv_parent_drained_begin(bs, parent);
if (bs->drv && bs->drv->bdrv_drain_begin) {
@@ -397,8 +401,6 @@ bdrv_drained_begin(BlockDriverState *bs)
*/
static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
{
- int old_quiesce_counter;
-
IO_OR_GS_CODE();
if (qemu_in_coroutine()) {
@@ -409,11 +411,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
/* At this point, we should be always running in the main loop. */
GLOBAL_STATE_CODE();
assert(bs->quiesce_counter > 0);
- GLOBAL_STATE_CODE();
/* Re-enable things in child-to-parent order */
- old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
- if (old_quiesce_counter == 1) {
+ if (--bs->quiesce_counter == 0) {
GRAPH_RDLOCK_GUARD_MAINLOOP();
if (bs->drv && bs->drv->bdrv_drain_end) {
bs->drv->bdrv_drain_end(bs);
@@ -2364,10 +2364,8 @@ int bdrv_flush_all(void)
* Drivers not implementing the functionality are assumed to not support
* backing files, hence all their sectors are reported as allocated.
*
- * If 'want_zero' is true, the caller is querying for mapping
- * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and
- * _ZERO where possible; otherwise, the result favors larger 'pnum',
- * with a focus on accurate BDRV_BLOCK_ALLOCATED.
+ * 'mode' serves as a hint as to which results are favored; see the
+ * BDRV_WANT_* macros for details.
*
* If 'offset' is beyond the end of the disk image the return value is
* BDRV_BLOCK_EOF and 'pnum' is set to 0.
@@ -2387,7 +2385,7 @@ int bdrv_flush_all(void)
* set to the host mapping and BDS corresponding to the guest offset.
*/
static int coroutine_fn GRAPH_RDLOCK
-bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero,
+bdrv_co_do_block_status(BlockDriverState *bs, unsigned int mode,
int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map, BlockDriverState **file)
{
@@ -2476,7 +2474,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero,
local_file = bs;
local_map = aligned_offset;
} else {
- ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
+ ret = bs->drv->bdrv_co_block_status(bs, mode, aligned_offset,
aligned_bytes, pnum, &local_map,
&local_file);
@@ -2488,10 +2486,10 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero,
* the cache requires an RCU update, so double check here to avoid
* such an update if possible.
*
- * Check want_zero, because we only want to update the cache when we
+ * Check mode, because we only want to update the cache when we
* have accurate information about what is zero and what is data.
*/
- if (want_zero &&
+ if (mode == BDRV_WANT_PRECISE &&
ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
QLIST_EMPTY(&bs->children))
{
@@ -2548,7 +2546,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero,
if (ret & BDRV_BLOCK_RAW) {
assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file);
- ret = bdrv_co_do_block_status(local_file, want_zero, local_map,
+ ret = bdrv_co_do_block_status(local_file, mode, local_map,
*pnum, pnum, &local_map, &local_file);
goto out;
}
@@ -2560,7 +2558,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero,
if (!cow_bs) {
ret |= BDRV_BLOCK_ZERO;
- } else if (want_zero) {
+ } else if (mode == BDRV_WANT_PRECISE) {
int64_t size2 = bdrv_co_getlength(cow_bs);
if (size2 >= 0 && offset >= size2) {
@@ -2569,14 +2567,14 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero,
}
}
- if (want_zero && ret & BDRV_BLOCK_RECURSE &&
+ if (mode == BDRV_WANT_PRECISE && ret & BDRV_BLOCK_RECURSE &&
local_file && local_file != bs &&
(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
(ret & BDRV_BLOCK_OFFSET_VALID)) {
int64_t file_pnum;
int ret2;
- ret2 = bdrv_co_do_block_status(local_file, want_zero, local_map,
+ ret2 = bdrv_co_do_block_status(local_file, mode, local_map,
*pnum, &file_pnum, NULL, NULL);
if (ret2 >= 0) {
/* Ignore errors. This is just providing extra information, it
@@ -2627,7 +2625,7 @@ int coroutine_fn
bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool include_base,
- bool want_zero,
+ unsigned int mode,
int64_t offset,
int64_t bytes,
int64_t *pnum,
@@ -2654,7 +2652,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
return 0;
}
- ret = bdrv_co_do_block_status(bs, want_zero, offset, bytes, pnum,
+ ret = bdrv_co_do_block_status(bs, mode, offset, bytes, pnum,
map, file);
++*depth;
if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
@@ -2671,7 +2669,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
p = bdrv_filter_or_cow_bs(p))
{
- ret = bdrv_co_do_block_status(p, want_zero, offset, bytes, pnum,
+ ret = bdrv_co_do_block_status(p, mode, offset, bytes, pnum,
map, file);
++*depth;
if (ret < 0) {
@@ -2734,7 +2732,8 @@ int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
BlockDriverState **file)
{
IO_CODE();
- return bdrv_co_common_block_status_above(bs, base, false, true, offset,
+ return bdrv_co_common_block_status_above(bs, base, false,
+ BDRV_WANT_PRECISE, offset,
bytes, pnum, map, file, NULL);
}
@@ -2752,27 +2751,89 @@ int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t offset,
* by @offset and @bytes is known to read as zeroes.
* Return 1 if that is the case, 0 otherwise and -errno on error.
* This test is meant to be fast rather than accurate so returning 0
- * does not guarantee non-zero data.
+ * does not guarantee non-zero data; but a return of 1 is reliable.
*/
int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
int64_t bytes)
{
int ret;
- int64_t pnum = bytes;
+ int64_t pnum;
IO_CODE();
- if (!bytes) {
- return 1;
+ while (bytes) {
+ ret = bdrv_co_common_block_status_above(bs, NULL, false,
+ BDRV_WANT_ZERO, offset, bytes,
+ &pnum, NULL, NULL, NULL);
+
+ if (ret < 0) {
+ return ret;
+ }
+ if (!(ret & BDRV_BLOCK_ZERO)) {
+ return 0;
+ }
+ offset += pnum;
+ bytes -= pnum;
}
- ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset,
- bytes, &pnum, NULL, NULL, NULL);
+ return 1;
+}
+
+/*
+ * Check @bs (and its backing chain) to see if the entire image is known
+ * to read as zeroes.
+ * Return 1 if that is the case, 0 otherwise and -errno on error.
+ * This test is meant to be fast rather than accurate so returning 0
+ * does not guarantee non-zero data; however, a return of 1 is reliable,
+ * and this function can report 1 in more cases than bdrv_co_is_zero_fast.
+ */
+int coroutine_fn bdrv_co_is_all_zeroes(BlockDriverState *bs)
+{
+ int ret;
+ int64_t pnum, bytes;
+ char *buf;
+ QEMUIOVector local_qiov;
+ IO_CODE();
+
+ bytes = bdrv_co_getlength(bs);
+ if (bytes < 0) {
+ return bytes;
+ }
+ /* First probe - see if the entire image reads as zero */
+ ret = bdrv_co_common_block_status_above(bs, NULL, false, BDRV_WANT_ZERO,
+ 0, bytes, &pnum, NULL, NULL,
+ NULL);
if (ret < 0) {
return ret;
}
+ if (ret & BDRV_BLOCK_ZERO) {
+ return bdrv_co_is_zero_fast(bs, pnum, bytes - pnum);
+ }
- return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
+ /*
+ * Because of the way 'blockdev-create' works, raw files tend to
+ * be created with a non-sparse region at the front to make
+ * alignment probing easier. If the block starts with only a
+ * small allocated region, it is still worth the effort to see if
+ * the rest of the image is still sparse, coupled with manually
+ * reading the first region to see if it reads zero after all.
+ */
+ if (pnum > MAX_ZERO_CHECK_BUFFER) {
+ return 0;
+ }
+ ret = bdrv_co_is_zero_fast(bs, pnum, bytes - pnum);
+ if (ret <= 0) {
+ return ret;
+ }
+ /* Only the head of the image is unknown, and it's small. Read it. */
+ buf = qemu_blockalign(bs, pnum);
+ qemu_iovec_init_buf(&local_qiov, buf, pnum);
+ ret = bdrv_driver_preadv(bs, 0, pnum, &local_qiov, 0, 0);
+ if (ret >= 0) {
+ ret = buffer_is_zero(buf, pnum);
+ }
+ qemu_vfree(buf);
+ return ret;
}
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
@@ -2782,9 +2843,9 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
int64_t dummy;
IO_CODE();
- ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset,
- bytes, pnum ? pnum : &dummy, NULL,
- NULL, NULL);
+ ret = bdrv_co_common_block_status_above(bs, bs, true, BDRV_WANT_ALLOCATED,
+ offset, bytes, pnum ? pnum : &dummy,
+ NULL, NULL, NULL);
if (ret < 0) {
return ret;
}
@@ -2817,7 +2878,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *bs,
int ret;
IO_CODE();
- ret = bdrv_co_common_block_status_above(bs, base, include_base, false,
+ ret = bdrv_co_common_block_status_above(bs, base, include_base,
+ BDRV_WANT_ALLOCATED,
offset, bytes, pnum, NULL, NULL,
&depth);
if (ret < 0) {
@@ -3102,18 +3164,19 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
return 0;
}
- if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
+ if (!bs->drv->bdrv_co_pdiscard) {
return 0;
}
/* Invalidate the cached block-status data range if this discard overlaps */
bdrv_bsc_invalidate_range(bs, offset, bytes);
- /* Discard is advisory, but some devices track and coalesce
+ /*
+ * Discard is advisory, but some devices track and coalesce
* unaligned requests, so we must pass everything down rather than
- * round here. Still, most devices will just silently ignore
- * unaligned requests (by returning -ENOTSUP), so we must fragment
- * the request accordingly. */
+ * round here. Still, most devices reject unaligned requests with
+ * -EINVAL or -ENOTSUP, so we must fragment the request accordingly.
+ */
align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
assert(align % bs->bl.request_alignment == 0);
head = offset % align;
@@ -3161,27 +3224,15 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
ret = -ENOMEDIUM;
goto out;
}
- if (bs->drv->bdrv_co_pdiscard) {
- ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
- } else {
- BlockAIOCB *acb;
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
-
- acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
- bdrv_co_io_em_complete, &co);
- if (acb == NULL) {
- ret = -EIO;
- goto out;
+
+ ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
+ if (ret && ret != -ENOTSUP) {
+ if (ret == -EINVAL && (offset % align != 0 || num % align != 0)) {
+ /* Silently skip rejected unaligned head/tail requests */
} else {
- qemu_coroutine_yield();
- ret = co.ret;
+ goto out; /* bail out */
}
}
- if (ret && ret != -ENOTSUP) {
- goto out;
- }
offset += num;
bytes -= num;
@@ -3709,8 +3760,8 @@ bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes,
}
int coroutine_fn
-bdrv_co_snapshot_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset, int64_t bytes,
+bdrv_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode,
+ int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map,
BlockDriverState **file)
{
@@ -3728,7 +3779,7 @@ bdrv_co_snapshot_block_status(BlockDriverState *bs,
}
bdrv_inc_in_flight(bs);
- ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes,
+ ret = drv->bdrv_co_snapshot_block_status(bs, mode, offset, bytes,
pnum, map, file);
bdrv_dec_in_flight(bs);
diff --git a/block/iscsi.c b/block/iscsi.c
index 2f0f4da..15b96ee 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -694,9 +694,9 @@ out_unlock:
static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
+ unsigned int mode,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
BlockDriverState **file)
{
IscsiLun *iscsilun = bs->opaque;
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 407369f..c200e7a 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -291,7 +291,7 @@ static void ioq_submit(LinuxAioState *s)
{
int ret, len;
struct qemu_laiocb *aiocb;
- struct iocb *iocbs[MAX_EVENTS];
+ QEMU_UNINITIALIZED struct iocb *iocbs[MAX_EVENTS];
QSIMPLEQ_HEAD(, qemu_laiocb) completed;
do {
diff --git a/block/mirror.c b/block/mirror.c
index a53582f..b344182 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -51,10 +51,10 @@ typedef struct MirrorBlockJob {
BlockDriverState *to_replace;
/* Used to block operations on the drive-mirror-replace target */
Error *replace_blocker;
- bool is_none_mode;
+ MirrorSyncMode sync_mode;
BlockMirrorBackingMode backing_mode;
- /* Whether the target image requires explicit zero-initialization */
- bool zero_target;
+ /* Whether the target should be assumed to be already zero initialized */
+ bool target_is_zero;
/*
* To be accesssed with atomics. Written only under the BQL (required by the
* current implementation of mirror_change()).
@@ -73,6 +73,7 @@ typedef struct MirrorBlockJob {
size_t buf_size;
int64_t bdev_length;
unsigned long *cow_bitmap;
+ unsigned long *zero_bitmap;
BdrvDirtyBitmap *dirty_bitmap;
BdrvDirtyBitmapIter *dbi;
uint8_t *buf;
@@ -108,9 +109,12 @@ struct MirrorOp {
int64_t offset;
uint64_t bytes;
- /* The pointee is set by mirror_co_read(), mirror_co_zero(), and
- * mirror_co_discard() before yielding for the first time */
+ /*
+ * These pointers are set by mirror_co_read(), mirror_co_zero(), and
+ * mirror_co_discard() before yielding for the first time
+ */
int64_t *bytes_handled;
+ bool *io_skipped;
bool is_pseudo_op;
bool is_active_write;
@@ -408,15 +412,34 @@ static void coroutine_fn mirror_co_read(void *opaque)
static void coroutine_fn mirror_co_zero(void *opaque)
{
MirrorOp *op = opaque;
- int ret;
+ bool write_needed = true;
+ int ret = 0;
op->s->in_flight++;
op->s->bytes_in_flight += op->bytes;
*op->bytes_handled = op->bytes;
op->is_in_flight = true;
- ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes,
- op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0);
+ if (op->s->zero_bitmap) {
+ unsigned long end = DIV_ROUND_UP(op->offset + op->bytes,
+ op->s->granularity);
+ assert(QEMU_IS_ALIGNED(op->offset, op->s->granularity));
+ assert(QEMU_IS_ALIGNED(op->bytes, op->s->granularity) ||
+ op->offset + op->bytes == op->s->bdev_length);
+ if (find_next_zero_bit(op->s->zero_bitmap, end,
+ op->offset / op->s->granularity) == end) {
+ write_needed = false;
+ *op->io_skipped = true;
+ }
+ }
+ if (write_needed) {
+ ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes,
+ op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0);
+ }
+ if (ret >= 0 && op->s->zero_bitmap) {
+ bitmap_set(op->s->zero_bitmap, op->offset / op->s->granularity,
+ DIV_ROUND_UP(op->bytes, op->s->granularity));
+ }
mirror_write_complete(op, ret);
}
@@ -435,29 +458,43 @@ static void coroutine_fn mirror_co_discard(void *opaque)
}
static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
- unsigned bytes, MirrorMethod mirror_method)
+ unsigned bytes, MirrorMethod mirror_method,
+ bool *io_skipped)
{
MirrorOp *op;
Coroutine *co;
int64_t bytes_handled = -1;
+ assert(QEMU_IS_ALIGNED(offset, s->granularity));
+ assert(QEMU_IS_ALIGNED(bytes, s->granularity) ||
+ offset + bytes == s->bdev_length);
op = g_new(MirrorOp, 1);
*op = (MirrorOp){
.s = s,
.offset = offset,
.bytes = bytes,
.bytes_handled = &bytes_handled,
+ .io_skipped = io_skipped,
};
qemu_co_queue_init(&op->waiting_requests);
switch (mirror_method) {
case MIRROR_METHOD_COPY:
+ if (s->zero_bitmap) {
+ bitmap_clear(s->zero_bitmap, offset / s->granularity,
+ DIV_ROUND_UP(bytes, s->granularity));
+ }
co = qemu_coroutine_create(mirror_co_read, op);
break;
case MIRROR_METHOD_ZERO:
+ /* s->zero_bitmap handled in mirror_co_zero */
co = qemu_coroutine_create(mirror_co_zero, op);
break;
case MIRROR_METHOD_DISCARD:
+ if (s->zero_bitmap) {
+ bitmap_clear(s->zero_bitmap, offset / s->granularity,
+ DIV_ROUND_UP(bytes, s->granularity));
+ }
co = qemu_coroutine_create(mirror_co_discard, op);
break;
default:
@@ -568,6 +605,7 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s)
int ret = -1;
int64_t io_bytes;
int64_t io_bytes_acct;
+ bool io_skipped = false;
MirrorMethod mirror_method = MIRROR_METHOD_COPY;
assert(!(offset % s->granularity));
@@ -611,8 +649,10 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s)
}
io_bytes = mirror_clip_bytes(s, offset, io_bytes);
- io_bytes = mirror_perform(s, offset, io_bytes, mirror_method);
- if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) {
+ io_bytes = mirror_perform(s, offset, io_bytes, mirror_method,
+ &io_skipped);
+ if (io_skipped ||
+ (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok)) {
io_bytes_acct = 0;
} else {
io_bytes_acct = io_bytes;
@@ -721,11 +761,16 @@ static int mirror_exit_common(Job *job)
bdrv_graph_rdlock_main_loop();
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort);
+ bdrv_graph_rdunlock_main_loop();
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
- BlockDriverState *backing = s->is_none_mode ? src : s->base;
- BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
+ BlockDriverState *backing;
+ BlockDriverState *unfiltered_target;
+ bdrv_graph_wrlock_drained();
+ unfiltered_target = bdrv_skip_filters(target_bs);
+
+ backing = s->sync_mode == MIRROR_SYNC_MODE_NONE ? src : s->base;
if (bdrv_cow_bs(unfiltered_target) != backing) {
bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
if (local_err) {
@@ -734,16 +779,18 @@ static int mirror_exit_common(Job *job)
ret = -EPERM;
}
}
+ bdrv_graph_wrunlock();
} else if (!abort && s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
+ bdrv_graph_rdlock_main_loop();
assert(!bdrv_backing_chain_next(target_bs));
ret = bdrv_open_backing_file(bdrv_skip_filters(target_bs), NULL,
"backing", &local_err);
+ bdrv_graph_rdunlock_main_loop();
if (ret < 0) {
error_report_err(local_err);
local_err = NULL;
}
}
- bdrv_graph_rdunlock_main_loop();
if (s->should_complete && !abort) {
BlockDriverState *to_replace = s->to_replace ?: src;
@@ -841,15 +888,54 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s)
int64_t offset;
BlockDriverState *bs;
BlockDriverState *target_bs = blk_bs(s->target);
- int ret = -1;
+ int ret = -EIO;
int64_t count;
+ bool punch_holes =
+ target_bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+ bdrv_can_write_zeroes_with_unmap(target_bs);
+ int64_t bitmap_length = DIV_ROUND_UP(s->bdev_length, s->granularity);
+ /* Determine if the image is already zero, regardless of sync mode. */
+ s->zero_bitmap = bitmap_new(bitmap_length);
bdrv_graph_co_rdlock();
bs = s->mirror_top_bs->backing->bs;
+ if (s->target_is_zero) {
+ ret = 1;
+ } else {
+ ret = bdrv_co_is_all_zeroes(target_bs);
+ }
bdrv_graph_co_rdunlock();
- if (s->zero_target) {
- if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
+ /* Determine if a pre-zeroing pass is necessary. */
+ if (ret < 0) {
+ return ret;
+ } else if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
+ /*
+ * In TOP mode, there is no benefit to a pre-zeroing pass, but
+ * the zero bitmap can be set if the destination already reads
+ * as zero and we are not punching holes.
+ */
+ if (ret > 0 && !punch_holes) {
+ bitmap_set(s->zero_bitmap, 0, bitmap_length);
+ }
+ } else if (ret == 0 || punch_holes) {
+ /*
+ * Here, we are in FULL mode; our goal is to avoid writing
+ * zeroes if the destination already reads as zero, except
+ * when we are trying to punch holes. This is possible if
+ * zeroing happened externally (ret > 0) or if we have a fast
+ * way to pre-zero the image (the dirty bitmap will be
+ * populated later by the non-zero portions, the same as for
+ * TOP mode). If pre-zeroing is not fast, or we need to visit
+ * the entire image in order to punch holes even in the
+ * non-allocated regions of the source, then just mark the
+ * entire image dirty and leave the zero bitmap clear at this
+ * point in time. Otherwise, it can be faster to pre-zero the
+ * image now, even if we re-write the allocated portions of
+ * the disk later, and the pre-zero pass will populate the
+ * zero bitmap.
+ */
+ if (!bdrv_can_write_zeroes_with_unmap(target_bs) || punch_holes) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length);
return 0;
}
@@ -858,6 +944,7 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s)
for (offset = 0; offset < s->bdev_length; ) {
int bytes = MIN(s->bdev_length - offset,
QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
+ bool ignored;
mirror_throttle(s);
@@ -873,12 +960,15 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s)
continue;
}
- mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO);
+ mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO, &ignored);
offset += bytes;
}
mirror_wait_for_all_io(s);
s->initial_zeroing_ongoing = false;
+ } else {
+ /* In FULL mode, and image already reads as zero. */
+ bitmap_set(s->zero_bitmap, 0, bitmap_length);
}
/* First part, loop on the sectors and initialize the dirty bitmap. */
@@ -1020,7 +1110,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
mirror_free_init(s);
s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- if (!s->is_none_mode) {
+ if (s->sync_mode != MIRROR_SYNC_MODE_NONE) {
ret = mirror_dirty_init(s);
if (ret < 0 || job_is_cancelled(&s->common.job)) {
goto immediate_exit;
@@ -1163,6 +1253,7 @@ immediate_exit:
assert(s->in_flight == 0);
qemu_vfree(s->buf);
g_free(s->cow_bitmap);
+ g_free(s->zero_bitmap);
g_free(s->in_flight_bitmap);
bdrv_dirty_iter_free(s->dbi);
@@ -1341,7 +1432,8 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
{
int ret;
size_t qiov_offset = 0;
- int64_t bitmap_offset, bitmap_end;
+ int64_t dirty_bitmap_offset, dirty_bitmap_end;
+ int64_t zero_bitmap_offset, zero_bitmap_end;
if (!QEMU_IS_ALIGNED(offset, job->granularity) &&
bdrv_dirty_bitmap_get(job->dirty_bitmap, offset))
@@ -1385,31 +1477,54 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
}
/*
- * Tails are either clean or shrunk, so for bitmap resetting
- * we safely align the range down.
+ * Tails are either clean or shrunk, so for dirty bitmap resetting
+ * we safely align the range narrower. But for zero bitmap, round
+ * range wider for checking or clearing, and narrower for setting.
*/
- bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity);
- bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity);
- if (bitmap_offset < bitmap_end) {
- bdrv_reset_dirty_bitmap(job->dirty_bitmap, bitmap_offset,
- bitmap_end - bitmap_offset);
+ dirty_bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity);
+ dirty_bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity);
+ if (dirty_bitmap_offset < dirty_bitmap_end) {
+ bdrv_reset_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset,
+ dirty_bitmap_end - dirty_bitmap_offset);
}
+ zero_bitmap_offset = offset / job->granularity;
+ zero_bitmap_end = DIV_ROUND_UP(offset + bytes, job->granularity);
job_progress_increase_remaining(&job->common.job, bytes);
job->active_write_bytes_in_flight += bytes;
switch (method) {
case MIRROR_METHOD_COPY:
+ if (job->zero_bitmap) {
+ bitmap_clear(job->zero_bitmap, zero_bitmap_offset,
+ zero_bitmap_end - zero_bitmap_offset);
+ }
ret = blk_co_pwritev_part(job->target, offset, bytes,
qiov, qiov_offset, flags);
break;
case MIRROR_METHOD_ZERO:
+ if (job->zero_bitmap) {
+ if (find_next_zero_bit(job->zero_bitmap, zero_bitmap_end,
+ zero_bitmap_offset) == zero_bitmap_end) {
+ ret = 0;
+ break;
+ }
+ }
assert(!qiov);
ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags);
+ if (job->zero_bitmap && ret >= 0) {
+ bitmap_set(job->zero_bitmap, dirty_bitmap_offset / job->granularity,
+ (dirty_bitmap_end - dirty_bitmap_offset) /
+ job->granularity);
+ }
break;
case MIRROR_METHOD_DISCARD:
+ if (job->zero_bitmap) {
+ bitmap_clear(job->zero_bitmap, zero_bitmap_offset,
+ zero_bitmap_end - zero_bitmap_offset);
+ }
assert(!qiov);
ret = blk_co_pdiscard(job->target, offset, bytes);
break;
@@ -1430,10 +1545,10 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
* at function start, and they must be still dirty, as we've locked
* the region for in-flight op.
*/
- bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity);
- bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity);
- bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset,
- bitmap_end - bitmap_offset);
+ dirty_bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity);
+ dirty_bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity);
+ bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset,
+ dirty_bitmap_end - dirty_bitmap_offset);
qatomic_set(&job->actively_synced, false);
action = mirror_error_action(job, false, -ret);
@@ -1711,15 +1826,16 @@ static BlockJob *mirror_start_job(
int creation_flags, BlockDriverState *target,
const char *replaces, int64_t speed,
uint32_t granularity, int64_t buf_size,
+ MirrorSyncMode sync_mode,
BlockMirrorBackingMode backing_mode,
- bool zero_target,
+ bool target_is_zero,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap,
BlockCompletionFunc *cb,
void *opaque,
const BlockJobDriver *driver,
- bool is_none_mode, BlockDriverState *base,
+ BlockDriverState *base,
bool auto_complete, const char *filter_node_name,
bool is_mirror, MirrorCopyMode copy_mode,
bool base_ro,
@@ -1878,9 +1994,9 @@ static BlockJob *mirror_start_job(
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
- s->is_none_mode = is_none_mode;
+ s->sync_mode = sync_mode;
s->backing_mode = backing_mode;
- s->zero_target = zero_target;
+ s->target_is_zero = target_is_zero;
qatomic_set(&s->copy_mode, copy_mode);
s->base = base;
s->base_overlay = bdrv_find_overlay(bs, base);
@@ -1904,7 +2020,7 @@ static BlockJob *mirror_start_job(
*/
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
BLK_PERM_CONSISTENT_READ,
@@ -2009,13 +2125,12 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
- bool zero_target,
+ bool target_is_zero,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap, const char *filter_node_name,
MirrorCopyMode copy_mode, Error **errp)
{
- bool is_none_mode;
BlockDriverState *base;
GLOBAL_STATE_CODE();
@@ -2028,14 +2143,13 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
}
bdrv_graph_rdlock_main_loop();
- is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
bdrv_graph_rdunlock_main_loop();
mirror_start_job(job_id, bs, creation_flags, target, replaces,
- speed, granularity, buf_size, backing_mode, zero_target,
- on_source_error, on_target_error, unmap, NULL, NULL,
- &mirror_job_driver, is_none_mode, base, false,
+ speed, granularity, buf_size, mode, backing_mode,
+ target_is_zero, on_source_error, on_target_error, unmap,
+ NULL, NULL, &mirror_job_driver, base, false,
filter_node_name, true, copy_mode, false, errp);
}
@@ -2061,9 +2175,9 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
job = mirror_start_job(
job_id, bs, creation_flags, base, NULL, speed, 0, 0,
- MIRROR_LEAVE_BACKING_CHAIN, false,
+ MIRROR_SYNC_MODE_TOP, MIRROR_LEAVE_BACKING_CHAIN, false,
on_error, on_error, true, cb, opaque,
- &commit_active_job_driver, false, base, auto_complete,
+ &commit_active_job_driver, base, auto_complete,
filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND,
base_read_only, errp);
if (!job) {
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 6919a49..282d1c3 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -144,7 +144,7 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
Error *local_err = NULL;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
+ bdrv_graph_rdlock_main_loop();
bs = bdrv_find_node(id);
if (bs) {
@@ -152,29 +152,31 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
if (local_err) {
error_report_err(local_err);
}
- return;
+ goto unlock;
}
blk = blk_by_name(id);
if (!blk) {
error_report("Device '%s' not found", id);
- return;
+ goto unlock;
}
if (!blk_legacy_dinfo(blk)) {
error_report("Deleting device added with blockdev-add"
" is not supported");
- return;
+ goto unlock;
}
bs = blk_bs(blk);
if (bs) {
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
error_report_err(local_err);
- return;
+ goto unlock;
}
+ bdrv_graph_rdunlock_main_loop();
blk_remove_bs(blk);
+ bdrv_graph_rdlock_main_loop();
}
/* Make the BlockBackend and the attached BlockDriverState anonymous */
@@ -191,6 +193,9 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
} else {
blk_unref(blk);
}
+
+unlock:
+ bdrv_graph_rdunlock_main_loop();
}
void hmp_commit(Monitor *mon, const QDict *qdict)
diff --git a/block/nbd.c b/block/nbd.c
index 887841b..d5a2b21 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1397,8 +1397,8 @@ nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
}
static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status(
- BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
- int64_t *pnum, int64_t *map, BlockDriverState **file)
+ BlockDriverState *bs, unsigned int mode, int64_t offset,
+ int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file)
{
int ret, request_ret;
NBDExtent64 extent = { 0 };
diff --git a/block/null.c b/block/null.c
index dc0b1fd..4e448d5 100644
--- a/block/null.c
+++ b/block/null.c
@@ -227,9 +227,9 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state,
}
static int coroutine_fn null_co_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
+ unsigned int mode,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
BlockDriverState **file)
{
BDRVNullState *s = bs->opaque;
diff --git a/block/nvme.c b/block/nvme.c
index bbf7c23..8df53ee 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -18,6 +18,7 @@
#include "qobject/qstring.h"
#include "qemu/defer-call.h"
#include "qemu/error-report.h"
+#include "qemu/host-pci-mmio.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "qemu/cutils.h"
@@ -60,7 +61,7 @@ typedef struct {
uint8_t *queue;
uint64_t iova;
/* Hardware MMIO register */
- volatile uint32_t *doorbell;
+ uint32_t *doorbell;
} NVMeQueue;
typedef struct {
@@ -100,7 +101,7 @@ struct BDRVNVMeState {
QEMUVFIOState *vfio;
void *bar0_wo_map;
/* Memory mapped registers */
- volatile struct {
+ struct {
uint32_t sq_tail;
uint32_t cq_head;
} *doorbells;
@@ -292,7 +293,7 @@ static void nvme_kick(NVMeQueuePair *q)
assert(!(q->sq.tail & 0xFF00));
/* Fence the write to submission queue entry before notifying the device. */
smp_wmb();
- *q->sq.doorbell = cpu_to_le32(q->sq.tail);
+ host_pci_stl_le_p(q->sq.doorbell, q->sq.tail);
q->inflight += q->need_kick;
q->need_kick = 0;
}
@@ -441,7 +442,7 @@ static bool nvme_process_completion(NVMeQueuePair *q)
if (progress) {
/* Notify the device so it can post more completions. */
smp_mb_release();
- *q->cq.doorbell = cpu_to_le32(q->cq.head);
+ host_pci_stl_le_p(q->cq.doorbell, q->cq.head);
nvme_wake_free_req_locked(q);
}
@@ -460,7 +461,7 @@ static void nvme_process_completion_bh(void *opaque)
* so notify the device that it has space to fill in more completions now.
*/
smp_mb_release();
- *q->cq.doorbell = cpu_to_le32(q->cq.head);
+ host_pci_stl_le_p(q->cq.doorbell, q->cq.head);
nvme_wake_free_req_locked(q);
nvme_process_completion(q);
@@ -749,9 +750,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
int ret;
uint64_t cap;
uint32_t ver;
+ uint32_t cc;
uint64_t timeout_ms;
uint64_t deadline, now;
- volatile NvmeBar *regs = NULL;
+ NvmeBar *regs = NULL;
qemu_co_mutex_init(&s->dma_map_lock);
qemu_co_queue_init(&s->dma_flush_queue);
@@ -779,7 +781,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
/* Perform initialize sequence as described in NVMe spec "7.6.1
* Initialization". */
- cap = le64_to_cpu(regs->cap);
+ cap = host_pci_ldq_le_p(&regs->cap);
trace_nvme_controller_capability_raw(cap);
trace_nvme_controller_capability("Maximum Queue Entries Supported",
1 + NVME_CAP_MQES(cap));
@@ -805,16 +807,17 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
bs->bl.request_alignment = s->page_size;
timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000);
- ver = le32_to_cpu(regs->vs);
+ ver = host_pci_ldl_le_p(&regs->vs);
trace_nvme_controller_spec_version(extract32(ver, 16, 16),
extract32(ver, 8, 8),
extract32(ver, 0, 8));
/* Reset device to get a clean state. */
- regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
+ cc = host_pci_ldl_le_p(&regs->cc);
+ host_pci_stl_le_p(&regs->cc, cc & 0xFE);
/* Wait for CSTS.RDY = 0. */
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
- while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
+ while (NVME_CSTS_RDY(host_pci_ldl_le_p(&regs->csts))) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to reset (%"
PRId64 " ms)",
@@ -843,19 +846,21 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
s->queues[INDEX_ADMIN] = q;
s->queue_count = 1;
QEMU_BUILD_BUG_ON((NVME_QUEUE_SIZE - 1) & 0xF000);
- regs->aqa = cpu_to_le32(((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) |
- ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT));
- regs->asq = cpu_to_le64(q->sq.iova);
- regs->acq = cpu_to_le64(q->cq.iova);
+ host_pci_stl_le_p(&regs->aqa,
+ ((NVME_QUEUE_SIZE - 1) << AQA_ACQS_SHIFT) |
+ ((NVME_QUEUE_SIZE - 1) << AQA_ASQS_SHIFT));
+ host_pci_stq_le_p(&regs->asq, q->sq.iova);
+ host_pci_stq_le_p(&regs->acq, q->cq.iova);
/* After setting up all control registers we can enable device now. */
- regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
- (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) |
- CC_EN_MASK);
+ host_pci_stl_le_p(&regs->cc,
+ (ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
+ (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) |
+ CC_EN_MASK);
/* Wait for CSTS.RDY = 1. */
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
deadline = now + timeout_ms * SCALE_MS;
- while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
+ while (!NVME_CSTS_RDY(host_pci_ldl_le_p(&regs->csts))) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to start (%"
PRId64 " ms)",
diff --git a/block/parallels.c b/block/parallels.c
index 347ca12..3a375e2 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -416,9 +416,9 @@ parallels_co_flush_to_os(BlockDriverState *bs)
}
static int coroutine_fn GRAPH_RDLOCK
-parallels_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum, int64_t *map,
- BlockDriverState **file)
+parallels_co_block_status(BlockDriverState *bs, unsigned int mode,
+ int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file)
{
BDRVParallelsState *s = bs->opaque;
int count;
diff --git a/block/qapi.c b/block/qapi.c
index 2c50a6b..12fbf8d 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -51,6 +51,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
ImageInfo *backing_info;
BlockDriverState *backing;
BlockDeviceInfo *info;
+ BlockdevChildList **children_list_tail;
+ BdrvChild *child;
if (!bs->drv) {
error_setg(errp, "Block device %s is ejected", bs->node_name);
@@ -73,8 +75,14 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
.no_flush = !!(bs->open_flags & BDRV_O_NO_FLUSH),
};
- if (bs->node_name[0]) {
- info->node_name = g_strdup(bs->node_name);
+ info->node_name = g_strdup(bs->node_name);
+
+ children_list_tail = &info->children;
+ QLIST_FOREACH(child, &bs->children, next) {
+ BlockdevChild *child_ref = g_new0(BlockdevChild, 1);
+ child_ref->child = g_strdup(child->name);
+ child_ref->node_name = g_strdup(child->bs->node_name);
+ QAPI_LIST_APPEND(children_list_tail, child_ref);
}
backing = bdrv_cow_bs(bs);
diff --git a/block/qcow.c b/block/qcow.c
index da8ad4d..8a3e759 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -530,7 +530,7 @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
}
static int coroutine_fn GRAPH_RDLOCK
-qcow_co_block_status(BlockDriverState *bs, bool want_zero,
+qcow_co_block_status(BlockDriverState *bs, unsigned int mode,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
{
diff --git a/block/qcow2.c b/block/qcow2.c
index dd6bcaf..4aa9f9e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1721,7 +1721,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto fail;
}
- } else if (!(flags & BDRV_O_NO_IO)) {
+ } else {
error_setg(errp, "Missing CRYPTO header for crypt method %d",
s->crypt_method_header);
ret = -EINVAL;
@@ -1895,7 +1895,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
g_free(s->image_data_file);
if (open_data_file && has_data_file(bs)) {
bdrv_graph_co_rdunlock();
+ bdrv_drain_all_begin();
bdrv_co_unref_child(bs, s->data_file);
+ bdrv_drain_all_end();
bdrv_graph_co_rdlock();
s->data_file = NULL;
}
@@ -1976,7 +1978,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
- if (bs->encrypted) {
+ if (s->crypto) {
/* Encryption works on a sector granularity */
bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
}
@@ -2141,9 +2143,9 @@ static void qcow2_join_options(QDict *options, QDict *old_options)
}
static int coroutine_fn GRAPH_RDLOCK
-qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
- int64_t count, int64_t *pnum, int64_t *map,
- BlockDriverState **file)
+qcow2_co_block_status(BlockDriverState *bs, unsigned int mode,
+ int64_t offset, int64_t count, int64_t *pnum,
+ int64_t *map, BlockDriverState **file)
{
BDRVQcow2State *s = bs->opaque;
uint64_t host_offset;
@@ -2821,7 +2823,7 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file)
if (close_data_file && has_data_file(bs)) {
GLOBAL_STATE_CODE();
bdrv_graph_rdunlock_main_loop();
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->data_file);
bdrv_graph_wrunlock();
s->data_file = NULL;
diff --git a/block/qed.c b/block/qed.c
index ac24449..4a36fb3 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -833,9 +833,9 @@ fail:
}
static int coroutine_fn GRAPH_RDLOCK
-bdrv_qed_co_block_status(BlockDriverState *bs, bool want_zero, int64_t pos,
- int64_t bytes, int64_t *pnum, int64_t *map,
- BlockDriverState **file)
+bdrv_qed_co_block_status(BlockDriverState *bs, unsigned int mode,
+ int64_t pos, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file)
{
BDRVQEDState *s = bs->opaque;
size_t len = MIN(bytes, SIZE_MAX);
diff --git a/block/quorum.c b/block/quorum.c
index 30747a6..76a4feb 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1037,7 +1037,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
close_exit:
/* cleanup on error */
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
for (i = 0; i < s->num_children; i++) {
if (!opened[i]) {
continue;
@@ -1057,7 +1057,7 @@ static void quorum_close(BlockDriverState *bs)
BDRVQuorumState *s = bs->opaque;
int i;
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
for (i = 0; i < s->num_children; i++) {
bdrv_unref_child(bs, s->children[i]);
}
@@ -1226,7 +1226,7 @@ static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c,
* region contains zeroes, and BDRV_BLOCK_DATA otherwise.
*/
static int coroutine_fn GRAPH_RDLOCK
-quorum_co_block_status(BlockDriverState *bs, bool want_zero,
+quorum_co_block_status(BlockDriverState *bs, unsigned int mode,
int64_t offset, int64_t count,
int64_t *pnum, int64_t *map, BlockDriverState **file)
{
@@ -1238,7 +1238,7 @@ quorum_co_block_status(BlockDriverState *bs, bool want_zero,
for (i = 0; i < s->num_children; i++) {
int64_t bytes;
ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, false,
- want_zero, offset, count,
+ mode, offset, count,
&bytes, NULL, NULL, NULL);
if (ret < 0) {
quorum_report_bad(QUORUM_OP_TYPE_READ, offset, count,
diff --git a/block/raw-format.c b/block/raw-format.c
index e08526e..df16ac1 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -283,8 +283,8 @@ fail:
}
static int coroutine_fn GRAPH_RDLOCK
-raw_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum, int64_t *map,
+raw_co_block_status(BlockDriverState *bs, unsigned int mode,
+ int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map,
BlockDriverState **file)
{
BDRVRawState *s = bs->opaque;
diff --git a/block/rbd.c b/block/rbd.c
index af984fb..951cd63 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -254,7 +254,6 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
done:
g_free(buf);
qobject_unref(keypairs);
- return;
}
static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
@@ -1504,9 +1503,9 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
}
static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum,
- int64_t *map,
+ unsigned int mode,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
BlockDriverState **file)
{
BDRVRBDState *s = bs->opaque;
diff --git a/block/replication.c b/block/replication.c
index 0020f33..3a431e9 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -176,7 +176,6 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
*nshared = BLK_PERM_CONSISTENT_READ
| BLK_PERM_WRITE
| BLK_PERM_WRITE_UNCHANGED;
- return;
}
static int64_t coroutine_fn GRAPH_RDLOCK
@@ -365,14 +364,15 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
BlockReopenQueue *reopen_queue = NULL;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
+ bdrv_graph_rdlock_main_loop();
/*
* s->hidden_disk and s->secondary_disk may not be set yet, as they will
* only be set after the children are writable.
*/
hidden_disk = bs->file->bs->backing;
secondary_disk = hidden_disk->bs->backing;
+ bdrv_graph_rdunlock_main_loop();
if (writable) {
s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs);
@@ -541,7 +541,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
return;
}
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_ref(hidden_disk->bs);
s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
@@ -584,7 +584,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
NULL, &perf,
BLOCKDEV_ON_ERROR_REPORT,
- BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
+ BLOCKDEV_ON_ERROR_REPORT,
+ ON_CBW_ERROR_BREAK_GUEST_WRITE,
+ JOB_INTERNAL,
backup_job_completed, bs, NULL, &local_err);
if (local_err) {
error_propagate(errp, local_err);
@@ -650,7 +652,7 @@ static void replication_done(void *opaque, int ret)
if (ret == 0) {
s->stage = BLOCK_REPLICATION_DONE;
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, s->secondary_disk);
s->secondary_disk = NULL;
bdrv_unref_child(bs, s->hidden_disk);
diff --git a/block/snapshot-access.c b/block/snapshot-access.c
index 71ac83c..17ed240 100644
--- a/block/snapshot-access.c
+++ b/block/snapshot-access.c
@@ -41,11 +41,11 @@ snapshot_access_co_preadv_part(BlockDriverState *bs,
static int coroutine_fn GRAPH_RDLOCK
snapshot_access_co_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset,
+ unsigned int mode, int64_t offset,
int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
{
- return bdrv_co_snapshot_block_status(bs->file->bs, want_zero, offset,
+ return bdrv_co_snapshot_block_status(bs->file->bs, mode, offset,
bytes, pnum, map, file);
}
diff --git a/block/snapshot.c b/block/snapshot.c
index 22567f1..bd9d759 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -291,7 +291,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
}
/* .bdrv_open() will re-attach it */
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, fallback);
bdrv_graph_wrunlock();
@@ -327,7 +327,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
/**
* Delete an internal snapshot by @snapshot_id and @name.
- * @bs: block device used in the operation
+ * @bs: block device used in the operation, must be drained
* @snapshot_id: unique snapshot ID, or NULL
* @name: snapshot name, or NULL
* @errp: location to store error
@@ -358,6 +358,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
GLOBAL_STATE_CODE();
+ assert(bs->quiesce_counter > 0);
+
if (!drv) {
error_setg(errp, "Device '%s' has no medium",
bdrv_get_device_name(bs));
@@ -368,9 +370,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
return -EINVAL;
}
- /* drain all pending i/o before deleting snapshot */
- bdrv_drained_begin(bs);
-
if (drv->bdrv_snapshot_delete) {
ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
} else if (fallback_bs) {
@@ -382,7 +381,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
ret = -ENOTSUP;
}
- bdrv_drained_end(bs);
return ret;
}
@@ -571,19 +569,22 @@ int bdrv_all_delete_snapshot(const char *name,
ERRP_GUARD();
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ int ret = 0;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
- if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
- return -1;
+ bdrv_drain_all_begin();
+ bdrv_graph_rdlock_main_loop();
+
+ ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp);
+ if (ret < 0) {
+ goto out;
}
iterbdrvs = bdrvs;
while (iterbdrvs) {
BlockDriverState *bs = iterbdrvs->data;
QEMUSnapshotInfo sn1, *snapshot = &sn1;
- int ret = 0;
if ((devices || bdrv_all_snapshots_includes_bs(bs)) &&
bdrv_snapshot_find(bs, snapshot, name) >= 0)
@@ -594,13 +595,16 @@ int bdrv_all_delete_snapshot(const char *name,
if (ret < 0) {
error_prepend(errp, "Could not delete snapshot '%s' on '%s': ",
name, bdrv_get_device_or_node_name(bs));
- return -1;
+ goto out;
}
iterbdrvs = iterbdrvs->next;
}
- return 0;
+out:
+ bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_end();
+ return ret;
}
diff --git a/block/stream.c b/block/stream.c
index 999d9e5..c0616b6 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -51,7 +51,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
}
-static int stream_prepare(Job *job)
+static int GRAPH_UNLOCKED stream_prepare(Job *job)
{
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
BlockDriverState *unfiltered_bs;
@@ -73,18 +73,16 @@ static int stream_prepare(Job *job)
s->cor_filter_bs = NULL;
/*
- * bdrv_set_backing_hd() requires that the unfiltered_bs and the COW child
- * of unfiltered_bs is drained. Drain already here and use
- * bdrv_set_backing_hd_drained() instead because the polling during
- * drained_begin() might change the graph, and if we do this only later, we
- * may end up working with the wrong base node (or it might even have gone
- * away by the time we want to use it).
+ * bdrv_set_backing_hd() requires that all block nodes are drained. Drain
+ * already here, because the polling during drained_begin() might change the
+ * graph, and if we do this only later, we may end up working with the wrong
+ * base node (or it might even have gone away by the time we want to use
+ * it).
*/
- bdrv_drained_begin(unfiltered_bs);
if (unfiltered_bs_cow) {
bdrv_ref(unfiltered_bs_cow);
- bdrv_drained_begin(unfiltered_bs_cow);
}
+ bdrv_drain_all_begin();
bdrv_graph_rdlock_main_loop();
base = bdrv_filter_or_cow_bs(s->above_base);
@@ -106,7 +104,7 @@ static int stream_prepare(Job *job)
}
bdrv_graph_wrlock();
- bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err);
+ bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
bdrv_graph_wrunlock();
/*
@@ -123,11 +121,10 @@ static int stream_prepare(Job *job)
}
out:
+ bdrv_drain_all_end();
if (unfiltered_bs_cow) {
- bdrv_drained_end(unfiltered_bs_cow);
bdrv_unref(unfiltered_bs_cow);
}
- bdrv_drained_end(unfiltered_bs);
return ret;
}
@@ -373,7 +370,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
* already have our own plans. Also don't allow resize as the image size is
* queried only at the job start and then cached.
*/
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
if (block_job_add_bdrv(&s->common, "active node", bs, 0,
basic_flags | BLK_PERM_WRITE, errp)) {
bdrv_graph_wrunlock();
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 32553b3..66fdce9 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -908,7 +908,6 @@ unlock:
qemu_mutex_unlock(&tg->lock);
qapi_free_ThrottleLimits(argp);
error_propagate(errp, local_err);
- return;
}
static void throttle_group_get_limits(Object *obj, Visitor *v,
@@ -934,7 +933,8 @@ static bool throttle_group_can_be_deleted(UserCreatable *uc)
return OBJECT(uc)->ref == 1;
}
-static void throttle_group_obj_class_init(ObjectClass *klass, void *class_data)
+static void throttle_group_obj_class_init(ObjectClass *klass,
+ const void *class_data)
{
size_t i = 0;
UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
@@ -967,7 +967,7 @@ static const TypeInfo throttle_group_info = {
.instance_size = sizeof(ThrottleGroup),
.instance_init = throttle_group_obj_init,
.instance_finalize = throttle_group_obj_finalize,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_USER_CREATABLE },
{ }
},
diff --git a/block/vdi.c b/block/vdi.c
index a2da6ec..3ddc62a 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -523,8 +523,8 @@ static int vdi_reopen_prepare(BDRVReopenState *state,
}
static int coroutine_fn GRAPH_RDLOCK
-vdi_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
- int64_t bytes, int64_t *pnum, int64_t *map,
+vdi_co_block_status(BlockDriverState *bs, unsigned int mode,
+ int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map,
BlockDriverState **file)
{
BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
diff --git a/block/vmdk.c b/block/vmdk.c
index 2adec49..7b98deb 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -271,7 +271,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
BDRVVmdkState *s = bs->opaque;
VmdkExtent *e;
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
for (i = 0; i < s->num_extents; i++) {
e = &s->extents[i];
g_free(e->l1_table);
@@ -1229,9 +1229,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
extent_role |= BDRV_CHILD_METADATA;
}
+ bdrv_graph_rdunlock_main_loop();
extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
bs, &child_of_bds, extent_role, false,
&local_err);
+ bdrv_graph_rdlock_main_loop();
g_free(extent_path);
if (!extent_file) {
error_propagate(errp, local_err);
@@ -1247,7 +1249,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
0, 0, 0, 0, 0, &extent, errp);
if (ret < 0) {
bdrv_graph_rdunlock_main_loop();
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
bdrv_graph_rdlock_main_loop();
@@ -1266,7 +1268,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
g_free(buf);
if (ret) {
bdrv_graph_rdunlock_main_loop();
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
bdrv_graph_rdlock_main_loop();
@@ -1277,7 +1279,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
if (ret) {
bdrv_graph_rdunlock_main_loop();
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
bdrv_graph_rdlock_main_loop();
@@ -1287,7 +1289,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
bdrv_graph_rdunlock_main_loop();
- bdrv_graph_wrlock();
+ bdrv_graph_wrlock_drained();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
bdrv_graph_rdlock_main_loop();
@@ -1352,13 +1354,13 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
BDRVVmdkState *s = bs->opaque;
uint32_t magic;
- GRAPH_RDLOCK_GUARD_MAINLOOP();
-
ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
if (ret < 0) {
return ret;
}
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
buf = vmdk_read_desc(bs->file, 0, errp);
if (!buf) {
return -EINVAL;
@@ -1777,7 +1779,7 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
}
static int coroutine_fn GRAPH_RDLOCK
-vmdk_co_block_status(BlockDriverState *bs, bool want_zero,
+vmdk_co_block_status(BlockDriverState *bs, unsigned int mode,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
{
diff --git a/block/vpc.c b/block/vpc.c
index 0309e31..801ff57 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -726,7 +726,7 @@ fail:
}
static int coroutine_fn GRAPH_RDLOCK
-vpc_co_block_status(BlockDriverState *bs, bool want_zero,
+vpc_co_block_status(BlockDriverState *bs, unsigned int mode,
int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map,
BlockDriverState **file)
diff --git a/block/vvfat.c b/block/vvfat.c
index 91d69b3..814796d 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3134,9 +3134,9 @@ vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
}
static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs,
- bool want_zero, int64_t offset,
- int64_t bytes, int64_t *n,
- int64_t *map,
+ unsigned int mode,
+ int64_t offset, int64_t bytes,
+ int64_t *n, int64_t *map,
BlockDriverState **file)
{
*n = bytes;