diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-05-22 07:04:50 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-05-22 07:04:50 -0700 |
commit | ad3387396a71416cacc0b394e5e440dd6e9ba19a (patch) | |
tree | f271a1645d501e7ff53560b1e1e8b69298b9262c | |
parent | ffd9492f2a713c614025a87add4517db61d77609 (diff) | |
parent | 95fdd8db61848d31fde1d9b32da7f3f76babfa25 (diff) | |
download | qemu-ad3387396a71416cacc0b394e5e440dd6e9ba19a.zip qemu-ad3387396a71416cacc0b394e5e440dd6e9ba19a.tar.gz qemu-ad3387396a71416cacc0b394e5e440dd6e9ba19a.tar.bz2 |
Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging
Block layer patches
- qcow2 spec: Rename "zlib" compression to "deflate"
- Honour graph read lock even in the main thread + prerequisite fixes
- aio-posix: do not nest poll handlers (fixes infinite recursion)
- Refactor QMP blockdev transactions
- graph-lock: Disable locking for now
- iotests/245: Check if 'compress' driver is available
# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmRnrxURHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9aHyw/9H0xpceVb0kcC5CStOWCcq4PJHzkl/8/m
# c6ABFe0fgEuN2FCiKiCKOt6+V7qaIAw0+YLgPr/LGIsbIBzdxF3Xgd2UyIH6o4dK
# bSaIAaes6ZLTcYGIYEVJtHuwNgvzhjyBlW5qqwTpN0YArKS411eHyQ3wlUkCEVwK
# ZNmDY/MC8jq8r1xfwpPi7CaH6k1I6HhDmyl1PdURW9hmoAKZQZMhEdA5reJrUwZ9
# EhfgbLIaK0kkLLsufJ9YIkd+b/P3mUbH30kekNMOiA0XlnhWm1Djol5pxlnNiflg
# CGh6CAyhJKdXzwV567cSF11NYCsFmiY+c/l0xRIGscujwvO4iD7wFT5xk2geUAKV
# yaox8JA7Le36g7lO2CRadlS24/Ekqnle6q09g2i8s2tZwB4fS286vaZz6QDPmf7W
# VSQp9vuDj6ZcVjMsuo2+LzF3yA2Vqvgd9s032iBAjRDSGLAoOdQZjBJrreypJ0Oi
# pVFwgK+9QNCZBsqVhwVOgElSoK/3Vbl1kqpi30Ikgc0epAn0suM1g2QQPJ2Zt/MJ
# xqMlTv+48OW3vq3ebr8GXqkhvG/u0ku6I1G6ZyCrjOce89osK8QUaovERyi1eOmo
# ouoZ8UJJa6VfEkkmdhq2vF6u/MP4PeZ8MW3pYQy6qEnSOPDKpLnR30Z/s/HZCZcm
# H4QIbfQnzic=
# =edNP
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 19 May 2023 10:17:09 AM PDT
# gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg: issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
* tag 'for-upstream' of https://repo.or.cz/qemu/kevin: (21 commits)
iotests: Test commit with iothreads and ongoing I/O
nbd/server: Fix drained_poll to wake coroutine in right AioContext
graph-lock: Disable locking for now
tested: add test for nested aio_poll() in poll handlers
aio-posix: do not nest poll handlers
iotests/245: Check if 'compress' driver is available
graph-lock: Honour read locks even in the main thread
blockjob: Adhere to rate limit even when reentered early
test-bdrv-drain: Call bdrv_co_unref() in coroutine context
test-bdrv-drain: Take graph lock more selectively
qemu-img: Take graph lock more selectively
qcow2: Unlock the graph in qcow2_do_open() where necessary
block/export: Fix null pointer dereference in error path
block: Call .bdrv_co_create(_opts) unlocked
docs/interop/qcow2.txt: fix description about "zlib" clusters
blockdev: qmp_transaction: drop extra generic layer
blockdev: use state.bitmap in block-dirty-bitmap-add action
blockdev: transaction: refactor handling transaction properties
blockdev: qmp_transaction: refactor loop to classic for
blockdev: transactions: rename some things
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
36 files changed, 664 insertions, 502 deletions
@@ -533,7 +533,6 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, int ret; GLOBAL_STATE_CODE(); ERRP_GUARD(); - assert_bdrv_graph_readable(); if (!drv->bdrv_co_create_opts) { error_setg(errp, "Driver '%s' does not support image creation", diff --git a/block/commit.c b/block/commit.c index 2b20fd0..aa45beb 100644 --- a/block/commit.c +++ b/block/commit.c @@ -116,7 +116,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); int64_t offset; - uint64_t delay_ns = 0; int ret = 0; int64_t n = 0; /* bytes */ QEMU_AUTO_VFREE void *buf = NULL; @@ -149,7 +148,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ - job_sleep_ns(&s->common.job, delay_ns); + block_job_ratelimit_sleep(&s->common); if (job_is_cancelled(&s->common.job)) { break; } @@ -184,9 +183,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) job_progress_update(&s->common.job, n); if (copy) { - delay_ns = block_job_ratelimit_get_delay(&s->common, n); - } else { - delay_ns = 0; + block_job_ratelimit_processed_bytes(&s->common, n); } } diff --git a/block/create.c b/block/create.c index bf67b99..6b23a21 100644 --- a/block/create.c +++ b/block/create.c @@ -43,7 +43,6 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp) int ret; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD(); job_progress_set_remaining(&s->common, 1); ret = s->drv->bdrv_co_create(s->opts, errp); diff --git a/block/crypto.c b/block/crypto.c index 30093cf..6ee8d46 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -99,12 +99,10 @@ struct BlockCryptoCreateData { }; -static int block_crypto_create_write_func(QCryptoBlock *block, - size_t offset, - const uint8_t *buf, - size_t buflen, - void *opaque, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +block_crypto_create_write_func(QCryptoBlock *block, size_t offset, + const uint8_t *buf, size_t buflen, void *opaque, + Error **errp) { struct BlockCryptoCreateData *data = opaque; ssize_t ret; @@ -117,10 +115,9 @@ static int block_crypto_create_write_func(QCryptoBlock *block, return 0; } -static int block_crypto_create_init_func(QCryptoBlock *block, - size_t headerlen, - void *opaque, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +block_crypto_create_init_func(QCryptoBlock *block, size_t headerlen, + void *opaque, Error **errp) { struct BlockCryptoCreateData *data = opaque; Error *local_error = NULL; @@ -314,7 +311,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, } -static int coroutine_fn +static int coroutine_fn GRAPH_UNLOCKED block_crypto_co_create_generic(BlockDriverState *bs, int64_t size, QCryptoBlockCreateOptions *opts, PreallocMode prealloc, Error **errp) @@ -627,7 +624,7 @@ static int block_crypto_open_luks(BlockDriverState *bs, bs, options, flags, errp); } -static int coroutine_fn +static int coroutine_fn GRAPH_UNLOCKED block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) { BlockdevCreateOptionsLUKS *luks_opts; @@ -665,7 +662,7 @@ fail: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { @@ -727,7 +724,9 @@ fail: * beforehand, it has been truncated and corrupted in the process. */ if (ret) { + bdrv_graph_co_rdlock(); bdrv_co_delete_file_noerr(bs); + bdrv_graph_co_rdunlock(); } bdrv_co_unref(bs); diff --git a/block/export/export.c b/block/export/export.c index 62c7c22..a5c8f42 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -192,8 +192,10 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) return exp; fail: - blk_set_dev_ops(exp->blk, NULL, NULL); - blk_unref(blk); + if (blk) { + blk_set_dev_ops(blk, NULL, NULL); + blk_unref(blk); + } aio_context_release(ctx); if (exp) { g_free(exp->id); diff --git a/block/graph-lock.c b/block/graph-lock.c index 377884c..a92c6ae 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -30,8 +30,10 @@ BdrvGraphLock graph_lock; /* Protects the list of aiocontext and orphaned_reader_count */ static QemuMutex aio_context_list_lock; +#if 0 /* Written and read with atomic operations. */ static int has_writer; +#endif /* * A reader coroutine could move from an AioContext to another. @@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx) g_free(ctx->bdrv_graph); } +#if 0 static uint32_t reader_count(void) { BdrvGraphRWlock *brdv_graph; @@ -105,10 +108,17 @@ static uint32_t reader_count(void) assert((int32_t)rd >= 0); return rd; } +#endif void bdrv_graph_wrlock(void) { GLOBAL_STATE_CODE(); + /* + * TODO Some callers hold an AioContext lock when this is called, which + * causes deadlocks. Reenable once the AioContext locking is cleaned up (or + * AioContext locks are gone). + */ +#if 0 assert(!qatomic_read(&has_writer)); /* Make sure that constantly arriving new I/O doesn't cause starvation */ @@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void) } while (reader_count() >= 1); bdrv_drain_all_end(); +#endif } void bdrv_graph_wrunlock(void) { GLOBAL_STATE_CODE(); +#if 0 QEMU_LOCK_GUARD(&aio_context_list_lock); assert(qatomic_read(&has_writer)); @@ -155,18 +167,16 @@ void bdrv_graph_wrunlock(void) /* Wake up all coroutine that are waiting to read the graph */ qemu_co_enter_all(&reader_queue, &aio_context_list_lock); +#endif } void coroutine_fn bdrv_graph_co_rdlock(void) { + /* TODO Reenable when wrlock is reenabled */ +#if 0 BdrvGraphRWlock *bdrv_graph; bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - /* Do not lock if in main thread */ - if (qemu_in_main_thread()) { - return; - } - for (;;) { qatomic_set(&bdrv_graph->reader_count, bdrv_graph->reader_count + 1); @@ -223,18 +233,15 @@ void coroutine_fn bdrv_graph_co_rdlock(void) qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); } } +#endif } void coroutine_fn bdrv_graph_co_rdunlock(void) { +#if 0 BdrvGraphRWlock *bdrv_graph; bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - /* Do not lock if in main thread */ - if (qemu_in_main_thread()) { - return; - } - qatomic_store_release(&bdrv_graph->reader_count, bdrv_graph->reader_count - 1); /* make sure writer sees reader_count before we check has_writer */ @@ -249,6 +256,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) if (qatomic_read(&has_writer)) { aio_wait_kick(); } +#endif } void bdrv_graph_rdlock_main_loop(void) @@ -266,13 +274,19 @@ void bdrv_graph_rdunlock_main_loop(void) void assert_bdrv_graph_readable(void) { /* reader_count() is slow due to aio_context_list_lock lock contention */ + /* TODO Reenable when wrlock is reenabled */ +#if 0 #ifdef CONFIG_DEBUG_GRAPH_LOCK assert(qemu_in_main_thread() || reader_count()); #endif +#endif } void assert_bdrv_graph_writable(void) { assert(qemu_in_main_thread()); + /* TODO Reenable when wrlock is reenabled */ +#if 0 assert(qatomic_read(&has_writer)); +#endif } diff --git a/block/mirror.c b/block/mirror.c index 717442c..b7d92d1 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -471,12 +471,11 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, return bytes_handled; } -static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) +static void coroutine_fn mirror_iteration(MirrorBlockJob *s) { BlockDriverState *source = s->mirror_top_bs->backing->bs; MirrorOp *pseudo_op; int64_t offset; - uint64_t delay_ns = 0, ret = 0; /* At least the first dirty chunk is mirrored in one iteration. */ int nb_chunks = 1; bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); @@ -608,16 +607,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) assert(io_bytes); offset += io_bytes; nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity); - delay_ns = block_job_ratelimit_get_delay(&s->common, io_bytes_acct); + block_job_ratelimit_processed_bytes(&s->common, io_bytes_acct); } - ret = delay_ns; fail: QTAILQ_REMOVE(&s->ops_in_flight, pseudo_op, next); qemu_co_queue_restart_all(&pseudo_op->waiting_requests); g_free(pseudo_op); - - return ret; } static void mirror_free_init(MirrorBlockJob *s) @@ -1011,7 +1007,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) assert(!s->dbi); s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap); for (;;) { - uint64_t delay_ns = 0; int64_t cnt, delta; bool should_complete; @@ -1051,7 +1046,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) mirror_wait_for_free_in_flight_slot(s); continue; } else if (cnt != 0) { - delay_ns = mirror_iteration(s); + mirror_iteration(s); } } @@ -1114,12 +1109,14 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) } if (job_is_ready(&s->common.job) && !should_complete) { - delay_ns = (s->in_flight == 0 && - cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0); + if (s->in_flight == 0 && cnt == 0) { + trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job), + BLOCK_JOB_SLICE_TIME); + job_sleep_ns(&s->common.job, BLOCK_JOB_SLICE_TIME); + } + } else { + block_job_ratelimit_sleep(&s->common); } - trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job), - delay_ns); - job_sleep_ns(&s->common.job, delay_ns); s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } diff --git a/block/parallels.c b/block/parallels.c index b49c359..d8a3f13 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -522,8 +522,8 @@ out: } -static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +parallels_co_create(BlockdevCreateOptions* opts, Error **errp) { BlockdevCreateOptionsParallels *parallels_opts; BlockDriverState *bs; @@ -622,7 +622,7 @@ exit: goto out; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED parallels_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/qcow.c b/block/qcow.c index a0c701f..3644bbf 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -800,8 +800,8 @@ static void qcow_close(BlockDriverState *bs) error_free(s->migration_blocker); } -static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +qcow_co_create(BlockdevCreateOptions *opts, Error **errp) { BlockdevCreateOptionsQcow *qcow_opts; int header_size, backing_filename_len, l1_size, shift, i; @@ -921,7 +921,7 @@ exit: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED qcow_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/qcow2.c b/block/qcow2.c index 5bde3b8..b00b4e7 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -118,8 +118,9 @@ static int qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, } -static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, - void *opaque, Error **errp) +static int coroutine_fn GRAPH_RDLOCK +qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, void *opaque, + Error **errp) { BlockDriverState *bs = opaque; BDRVQcow2State *s = bs->opaque; @@ -144,9 +145,7 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, */ clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0); - ret = bdrv_pwrite_zeroes(bs->file, - ret, - clusterlen, 0); + ret = bdrv_co_pwrite_zeroes(bs->file, ret, clusterlen, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Could not zero fill encryption header"); return -1; @@ -156,9 +155,11 @@ static int qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, } -static int qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, - const uint8_t *buf, size_t buflen, - void *opaque, Error **errp) +/* The graph lock must be held when called in coroutine context */ +static int coroutine_mixed_fn +qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, + const uint8_t *buf, size_t buflen, + void *opaque, Error **errp) { BlockDriverState *bs = opaque; BDRVQcow2State *s = bs->opaque; @@ -1618,9 +1619,11 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, if (open_data_file) { /* Open external data file */ + bdrv_graph_co_rdunlock(); s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs, &child_of_bds, BDRV_CHILD_DATA, true, errp); + bdrv_graph_co_rdlock(); if (*errp) { ret = -EINVAL; goto fail; @@ -1628,10 +1631,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { if (!s->data_file && s->image_data_file) { + bdrv_graph_co_rdunlock(); s->data_file = bdrv_co_open_child(s->image_data_file, options, "data-file", bs, &child_of_bds, BDRV_CHILD_DATA, false, errp); + bdrv_graph_co_rdlock(); if (!s->data_file) { ret = -EINVAL; goto fail; @@ -1856,7 +1861,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, fail: g_free(s->image_data_file); if (open_data_file && has_data_file(bs)) { + bdrv_graph_co_rdunlock(); bdrv_unref_child(bs, s->data_file); + bdrv_graph_co_rdlock(); s->data_file = NULL; } g_free(s->unknown_header_fields); @@ -3137,9 +3144,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs, return qcow2_update_header(bs); } -static int qcow2_set_up_encryption(BlockDriverState *bs, - QCryptoBlockCreateOptions *cryptoopts, - Error **errp) +static int coroutine_fn GRAPH_RDLOCK +qcow2_set_up_encryption(BlockDriverState *bs, + QCryptoBlockCreateOptions *cryptoopts, + Error **errp) { BDRVQcow2State *s = bs->opaque; QCryptoBlock *crypto = NULL; @@ -3426,7 +3434,7 @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, return refcount_bits; } -static int coroutine_fn +static int coroutine_fn GRAPH_UNLOCKED qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) { BlockdevCreateOptionsQcow2 *qcow2_opts; @@ -3724,8 +3732,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) goto out; } + bdrv_graph_co_rdlock(); ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); if (ret < 0) { + bdrv_graph_co_rdunlock(); error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " "header and refcount table"); goto out; @@ -3743,6 +3753,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) /* Create a full header (including things like feature table) */ ret = qcow2_update_header(blk_bs(blk)); + bdrv_graph_co_rdunlock(); + if (ret < 0) { error_setg_errno(errp, -ret, "Could not update qcow2 header"); goto out; @@ -3776,7 +3788,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) /* Want encryption? There you go. */ if (qcow2_opts->encrypt) { + bdrv_graph_co_rdlock(); ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); + bdrv_graph_co_rdunlock(); + if (ret < 0) { goto out; } @@ -3813,7 +3828,7 @@ out: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { @@ -3933,8 +3948,10 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, ret = qcow2_co_create(create_options, errp); finish: if (ret < 0) { + bdrv_graph_co_rdlock(); bdrv_co_delete_file_noerr(bs); bdrv_co_delete_file_noerr(data_bs); + bdrv_graph_co_rdunlock(); } else { ret = 0; } diff --git a/block/qed.c b/block/qed.c index be9ff0f..9a0350b 100644 --- a/block/qed.c +++ b/block/qed.c @@ -630,8 +630,8 @@ static void bdrv_qed_close(BlockDriverState *bs) qemu_vfree(s->l1_table); } -static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +bdrv_qed_co_create(BlockdevCreateOptions *opts, Error **errp) { BlockdevCreateOptionsQed *qed_opts; BlockBackend *blk = NULL; @@ -751,7 +751,7 @@ out: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/raw-format.c b/block/raw-format.c index 3a39462..918fe4f 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -457,7 +457,7 @@ static int raw_has_zero_init(BlockDriverState *bs) return bdrv_has_zero_init(bs->file->bs); } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED raw_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/stream.c b/block/stream.c index 7f9e1ec..e522bbd 100644 --- a/block/stream.c +++ b/block/stream.c @@ -131,7 +131,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp) BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs); int64_t len; int64_t offset = 0; - uint64_t delay_ns = 0; int error = 0; int64_t n = 0; /* bytes */ @@ -155,7 +154,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp) /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ - job_sleep_ns(&s->common.job, delay_ns); + block_job_ratelimit_sleep(&s->common); if (job_is_cancelled(&s->common.job)) { break; } @@ -204,9 +203,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp) /* Publish progress */ job_progress_update(&s->common.job, n); if (copy) { - delay_ns = block_job_ratelimit_get_delay(&s->common, n); - } else { - delay_ns = 0; + block_job_ratelimit_processed_bytes(&s->common, n); } } diff --git a/block/vdi.c b/block/vdi.c index 08331d2..6c35309 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -734,8 +734,9 @@ nonallocating_write: return ret; } -static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, - size_t block_size, Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +vdi_co_do_create(BlockdevCreateOptions *create_options, size_t block_size, + Error **errp) { BlockdevCreateOptionsVdi *vdi_opts; int ret = 0; @@ -892,13 +893,13 @@ exit: return ret; } -static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +vdi_co_create(BlockdevCreateOptions *create_options, Error **errp) { return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vdi_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/vhdx.c b/block/vhdx.c index b20b1ed..89913cb 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1506,7 +1506,7 @@ exit: * There are 2 headers, and the highest sequence number will represent * the active header */ -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size, uint32_t log_size) { @@ -1515,6 +1515,8 @@ vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size, int ret = 0; VHDXHeader *hdr = NULL; + GRAPH_RDLOCK_GUARD(); + hdr = g_new0(VHDXHeader, 1); hdr->signature = VHDX_HEADER_SIGNATURE; @@ -1898,7 +1900,7 @@ exit: * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. * 1MB */ -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vhdx_co_create(BlockdevCreateOptions *opts, Error **errp) { BlockdevCreateOptionsVhdx *vhdx_opts; @@ -2060,7 +2062,7 @@ delete_and_exit: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vhdx_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { diff --git a/block/vmdk.c b/block/vmdk.c index fddbd1c..e3e8660 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2165,10 +2165,9 @@ vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, return ret; } -static int vmdk_init_extent(BlockBackend *blk, - int64_t filesize, bool flat, - bool compress, bool zeroed_grain, - Error **errp) +static int GRAPH_UNLOCKED +vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress, + bool zeroed_grain, Error **errp) { int ret, i; VMDK4Header header; @@ -2277,7 +2276,7 @@ exit: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vmdk_create_extent(const char *filename, int64_t filesize, bool flat, bool compress, bool zeroed_grain, BlockBackend **pbb, QemuOpts *opts, Error **errp) @@ -2358,7 +2357,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, * non-split format. * idx >= 1: get the n-th extent if in a split subformat */ -typedef BlockBackend * coroutine_fn /* GRAPH_RDLOCK */ +typedef BlockBackend * coroutine_fn GRAPH_UNLOCKED_PTR (*vmdk_create_extent_fn)(int64_t size, int idx, bool flat, bool split, bool compress, bool zeroed_grain, void *opaque, Error **errp); @@ -2374,7 +2373,7 @@ static void vmdk_desc_add_extent(GString *desc, g_free(basename); } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vmdk_co_do_create(int64_t size, BlockdevVmdkSubformat subformat, BlockdevVmdkAdapterType adapter_type, @@ -2605,7 +2604,7 @@ typedef struct { QemuOpts *opts; } VMDKCreateOptsData; -static BlockBackend * coroutine_fn GRAPH_RDLOCK +static BlockBackend * coroutine_fn GRAPH_UNLOCKED vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, bool compress, bool zeroed_grain, void *opaque, Error **errp) @@ -2647,7 +2646,7 @@ exit: return blk; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vmdk_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { @@ -2756,11 +2755,9 @@ exit: return ret; } -static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, - bool flat, bool split, - bool compress, - bool zeroed_grain, - void *opaque, Error **errp) +static BlockBackend * coroutine_fn GRAPH_UNLOCKED +vmdk_co_create_cb(int64_t size, int idx, bool flat, bool split, bool compress, + bool zeroed_grain, void *opaque, Error **errp) { int ret; BlockDriverState *bs; @@ -2809,7 +2806,7 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, return blk; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vmdk_co_create(BlockdevCreateOptions *create_options, Error **errp) { BlockdevCreateOptionsVmdk *opts; diff --git a/block/vpc.c b/block/vpc.c index 07ddda5..7ee7c7b 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -967,8 +967,8 @@ static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts, return 0; } -static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, - Error **errp) +static int coroutine_fn GRAPH_UNLOCKED +vpc_co_create(BlockdevCreateOptions *opts, Error **errp) { BlockdevCreateOptionsVpc *vpc_opts; BlockBackend *blk = NULL; @@ -1087,7 +1087,7 @@ out: return ret; } -static int coroutine_fn GRAPH_RDLOCK +static int coroutine_fn GRAPH_UNLOCKED vpc_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp) { @@ -1188,79 +1188,22 @@ out_aio_context: return NULL; } -/* New and old BlockDriverState structs for atomic group operations */ - -typedef struct BlkActionState BlkActionState; - -/** - * BlkActionOps: - * Table of operations that define an Action. - * - * @instance_size: Size of state struct, in bytes. - * @prepare: Prepare the work, must NOT be NULL. - * @commit: Commit the changes, can be NULL. - * @abort: Abort the changes on fail, can be NULL. - * @clean: Clean up resources after all transaction actions have called - * commit() or abort(). Can be NULL. - * - * Only prepare() may fail. In a single transaction, only one of commit() or - * abort() will be called. clean() will always be called if it is present. - * - * Always run under BQL. - */ -typedef struct BlkActionOps { - size_t instance_size; - void (*prepare)(BlkActionState *common, Error **errp); - void (*commit)(BlkActionState *common); - void (*abort)(BlkActionState *common); - void (*clean)(BlkActionState *common); -} BlkActionOps; - -/** - * BlkActionState: - * Describes one Action's state within a Transaction. - * - * @action: QAPI-defined enum identifying which Action to perform. - * @ops: Table of ActionOps this Action can perform. - * @block_job_txn: Transaction which this action belongs to. - * @entry: List membership for all Actions in this Transaction. - * - * This structure must be arranged as first member in a subclassed type, - * assuming that the compiler will also arrange it to the same offsets as the - * base class. - */ -struct BlkActionState { - TransactionAction *action; - const BlkActionOps *ops; - JobTxn *block_job_txn; - TransactionProperties *txn_props; - QTAILQ_ENTRY(BlkActionState) entry; -}; - /* internal snapshot private data */ typedef struct InternalSnapshotState { - BlkActionState common; BlockDriverState *bs; QEMUSnapshotInfo sn; bool created; } InternalSnapshotState; +static void internal_snapshot_abort(void *opaque); +static void internal_snapshot_clean(void *opaque); +TransactionActionDrv internal_snapshot_drv = { + .abort = internal_snapshot_abort, + .clean = internal_snapshot_clean, +}; -static int action_check_completion_mode(BlkActionState *s, Error **errp) -{ - if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { - error_setg(errp, - "Action '%s' does not support Transaction property " - "completion-mode = %s", - TransactionActionKind_str(s->action->type), - ActionCompletionMode_str(s->txn_props->completion_mode)); - return -1; - } - return 0; -} - -static void internal_snapshot_prepare(BlkActionState *common, - Error **errp) +static void internal_snapshot_action(BlockdevSnapshotInternal *internal, + Transaction *tran, Error **errp) { Error *local_err = NULL; const char *device; @@ -1269,25 +1212,15 @@ static void internal_snapshot_prepare(BlkActionState *common, QEMUSnapshotInfo old_sn, *sn; bool ret; int64_t rt; - BlockdevSnapshotInternal *internal; - InternalSnapshotState *state; + InternalSnapshotState *state = g_new0(InternalSnapshotState, 1); AioContext *aio_context; int ret1; - g_assert(common->action->type == - TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC); - internal = common->action->u.blockdev_snapshot_internal_sync.data; - state = DO_UPCAST(InternalSnapshotState, common, common); + tran_add(tran, &internal_snapshot_drv, state); - /* 1. parse input */ device = internal->device; name = internal->name; - /* 2. check for validation */ - if (action_check_completion_mode(common, errp) < 0) { - return; - } - bs = qmp_get_root_bs(device, errp); if (!bs) { return; @@ -1363,10 +1296,9 @@ out: aio_context_release(aio_context); } -static void internal_snapshot_abort(BlkActionState *common) +static void internal_snapshot_abort(void *opaque) { - InternalSnapshotState *state = - DO_UPCAST(InternalSnapshotState, common, common); + InternalSnapshotState *state = opaque; BlockDriverState *bs = state->bs; QEMUSnapshotInfo *sn = &state->sn; AioContext *aio_context; @@ -1390,10 +1322,9 @@ static void internal_snapshot_abort(BlkActionState *common) aio_context_release(aio_context); } -static void internal_snapshot_clean(BlkActionState *common) +static void internal_snapshot_clean(void *opaque) { - InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState, - common, common); + g_autofree InternalSnapshotState *state = opaque; AioContext *aio_context; if (!state->bs) { @@ -1410,14 +1341,22 @@ static void internal_snapshot_clean(BlkActionState *common) /* external snapshot private data */ typedef struct ExternalSnapshotState { - BlkActionState common; BlockDriverState *old_bs; BlockDriverState *new_bs; bool overlay_appended; } ExternalSnapshotState; -static void external_snapshot_prepare(BlkActionState *common, - Error **errp) +static void external_snapshot_commit(void *opaque); +static void external_snapshot_abort(void *opaque); +static void external_snapshot_clean(void *opaque); +TransactionActionDrv external_snapshot_drv = { + .commit = external_snapshot_commit, + .abort = external_snapshot_abort, + .clean = external_snapshot_clean, +}; + +static void external_snapshot_action(TransactionAction *action, + Transaction *tran, Error **errp) { int ret; int flags = 0; @@ -1430,12 +1369,12 @@ static void external_snapshot_prepare(BlkActionState *common, const char *snapshot_ref; /* File name of the new image (for 'blockdev-snapshot-sync') */ const char *new_image_file; - ExternalSnapshotState *state = - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; + ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1); AioContext *aio_context; uint64_t perm, shared; + tran_add(tran, &external_snapshot_drv, state); + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar * purpose but a different set of parameters */ switch (action->type) { @@ -1462,9 +1401,6 @@ static void external_snapshot_prepare(BlkActionState *common, } /* start processing */ - if (action_check_completion_mode(common, errp) < 0) { - return; - } state->old_bs = bdrv_lookup_bs(device, node_name, errp); if (!state->old_bs) { @@ -1588,10 +1524,9 @@ out: aio_context_release(aio_context); } -static void external_snapshot_commit(BlkActionState *common) +static void external_snapshot_commit(void *opaque) { - ExternalSnapshotState *state = - DO_UPCAST(ExternalSnapshotState, common, common); + ExternalSnapshotState *state = opaque; AioContext *aio_context; aio_context = bdrv_get_aio_context(state->old_bs); @@ -1607,10 +1542,9 @@ static void external_snapshot_commit(BlkActionState *common) aio_context_release(aio_context); } -static void external_snapshot_abort(BlkActionState *common) +static void external_snapshot_abort(void *opaque) { - ExternalSnapshotState *state = - DO_UPCAST(ExternalSnapshotState, common, common); + ExternalSnapshotState *state = opaque; if (state->new_bs) { if (state->overlay_appended) { AioContext *aio_context; @@ -1650,10 +1584,9 @@ static void external_snapshot_abort(BlkActionState *common) } } -static void external_snapshot_clean(BlkActionState *common) +static void external_snapshot_clean(void *opaque) { - ExternalSnapshotState *state = - DO_UPCAST(ExternalSnapshotState, common, common); + g_autofree ExternalSnapshotState *state = opaque; AioContext *aio_context; if (!state->old_bs) { @@ -1670,7 +1603,6 @@ static void external_snapshot_clean(BlkActionState *common) } typedef struct DriveBackupState { - BlkActionState common; BlockDriverState *bs; BlockJob *job; } DriveBackupState; @@ -1681,10 +1613,20 @@ static BlockJob *do_backup_common(BackupCommon *backup, AioContext *aio_context, JobTxn *txn, Error **errp); -static void drive_backup_prepare(BlkActionState *common, Error **errp) +static void drive_backup_commit(void *opaque); +static void drive_backup_abort(void *opaque); +static void drive_backup_clean(void *opaque); +TransactionActionDrv drive_backup_drv = { + .commit = drive_backup_commit, + .abort = drive_backup_abort, + .clean = drive_backup_clean, +}; + +static void drive_backup_action(DriveBackup *backup, + JobTxn *block_job_txn, + Transaction *tran, Error **errp) { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); - DriveBackup *backup; + DriveBackupState *state = g_new0(DriveBackupState, 1); BlockDriverState *bs; BlockDriverState *target_bs; BlockDriverState *source = NULL; @@ -1698,8 +1640,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) bool set_backing_hd = false; int ret; - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; + tran_add(tran, &drive_backup_drv, state); if (!backup->has_mode) { backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; @@ -1820,7 +1761,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) state->job = do_backup_common(qapi_DriveBackup_base(backup), bs, target_bs, aio_context, - common->block_job_txn, errp); + block_job_txn, errp); unref: bdrv_unref(target_bs); @@ -1828,9 +1769,9 @@ out: aio_context_release(aio_context); } -static void drive_backup_commit(BlkActionState *common) +static void drive_backup_commit(void *opaque) { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + DriveBackupState *state = opaque; AioContext *aio_context; aio_context = bdrv_get_aio_context(state->bs); @@ -1842,18 +1783,18 @@ static void drive_backup_commit(BlkActionState *common) aio_context_release(aio_context); } -static void drive_backup_abort(BlkActionState *common) +static void drive_backup_abort(void *opaque) { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + DriveBackupState *state = opaque; if (state->job) { job_cancel_sync(&state->job->job, true); } } -static void drive_backup_clean(BlkActionState *common) +static void drive_backup_clean(void *opaque) { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + g_autofree DriveBackupState *state = opaque; AioContext *aio_context; if (!state->bs) { @@ -1869,23 +1810,31 @@ static void drive_backup_clean(BlkActionState *common) } typedef struct BlockdevBackupState { - BlkActionState common; BlockDriverState *bs; BlockJob *job; } BlockdevBackupState; -static void blockdev_backup_prepare(BlkActionState *common, Error **errp) +static void blockdev_backup_commit(void *opaque); +static void blockdev_backup_abort(void *opaque); +static void blockdev_backup_clean(void *opaque); +TransactionActionDrv blockdev_backup_drv = { + .commit = blockdev_backup_commit, + .abort = blockdev_backup_abort, + .clean = blockdev_backup_clean, +}; + +static void blockdev_backup_action(BlockdevBackup *backup, + JobTxn *block_job_txn, + Transaction *tran, Error **errp) { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); - BlockdevBackup *backup; + BlockdevBackupState *state = g_new0(BlockdevBackupState, 1); BlockDriverState *bs; BlockDriverState *target_bs; AioContext *aio_context; AioContext *old_context; int ret; - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; + tran_add(tran, &blockdev_backup_drv, state); bs = bdrv_lookup_bs(backup->device, backup->device, errp); if (!bs) { @@ -1917,14 +1866,14 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) state->job = do_backup_common(qapi_BlockdevBackup_base(backup), bs, target_bs, aio_context, - common->block_job_txn, errp); + block_job_txn, errp); aio_context_release(aio_context); } -static void blockdev_backup_commit(BlkActionState *common) +static void blockdev_backup_commit(void *opaque) { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackupState *state = opaque; AioContext *aio_context; aio_context = bdrv_get_aio_context(state->bs); @@ -1936,18 +1885,18 @@ static void blockdev_backup_commit(BlkActionState *common) aio_context_release(aio_context); } -static void blockdev_backup_abort(BlkActionState *common) +static void blockdev_backup_abort(void *opaque) { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackupState *state = opaque; if (state->job) { job_cancel_sync(&state->job->job, true); } } -static void blockdev_backup_clean(BlkActionState *common) +static void blockdev_backup_clean(void *opaque) { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + g_autofree BlockdevBackupState *state = opaque; AioContext *aio_context; if (!state->bs) { @@ -1963,27 +1912,26 @@ static void blockdev_backup_clean(BlkActionState *common) } typedef struct BlockDirtyBitmapState { - BlkActionState common; BdrvDirtyBitmap *bitmap; BlockDriverState *bs; HBitmap *backup; - bool prepared; bool was_enabled; } BlockDirtyBitmapState; -static void block_dirty_bitmap_add_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_add_abort(void *opaque); +TransactionActionDrv block_dirty_bitmap_add_drv = { + .abort = block_dirty_bitmap_add_abort, + .clean = g_free, +}; + +static void block_dirty_bitmap_add_action(BlockDirtyBitmapAdd *action, + Transaction *tran, Error **errp) { Error *local_err = NULL; - BlockDirtyBitmapAdd *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); - if (action_check_completion_mode(common, errp) < 0) { - return; - } + tran_add(tran, &block_dirty_bitmap_add_drv, state); - action = common->action->u.block_dirty_bitmap_add.data; /* AIO context taken and released within qmp_block_dirty_bitmap_add */ qmp_block_dirty_bitmap_add(action->node, action->name, action->has_granularity, action->granularity, @@ -1992,39 +1940,37 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common, &local_err); if (!local_err) { - state->prepared = true; + state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, + NULL, &error_abort); } else { error_propagate(errp, local_err); } } -static void block_dirty_bitmap_add_abort(BlkActionState *common) +static void block_dirty_bitmap_add_abort(void *opaque) { - BlockDirtyBitmapAdd *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; - action = common->action->u.block_dirty_bitmap_add.data; - /* Should not be able to fail: IF the bitmap was added via .prepare(), - * then the node reference and bitmap name must have been valid. - */ - if (state->prepared) { - qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort); + if (state->bitmap) { + bdrv_release_dirty_bitmap(state->bitmap); } } -static void block_dirty_bitmap_clear_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_restore(void *opaque); +static void block_dirty_bitmap_free_backup(void *opaque); +TransactionActionDrv block_dirty_bitmap_clear_drv = { + .abort = block_dirty_bitmap_restore, + .commit = block_dirty_bitmap_free_backup, + .clean = g_free, +}; + +static void block_dirty_bitmap_clear_action(BlockDirtyBitmap *action, + Transaction *tran, Error **errp) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); - BlockDirtyBitmap *action; + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); - if (action_check_completion_mode(common, errp) < 0) { - return; - } + tran_add(tran, &block_dirty_bitmap_clear_drv, state); - action = common->action->u.block_dirty_bitmap_clear.data; state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, &state->bs, @@ -2040,36 +1986,35 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common, bdrv_clear_dirty_bitmap(state->bitmap, &state->backup); } -static void block_dirty_bitmap_restore(BlkActionState *common) +static void block_dirty_bitmap_restore(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; if (state->backup) { bdrv_restore_dirty_bitmap(state->bitmap, state->backup); } } -static void block_dirty_bitmap_free_backup(BlkActionState *common) +static void block_dirty_bitmap_free_backup(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; hbitmap_free(state->backup); } -static void block_dirty_bitmap_enable_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_enable_abort(void *opaque); +TransactionActionDrv block_dirty_bitmap_enable_drv = { + .abort = block_dirty_bitmap_enable_abort, + .clean = g_free, +}; + +static void block_dirty_bitmap_enable_action(BlockDirtyBitmap *action, + Transaction *tran, Error **errp) { - BlockDirtyBitmap *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); - if (action_check_completion_mode(common, errp) < 0) { - return; - } + tran_add(tran, &block_dirty_bitmap_enable_drv, state); - action = common->action->u.block_dirty_bitmap_enable.data; state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, NULL, @@ -2086,28 +2031,28 @@ static void block_dirty_bitmap_enable_prepare(BlkActionState *common, bdrv_enable_dirty_bitmap(state->bitmap); } -static void block_dirty_bitmap_enable_abort(BlkActionState *common) +static void block_dirty_bitmap_enable_abort(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; if (!state->was_enabled) { bdrv_disable_dirty_bitmap(state->bitmap); } } -static void block_dirty_bitmap_disable_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_disable_abort(void *opaque); +TransactionActionDrv block_dirty_bitmap_disable_drv = { + .abort = block_dirty_bitmap_disable_abort, + .clean = g_free, +}; + +static void block_dirty_bitmap_disable_action(BlockDirtyBitmap *action, + Transaction *tran, Error **errp) { - BlockDirtyBitmap *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); - if (action_check_completion_mode(common, errp) < 0) { - return; - } + tran_add(tran, &block_dirty_bitmap_disable_drv, state); - action = common->action->u.block_dirty_bitmap_disable.data; state->bitmap = block_dirty_bitmap_lookup(action->node, action->name, NULL, @@ -2124,46 +2069,48 @@ static void block_dirty_bitmap_disable_prepare(BlkActionState *common, bdrv_disable_dirty_bitmap(state->bitmap); } -static void block_dirty_bitmap_disable_abort(BlkActionState *common) +static void block_dirty_bitmap_disable_abort(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; if (state->was_enabled) { bdrv_enable_dirty_bitmap(state->bitmap); } } -static void block_dirty_bitmap_merge_prepare(BlkActionState *common, - Error **errp) -{ - BlockDirtyBitmapMerge *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); +TransactionActionDrv block_dirty_bitmap_merge_drv = { + .commit = block_dirty_bitmap_free_backup, + .abort = block_dirty_bitmap_restore, + .clean = g_free, +}; - if (action_check_completion_mode(common, errp) < 0) { - return; - } +static void block_dirty_bitmap_merge_action(BlockDirtyBitmapMerge *action, + Transaction *tran, Error **errp) +{ + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); - action = common->action->u.block_dirty_bitmap_merge.data; + tran_add(tran, &block_dirty_bitmap_merge_drv, state); state->bitmap = block_dirty_bitmap_merge(action->node, action->target, action->bitmaps, &state->backup, errp); } -static void block_dirty_bitmap_remove_prepare(BlkActionState *common, - Error **errp) +static void block_dirty_bitmap_remove_commit(void *opaque); +static void block_dirty_bitmap_remove_abort(void *opaque); +TransactionActionDrv block_dirty_bitmap_remove_drv = { + .commit = block_dirty_bitmap_remove_commit, + .abort = block_dirty_bitmap_remove_abort, + .clean = g_free, +}; + +static void block_dirty_bitmap_remove_action(BlockDirtyBitmap *action, + Transaction *tran, Error **errp) { - BlockDirtyBitmap *action; - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = g_new0(BlockDirtyBitmapState, 1); - if (action_check_completion_mode(common, errp) < 0) { - return; - } + tran_add(tran, &block_dirty_bitmap_remove_drv, state); - action = common->action->u.block_dirty_bitmap_remove.data; state->bitmap = block_dirty_bitmap_remove(action->node, action->name, false, &state->bs, errp); @@ -2173,10 +2120,9 @@ static void block_dirty_bitmap_remove_prepare(BlkActionState *common, } } -static void block_dirty_bitmap_remove_abort(BlkActionState *common) +static void block_dirty_bitmap_remove_abort(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; if (state->bitmap) { bdrv_dirty_bitmap_skip_store(state->bitmap, false); @@ -2184,210 +2130,156 @@ static void block_dirty_bitmap_remove_abort(BlkActionState *common) } } -static void block_dirty_bitmap_remove_commit(BlkActionState *common) +static void block_dirty_bitmap_remove_commit(void *opaque) { - BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, - common, common); + BlockDirtyBitmapState *state = opaque; bdrv_dirty_bitmap_set_busy(state->bitmap, false); bdrv_release_dirty_bitmap(state->bitmap); } -static void abort_prepare(BlkActionState *common, Error **errp) +static void abort_commit(void *opaque); +TransactionActionDrv abort_drv = { + .commit = abort_commit, +}; + +static void abort_action(Transaction *tran, Error **errp) { + tran_add(tran, &abort_drv, NULL); error_setg(errp, "Transaction aborted using Abort action"); } -static void abort_commit(BlkActionState *common) +static void abort_commit(void *opaque) { g_assert_not_reached(); /* this action never succeeds */ } -static const BlkActionOps actions[] = { - [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = { - .instance_size = sizeof(ExternalSnapshotState), - .prepare = external_snapshot_prepare, - .commit = external_snapshot_commit, - .abort = external_snapshot_abort, - .clean = external_snapshot_clean, - }, - [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { - .instance_size = sizeof(ExternalSnapshotState), - .prepare = external_snapshot_prepare, - .commit = external_snapshot_commit, - .abort = external_snapshot_abort, - .clean = external_snapshot_clean, - }, - [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = { - .instance_size = sizeof(DriveBackupState), - .prepare = drive_backup_prepare, - .commit = drive_backup_commit, - .abort = drive_backup_abort, - .clean = drive_backup_clean, - }, - [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = { - .instance_size = sizeof(BlockdevBackupState), - .prepare = blockdev_backup_prepare, - .commit = blockdev_backup_commit, - .abort = blockdev_backup_abort, - .clean = blockdev_backup_clean, - }, - [TRANSACTION_ACTION_KIND_ABORT] = { - .instance_size = sizeof(BlkActionState), - .prepare = abort_prepare, - .commit = abort_commit, - }, - [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = { - .instance_size = sizeof(InternalSnapshotState), - .prepare = internal_snapshot_prepare, - .abort = internal_snapshot_abort, - .clean = internal_snapshot_clean, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_add_prepare, - .abort = block_dirty_bitmap_add_abort, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_clear_prepare, - .commit = block_dirty_bitmap_free_backup, - .abort = block_dirty_bitmap_restore, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_enable_prepare, - .abort = block_dirty_bitmap_enable_abort, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_disable_prepare, - .abort = block_dirty_bitmap_disable_abort, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_merge_prepare, - .commit = block_dirty_bitmap_free_backup, - .abort = block_dirty_bitmap_restore, - }, - [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_remove_prepare, - .commit = block_dirty_bitmap_remove_commit, - .abort = block_dirty_bitmap_remove_abort, - }, - /* Where are transactions for MIRROR, COMMIT and STREAM? +static void transaction_action(TransactionAction *act, JobTxn *block_job_txn, + Transaction *tran, Error **errp) +{ + switch (act->type) { + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT: + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: + external_snapshot_action(act, tran, errp); + return; + case TRANSACTION_ACTION_KIND_DRIVE_BACKUP: + drive_backup_action(act->u.drive_backup.data, + block_job_txn, tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP: + blockdev_backup_action(act->u.blockdev_backup.data, + block_job_txn, tran, errp); + return; + case TRANSACTION_ACTION_KIND_ABORT: + abort_action(tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC: + internal_snapshot_action(act->u.blockdev_snapshot_internal_sync.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD: + block_dirty_bitmap_add_action(act->u.block_dirty_bitmap_add.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR: + block_dirty_bitmap_clear_action(act->u.block_dirty_bitmap_clear.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE: + block_dirty_bitmap_enable_action(act->u.block_dirty_bitmap_enable.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE: + block_dirty_bitmap_disable_action( + act->u.block_dirty_bitmap_disable.data, tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE: + block_dirty_bitmap_merge_action(act->u.block_dirty_bitmap_merge.data, + tran, errp); + return; + case TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE: + block_dirty_bitmap_remove_action(act->u.block_dirty_bitmap_remove.data, + tran, errp); + return; + /* + * Where are transactions for MIRROR, COMMIT and STREAM? * Although these blockjobs use transaction callbacks like the backup job, * these jobs do not necessarily adhere to transaction semantics. * These jobs may not fully undo all of their actions on abort, nor do they * necessarily work in transactions with more than one job in them. */ -}; - -/** - * Allocate a TransactionProperties structure if necessary, and fill - * that structure with desired defaults if they are unset. - */ -static TransactionProperties *get_transaction_properties( - TransactionProperties *props) -{ - if (!props) { - props = g_new0(TransactionProperties, 1); - } - - if (!props->has_completion_mode) { - props->has_completion_mode = true; - props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL; - } - - return props; + case TRANSACTION_ACTION_KIND__MAX: + default: + g_assert_not_reached(); + }; } + /* * 'Atomic' group operations. The operations are performed as a set, and if * any fail then we roll back all operations in the group. * * Always run under BQL. */ -void qmp_transaction(TransactionActionList *dev_list, - struct TransactionProperties *props, +void qmp_transaction(TransactionActionList *actions, + struct TransactionProperties *properties, Error **errp) { - TransactionActionList *dev_entry = dev_list; - bool has_props = !!props; + TransactionActionList *act; JobTxn *block_job_txn = NULL; - BlkActionState *state, *next; Error *local_err = NULL; + Transaction *tran; + ActionCompletionMode comp_mode = + properties ? properties->completion_mode : + ACTION_COMPLETION_MODE_INDIVIDUAL; GLOBAL_STATE_CODE(); - QTAILQ_HEAD(, BlkActionState) snap_bdrv_states; - QTAILQ_INIT(&snap_bdrv_states); - /* Does this transaction get canceled as a group on failure? * If not, we don't really need to make a JobTxn. */ - props = get_transaction_properties(props); - if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { + if (comp_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) { + for (act = actions; act; act = act->next) { + TransactionActionKind type = act->value->type; + + if (type != TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP && + type != TRANSACTION_ACTION_KIND_DRIVE_BACKUP) + { + error_setg(errp, + "Action '%s' does not support transaction property " + "completion-mode = %s", + TransactionActionKind_str(type), + ActionCompletionMode_str(comp_mode)); + return; + } + } + block_job_txn = job_txn_new(); } /* drain all i/o before any operations */ bdrv_drain_all(); - /* We don't do anything in this loop that commits us to the operations */ - while (NULL != dev_entry) { - TransactionAction *dev_info = NULL; - const BlkActionOps *ops; - - dev_info = dev_entry->value; - dev_entry = dev_entry->next; + tran = tran_new(); - assert(dev_info->type < ARRAY_SIZE(actions)); - - ops = &actions[dev_info->type]; - assert(ops->instance_size > 0); - - state = g_malloc0(ops->instance_size); - state->ops = ops; - state->action = dev_info; - state->block_job_txn = block_job_txn; - state->txn_props = props; - QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry); - - state->ops->prepare(state, &local_err); + /* We don't do anything in this loop that commits us to the operations */ + for (act = actions; act; act = act->next) { + transaction_action(act->value, block_job_txn, tran, &local_err); if (local_err) { error_propagate(errp, local_err); goto delete_and_fail; } } - QTAILQ_FOREACH(state, &snap_bdrv_states, entry) { - if (state->ops->commit) { - state->ops->commit(state); - } - } + tran_commit(tran); /* success */ goto exit; delete_and_fail: /* failure, and it is all-or-none; roll back all operations */ - QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, entry) { - if (state->ops->abort) { - state->ops->abort(state); - } - } + tran_abort(tran); exit: - QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) { - if (state->ops->clean) { - state->ops->clean(state); - } - g_free(state); - } - if (!has_props) { - qapi_free_TransactionProperties(props); - } job_txn_unref(block_job_txn); } @@ -319,10 +319,28 @@ static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) return block_job_set_speed_locked(job, speed, errp); } -int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n) +void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n) { IO_CODE(); - return ratelimit_calculate_delay(&job->limit, n); + ratelimit_calculate_delay(&job->limit, n); +} + +void block_job_ratelimit_sleep(BlockJob *job) +{ + uint64_t delay_ns; + + /* + * Sleep at least once. If the job is reentered early, keep waiting until + * we've waited for the full time that is necessary to keep the job at the + * right speed. + * + * Make sure to recalculate the delay after each (possibly interrupted) + * sleep because the speed can change while the job has yielded. + */ + do { + delay_ns = ratelimit_calculate_delay(&job->limit, 0); + job_sleep_ns(&job->job, delay_ns); + } while (delay_ns && !job_is_cancelled(&job->job)); } BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp) diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt index f7dc304..e7f036c 100644 --- a/docs/interop/qcow2.txt +++ b/docs/interop/qcow2.txt @@ -214,14 +214,18 @@ version 2. type. If the incompatible bit "Compression type" is set: the field - must be present and non-zero (which means non-zlib + must be present and non-zero (which means non-deflate compression type). Otherwise, this field must not be present - or must be zero (which means zlib). + or must be zero (which means deflate). Available compression type values: - 0: zlib <https://www.zlib.net/> + 0: deflate <https://www.ietf.org/rfc/rfc1951.txt> 1: zstd <http://github.com/facebook/zstd> + The deflate compression type is called "zlib" + <https://www.zlib.net/> in QEMU. However, clusters with the + deflate compression type do not have zlib headers. + === Header padding === diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 2d93423..f347199 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -58,14 +58,14 @@ BlockDriver *bdrv_find_protocol(const char *filename, Error **errp); BlockDriver *bdrv_find_format(const char *format_name); -int coroutine_fn GRAPH_RDLOCK +int coroutine_fn GRAPH_UNLOCKED bdrv_co_create(BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp); -int co_wrapper_bdrv_rdlock bdrv_create(BlockDriver *drv, const char *filename, - QemuOpts *opts, Error **errp); +int co_wrapper bdrv_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp); -int coroutine_fn GRAPH_RDLOCK +int coroutine_fn GRAPH_UNLOCKED bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp); BlockDriverState *bdrv_new(void); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index dbec0e3..6492a1e 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -250,10 +250,10 @@ struct BlockDriver { BlockDriverState *bs, QDict *options, int flags, Error **errp); void (*bdrv_close)(BlockDriverState *bs); - int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create)( + int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create)( BlockdevCreateOptions *opts, Error **errp); - int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_create_opts)( + int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)( BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp); int (*bdrv_amend_options)(BlockDriverState *bs, diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index f008446..1048240 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -126,12 +126,18 @@ void block_job_user_resume(Job *job); */ /** - * block_job_ratelimit_get_delay: + * block_job_ratelimit_processed_bytes: * - * Calculate and return delay for the next request in ns. See the documentation - * of ratelimit_calculate_delay() for details. + * To be called after some work has been done. Adjusts the delay for the next + * request. See the documentation of ratelimit_calculate_delay() for details. */ -int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n); +void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n); + +/** + * Put the job to sleep (assuming that it wasn't canceled) to throttle it to the + * right speed according to its rate limiting. + */ +void block_job_ratelimit_sleep(BlockJob *job); /** * block_job_error_action: diff --git a/include/io/channel.h b/include/io/channel.h index 446a566..229bf36 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -758,6 +758,16 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, GIOCondition condition); /** + * qio_channel_wake_read: + * @ioc: the channel object + * + * If qio_channel_yield() is currently waiting for the channel to become + * readable, interrupt it and reenter immediately. This function is safe to call + * from any thread. + */ +void qio_channel_wake_read(QIOChannel *ioc); + +/** * qio_channel_wait: * @ioc: the channel object * @condition: the I/O condition to wait for diff --git a/io/channel.c b/io/channel.c index 375a130..72f0066 100644 --- a/io/channel.c +++ b/io/channel.c @@ -19,6 +19,7 @@ */ #include "qemu/osdep.h" +#include "block/aio-wait.h" #include "io/channel.h" #include "qapi/error.h" #include "qemu/main-loop.h" @@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc, static void qio_channel_restart_read(void *opaque) { QIOChannel *ioc = opaque; - Coroutine *co = ioc->read_coroutine; + Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); + + if (!co) { + return; + } /* Assert that aio_co_wake() reenters the coroutine directly */ assert(qemu_get_current_aio_context() == @@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque) static void qio_channel_restart_write(void *opaque) { QIOChannel *ioc = opaque; - Coroutine *co = ioc->write_coroutine; + Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL); + + if (!co) { + return; + } /* Assert that aio_co_wake() reenters the coroutine directly */ assert(qemu_get_current_aio_context() == @@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc) void coroutine_fn qio_channel_yield(QIOChannel *ioc, GIOCondition condition) { + AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); + assert(qemu_in_coroutine()); + assert(in_aio_context_home_thread(ioc_ctx)); + if (condition == G_IO_IN) { assert(!ioc->read_coroutine); ioc->read_coroutine = qemu_coroutine_self(); @@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, } qio_channel_set_aio_fd_handlers(ioc); qemu_coroutine_yield(); + assert(in_aio_context_home_thread(ioc_ctx)); /* Allow interrupting the operation by reentering the coroutine other than * through the aio_fd_handlers. */ - if (condition == G_IO_IN && ioc->read_coroutine) { - ioc->read_coroutine = NULL; + if (condition == G_IO_IN) { + assert(ioc->read_coroutine == NULL); qio_channel_set_aio_fd_handlers(ioc); - } else if (condition == G_IO_OUT && ioc->write_coroutine) { - ioc->write_coroutine = NULL; + } else if (condition == G_IO_OUT) { + assert(ioc->write_coroutine == NULL); qio_channel_set_aio_fd_handlers(ioc); } } +void qio_channel_wake_read(QIOChannel *ioc) +{ + Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); + if (co) { + aio_co_wake(co); + } +} static gboolean qio_channel_wait_complete(QIOChannel *ioc, GIOCondition condition, diff --git a/nbd/server.c b/nbd/server.c index e239c28..2664d43 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque) * enter it here so we don't depend on the client to wake it up. */ if (client->recv_coroutine != NULL && client->read_yielding) { - qemu_aio_coroutine_enter(exp->common.ctx, - client->recv_coroutine); + qio_channel_wake_read(client->ioc); } return true; @@ -2938,8 +2938,6 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts, } bs = blk_bs(blk); - GRAPH_RDLOCK_GUARD_MAINLOOP(); - /* * Note that the returned BlockGraphInfo object will not have * information about this image's backing node, because we have opened @@ -2947,7 +2945,10 @@ static BlockGraphInfoList *collect_image_info_list(bool image_opts, * duplicate the backing chain information that we obtain by walking * the chain manually here. */ + bdrv_graph_rdlock_main_loop(); bdrv_query_block_graph_info(bs, &info, &err); + bdrv_graph_rdunlock_main_loop(); + if (err) { error_report_err(err); blk_unref(blk); diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 index edaf290..92b28c7 100755 --- a/tests/qemu-iotests/245 +++ b/tests/qemu-iotests/245 @@ -611,6 +611,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): self.reopen(hd0_opts, {'file': 'hd0-file'}) # Insert (and remove) a compress filter + @iotests.skip_if_unsupported(['compress']) def test_insert_compress_filter(self): # Add an image to the VM: hd (raw) -> hd0 (qcow2) -> hd0-file (file) opts = {'driver': 'raw', 'node-name': 'hd', 'file': hd_opts(0)} @@ -650,9 +651,9 @@ class TestBlockdevReopen(iotests.QMPTestCase): # Check the first byte of the first three L2 entries and verify that # the second one is compressed (0x40) while the others are not (0x80) - iotests.qemu_io_log('-f', 'raw', '-c', 'read -P 0x80 0x40000 1', - '-c', 'read -P 0x40 0x40008 1', - '-c', 'read -P 0x80 0x40010 1', hd_path[0]) + iotests.qemu_io('-f', 'raw', '-c', 'read -P 0x80 0x40000 1', + '-c', 'read -P 0x40 0x40008 1', + '-c', 'read -P 0x80 0x40010 1', hd_path[0]) # Swap the disk images of two active block devices def test_swap_files(self): diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out index a4e04a3..0970ced 100644 --- a/tests/qemu-iotests/245.out +++ b/tests/qemu-iotests/245.out @@ -10,14 +10,7 @@ {"return": {}} {"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} {"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -....read 1/1 bytes at offset 262144 -1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 1/1 bytes at offset 262152 -1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 1/1 bytes at offset 262160 -1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - -................ +.................... ---------------------------------------------------------------------- Ran 26 tests diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 3e82c63..7073579 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -462,6 +462,10 @@ class QemuStorageDaemon: assert self._qmp is not None return self._qmp.cmd(cmd, args) + def get_qmp(self) -> QEMUMonitorProtocol: + assert self._qmp is not None + return self._qmp + def stop(self, kill_signal=15): self._p.send_signal(kill_signal) self._p.wait() diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io index 7664f33..750e7d4 100755 --- a/tests/qemu-iotests/tests/graph-changes-while-io +++ b/tests/qemu-iotests/tests/graph-changes-while-io @@ -22,19 +22,19 @@ import os from threading import Thread import iotests -from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ - QemuStorageDaemon +from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ + QMPTestCase, QemuStorageDaemon top = os.path.join(iotests.test_dir, 'top.img') nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') -def do_qemu_img_bench() -> None: +def do_qemu_img_bench(count: int = 2000000) -> None: """ Do some I/O requests on `nbd_sock`. """ - qemu_img('bench', '-f', 'raw', '-c', '2000000', + qemu_img('bench', '-f', 'raw', '-c', str(count), f'nbd+unix:///node0?socket={nbd_sock}') @@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase): bench_thr.join() + def test_commit_while_io(self) -> None: + # Run qemu-img bench in the background + bench_thr = Thread(target=do_qemu_img_bench, args=(200000, )) + bench_thr.start() + + qemu_io('-c', 'write 0 64k', top) + qemu_io('-c', 'write 128k 64k', top) + + result = self.qsd.qmp('blockdev-add', { + 'driver': imgfmt, + 'node-name': 'overlay', + 'backing': None, + 'file': { + 'driver': 'file', + 'filename': top + } + }) + self.assert_qmp(result, 'return', {}) + + result = self.qsd.qmp('blockdev-snapshot', { + 'node': 'node0', + 'overlay': 'overlay', + }) + self.assert_qmp(result, 'return', {}) + + # While qemu-img bench is running, repeatedly commit overlay to node0 + while bench_thr.is_alive(): + result = self.qsd.qmp('block-commit', { + 'job-id': 'job0', + 'device': 'overlay', + }) + self.assert_qmp(result, 'return', {}) + + result = self.qsd.qmp('block-job-cancel', { + 'device': 'job0', + }) + self.assert_qmp(result, 'return', {}) + + cancelled = False + while not cancelled: + for event in self.qsd.get_qmp().get_events(wait=10.0): + if event['event'] != 'JOB_STATUS_CHANGE': + continue + if event['data']['status'] == 'null': + cancelled = True + + bench_thr.join() + if __name__ == '__main__': # Format must support raw backing files iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out index ae1213e..fbc63e6 100644 --- a/tests/qemu-iotests/tests/graph-changes-while-io.out +++ b/tests/qemu-iotests/tests/graph-changes-while-io.out @@ -1,5 +1,5 @@ -. +.. ---------------------------------------------------------------------- -Ran 1 tests +Ran 2 tests OK diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 48ae660..3a63142 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -114,7 +114,10 @@ if have_block tests += {'test-crypto-xts': [crypto, io]} endif if 'CONFIG_POSIX' in config_host - tests += {'test-image-locking': [testblock]} + tests += { + 'test-image-locking': [testblock], + 'test-nested-aio-poll': [testblock], + } endif if config_host_data.get('CONFIG_REPLICATION') tests += {'test-replication': [testblock]} diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 9a4c5e5..08bb0f9 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1004,8 +1004,6 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) void *buffer = g_malloc(65536); QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536); - GRAPH_RDLOCK_GUARD(); - /* Pretend some internal write operation from parent to child. * Important: We have to read from the child, not from the parent! * Draining works by first propagating it all up the tree to the @@ -1014,12 +1012,14 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) * everything will be drained before we go back down the tree, but * we do not want that. We want to be in the middle of draining * when this following requests returns. */ + bdrv_graph_co_rdlock(); bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0); + bdrv_graph_co_rdunlock(); g_assert_cmpint(bs->refcnt, ==, 1); if (!dbdd->detach_instead_of_delete) { - blk_unref(blk); + blk_co_unref(blk); } else { BdrvChild *c, *next_c; QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { diff --git a/tests/unit/test-nested-aio-poll.c b/tests/unit/test-nested-aio-poll.c new file mode 100644 index 0000000..9bbe18b --- /dev/null +++ b/tests/unit/test-nested-aio-poll.c @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Test that poll handlers are not re-entrant in nested aio_poll() + * + * Copyright Red Hat + * + * Poll handlers are usually level-triggered. That means they continue firing + * until the condition is reset (e.g. a virtqueue becomes empty). If a poll + * handler calls nested aio_poll() before the condition is reset, then infinite + * recursion occurs. + * + * aio_poll() is supposed to prevent this by disabling poll handlers in nested + * aio_poll() calls. This test case checks that this is indeed what happens. + */ +#include "qemu/osdep.h" +#include "block/aio.h" +#include "qapi/error.h" + +typedef struct { + AioContext *ctx; + + /* This is the EventNotifier that drives the test */ + EventNotifier poll_notifier; + + /* This EventNotifier is only used to wake aio_poll() */ + EventNotifier dummy_notifier; + + bool nested; +} TestData; + +static void io_read(EventNotifier *notifier) +{ + fprintf(stderr, "%s %p\n", __func__, notifier); + event_notifier_test_and_clear(notifier); +} + +static bool io_poll_true(void *opaque) +{ + fprintf(stderr, "%s %p\n", __func__, opaque); + return true; +} + +static bool io_poll_false(void *opaque) +{ + fprintf(stderr, "%s %p\n", __func__, opaque); + return false; +} + +static void io_poll_ready(EventNotifier *notifier) +{ + TestData *td = container_of(notifier, TestData, poll_notifier); + + fprintf(stderr, "> %s\n", __func__); + + g_assert(!td->nested); + td->nested = true; + + /* Wake the following nested aio_poll() call */ + event_notifier_set(&td->dummy_notifier); + + /* This nested event loop must not call io_poll()/io_poll_ready() */ + g_assert(aio_poll(td->ctx, true)); + + td->nested = false; + + fprintf(stderr, "< %s\n", __func__); +} + +/* dummy_notifier never triggers */ +static void io_poll_never_ready(EventNotifier *notifier) +{ + g_assert_not_reached(); +} + +static void test(void) +{ + TestData td = { + .ctx = aio_context_new(&error_abort), + }; + + qemu_set_current_aio_context(td.ctx); + + /* Enable polling */ + aio_context_set_poll_params(td.ctx, 1000000, 2, 2, &error_abort); + + /* + * The GSource is unused but this has the side-effect of changing the fdmon + * that AioContext uses. + */ + aio_get_g_source(td.ctx); + + /* Make the event notifier active (set) right away */ + event_notifier_init(&td.poll_notifier, 1); + aio_set_event_notifier(td.ctx, &td.poll_notifier, false, + io_read, io_poll_true, io_poll_ready); + + /* This event notifier will be used later */ + event_notifier_init(&td.dummy_notifier, 0); + aio_set_event_notifier(td.ctx, &td.dummy_notifier, false, + io_read, io_poll_false, io_poll_never_ready); + + /* Consume aio_notify() */ + g_assert(!aio_poll(td.ctx, false)); + + /* + * Run the io_read() handler. This has the side-effect of activating + * polling in future aio_poll() calls. + */ + g_assert(aio_poll(td.ctx, true)); + + /* The second time around the io_poll()/io_poll_ready() handler runs */ + g_assert(aio_poll(td.ctx, true)); + + /* Run io_poll()/io_poll_ready() one more time to show it keeps working */ + g_assert(aio_poll(td.ctx, true)); + + aio_set_event_notifier(td.ctx, &td.dummy_notifier, false, + NULL, NULL, NULL); + aio_set_event_notifier(td.ctx, &td.poll_notifier, false, NULL, NULL, NULL); + event_notifier_cleanup(&td.dummy_notifier); + event_notifier_cleanup(&td.poll_notifier); + aio_context_unref(td.ctx); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/nested-aio-poll", test); + return g_test_run(); +} diff --git a/util/aio-posix.c b/util/aio-posix.c index a8be940..34bc2a6 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -353,8 +353,19 @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) poll_ready && revents == 0 && aio_node_check(ctx, node->is_external) && node->io_poll_ready) { + /* + * Remove temporarily to avoid infinite loops when ->io_poll_ready() + * calls aio_poll() before clearing the condition that made the poll + * handler become ready. + */ + QLIST_SAFE_REMOVE(node, node_poll); + node->io_poll_ready(node->opaque); + if (!QLIST_IS_INSERTED(node, node_poll)) { + QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll); + } + /* * Return early since revents was zero. aio_notify() does not count as * progress. |