diff options
81 files changed, 1823 insertions, 454 deletions
diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml index 118371e..fea4e8d 100644 --- a/.gitlab-ci.d/buildtest-template.yml +++ b/.gitlab-ci.d/buildtest-template.yml @@ -76,7 +76,8 @@ fi - section_end buildenv - section_start test "Running tests" - - $MAKE NINJA=":" $MAKE_CHECK_ARGS + # doctests need all the compilation artifacts + - $MAKE NINJA=":" MTESTARGS="--no-suite doc" $MAKE_CHECK_ARGS - section_end test .native_test_job_template: diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index ca1a9c6..d888a60 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -41,7 +41,7 @@ build-system-ubuntu: IMAGE: ubuntu2204 CONFIGURE_ARGS: --enable-docs --enable-rust TARGETS: alpha-softmmu microblazeel-softmmu mips64el-softmmu - MAKE_CHECK_ARGS: check-build + MAKE_CHECK_ARGS: check-build check-doc check-system-ubuntu: extends: .native_test_job_template @@ -115,7 +115,7 @@ build-system-fedora: CONFIGURE_ARGS: --disable-gcrypt --enable-nettle --enable-docs --enable-crypto-afalg --enable-rust TARGETS: microblaze-softmmu mips-softmmu xtensa-softmmu m68k-softmmu riscv32-softmmu ppc-softmmu sparc64-softmmu - MAKE_CHECK_ARGS: check-build + MAKE_CHECK_ARGS: check-build check-doc build-system-fedora-rust-nightly: extends: @@ -127,12 +127,7 @@ build-system-fedora-rust-nightly: IMAGE: fedora-rust-nightly CONFIGURE_ARGS: --disable-docs --enable-rust --enable-strict-rust-lints TARGETS: aarch64-softmmu - MAKE_CHECK_ARGS: check-build - after_script: - - source scripts/ci/gitlab-ci-section - - section_start test "Running Rust doctests" - - cd build - - pyvenv/bin/meson devenv -w ../rust ${CARGO-cargo} test --doc -p qemu_api + MAKE_CHECK_ARGS: check-build check-doc allow_failure: true diff --git a/MAINTAINERS b/MAINTAINERS index 16af379..aa67630 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3032,6 +3032,16 @@ F: include/qemu/co-shared-resource.h T: git https://gitlab.com/jsnow/qemu.git jobs T: git https://gitlab.com/vsementsov/qemu.git block +CheckPoint and Restart (CPR) +R: Steve Sistare <steven.sistare@oracle.com> +S: Supported +F: hw/vfio/cpr* +F: include/hw/vfio/vfio-cpr.h +F: include/migration/cpr.h +F: migration/cpr* +F: tests/qtest/migration/cpr* +F: docs/devel/migration/CPR.rst + Compute Express Link M: Jonathan Cameron <jonathan.cameron@huawei.com> R: Fan Ni <fan.ni@samsung.com> diff --git a/backends/iommufd.c b/backends/iommufd.c index b73f75c..c2c47ab 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -311,6 +311,62 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, return true; } +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, + uint32_t data_type, uint32_t entry_len, + uint32_t *entry_num, void *data, + Error **errp) +{ + int ret, fd = be->fd; + uint32_t total_entries = *entry_num; + struct iommu_hwpt_invalidate cache = { + .size = sizeof(cache), + .hwpt_id = id, + .data_type = data_type, + .entry_len = entry_len, + .entry_num = total_entries, + .data_uptr = (uintptr_t)data, + }; + + ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache); + trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len, + total_entries, cache.entry_num, + (uintptr_t)data, ret ? errno : 0); + *entry_num = cache.entry_num; + + if (ret) { + error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:" + " total %d entries, processed %d entries", + total_entries, cache.entry_num); + } else if (total_entries != cache.entry_num) { + error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed" + " entries: total %d entries, processed %d entries." + " Kernel BUG?!", total_entries, cache.entry_num); + return false; + } + + return !ret; +} + +bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) +{ + HostIOMMUDeviceIOMMUFDClass *idevc = + HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev); + + g_assert(idevc->attach_hwpt); + return idevc->attach_hwpt(idev, hwpt_id, errp); +} + +bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp) +{ + HostIOMMUDeviceIOMMUFDClass *idevc = + HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev); + + g_assert(idevc->detach_hwpt); + return idevc->detach_hwpt(idev, errp); +} + static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) { HostIOMMUDeviceCaps *caps = &hiod->caps; @@ -349,6 +405,8 @@ static const TypeInfo types[] = { }, { .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, .parent = TYPE_HOST_IOMMU_DEVICE, + .instance_size = sizeof(HostIOMMUDeviceIOMMUFD), + .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass), .class_init = hiod_iommufd_class_init, .abstract = true, } diff --git a/backends/trace-events b/backends/trace-events index 40811a3..7278214 100644 --- a/backends/trace-events +++ b/backends/trace-events @@ -18,3 +18,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_ iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)" iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)" +iommufd_backend_invalidate_cache(int iommufd, uint32_t id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)" @@ -106,9 +106,9 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state); static bool bdrv_backing_overridden(BlockDriverState *bs); -static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); +static bool GRAPH_RDLOCK +bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, + GHashTable *visited, Transaction *tran, Error **errp); /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -1226,9 +1226,10 @@ static int bdrv_child_cb_inactivate(BdrvChild *child) return 0; } -static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp) +static bool GRAPH_RDLOCK +bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { BlockDriverState *bs = child->opaque; return bdrv_change_aio_context(bs, ctx, visited, tran, errp); @@ -1720,12 +1721,14 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, open_failed: bs->drv = NULL; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); if (bs->file != NULL) { bdrv_unref_child(bs, bs->file); assert(!bs->file); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(bs->opaque); bs->opaque = NULL; @@ -3027,7 +3030,8 @@ static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque) bdrv_replace_child_noperm(s->child, NULL); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { - bdrv_try_change_aio_context(bs, s->old_child_ctx, NULL, &error_abort); + bdrv_try_change_aio_context_locked(bs, s->old_child_ctx, NULL, + &error_abort); } if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) { @@ -3069,6 +3073,9 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { * * Both @parent_bs and @child_bs can move to a different AioContext in this * function. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static BdrvChild * GRAPH_WRLOCK bdrv_attach_child_common(BlockDriverState *child_bs, @@ -3112,8 +3119,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs, parent_ctx = bdrv_child_get_parent_aio_context(new_child); if (child_ctx != parent_ctx) { Error *local_err = NULL; - int ret = bdrv_try_change_aio_context(child_bs, parent_ctx, NULL, - &local_err); + int ret = bdrv_try_change_aio_context_locked(child_bs, parent_ctx, NULL, + &local_err); if (ret < 0 && child_class->change_aio_ctx) { Transaction *aio_ctx_tran = tran_new(); @@ -3179,6 +3186,9 @@ bdrv_attach_child_common(BlockDriverState *child_bs, * * After calling this function, the transaction @tran may only be completed * while holding a writer lock for the graph. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static BdrvChild * GRAPH_WRLOCK bdrv_attach_child_noperm(BlockDriverState *parent_bs, @@ -3220,6 +3230,8 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs, * * On failure NULL is returned, errp is set and the reference to * child_bs is also dropped. + * + * All block nodes must be drained. */ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, @@ -3259,6 +3271,8 @@ out: * * On failure NULL is returned, errp is set and the reference to * child_bs is also dropped. + * + * All block nodes must be drained. */ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, @@ -3293,7 +3307,11 @@ out: return ret < 0 ? NULL : child; } -/* Callers must ensure that child->frozen is false. */ +/* + * Callers must ensure that child->frozen is false. + * + * All block nodes must be drained. + */ void bdrv_root_unref_child(BdrvChild *child) { BlockDriverState *child_bs = child->bs; @@ -3314,8 +3332,8 @@ void bdrv_root_unref_child(BdrvChild *child) * When the parent requiring a non-default AioContext is removed, the * node moves back to the main AioContext */ - bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL, - NULL); + bdrv_try_change_aio_context_locked(child_bs, qemu_get_aio_context(), + NULL, NULL); } bdrv_schedule_unref(child_bs); @@ -3388,7 +3406,11 @@ bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, } } -/* Callers must ensure that child->frozen is false. */ +/* + * Callers must ensure that child->frozen is false. + * + * All block nodes must be drained. + */ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) { GLOBAL_STATE_CODE(); @@ -3453,6 +3475,9 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) * * After calling this function, the transaction @tran may only be completed * while holding a writer lock for the graph. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static int GRAPH_WRLOCK bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, @@ -3545,8 +3570,7 @@ out: * Both @bs and @backing_hd can move to a different AioContext in this * function. * - * If a backing child is already present (i.e. we're detaching a node), that - * child node must be drained. + * All block nodes must be drained. */ int bdrv_set_backing_hd_drained(BlockDriverState *bs, BlockDriverState *backing_hd, @@ -3575,21 +3599,14 @@ out: int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, Error **errp) { - BlockDriverState *drain_bs; int ret; GLOBAL_STATE_CODE(); - bdrv_graph_rdlock_main_loop(); - drain_bs = bs->backing ? bs->backing->bs : bs; - bdrv_graph_rdunlock_main_loop(); - - bdrv_ref(drain_bs); - bdrv_drained_begin(drain_bs); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); bdrv_graph_wrunlock(); - bdrv_drained_end(drain_bs); - bdrv_unref(drain_bs); + bdrv_drain_all_end(); return ret; } @@ -3780,10 +3797,12 @@ static BdrvChild *bdrv_open_child_common(const char *filename, return NULL; } + bdrv_drain_all_begin(); bdrv_graph_wrlock(); child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, errp); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return child; } @@ -4358,9 +4377,7 @@ bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child) * returns a pointer to bs_queue, which is either the newly allocated * bs_queue, or the existing bs_queue being used. * - * bs is drained here and undrained by bdrv_reopen_queue_free(). - * - * To be called with bs->aio_context locked. + * bs must be drained. */ static BlockReopenQueue * GRAPH_RDLOCK bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, @@ -4379,12 +4396,7 @@ bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, GLOBAL_STATE_CODE(); - /* - * Strictly speaking, draining is illegal under GRAPH_RDLOCK. We know that - * we've been called with bdrv_graph_rdlock_main_loop(), though, so it's ok - * in practice. - */ - bdrv_drained_begin(bs); + assert(bs->quiesce_counter > 0); if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); @@ -4519,12 +4531,17 @@ bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, return bs_queue; } -/* To be called with bs->aio_context locked */ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, bool keep_old_opts) { GLOBAL_STATE_CODE(); + + if (bs_queue == NULL) { + /* Paired with bdrv_drain_all_end() in bdrv_reopen_queue_free(). */ + bdrv_drain_all_begin(); + } + GRAPH_RDLOCK_GUARD_MAINLOOP(); return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, @@ -4537,12 +4554,14 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) if (bs_queue) { BlockReopenQueueEntry *bs_entry, *next; QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - bdrv_drained_end(bs_entry->state.bs); qobject_unref(bs_entry->state.explicit_options); qobject_unref(bs_entry->state.options); g_free(bs_entry); } g_free(bs_queue); + + /* Paired with bdrv_drain_all_begin() in bdrv_reopen_queue(). */ + bdrv_drain_all_end(); } } @@ -4709,6 +4728,9 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, * Return 0 on success, otherwise return < 0 and set @errp. * * @reopen_state->bs can move to a different AioContext in this function. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static int GRAPH_UNLOCKED bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, @@ -4802,7 +4824,7 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, if (old_child_bs) { bdrv_ref(old_child_bs); - bdrv_drained_begin(old_child_bs); + assert(old_child_bs->quiesce_counter > 0); } bdrv_graph_rdunlock_main_loop(); @@ -4814,7 +4836,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, bdrv_graph_wrunlock(); if (old_child_bs) { - bdrv_drained_end(old_child_bs); bdrv_unref(old_child_bs); } @@ -4843,6 +4864,9 @@ out_rdlock: * * After calling this function, the transaction @change_child_tran may only be * completed while holding a writer lock for the graph. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static int GRAPH_UNLOCKED bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, @@ -5156,6 +5180,7 @@ static void bdrv_close(BlockDriverState *bs) bs->drv = NULL; } + bdrv_drain_all_begin(); bdrv_graph_wrlock(); QLIST_FOREACH_SAFE(child, &bs->children, next, next) { bdrv_unref_child(bs, child); @@ -5164,6 +5189,7 @@ static void bdrv_close(BlockDriverState *bs) assert(!bs->backing); assert(!bs->file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(bs->opaque); bs->opaque = NULL; @@ -5489,9 +5515,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, assert(!bs_new->backing); bdrv_graph_rdunlock_main_loop(); - bdrv_drained_begin(bs_top); - bdrv_drained_begin(bs_new); - + bdrv_drain_all_begin(); bdrv_graph_wrlock(); child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", @@ -5513,9 +5537,7 @@ out: bdrv_refresh_limits(bs_top, NULL, NULL); bdrv_graph_wrunlock(); - - bdrv_drained_end(bs_top); - bdrv_drained_end(bs_new); + bdrv_drain_all_end(); return ret; } @@ -6989,6 +7011,8 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) GLOBAL_STATE_CODE(); + assert(bs->quiesce_counter > 0); + if (!bs->drv) { return -ENOMEDIUM; } @@ -7032,9 +7056,7 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) return -EPERM; } - bdrv_drained_begin(bs); bs->open_flags |= BDRV_O_INACTIVE; - bdrv_drained_end(bs); /* * Update permissions, they may differ for inactive nodes. @@ -7059,20 +7081,26 @@ int bdrv_inactivate(BlockDriverState *bs, Error **errp) int ret; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); if (bdrv_has_bds_parent(bs, true)) { error_setg(errp, "Node has active parent node"); - return -EPERM; + ret = -EPERM; + goto out; } ret = bdrv_inactivate_recurse(bs, true); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to inactivate node"); - return ret; + goto out; } - return 0; +out: + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); + return ret; } int bdrv_inactivate_all(void) @@ -7082,7 +7110,9 @@ int bdrv_inactivate_all(void) int ret = 0; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { /* Nodes with BDS parents are covered by recursion from the last @@ -7098,6 +7128,9 @@ int bdrv_inactivate_all(void) } } + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); + return ret; } @@ -7278,10 +7311,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) return true; } -/* - * Must not be called while holding the lock of an AioContext other than the - * current one. - */ void bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, char *options, uint64_t img_size, int flags, bool quiet, @@ -7568,10 +7597,21 @@ typedef struct BdrvStateSetAioContext { BlockDriverState *bs; } BdrvStateSetAioContext; -static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, - Transaction *tran, - Error **errp) +/* + * Changes the AioContext of @child to @ctx and recursively for the associated + * block nodes and all their children and parents. Returns true if the change is + * possible and the transaction @tran can be continued. Returns false and sets + * @errp if not and the transaction must be aborted. + * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + * + * Must be called with the affected block nodes drained. + */ +static bool GRAPH_RDLOCK +bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { GLOBAL_STATE_CODE(); if (g_hash_table_contains(visited, c)) { @@ -7596,6 +7636,17 @@ static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, return true; } +/* + * Changes the AioContext of @c->bs to @ctx and recursively for all its children + * and parents. Returns true if the change is possible and the transaction @tran + * can be continued. Returns false and sets @errp if not and the transaction + * must be aborted. + * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + * + * Must be called with the affected block nodes drained. + */ bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, GHashTable *visited, Transaction *tran, Error **errp) @@ -7611,10 +7662,6 @@ bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, static void bdrv_set_aio_context_clean(void *opaque) { BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; - BlockDriverState *bs = (BlockDriverState *) state->bs; - - /* Paired with bdrv_drained_begin in bdrv_change_aio_context() */ - bdrv_drained_end(bs); g_free(state); } @@ -7642,10 +7689,12 @@ static TransactionActionDrv set_aio_context = { * * @visited will accumulate all visited BdrvChild objects. The caller is * responsible for freeing the list afterwards. + * + * @bs must be drained. */ -static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp) +static bool GRAPH_RDLOCK +bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, + GHashTable *visited, Transaction *tran, Error **errp) { BdrvChild *c; BdrvStateSetAioContext *state; @@ -7656,21 +7705,17 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, return true; } - bdrv_graph_rdlock_main_loop(); QLIST_FOREACH(c, &bs->parents, next_parent) { if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) { - bdrv_graph_rdunlock_main_loop(); return false; } } QLIST_FOREACH(c, &bs->children, next) { if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) { - bdrv_graph_rdunlock_main_loop(); return false; } } - bdrv_graph_rdunlock_main_loop(); state = g_new(BdrvStateSetAioContext, 1); *state = (BdrvStateSetAioContext) { @@ -7678,8 +7723,7 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, .bs = bs, }; - /* Paired with bdrv_drained_end in bdrv_set_aio_context_clean() */ - bdrv_drained_begin(bs); + assert(bs->quiesce_counter > 0); tran_add(tran, &set_aio_context, state); @@ -7692,9 +7736,13 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, * * If ignore_child is not NULL, that child (and its subgraph) will not * be touched. + * + * Called with the graph lock held. + * + * Called while all bs are drained. */ -int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp) +int bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) { Transaction *tran; GHashTable *visited; @@ -7703,9 +7751,9 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, /* * Recursion phase: go through all nodes of the graph. - * Take care of checking that all nodes support changing AioContext - * and drain them, building a linear list of callbacks to run if everything - * is successful (the transaction itself). + * Take care of checking that all nodes support changing AioContext, + * building a linear list of callbacks to run if everything is successful + * (the transaction itself). */ tran = tran_new(); visited = g_hash_table_new(NULL, NULL); @@ -7732,6 +7780,29 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, return 0; } +/* + * Change bs's and recursively all of its parents' and children's AioContext + * to the given new context, returning an error if that isn't possible. + * + * If ignore_child is not NULL, that child (and its subgraph) will not + * be touched. + */ +int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) +{ + int ret; + + GLOBAL_STATE_CODE(); + + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); + ret = bdrv_try_change_aio_context_locked(bs, ctx, ignore_child, errp); + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); + + return ret; +} + void bdrv_add_aio_context_notifier(BlockDriverState *bs, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) @@ -8159,8 +8230,10 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp) } /* - * Hot add/remove a BDS's child. So the user can take a child offline when - * it is broken and take a new child online + * Hot add a BDS's child. Used in combination with bdrv_del_child, so the user + * can take a child offline when it is broken and take a new child online. + * + * All block nodes must be drained. */ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, Error **errp) @@ -8200,6 +8273,12 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); } +/* + * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the + * user can take a child offline when it is broken and take a new child online. + * + * All block nodes must be drained. + */ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) { BdrvChild *tmp; diff --git a/block/backup.c b/block/backup.c index 0151e84..909027c 100644 --- a/block/backup.c +++ b/block/backup.c @@ -498,10 +498,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, block_copy_set_speed(bcs, speed); /* Required permissions are taken by copy-before-write filter target */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return &job->common; diff --git a/block/blklogwrites.c b/block/blklogwrites.c index b0f78c4..70ac76f 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -281,9 +281,11 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, ret = 0; fail_log: if (ret < 0) { + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->log_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->log_file = NULL; qemu_mutex_destroy(&s->mutex); } @@ -296,10 +298,12 @@ static void blk_log_writes_close(BlockDriverState *bs) { BDRVBlkLogWritesState *s = bs->opaque; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->log_file); s->log_file = NULL; bdrv_graph_wrunlock(); + bdrv_drain_all_end(); qemu_mutex_destroy(&s->mutex); } diff --git a/block/blkverify.c b/block/blkverify.c index db79a36..3a71f74 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -151,10 +151,12 @@ static void blkverify_close(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->test_file); s->test_file = NULL; bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } static int64_t coroutine_fn GRAPH_RDLOCK diff --git a/block/block-backend.c b/block/block-backend.c index a402db1..68209bb 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -136,9 +136,9 @@ static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); -static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); +static bool GRAPH_RDLOCK +blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, GHashTable *visited, + Transaction *tran, Error **errp); static char *blk_root_get_parent_desc(BdrvChild *child) { @@ -889,9 +889,11 @@ void blk_remove_bs(BlockBackend *blk) root = blk->root; blk->root = NULL; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_root_unref_child(root); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } /* @@ -904,6 +906,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) GLOBAL_STATE_CODE(); bdrv_ref(bs); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) { @@ -919,6 +922,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, perm, shared_perm, blk, errp); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); if (blk->root == NULL) { return -EPERM; } diff --git a/block/commit.c b/block/commit.c index 7cc8c0f..6c4b736 100644 --- a/block/commit.c +++ b/block/commit.c @@ -392,6 +392,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, * this is the responsibility of the interface (i.e. whoever calls * commit_start()). */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); s->base_overlay = bdrv_find_overlay(top, base); assert(s->base_overlay); @@ -424,18 +425,21 @@ void commit_start(const char *job_id, BlockDriverState *bs, iter_shared_perms, errp); if (ret < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } } if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } s->chain_frozen = true; ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); if (ret < 0) { goto fail; @@ -413,7 +413,6 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) /* At this point, we should be always running in the main loop. */ GLOBAL_STATE_CODE(); assert(bs->quiesce_counter > 0); - GLOBAL_STATE_CODE(); /* Re-enable things in child-to-parent order */ old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); diff --git a/block/mirror.c b/block/mirror.c index c2c5099..6e8caf4 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -2014,6 +2014,7 @@ static BlockJob *mirror_start_job( */ bdrv_disable_dirty_bitmap(s->dirty_bitmap); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); ret = block_job_add_bdrv(&s->common, "source", bs, 0, BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | @@ -2021,6 +2022,7 @@ static BlockJob *mirror_start_job( errp); if (ret < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } @@ -2066,16 +2068,19 @@ static BlockJob *mirror_start_job( iter_shared_perms, errp); if (ret < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } } if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); QTAILQ_INIT(&s->ops_in_flight); diff --git a/block/qcow2.c b/block/qcow2.c index 66fba89..45451a7 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1895,7 +1895,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, g_free(s->image_data_file); if (open_data_file && has_data_file(bs)) { bdrv_graph_co_rdunlock(); + bdrv_drain_all_begin(); bdrv_co_unref_child(bs, s->data_file); + bdrv_drain_all_end(); bdrv_graph_co_rdlock(); s->data_file = NULL; } @@ -2821,9 +2823,11 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file) if (close_data_file && has_data_file(bs)) { GLOBAL_STATE_CODE(); bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->data_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->data_file = NULL; bdrv_graph_rdlock_main_loop(); } diff --git a/block/quorum.c b/block/quorum.c index ed8ce80..cc3bc5f 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1037,6 +1037,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, close_exit: /* cleanup on error */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); for (i = 0; i < s->num_children; i++) { if (!opened[i]) { @@ -1045,6 +1046,7 @@ close_exit: bdrv_unref_child(bs, s->children[i]); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(s->children); g_free(opened); exit: @@ -1057,11 +1059,13 @@ static void quorum_close(BlockDriverState *bs) BDRVQuorumState *s = bs->opaque; int i; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); for (i = 0; i < s->num_children; i++) { bdrv_unref_child(bs, s->children[i]); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(s->children); } diff --git a/block/replication.c b/block/replication.c index 07f274d..0879718 100644 --- a/block/replication.c +++ b/block/replication.c @@ -540,6 +540,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_ref(hidden_disk->bs); @@ -549,6 +550,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, if (local_err) { error_propagate(errp, local_err); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return; } @@ -559,6 +561,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, if (local_err) { error_propagate(errp, local_err); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return; } @@ -571,12 +574,14 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, !check_top_bs(top_bs, bs)) { error_setg(errp, "No top_bs or it is invalid"); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); reopen_backing_file(bs, false, NULL); return; } bdrv_op_block_all(top_bs, s->blocker); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->backup_job = backup_job_create( NULL, s->secondary_disk->bs, s->hidden_disk->bs, @@ -651,12 +656,14 @@ static void replication_done(void *opaque, int ret) if (ret == 0) { s->stage = BLOCK_REPLICATION_DONE; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->secondary_disk); s->secondary_disk = NULL; bdrv_unref_child(bs, s->hidden_disk); s->hidden_disk = NULL; bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->error = 0; } else { diff --git a/block/snapshot.c b/block/snapshot.c index 22567f1..28c9c43 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -291,9 +291,11 @@ int bdrv_snapshot_goto(BlockDriverState *bs, } /* .bdrv_open() will re-attach it */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, fallback); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); memset(bs->opaque, 0, drv->instance_size); @@ -327,7 +329,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, /** * Delete an internal snapshot by @snapshot_id and @name. - * @bs: block device used in the operation + * @bs: block device used in the operation, must be drained * @snapshot_id: unique snapshot ID, or NULL * @name: snapshot name, or NULL * @errp: location to store error @@ -358,6 +360,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, GLOBAL_STATE_CODE(); + assert(bs->quiesce_counter > 0); + if (!drv) { error_setg(errp, "Device '%s' has no medium", bdrv_get_device_name(bs)); @@ -368,9 +372,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, return -EINVAL; } - /* drain all pending i/o before deleting snapshot */ - bdrv_drained_begin(bs); - if (drv->bdrv_snapshot_delete) { ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); } else if (fallback_bs) { @@ -382,7 +383,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, ret = -ENOTSUP; } - bdrv_drained_end(bs); return ret; } @@ -571,19 +571,22 @@ int bdrv_all_delete_snapshot(const char *name, ERRP_GUARD(); g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + int ret = 0; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); - if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { - return -1; + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); + + ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp); + if (ret < 0) { + goto out; } iterbdrvs = bdrvs; while (iterbdrvs) { BlockDriverState *bs = iterbdrvs->data; QEMUSnapshotInfo sn1, *snapshot = &sn1; - int ret = 0; if ((devices || bdrv_all_snapshots_includes_bs(bs)) && bdrv_snapshot_find(bs, snapshot, name) >= 0) @@ -594,13 +597,16 @@ int bdrv_all_delete_snapshot(const char *name, if (ret < 0) { error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", name, bdrv_get_device_or_node_name(bs)); - return -1; + goto out; } iterbdrvs = iterbdrvs->next; } - return 0; +out: + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); + return ret; } diff --git a/block/stream.c b/block/stream.c index 999d9e5..f5441f2 100644 --- a/block/stream.c +++ b/block/stream.c @@ -80,11 +80,10 @@ static int stream_prepare(Job *job) * may end up working with the wrong base node (or it might even have gone * away by the time we want to use it). */ - bdrv_drained_begin(unfiltered_bs); if (unfiltered_bs_cow) { bdrv_ref(unfiltered_bs_cow); - bdrv_drained_begin(unfiltered_bs_cow); } + bdrv_drain_all_begin(); bdrv_graph_rdlock_main_loop(); base = bdrv_filter_or_cow_bs(s->above_base); @@ -123,11 +122,10 @@ static int stream_prepare(Job *job) } out: + bdrv_drain_all_end(); if (unfiltered_bs_cow) { - bdrv_drained_end(unfiltered_bs_cow); bdrv_unref(unfiltered_bs_cow); } - bdrv_drained_end(unfiltered_bs); return ret; } @@ -373,10 +371,12 @@ void stream_start(const char *job_id, BlockDriverState *bs, * already have our own plans. Also don't allow resize as the image size is * queried only at the job start and then cached. */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); if (block_job_add_bdrv(&s->common, "active node", bs, 0, basic_flags | BLK_PERM_WRITE, errp)) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } @@ -397,10 +397,12 @@ void stream_start(const char *job_id, BlockDriverState *bs, basic_flags, errp); if (ret < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->base_overlay = base_overlay; s->above_base = above_base; diff --git a/block/vmdk.c b/block/vmdk.c index 9c7ab03..89a7250 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -271,6 +271,7 @@ static void vmdk_free_extents(BlockDriverState *bs) BDRVVmdkState *s = bs->opaque; VmdkExtent *e; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); for (i = 0; i < s->num_extents; i++) { e = &s->extents[i]; @@ -283,6 +284,7 @@ static void vmdk_free_extents(BlockDriverState *bs) } } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(s->extents); } @@ -1247,9 +1249,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, 0, 0, 0, 0, 0, &extent, errp); if (ret < 0) { bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_graph_rdlock_main_loop(); goto out; } @@ -1266,9 +1270,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, g_free(buf); if (ret) { bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_graph_rdlock_main_loop(); goto out; } @@ -1277,9 +1283,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); if (ret) { bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_graph_rdlock_main_loop(); goto out; } @@ -1287,9 +1295,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, } else { error_setg(errp, "Unsupported extent type '%s'", type); bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_graph_rdlock_main_loop(); ret = -ENOTSUP; goto out; @@ -1132,39 +1132,41 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, int ret; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); bs = qmp_get_root_bs(device, errp); if (!bs) { - return NULL; + goto error; } if (!id && !name) { error_setg(errp, "Name or id must be provided"); - return NULL; + goto error; } if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) { - return NULL; + goto error; } ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err); if (local_err) { error_propagate(errp, local_err); - return NULL; + goto error; } if (!ret) { error_setg(errp, "Snapshot with id '%s' and name '%s' does not exist on " "device '%s'", STR_OR_NULL(id), STR_OR_NULL(name), device); - return NULL; + goto error; } bdrv_snapshot_delete(bs, id, name, &local_err); if (local_err) { error_propagate(errp, local_err); - return NULL; + goto error; } info = g_new0(SnapshotInfo, 1); @@ -1180,6 +1182,9 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, info->has_icount = true; } +error: + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); return info; } @@ -1203,7 +1208,7 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, Error *local_err = NULL; const char *device; const char *name; - BlockDriverState *bs; + BlockDriverState *bs, *check_bs; QEMUSnapshotInfo old_sn, *sn; bool ret; int64_t rt; @@ -1211,7 +1216,7 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, int ret1; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + bdrv_graph_rdlock_main_loop(); tran_add(tran, &internal_snapshot_drv, state); @@ -1220,14 +1225,29 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, bs = qmp_get_root_bs(device, errp); if (!bs) { + bdrv_graph_rdunlock_main_loop(); return; } state->bs = bs; + /* Need to drain while unlocked. */ + bdrv_graph_rdunlock_main_loop(); /* Paired with .clean() */ bdrv_drained_begin(bs); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + /* Make sure the root bs did not change with the drain. */ + check_bs = qmp_get_root_bs(device, errp); + if (bs != check_bs) { + if (check_bs) { + error_setg(errp, "Block node of device '%s' unexpectedly changed", + device); + } /* else errp is already set */ + return; + } + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) { return; } @@ -1295,12 +1315,14 @@ static void internal_snapshot_abort(void *opaque) Error *local_error = NULL; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!state->created) { return; } + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); + if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) { error_reportf_err(local_error, "Failed to delete snapshot with id '%s' and " @@ -1308,6 +1330,8 @@ static void internal_snapshot_abort(void *opaque) sn->id_str, sn->name, bdrv_get_device_name(bs)); } + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); } static void internal_snapshot_clean(void *opaque) @@ -1353,9 +1377,10 @@ static void external_snapshot_action(TransactionAction *action, const char *new_image_file; ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1); uint64_t perm, shared; + BlockDriverState *check_bs; /* TODO We'll eventually have to take a writer lock in this function */ - GRAPH_RDLOCK_GUARD_MAINLOOP(); + bdrv_graph_rdlock_main_loop(); tran_add(tran, &external_snapshot_drv, state); @@ -1388,11 +1413,25 @@ static void external_snapshot_action(TransactionAction *action, state->old_bs = bdrv_lookup_bs(device, node_name, errp); if (!state->old_bs) { + bdrv_graph_rdunlock_main_loop(); return; } + /* Need to drain while unlocked. */ + bdrv_graph_rdunlock_main_loop(); /* Paired with .clean() */ bdrv_drained_begin(state->old_bs); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + /* Make sure the associated bs did not change with the drain. */ + check_bs = bdrv_lookup_bs(device, node_name, errp); + if (state->old_bs != check_bs) { + if (check_bs) { + error_setg(errp, "Block node of device '%s' unexpectedly changed", + device); + } /* else errp is already set */ + return; + } if (!bdrv_is_inserted(state->old_bs)) { error_setg(errp, "Device '%s' has no medium", @@ -3522,6 +3561,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, BlockDriverState *parent_bs, *new_bs = NULL; BdrvChild *p_child; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); parent_bs = bdrv_lookup_bs(parent, parent, errp); @@ -3559,6 +3599,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, out: bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } BlockJobInfoList *qmp_query_block_jobs(Error **errp) @@ -3592,12 +3633,13 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, AioContext *new_context; BlockDriverState *bs; - GRAPH_RDLOCK_GUARD_MAINLOOP(); + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); bs = bdrv_find_node(node_name); if (!bs) { error_setg(errp, "Failed to find node with node-name='%s'", node_name); - return; + goto out; } /* Protects against accidents. */ @@ -3605,14 +3647,14 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, error_setg(errp, "Node %s is associated with a BlockBackend and could " "be in use (use force=true to override this check)", node_name); - return; + goto out; } if (iothread->type == QTYPE_QSTRING) { IOThread *obj = iothread_by_id(iothread->u.s); if (!obj) { error_setg(errp, "Cannot find iothread %s", iothread->u.s); - return; + goto out; } new_context = iothread_get_aio_context(obj); @@ -3620,7 +3662,11 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, new_context = qemu_get_aio_context(); } - bdrv_try_change_aio_context(bs, new_context, NULL, errp); + bdrv_try_change_aio_context_locked(bs, new_context, NULL, errp); + +out: + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); } QemuOptsList qemu_common_drive_opts = { @@ -144,9 +144,9 @@ static TransactionActionDrv change_child_job_context = { .clean = g_free, }; -static bool child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp) +static bool GRAPH_RDLOCK +child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx, GHashTable *visited, + Transaction *tran, Error **errp) { BlockJob *job = c->opaque; BdrvStateChildJobContext *s; @@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) * one to make sure that such a concurrent access does not attempt * to process an already freed BdrvChild. */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); while (job->nodes) { GSList *l = job->nodes; @@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) g_slist_free_1(l); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) @@ -496,6 +498,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, int ret; GLOBAL_STATE_CODE(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); if (job_id == NULL && !(flags & JOB_INTERNAL)) { @@ -506,6 +509,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, flags, cb, opaque, errp); if (job == NULL) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return NULL; } @@ -544,10 +548,12 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return job; fail: bdrv_graph_wrunlock(); + bdrv_drain_all_end(); job_early_fail(&job->job); return NULL; } diff --git a/rust/clippy.toml b/clippy.toml index 58a62c0..9016172 100644 --- a/rust/clippy.toml +++ b/clippy.toml @@ -1,3 +1,3 @@ -doc-valid-idents = ["PrimeCell", ".."] +doc-valid-idents = ["IrDA", "PrimeCell", ".."] allow-mixed-uninlined-format-args = false msrv = "1.77.0" @@ -209,6 +209,8 @@ for opt do ;; --rustc=*) RUSTC="$optarg" ;; + --rustdoc=*) RUSTDOC="$optarg" + ;; --cpu=*) cpu="$optarg" ;; --extra-cflags=*) @@ -323,6 +325,7 @@ pkg_config="${PKG_CONFIG-${cross_prefix}pkg-config}" sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}" rustc="${RUSTC-rustc}" +rustdoc="${RUSTDOC-rustdoc}" check_define() { cat > $TMPC <<EOF @@ -660,6 +663,8 @@ for opt do ;; --rustc=*) ;; + --rustdoc=*) + ;; --make=*) ;; --install=*) @@ -890,6 +895,7 @@ Advanced options (experts only): --cxx=CXX use C++ compiler CXX [$cxx] --objcc=OBJCC use Objective-C compiler OBJCC [$objcc] --rustc=RUSTC use Rust compiler RUSTC [$rustc] + --rustdoc=RUSTDOC use rustdoc binary RUSTDOC [$rustdoc] --extra-cflags=CFLAGS append extra C compiler flags CFLAGS --extra-cxxflags=CXXFLAGS append extra C++ compiler flags CXXFLAGS --extra-objcflags=OBJCFLAGS append extra Objective C compiler flags OBJCFLAGS @@ -1178,6 +1184,14 @@ fi ########################################## # detect rust triple +meson_version=$($meson --version) +if test "$rust" != disabled && ! version_ge "$meson_version" 1.8.1; then + if test "$rust" = enabled; then + error_exit "Rust support needs Meson 1.8.1 or newer" + fi + echo "Rust needs Meson 1.8.1, disabling" 2>&1 + rust=disabled +fi if test "$rust" != disabled && has "$rustc" && $rustc -vV > "${TMPDIR1}/${TMPB}.out"; then rust_host_triple=$(sed -n 's/^host: //p' "${TMPDIR1}/${TMPB}.out") else @@ -1893,8 +1907,10 @@ if test "$skip_meson" = no; then if test "$rust" != disabled; then if test "$rust_host_triple" != "$rust_target_triple"; then echo "rust = [$(meson_quote $rustc --target "$rust_target_triple")]" >> $cross + echo "rustdoc = [$(meson_quote $rustdoc --target "$rust_target_triple")]" >> $cross else echo "rust = [$(meson_quote $rustc)]" >> $cross + echo "rustdoc = [$(meson_quote $rustdoc)]" >> $cross fi fi echo "ar = [$(meson_quote $ar)]" >> $cross diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst index 171d908..34d9c79 100644 --- a/docs/devel/rust.rst +++ b/docs/devel/rust.rst @@ -37,12 +37,16 @@ output directory (typically ``rust/target/``). A vanilla invocation of Cargo will complain that it cannot find the generated sources, which can be fixed in different ways: -* by using special shorthand targets in the QEMU build directory:: +* by using Makefile targets, provided by Meson, that run ``clippy`` or + ``rustdoc``: make clippy - make rustfmt make rustdoc +A target for ``rustfmt`` is also declared in ``rust/meson.build``: + + make rustfmt + * by invoking ``cargo`` through the Meson `development environment`__ feature:: @@ -50,7 +54,7 @@ which can be fixed in different ways: pyvenv/bin/meson devenv -w ../rust cargo fmt If you are going to use ``cargo`` repeatedly, ``pyvenv/bin/meson devenv`` - will enter a shell where commands like ``cargo clippy`` just work. + will enter a shell where commands like ``cargo fmt`` just work. __ https://mesonbuild.com/Commands.html#devenv @@ -66,7 +70,7 @@ be run via ``meson test`` or ``make``:: make check-rust -Building Rust code with ``--enable-modules`` is not supported yet. +Note that doctests require all ``.o`` files from the build to be available. Supported tools ''''''''''''''' diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 8e11e63..24e145d 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -145,6 +145,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, if (ctx != bdrv_get_aio_context(bs)) { error_setg(errp, "Different aio context is not supported for new " "node"); + return; } blk_replace_bs(blk, bs, errp); diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c index bd993ea..645d9d1 100644 --- a/hw/i386/tdvf.c +++ b/hw/i386/tdvf.c @@ -101,16 +101,16 @@ static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src, /* sanity check */ if (entry->size < entry->data_len) { - error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx", + error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%"PRIx64, entry->data_len, entry->size); return -1; } if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) { - error_report("MemoryAddress 0x%lx not page aligned", entry->address); + error_report("MemoryAddress 0x%"PRIx64" not page aligned", entry->address); return -1; } if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) { - error_report("MemoryDataSize 0x%lx not page aligned", entry->size); + error_report("MemoryDataSize 0x%"PRIx64" not page aligned", entry->size); return -1; } diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 1c6ca94..d834bd4 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -75,12 +75,12 @@ void vfio_address_space_insert(VFIOAddressSpace *space, int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly) + void *vaddr, bool readonly, MemoryRegion *mr) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); g_assert(vioc->dma_map); - return vioc->dma_map(bcontainer, iova, size, vaddr, readonly); + return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr); } int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, diff --git a/hw/vfio/container.c b/hw/vfio/container.c index a9f0dba..0f948d0 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -33,8 +33,8 @@ #include "qapi/error.h" #include "pci.h" #include "hw/vfio/vfio-container.h" +#include "hw/vfio/vfio-cpr.h" #include "vfio-helpers.h" -#include "vfio-cpr.h" #include "vfio-listener.h" #define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" @@ -207,7 +207,8 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, } static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mr) { const VFIOContainer *container = container_of(bcontainer, VFIOContainer, bcontainer); diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c index 3214184..0210e76 100644 --- a/hw/vfio/cpr.c +++ b/hw/vfio/cpr.c @@ -8,9 +8,9 @@ #include "qemu/osdep.h" #include "hw/vfio/vfio-device.h" #include "migration/misc.h" +#include "hw/vfio/vfio-cpr.h" #include "qapi/error.h" #include "system/runstate.h" -#include "vfio-cpr.h" static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e, Error **errp) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index e7952d1..e7a9d1f 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, } static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev, - struct vfio_region_info **opregion, - Error **errp) + struct vfio_region_info **opregion) { int ret; - /* Hotplugging is not supported for opregion access */ - if (vdev->pdev.qdev.hotplugged) { - error_setg(errp, "IGD OpRegion is not supported on hotplugged device"); - return false; - } - ret = vfio_device_get_region_info_type(&vdev->vbasedev, VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion); if (ret) { - error_setg_errno(errp, -ret, - "Device does not supports IGD OpRegion feature"); + return false; + } + + /* Hotplugging is not supported for opregion access */ + if (vdev->pdev.qdev.hotplugged) { + warn_report("IGD device detected, but OpRegion is not supported " + "on hotplugged device."); return false; } @@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) } /* IGD device always comes with OpRegion */ - if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) { + if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) { return true; } info_report("OpRegion detected on Intel display %x.", vdev->device_id); @@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp) return true; } - if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) { + if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) { /* Should never reach here, KVMGT always emulates OpRegion */ return false; } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index af1c7ab..d3efef7 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -21,20 +21,21 @@ #include "qapi/error.h" #include "system/iommufd.h" #include "hw/qdev-core.h" +#include "hw/vfio/vfio-cpr.h" #include "system/reset.h" #include "qemu/cutils.h" #include "qemu/chardev_open.h" #include "pci.h" #include "vfio-iommufd.h" #include "vfio-helpers.h" -#include "vfio-cpr.h" #include "vfio-listener.h" #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mr) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); @@ -592,6 +593,10 @@ found_container: goto err_listener_register; } + /* + * Do not move this code before attachment! The nested IOMMU support + * needs device and hwpt id which are generated only after attachment. + */ if (!vfio_device_hiod_create_and_realize(vbasedev, TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) { goto err_listener_register; @@ -810,21 +815,38 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; }; +static bool +host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); +} + +static bool +host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp); +} + static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, Error **errp) { VFIODevice *vdev = opaque; + HostIOMMUDeviceIOMMUFD *idev; HostIOMMUDeviceCaps *caps = &hiod->caps; + VendorCaps *vendor_caps = &caps->vendor_caps; enum iommu_hw_info_type type; - union { - struct iommu_hw_info_vtd vtd; - } data; uint64_t hw_caps; hiod->agent = opaque; - if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, - &type, &data, sizeof(data), + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type, + vendor_caps, sizeof(*vendor_caps), &hw_caps, errp)) { return false; } @@ -833,6 +855,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, caps->type = type; caps->hw_caps = hw_caps; + idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); + idev->iommufd = vdev->iommufd; + idev->devid = vdev->devid; + idev->hwpt_id = vdev->hwpt->hwpt_id; + return true; } @@ -858,10 +885,14 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod) static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data) { HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); + HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc); hiodc->realize = hiod_iommufd_vfio_realize; hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask; + + idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt; + idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt; }; static const TypeInfo types[] = { diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index bfacb3d..203ed03 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -90,16 +90,17 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) section->offset_within_address_space & (1ULL << 63); } -/* Called with rcu_read_lock held. */ -static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - Error **errp) +/* + * Called with rcu_read_lock held. + * The returned MemoryRegion must not be accessed after calling rcu_read_unlock. + */ +static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp) { - bool ret, mr_has_discard_manager; + MemoryRegion *mr; - ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only, - &mr_has_discard_manager, errp); - if (ret && mr_has_discard_manager) { + mr = memory_translate_iotlb(iotlb, xlat_p, errp); + if (mr && memory_region_has_ram_discard_manager(mr)) { /* * Malicious VMs might trigger discarding of IOMMU-mapped memory. The * pages will remain pinned inside vfio until unmapped, resulting in a @@ -118,7 +119,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, " intended via an IOMMU. It's possible to mitigate " " by setting/adjusting RLIMIT_MEMLOCK."); } - return ret; + return mr; } static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) @@ -126,6 +127,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); VFIOContainerBase *bcontainer = giommu->bcontainer; hwaddr iova = iotlb->iova + giommu->iommu_offset; + MemoryRegion *mr; + hwaddr xlat; void *vaddr; int ret; Error *local_err = NULL; @@ -150,10 +153,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { bool read_only; - if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) { + mr = vfio_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { error_report_err(local_err); goto out; } + vaddr = memory_region_get_ram_ptr(mr) + xlat; + read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly; + /* * vaddr is only valid until rcu_read_unlock(). But after * vfio_dma_map has set up the mapping the pages will be @@ -163,7 +170,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) */ ret = vfio_container_dma_map(bcontainer, iova, iotlb->addr_mask + 1, vaddr, - read_only); + read_only, mr); if (ret) { error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx", %p) = %d (%s)", @@ -233,7 +240,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, vaddr = memory_region_get_ram_ptr(section->mr) + start; ret = vfio_container_dma_map(bcontainer, iova, next - start, - vaddr, section->readonly); + vaddr, section->readonly, section->mr); if (ret) { /* Rollback */ vfio_ram_discard_notify_discard(rdl, section); @@ -449,6 +456,26 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp) } } +VFIORamDiscardListener *vfio_find_ram_discard_listener( + VFIOContainerBase *bcontainer, MemoryRegionSection *section) +{ + VFIORamDiscardListener *vrdl = NULL; + + QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { + break; + } + } + + if (!vrdl) { + hw_error("vfio: Trying to sync missing RAM discard listener"); + /* does not return */ + } + return vrdl; +} + static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { @@ -557,7 +584,7 @@ static void vfio_listener_region_add(MemoryListener *listener, } ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize), - vaddr, section->readonly); + vaddr, section->readonly, section->mr); if (ret) { error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx", %p) = %d (%s)", @@ -1010,6 +1037,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) ram_addr_t translated_addr; Error *local_err = NULL; int ret = -EINVAL; + MemoryRegion *mr; + hwaddr xlat; trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask); @@ -1021,9 +1050,11 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) } rcu_read_lock(); - if (!vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, &local_err)) { + mr = vfio_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { goto out_unlock; } + translated_addr = memory_region_get_ram_addr(mr) + xlat; ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1, translated_addr, &local_err); @@ -1075,19 +1106,8 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl = NULL; - - QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { - if (vrdl->mr == section->mr && - vrdl->offset_within_address_space == - section->offset_within_address_space) { - break; - } - } - - if (!vrdl) { - hw_error("vfio: Trying to sync missing RAM discard listener"); - } + VFIORamDiscardListener *vrdl = + vfio_find_ram_discard_listener(bcontainer, section); /* * We only want/can synchronize the bitmap for actually mapped parts - diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index a1bfdfe..b1250d8 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -511,6 +511,25 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, kvm_irqchip_commit_routes(kvm_state); } +static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector, + unsigned int nr) +{ + Error *err = NULL; + int32_t fd; + + if (vector->virq >= 0) { + fd = event_notifier_get_fd(&vector->kvm_interrupt); + } else { + fd = event_notifier_get_fd(&vector->interrupt); + } + + if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr, + VFIO_IRQ_SET_ACTION_TRIGGER, + fd, &err)) { + error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name); + } +} + static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { @@ -583,21 +602,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, strerror(-ret)); } } else { - Error *err = NULL; - int32_t fd; - - if (vector->virq >= 0) { - fd = event_notifier_get_fd(&vector->kvm_interrupt); - } else { - fd = event_notifier_get_fd(&vector->interrupt); - } - - if (!vfio_device_irq_set_signaling(&vdev->vbasedev, - VFIO_PCI_MSIX_IRQ_INDEX, nr, - VFIO_IRQ_SET_ACTION_TRIGGER, fd, - &err)) { - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); - } + set_irq_signalling(&vdev->vbasedev, vector, nr); } } @@ -2854,6 +2859,18 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) static void vfio_pci_put_device(VFIOPCIDevice *vdev) { + vfio_display_finalize(vdev); + vfio_bars_finalize(vdev); + g_free(vdev->emulated_config_bits); + g_free(vdev->rom); + /* + * XXX Leaking igd_opregion is not an oversight, we can't remove the + * fw_cfg entry therefore leaking this allocation seems like the safest + * option. + * + * g_free(vdev->igd_opregion); + */ + vfio_device_detach(&vdev->vbasedev); g_free(vdev->vbasedev.name); @@ -3005,6 +3022,19 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; VFIODevice *vbasedev = &vdev->vbasedev; + uint32_t config_space_size; + int ret; + + config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); + + /* Get a copy of config space */ + ret = vfio_pci_config_space_read(vdev, 0, config_space_size, + vdev->pdev.config); + if (ret < (int)config_space_size) { + ret = ret < 0 ? -ret : EFAULT; + error_setg_errno(errp, ret, "failed to read device config space"); + return false; + } /* vfio emulates a lot for us, but some bits need extra love */ vdev->emulated_config_bits = g_malloc0(vdev->config_size); @@ -3126,15 +3156,14 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) return true; } -static void vfio_realize(PCIDevice *pdev, Error **errp) +static void vfio_pci_realize(PCIDevice *pdev, Error **errp) { ERRP_GUARD(); VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; - int i, ret; + int i; char uuid[UUID_STR_LEN]; g_autofree char *name = NULL; - uint32_t config_space_size; if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -3189,17 +3218,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto error; } - config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); - - /* Get a copy of config space */ - ret = vfio_pci_config_space_read(vdev, 0, config_space_size, - vdev->pdev.config); - if (ret < (int)config_space_size) { - ret = ret < 0 ? -ret : EFAULT; - error_setg_errno(errp, ret, "failed to read device config space"); - goto error; - } - if (!vfio_pci_config_setup(vdev, errp)) { goto error; } @@ -3302,17 +3320,6 @@ static void vfio_instance_finalize(Object *obj) { VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); - vfio_display_finalize(vdev); - vfio_bars_finalize(vdev); - g_free(vdev->emulated_config_bits); - g_free(vdev->rom); - /* - * XXX Leaking igd_opregion is not an oversight, we can't remove the - * fw_cfg entry therefore leaking this allocation seems like the safest - * option. - * - * g_free(vdev->igd_opregion); - */ vfio_pci_put_device(vdev); } @@ -3514,7 +3521,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); #endif dc->desc = "VFIO-based PCI device assignment"; - pdc->realize = vfio_realize; + pdc->realize = vfio_pci_realize; object_class_property_set_description(klass, /* 1.3 */ "host", diff --git a/hw/vfio/vfio-cpr.h b/hw/vfio/vfio-cpr.h deleted file mode 100644 index 134b83a..0000000 --- a/hw/vfio/vfio-cpr.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * VFIO CPR - * - * Copyright (c) 2025 Oracle and/or its affiliates. - * - * SPDX-License-Identifier: GPL-2.0-or-later - */ - -#ifndef HW_VFIO_CPR_H -#define HW_VFIO_CPR_H - -bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp); -void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer); - -#endif /* HW_VFIO_CPR_H */ diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index e20da95..7061b6e 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) int ret; Int128 llend; Error *local_err = NULL; + MemoryRegion *mr; + hwaddr xlat; if (iotlb->target_as != &address_space_memory) { error_report("Wrong target AS \"%s\", only system memory is allowed", @@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { bool read_only; - if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL, - &local_err)) { + mr = memory_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { error_report_err(local_err); return; } + vaddr = memory_region_get_ram_ptr(mr) + xlat; + read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly; + ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1, vaddr, read_only); if (ret) { diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 9be34b3..84a2a4e 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -192,10 +192,10 @@ int bdrv_inactivate_all(void); int bdrv_flush_all(void); void bdrv_close_all(void); -void bdrv_drain_all_begin(void); +void GRAPH_UNLOCKED bdrv_drain_all_begin(void); void bdrv_drain_all_begin_nopoll(void); void bdrv_drain_all_end(void); -void bdrv_drain_all(void); +void GRAPH_UNLOCKED bdrv_drain_all(void); void bdrv_aio_cancel(BlockAIOCB *acb); @@ -274,11 +274,16 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); int bdrv_debug_resume(BlockDriverState *bs, const char *tag); bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); -bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); -int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp); +bool GRAPH_RDLOCK +bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); +int GRAPH_UNLOCKED +bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); +int GRAPH_RDLOCK +bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); int GRAPH_RDLOCK bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); diff --git a/include/block/block-io.h b/include/block/block-io.h index b99cc98..4cf83fb 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -431,7 +431,7 @@ bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, * * This function can be recursive. */ -void bdrv_drained_begin(BlockDriverState *bs); +void GRAPH_UNLOCKED bdrv_drained_begin(BlockDriverState *bs); /** * bdrv_do_drained_begin_quiesce: diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 2982dd3..925a3e7 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -396,9 +396,23 @@ struct BlockDriver { int GRAPH_RDLOCK_PTR (*bdrv_probe_geometry)( BlockDriverState *bs, HDGeometry *geo); + /** + * Hot add a BDS's child. Used in combination with bdrv_del_child, so the + * user can take a child offline when it is broken and take a new child + * online. + * + * All block nodes must be drained. + */ void GRAPH_WRLOCK_PTR (*bdrv_add_child)( BlockDriverState *parent, BlockDriverState *child, Error **errp); + /** + * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the + * user can take a child offline when it is broken and take a new child + * online. + * + * All block nodes must be drained. + */ void GRAPH_WRLOCK_PTR (*bdrv_del_child)( BlockDriverState *parent, BdrvChild *child, Error **errp); @@ -983,9 +997,21 @@ struct BdrvChildClass { bool backing_mask_protocol, Error **errp); - bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); + /* + * Notifies the parent that the child is trying to change its AioContext. + * The parent may in turn change the AioContext of other nodes in the same + * transaction. Returns true if the change is possible and the transaction + * can be continued. Returns false and sets @errp if not and the transaction + * must be aborted. + * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + * + * Must be called with the affected block nodes drained. + */ + bool GRAPH_RDLOCK_PTR (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, + GHashTable *visited, + Transaction *tran, Error **errp); /* * I/O API functions. These functions are thread-safe. diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 7061ab7..990f3e1 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -137,6 +137,8 @@ BlockJob *block_job_get_locked(const char *id); * Add @bs to the list of BlockDriverState that are involved in * @job. This means that all operations will be blocked on @bs while * @job exists. + * + * All block nodes must be drained. */ int GRAPH_WRLOCK block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 3d392b0..9d37f86 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -78,7 +78,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space, int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); @@ -115,13 +115,57 @@ OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU) struct VFIOIOMMUClass { ObjectClass parent_class; - /* basic feature */ + /** + * @setup + * + * Perform basic setup of the container, including configuring IOMMU + * capabilities, IOVA ranges, supported page sizes, etc. + * + * @bcontainer: #VFIOContainerBase + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*setup)(VFIOContainerBase *bcontainer, Error **errp); + + /** + * @listener_begin + * + * Called at the beginning of an address space update transaction. + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_begin)(VFIOContainerBase *bcontainer); + + /** + * @listener_commit + * + * Called at the end of an address space update transaction, + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_commit)(VFIOContainerBase *bcontainer); + + /** + * @dma_map + * + * Map an address range into the container. Note that the memory region is + * referenced within an RCU read lock region across this call. + * + * @bcontainer: #VFIOContainerBase to use + * @iova: start address to map + * @size: size of the range to map + * @vaddr: process virtual address of mapping + * @readonly: true if mapping should be readonly + * @mr: the memory region for this mapping + * + * Returns 0 to indicate success and -errno otherwise. + */ int (*dma_map)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); /** * @dma_unmap * @@ -132,12 +176,38 @@ struct VFIOIOMMUClass { * @size: size of the range to unmap * @iotlb: The IOMMU TLB mapping entry (or NULL) * @unmap_all: if set, unmap the entire address space + * + * Returns 0 to indicate success and -errno otherwise. */ int (*dma_unmap)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); + + + /** + * @attach_device + * + * Associate the given device with a container and do some related + * initialization of the device context. + * + * @name: name of the device + * @vbasedev: the device + * @as: address space to use + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*attach_device)(const char *name, VFIODevice *vbasedev, AddressSpace *as, Error **errp); + + /* + * @detach_device + * + * Detach the given device from its container and clean up any necessary + * state. + * + * @vbasedev: the device to disassociate + */ void (*detach_device)(VFIODevice *vbasedev); /* migration feature */ @@ -152,7 +222,7 @@ struct VFIOIOMMUClass { * @start: indicates whether to start or stop dirty pages tracking * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, bool start, Error **errp); @@ -167,7 +237,7 @@ struct VFIOIOMMUClass { * @size: size of iova range * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); @@ -183,4 +253,7 @@ struct VFIOIOMMUClass { void (*release)(VFIOContainerBase *bcontainer); }; +VFIORamDiscardListener *vfio_find_ram_discard_listener( + VFIOContainerBase *bcontainer, MemoryRegionSection *section); + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h new file mode 100644 index 0000000..750ea5b --- /dev/null +++ b/include/hw/vfio/vfio-cpr.h @@ -0,0 +1,18 @@ +/* + * VFIO CPR + * + * Copyright (c) 2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_VFIO_VFIO_CPR_H +#define HW_VFIO_VFIO_CPR_H + +struct VFIOContainerBase; + +bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, + Error **errp); +void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); + +#endif /* HW_VFIO_VFIO_CPR_H */ diff --git a/include/system/host_iommu_device.h b/include/system/host_iommu_device.h index 809cced..ab849a4 100644 --- a/include/system/host_iommu_device.h +++ b/include/system/host_iommu_device.h @@ -14,6 +14,13 @@ #include "qom/object.h" #include "qapi/error.h" +#ifdef CONFIG_LINUX +#include "linux/iommufd.h" + +typedef union VendorCaps { + struct iommu_hw_info_vtd vtd; + struct iommu_hw_info_arm_smmuv3 smmuv3; +} VendorCaps; /** * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. @@ -22,11 +29,17 @@ * * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents * the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl) + * + * @vendor_caps: host platform IOMMU vendor specific capabilities (e.g. on + * IOMMUFD this represents a user-space buffer filled by kernel + * with host IOMMU @type specific hardware information data) */ typedef struct HostIOMMUDeviceCaps { uint32_t type; uint64_t hw_caps; + VendorCaps vendor_caps; } HostIOMMUDeviceCaps; +#endif #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) @@ -38,7 +51,9 @@ struct HostIOMMUDevice { void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ PCIBus *aliased_bus; int aliased_devfn; +#ifdef CONFIG_LINUX HostIOMMUDeviceCaps caps; +#endif }; /** diff --git a/include/system/iommufd.h b/include/system/iommufd.h index cbab75b..283861b 100644 --- a/include/system/iommufd.h +++ b/include/system/iommufd.h @@ -61,6 +61,60 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id, uint64_t iova, ram_addr_t size, uint64_t page_size, uint64_t *data, Error **errp); +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, + uint32_t data_type, uint32_t entry_len, + uint32_t *entry_num, void *data, + Error **errp); #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" +OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, + HOST_IOMMU_DEVICE_IOMMUFD) + +/* Overload of the host IOMMU device for the iommufd backend */ +struct HostIOMMUDeviceIOMMUFD { + HostIOMMUDevice parent_obj; + + IOMMUFDBackend *iommufd; + uint32_t devid; + uint32_t hwpt_id; +}; + +struct HostIOMMUDeviceIOMMUFDClass { + HostIOMMUDeviceClass parent_class; + + /** + * @attach_hwpt: attach host IOMMU device to IOMMUFD hardware page table. + * VFIO and VDPA device can have different implementation. + * + * Mandatory callback. + * + * @idev: host IOMMU device backed by IOMMUFD backend. + * + * @hwpt_id: ID of IOMMUFD hardware page table. + * + * @errp: pass an Error out when attachment fails. + * + * Returns: true on success, false on failure. + */ + bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id, + Error **errp); + /** + * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table. + * VFIO and VDPA device can have different implementation. + * + * Mandatory callback. + * + * @idev: host IOMMU device backed by IOMMUFD backend. + * + * @errp: pass an Error out when attachment fails. + * + * Returns: true on success, false on failure. + */ + bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp); +}; + +bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp); +bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp); #endif diff --git a/include/system/memory.h b/include/system/memory.h index fc35a0d..0848690 100644 --- a/include/system/memory.h +++ b/include/system/memory.h @@ -739,21 +739,20 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, RamDiscardListener *rdl); /** - * memory_get_xlat_addr: Extract addresses from a TLB entry + * memory_translate_iotlb: Extract addresses from a TLB entry. + * Called with rcu_read_lock held. * * @iotlb: pointer to an #IOMMUTLBEntry - * @vaddr: virtual address - * @ram_addr: RAM address - * @read_only: indicates if writes are allowed - * @mr_has_discard_manager: indicates memory is controlled by a - * RamDiscardManager + * @xlat_p: return the offset of the entry from the start of the returned + * MemoryRegion. * @errp: pointer to Error*, to store an error if it happens. * - * Return: true on success, else false setting @errp with error. + * Return: On success, return the MemoryRegion containing the @iotlb translated + * addr. The MemoryRegion must not be accessed after rcu_read_unlock. + * On failure, return NULL, setting @errp with error. */ -bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - bool *mr_has_discard_manager, Error **errp); +MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp); typedef struct CoalescedMemoryRange CoalescedMemoryRange; typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; diff --git a/meson.build b/meson.build index ef99467..967a10e 100644 --- a/meson.build +++ b/meson.build @@ -106,6 +106,7 @@ if have_rust endif if have_rust + rustdoc = find_program('rustdoc', required: get_option('rust')) bindgen = find_program('bindgen', required: get_option('rust')) if not bindgen.found() or bindgen.version().version_compare('<0.60.0') if get_option('rust').enabled() @@ -4134,13 +4135,12 @@ common_all = static_library('common', target_common_arch_libs = {} target_common_system_arch_libs = {} foreach target_base_arch, config_base_arch : config_base_arch_mak - config_target = config_target_mak[target] target_inc = [include_directories('target' / target_base_arch)] inc = [common_user_inc + target_inc] - target_common = common_ss.apply(config_target, strict: false) - target_system = system_ss.apply(config_target, strict: false) - target_user = user_ss.apply(config_target, strict: false) + target_common = common_ss.apply(config_base_arch, strict: false) + target_system = system_ss.apply(config_base_arch, strict: false) + target_user = user_ss.apply(config_base_arch, strict: false) common_deps = [] system_deps = [] user_deps = [] @@ -4403,7 +4403,7 @@ foreach target : target_dirs build_by_default: true, build_always_stale: true) rlib = static_library('rust_' + target.underscorify(), - rlib_rs, + structured_sources([], {'.': rlib_rs}), dependencies: target_rust.dependencies(), override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'c') @@ -4757,6 +4757,7 @@ if have_rust summary_info += {'Rust target': config_host['RUST_TARGET_TRIPLE']} summary_info += {'rustc': ' '.join(rustc.cmd_array())} summary_info += {'rustc version': rustc.version()} + summary_info += {'rustdoc': rustdoc} summary_info += {'bindgen': bindgen.full_path()} summary_info += {'bindgen version': bindgen.version()} endif diff --git a/python/scripts/vendor.py b/python/scripts/vendor.py index 0405e91..b47db00 100755 --- a/python/scripts/vendor.py +++ b/python/scripts/vendor.py @@ -41,8 +41,8 @@ def main() -> int: parser.parse_args() packages = { - "meson==1.5.0": - "52b34f4903b882df52ad0d533146d4b992c018ea77399f825579737672ae7b20", + "meson==1.8.1": + "374bbf71247e629475fc10b0bd2ef66fc418c2d8f4890572f74de0f97d0d42da", } vendor_dir = Path(__file__, "..", "..", "wheels").resolve() diff --git a/python/wheels/meson-1.5.0-py3-none-any.whl b/python/wheels/meson-1.5.0-py3-none-any.whl Binary files differdeleted file mode 100644 index c7edeb3..0000000 --- a/python/wheels/meson-1.5.0-py3-none-any.whl +++ /dev/null diff --git a/python/wheels/meson-1.8.1-py3-none-any.whl b/python/wheels/meson-1.8.1-py3-none-any.whl Binary files differnew file mode 100644 index 0000000..a885f0e --- /dev/null +++ b/python/wheels/meson-1.8.1-py3-none-any.whl diff --git a/pythondeps.toml b/pythondeps.toml index 7eaaa0f..7884ab5 100644 --- a/pythondeps.toml +++ b/pythondeps.toml @@ -19,7 +19,7 @@ [meson] # The install key should match the version in python/wheels/ -meson = { accepted = ">=1.5.0", installed = "1.5.0", canary = "meson" } +meson = { accepted = ">=1.5.0", installed = "1.8.1", canary = "meson" } pycotap = { accepted = ">=1.1.0", installed = "1.3.1" } [docs] @@ -3505,6 +3505,7 @@ static int img_snapshot(int argc, char **argv) break; case SNAPSHOT_DELETE: + bdrv_drain_all_begin(); bdrv_graph_rdlock_main_loop(); ret = bdrv_snapshot_find(bs, &sn, snapshot_name); if (ret < 0) { @@ -3520,6 +3521,7 @@ static int img_snapshot(int argc, char **argv) } } bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); break; } diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 13d580c..bccfe85 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -32,6 +32,13 @@ dependencies = [ ] [[package]] +name = "bits" +version = "0.1.0" +dependencies = [ + "qemu_api_macros", +] + +[[package]] name = "either" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -66,6 +73,7 @@ version = "0.1.0" dependencies = [ "bilge", "bilge-impl", + "bits", "qemu_api", "qemu_api_macros", ] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index d9faeec..fd4c2fb 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,7 @@ [workspace] resolver = "2" members = [ + "bits", "qemu-api-macros", "qemu-api", "hw/char/pl011", @@ -63,7 +64,6 @@ ignored_unit_patterns = "deny" implicit_clone = "deny" macro_use_imports = "deny" missing_safety_doc = "deny" -multiple_crate_versions = "deny" mut_mut = "deny" needless_bitwise_bool = "deny" needless_pass_by_ref_mut = "deny" diff --git a/rust/bits/Cargo.toml b/rust/bits/Cargo.toml new file mode 100644 index 0000000..1ff38a4 --- /dev/null +++ b/rust/bits/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "bits" +version = "0.1.0" +authors = ["Paolo Bonzini <pbonzini@redhat.com>"] +description = "const-friendly bit flags" +resolver = "2" +publish = false + +edition.workspace = true +homepage.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +qemu_api_macros = { path = "../qemu-api-macros" } + +[lints] +workspace = true diff --git a/rust/bits/meson.build b/rust/bits/meson.build new file mode 100644 index 0000000..2a41e13 --- /dev/null +++ b/rust/bits/meson.build @@ -0,0 +1,16 @@ +_bits_rs = static_library( + 'bits', + 'src/lib.rs', + override_options: ['rust_std=2021', 'build.rust_std=2021'], + rust_abi: 'rust', + dependencies: [qemu_api_macros], +) + +bits_rs = declare_dependency(link_with: _bits_rs) + +rust.test('rust-bits-tests', _bits_rs, + suite: ['unit', 'rust']) + +rust.doctest('rust-bits-doctests', _bits_rs, + dependencies: bits_rs, + suite: ['doc', 'rust']) diff --git a/rust/bits/src/lib.rs b/rust/bits/src/lib.rs new file mode 100644 index 0000000..d485d6b --- /dev/null +++ b/rust/bits/src/lib.rs @@ -0,0 +1,443 @@ +// SPDX-License-Identifier: MIT or Apache-2.0 or GPL-2.0-or-later + +/// # Definition entry point +/// +/// Define a struct with a single field of type $type. Include public constants +/// for each element listed in braces. +/// +/// The unnamed element at the end, if present, can be used to enlarge the set +/// of valid bits. Bits that are valid but not listed are treated normally for +/// the purpose of arithmetic operations, and are printed with their hexadecimal +/// value. +/// +/// The struct implements the following traits: [`BitAnd`](std::ops::BitAnd), +/// [`BitOr`](std::ops::BitOr), [`BitXor`](std::ops::BitXor), +/// [`Not`](std::ops::Not), [`Sub`](std::ops::Sub); [`Debug`](std::fmt::Debug), +/// [`Display`](std::fmt::Display), [`Binary`](std::fmt::Binary), +/// [`Octal`](std::fmt::Octal), [`LowerHex`](std::fmt::LowerHex), +/// [`UpperHex`](std::fmt::UpperHex); [`From`]`<type>`/[`Into`]`<type>` where +/// type is the type specified in the definition. +/// +/// ## Example +/// +/// ``` +/// # use bits::bits; +/// bits! { +/// pub struct Colors(u8) { +/// BLACK = 0, +/// RED = 1, +/// GREEN = 1 << 1, +/// BLUE = 1 << 2, +/// WHITE = (1 << 0) | (1 << 1) | (1 << 2), +/// } +/// } +/// ``` +/// +/// ``` +/// # use bits::bits; +/// # bits! { pub struct Colors(u8) { BLACK = 0, RED = 1, GREEN = 1 << 1, BLUE = 1 << 2, } } +/// +/// bits! { +/// pub struct Colors8(u8) { +/// BLACK = 0, +/// RED = 1, +/// GREEN = 1 << 1, +/// BLUE = 1 << 2, +/// WHITE = (1 << 0) | (1 << 1) | (1 << 2), +/// +/// _ = 255, +/// } +/// } +/// +/// // The previously defined struct ignores bits not explicitly defined. +/// assert_eq!( +/// Colors::from(255).into_bits(), +/// (Colors::RED | Colors::GREEN | Colors::BLUE).into_bits() +/// ); +/// +/// // Adding "_ = 255" makes it retain other bits as well. +/// assert_eq!(Colors8::from(255).into_bits(), 255); +/// +/// // all() does not include the additional bits, valid_bits() does +/// assert_eq!(Colors8::all().into_bits(), Colors::all().into_bits()); +/// assert_eq!(Colors8::valid_bits().into_bits(), 255); +/// ``` +/// +/// # Evaluation entry point +/// +/// Return a constant corresponding to the boolean expression `$expr`. +/// Identifiers in the expression correspond to values defined for the +/// type `$type`. Supported operators are `!` (unary), `-`, `&`, `^`, `|`. +/// +/// ## Examples +/// +/// ``` +/// # use bits::bits; +/// bits! { +/// pub struct Colors(u8) { +/// BLACK = 0, +/// RED = 1, +/// GREEN = 1 << 1, +/// BLUE = 1 << 2, +/// // same as "WHITE = 7", +/// WHITE = bits!(Self as u8: RED | GREEN | BLUE), +/// } +/// } +/// +/// let rgb = bits! { Colors: RED | GREEN | BLUE }; +/// assert_eq!(rgb, Colors::WHITE); +/// ``` +#[macro_export] +macro_rules! bits { + { + $(#[$struct_meta:meta])* + $struct_vis:vis struct $struct_name:ident($field_vis:vis $type:ty) { + $($(#[$const_meta:meta])* $const:ident = $val:expr),+ + $(,_ = $mask:expr)? + $(,)? + } + } => { + $(#[$struct_meta])* + #[derive(Clone, Copy, PartialEq, Eq)] + #[repr(transparent)] + $struct_vis struct $struct_name($field_vis $type); + + impl $struct_name { + $( #[allow(dead_code)] $(#[$const_meta])* + pub const $const: $struct_name = $struct_name($val); )+ + + #[doc(hidden)] + const VALID__: $type = $( Self::$const.0 )|+ $(|$mask)?; + + #[allow(dead_code)] + #[inline(always)] + pub const fn empty() -> Self { + Self(0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn all() -> Self { + Self($( Self::$const.0 )|+) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn valid_bits() -> Self { + Self(Self::VALID__) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn valid(val: $type) -> bool { + (val & !Self::VALID__) == 0 + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn any_set(self, mask: Self) -> bool { + (self.0 & mask.0) != 0 + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn all_set(self, mask: Self) -> bool { + (self.0 & mask.0) == mask.0 + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn none_set(self, mask: Self) -> bool { + (self.0 & mask.0) == 0 + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn from_bits(value: $type) -> Self { + $struct_name(value) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn into_bits(self) -> $type { + self.0 + } + + #[allow(dead_code)] + #[inline(always)] + pub fn set(&mut self, rhs: Self) { + self.0 |= rhs.0; + } + + #[allow(dead_code)] + #[inline(always)] + pub fn clear(&mut self, rhs: Self) { + self.0 &= !rhs.0; + } + + #[allow(dead_code)] + #[inline(always)] + pub fn toggle(&mut self, rhs: Self) { + self.0 ^= rhs.0; + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn intersection(self, rhs: Self) -> Self { + $struct_name(self.0 & rhs.0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn difference(self, rhs: Self) -> Self { + $struct_name(self.0 & !rhs.0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn symmetric_difference(self, rhs: Self) -> Self { + $struct_name(self.0 ^ rhs.0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn union(self, rhs: Self) -> Self { + $struct_name(self.0 | rhs.0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn invert(self) -> Self { + $struct_name(self.0 ^ Self::VALID__) + } + } + + impl ::std::fmt::Binary for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + // If no width, use the highest valid bit + let width = f.width().unwrap_or((Self::VALID__.ilog2() + 1) as usize); + write!(f, "{:0>width$.precision$b}", self.0, + width = width, + precision = f.precision().unwrap_or(width)) + } + } + + impl ::std::fmt::LowerHex for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + <$type as ::std::fmt::LowerHex>::fmt(&self.0, f) + } + } + + impl ::std::fmt::Octal for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + <$type as ::std::fmt::Octal>::fmt(&self.0, f) + } + } + + impl ::std::fmt::UpperHex for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + <$type as ::std::fmt::UpperHex>::fmt(&self.0, f) + } + } + + impl ::std::fmt::Debug for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + write!(f, "{}({})", stringify!($struct_name), self) + } + } + + impl ::std::fmt::Display for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + use ::std::fmt::Display; + let mut first = true; + let mut left = self.0; + $(if Self::$const.0.is_power_of_two() && (self & Self::$const).0 != 0 { + if first { first = false } else { Display::fmt(&'|', f)?; } + Display::fmt(stringify!($const), f)?; + left -= Self::$const.0; + })+ + if first { + Display::fmt(&'0', f) + } else if left != 0 { + write!(f, "|{left:#x}") + } else { + Ok(()) + } + } + } + + impl ::std::cmp::PartialEq<$type> for $struct_name { + fn eq(&self, rhs: &$type) -> bool { + self.0 == *rhs + } + } + + impl ::std::ops::BitAnd<$struct_name> for &$struct_name { + type Output = $struct_name; + fn bitand(self, rhs: $struct_name) -> Self::Output { + $struct_name(self.0 & rhs.0) + } + } + + impl ::std::ops::BitAndAssign<$struct_name> for $struct_name { + fn bitand_assign(&mut self, rhs: $struct_name) { + self.0 = self.0 & rhs.0 + } + } + + impl ::std::ops::BitXor<$struct_name> for &$struct_name { + type Output = $struct_name; + fn bitxor(self, rhs: $struct_name) -> Self::Output { + $struct_name(self.0 ^ rhs.0) + } + } + + impl ::std::ops::BitXorAssign<$struct_name> for $struct_name { + fn bitxor_assign(&mut self, rhs: $struct_name) { + self.0 = self.0 ^ rhs.0 + } + } + + impl ::std::ops::BitOr<$struct_name> for &$struct_name { + type Output = $struct_name; + fn bitor(self, rhs: $struct_name) -> Self::Output { + $struct_name(self.0 | rhs.0) + } + } + + impl ::std::ops::BitOrAssign<$struct_name> for $struct_name { + fn bitor_assign(&mut self, rhs: $struct_name) { + self.0 = self.0 | rhs.0 + } + } + + impl ::std::ops::Sub<$struct_name> for &$struct_name { + type Output = $struct_name; + fn sub(self, rhs: $struct_name) -> Self::Output { + $struct_name(self.0 & !rhs.0) + } + } + + impl ::std::ops::SubAssign<$struct_name> for $struct_name { + fn sub_assign(&mut self, rhs: $struct_name) { + self.0 = self.0 - rhs.0 + } + } + + impl ::std::ops::Not for &$struct_name { + type Output = $struct_name; + fn not(self) -> Self::Output { + $struct_name(self.0 ^ $struct_name::VALID__) + } + } + + impl ::std::ops::BitAnd<$struct_name> for $struct_name { + type Output = Self; + fn bitand(self, rhs: Self) -> Self::Output { + $struct_name(self.0 & rhs.0) + } + } + + impl ::std::ops::BitXor<$struct_name> for $struct_name { + type Output = Self; + fn bitxor(self, rhs: Self) -> Self::Output { + $struct_name(self.0 ^ rhs.0) + } + } + + impl ::std::ops::BitOr<$struct_name> for $struct_name { + type Output = Self; + fn bitor(self, rhs: Self) -> Self::Output { + $struct_name(self.0 | rhs.0) + } + } + + impl ::std::ops::Sub<$struct_name> for $struct_name { + type Output = Self; + fn sub(self, rhs: Self) -> Self::Output { + $struct_name(self.0 & !rhs.0) + } + } + + impl ::std::ops::Not for $struct_name { + type Output = Self; + fn not(self) -> Self::Output { + $struct_name(self.0 ^ Self::VALID__) + } + } + + impl From<$struct_name> for $type { + fn from(x: $struct_name) -> $type { + x.0 + } + } + + impl From<$type> for $struct_name { + fn from(x: $type) -> Self { + $struct_name(x & Self::VALID__) + } + } + }; + + { $type:ty: $expr:expr } => { + ::qemu_api_macros::bits_const_internal! { $type @ ($expr) } + }; + + { $type:ty as $int_type:ty: $expr:expr } => { + (::qemu_api_macros::bits_const_internal! { $type @ ($expr) }.into_bits()) as $int_type + }; +} + +#[cfg(test)] +mod test { + bits! { + pub struct InterruptMask(u32) { + OE = 1 << 10, + BE = 1 << 9, + PE = 1 << 8, + FE = 1 << 7, + RT = 1 << 6, + TX = 1 << 5, + RX = 1 << 4, + DSR = 1 << 3, + DCD = 1 << 2, + CTS = 1 << 1, + RI = 1 << 0, + + E = bits!(Self as u32: OE | BE | PE | FE), + MS = bits!(Self as u32: RI | DSR | DCD | CTS), + } + } + + #[test] + pub fn test_not() { + assert_eq!( + !InterruptMask::from(InterruptMask::RT.0), + InterruptMask::E | InterruptMask::MS | InterruptMask::TX | InterruptMask::RX + ); + } + + #[test] + pub fn test_and() { + assert_eq!( + InterruptMask::from(0), + InterruptMask::MS & InterruptMask::OE + ) + } + + #[test] + pub fn test_or() { + assert_eq!( + InterruptMask::E, + InterruptMask::OE | InterruptMask::BE | InterruptMask::PE | InterruptMask::FE + ); + } + + #[test] + pub fn test_xor() { + assert_eq!( + InterruptMask::E ^ InterruptMask::BE, + InterruptMask::OE | InterruptMask::PE | InterruptMask::FE + ); + } +} diff --git a/rust/hw/char/pl011/Cargo.toml b/rust/hw/char/pl011/Cargo.toml index a1f431a..003ef96 100644 --- a/rust/hw/char/pl011/Cargo.toml +++ b/rust/hw/char/pl011/Cargo.toml @@ -18,6 +18,7 @@ crate-type = ["staticlib"] [dependencies] bilge = { version = "0.2.0" } bilge-impl = { version = "0.2.0" } +bits = { path = "../../../bits" } qemu_api = { path = "../../../qemu-api" } qemu_api_macros = { path = "../../../qemu-api-macros" } diff --git a/rust/hw/char/pl011/meson.build b/rust/hw/char/pl011/meson.build index 494b6c1..2a1be32 100644 --- a/rust/hw/char/pl011/meson.build +++ b/rust/hw/char/pl011/meson.build @@ -6,6 +6,7 @@ _libpl011_rs = static_library( dependencies: [ bilge_rs, bilge_impl_rs, + bits_rs, qemu_api, qemu_api_macros, ], diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs index bd5cee0..0501fa5 100644 --- a/rust/hw/char/pl011/src/device.rs +++ b/rust/hw/char/pl011/src/device.rs @@ -85,8 +85,8 @@ pub struct PL011Registers { #[doc(alias = "cr")] pub control: registers::Control, pub dmacr: u32, - pub int_enabled: u32, - pub int_level: u32, + pub int_enabled: Interrupt, + pub int_level: Interrupt, pub read_fifo: Fifo, pub ilpr: u32, pub ibrd: u32, @@ -199,9 +199,9 @@ impl PL011Registers { LCR_H => u32::from(self.line_control), CR => u32::from(self.control), FLS => self.ifl, - IMSC => self.int_enabled, - RIS => self.int_level, - MIS => self.int_level & self.int_enabled, + IMSC => u32::from(self.int_enabled), + RIS => u32::from(self.int_level), + MIS => u32::from(self.int_level & self.int_enabled), ICR => { // "The UARTICR Register is the interrupt clear register and is write-only" // Source: ARM DDI 0183G 3.3.13 Interrupt Clear Register, UARTICR @@ -263,13 +263,13 @@ impl PL011Registers { self.set_read_trigger(); } IMSC => { - self.int_enabled = value; + self.int_enabled = Interrupt::from(value); return true; } RIS => {} MIS => {} ICR => { - self.int_level &= !value; + self.int_level &= !Interrupt::from(value); return true; } DMACR => { @@ -295,7 +295,7 @@ impl PL011Registers { self.flags.set_receive_fifo_empty(true); } if self.read_count + 1 == self.read_trigger { - self.int_level &= !Interrupt::RX.0; + self.int_level &= !Interrupt::RX; } self.receive_status_error_clear.set_from_data(c); *update = true; @@ -305,7 +305,7 @@ impl PL011Registers { fn write_data_register(&mut self, value: u32) -> bool { // interrupts always checked let _ = self.loopback_tx(value.into()); - self.int_level |= Interrupt::TX.0; + self.int_level |= Interrupt::TX; true } @@ -361,19 +361,19 @@ impl PL011Registers { // Change interrupts based on updated FR let mut il = self.int_level; - il &= !Interrupt::MS.0; + il &= !Interrupt::MS; if self.flags.data_set_ready() { - il |= Interrupt::DSR.0; + il |= Interrupt::DSR; } if self.flags.data_carrier_detect() { - il |= Interrupt::DCD.0; + il |= Interrupt::DCD; } if self.flags.clear_to_send() { - il |= Interrupt::CTS.0; + il |= Interrupt::CTS; } if self.flags.ring_indicator() { - il |= Interrupt::RI.0; + il |= Interrupt::RI; } self.int_level = il; true @@ -391,8 +391,8 @@ impl PL011Registers { self.line_control.reset(); self.receive_status_error_clear.reset(); self.dmacr = 0; - self.int_enabled = 0; - self.int_level = 0; + self.int_enabled = 0.into(); + self.int_level = 0.into(); self.ilpr = 0; self.ibrd = 0; self.fbrd = 0; @@ -451,7 +451,7 @@ impl PL011Registers { } if self.read_count == self.read_trigger { - self.int_level |= Interrupt::RX.0; + self.int_level |= Interrupt::RX; return true; } false @@ -632,7 +632,7 @@ impl PL011State { let regs = self.regs.borrow(); let flags = regs.int_level & regs.int_enabled; for (irq, i) in self.interrupts.iter().zip(IRQMASK) { - irq.set(flags & i != 0); + irq.set(flags.any_set(i)); } } @@ -642,14 +642,13 @@ impl PL011State { } /// Which bits in the interrupt status matter for each outbound IRQ line ? -const IRQMASK: [u32; 6] = [ - /* combined IRQ */ - Interrupt::E.0 | Interrupt::MS.0 | Interrupt::RT.0 | Interrupt::TX.0 | Interrupt::RX.0, - Interrupt::RX.0, - Interrupt::TX.0, - Interrupt::RT.0, - Interrupt::MS.0, - Interrupt::E.0, +const IRQMASK: [Interrupt; 6] = [ + Interrupt::all(), + Interrupt::RX, + Interrupt::TX, + Interrupt::RT, + Interrupt::MS, + Interrupt::E, ]; /// # Safety diff --git a/rust/hw/char/pl011/src/registers.rs b/rust/hw/char/pl011/src/registers.rs index 690feb6..7ececd3 100644 --- a/rust/hw/char/pl011/src/registers.rs +++ b/rust/hw/char/pl011/src/registers.rs @@ -9,7 +9,8 @@ // https://developer.arm.com/documentation/ddi0183/latest/ use bilge::prelude::*; -use qemu_api::impl_vmstate_bitsized; +use bits::bits; +use qemu_api::{impl_vmstate_bitsized, impl_vmstate_forward}; /// Offset of each register from the base memory address of the device. #[doc(alias = "offset")] @@ -326,22 +327,24 @@ impl Default for Control { } } -/// Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC -pub struct Interrupt(pub u32); +bits! { + /// Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC + #[derive(Default)] + pub struct Interrupt(u32) { + OE = 1 << 10, + BE = 1 << 9, + PE = 1 << 8, + FE = 1 << 7, + RT = 1 << 6, + TX = 1 << 5, + RX = 1 << 4, + DSR = 1 << 3, + DCD = 1 << 2, + CTS = 1 << 1, + RI = 1 << 0, -impl Interrupt { - pub const OE: Self = Self(1 << 10); - pub const BE: Self = Self(1 << 9); - pub const PE: Self = Self(1 << 8); - pub const FE: Self = Self(1 << 7); - pub const RT: Self = Self(1 << 6); - pub const TX: Self = Self(1 << 5); - pub const RX: Self = Self(1 << 4); - pub const DSR: Self = Self(1 << 3); - pub const DCD: Self = Self(1 << 2); - pub const CTS: Self = Self(1 << 1); - pub const RI: Self = Self(1 << 0); - - pub const E: Self = Self(Self::OE.0 | Self::BE.0 | Self::PE.0 | Self::FE.0); - pub const MS: Self = Self(Self::RI.0 | Self::DSR.0 | Self::DCD.0 | Self::CTS.0); + E = bits!(Self as u32: OE | BE | PE | FE), + MS = bits!(Self as u32: RI | DSR | DCD | CTS), + } } +impl_vmstate_forward!(Interrupt); diff --git a/rust/meson.build b/rust/meson.build index 1f0dcce..b1b3315 100644 --- a/rust/meson.build +++ b/rust/meson.build @@ -14,7 +14,10 @@ quote_rs_native = dependency('quote-1-rs', native: true) syn_rs_native = dependency('syn-2-rs', native: true) proc_macro2_rs_native = dependency('proc-macro2-1-rs', native: true) +qemuutil_rs = qemuutil.partial_dependency(link_args: true, links: true) + subdir('qemu-api-macros') +subdir('bits') subdir('qemu-api') subdir('hw') @@ -22,21 +25,9 @@ subdir('hw') cargo = find_program('cargo', required: false) if cargo.found() - run_target('clippy', - command: [config_host['MESON'], 'devenv', - '--workdir', '@CURRENT_SOURCE_DIR@', - cargo, 'clippy', '--tests'], - depends: bindings_rs) - run_target('rustfmt', command: [config_host['MESON'], 'devenv', '--workdir', '@CURRENT_SOURCE_DIR@', cargo, 'fmt'], depends: bindings_rs) - - run_target('rustdoc', - command: [config_host['MESON'], 'devenv', - '--workdir', '@CURRENT_SOURCE_DIR@', - cargo, 'doc', '--no-deps', '--document-private-items'], - depends: bindings_rs) endif diff --git a/rust/qemu-api-macros/src/bits.rs b/rust/qemu-api-macros/src/bits.rs new file mode 100644 index 0000000..5ba8475 --- /dev/null +++ b/rust/qemu-api-macros/src/bits.rs @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: MIT or Apache-2.0 or GPL-2.0-or-later + +// shadowing is useful together with "if let" +#![allow(clippy::shadow_unrelated)] + +use proc_macro2::{ + Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree, TokenTree as TT, +}; + +use crate::utils::MacroError; + +pub struct BitsConstInternal { + typ: TokenTree, +} + +fn paren(ts: TokenStream) -> TokenTree { + TT::Group(Group::new(Delimiter::Parenthesis, ts)) +} + +fn ident(s: &'static str) -> TokenTree { + TT::Ident(Ident::new(s, Span::call_site())) +} + +fn punct(ch: char) -> TokenTree { + TT::Punct(Punct::new(ch, Spacing::Alone)) +} + +/// Implements a recursive-descent parser that translates Boolean expressions on +/// bitmasks to invocations of `const` functions defined by the `bits!` macro. +impl BitsConstInternal { + // primary ::= '(' or ')' + // | ident + // | '!' ident + fn parse_primary( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + let next = match tok { + TT::Group(ref g) => { + if g.delimiter() != Delimiter::Parenthesis && g.delimiter() != Delimiter::None { + return Err(MacroError::Message("expected parenthesis".into(), g.span())); + } + let mut stream = g.stream().into_iter(); + let Some(first_tok) = stream.next() else { + return Err(MacroError::Message( + "expected operand, found ')'".into(), + g.span(), + )); + }; + let mut output = TokenStream::new(); + // start from the lowest precedence + let next = self.parse_or(first_tok, &mut stream, &mut output)?; + if let Some(tok) = next { + return Err(MacroError::Message( + format!("unexpected token {tok}"), + tok.span(), + )); + } + out.extend(Some(paren(output))); + it.next() + } + TT::Ident(_) => { + let mut output = TokenStream::new(); + output.extend([ + self.typ.clone(), + TT::Punct(Punct::new(':', Spacing::Joint)), + TT::Punct(Punct::new(':', Spacing::Joint)), + tok, + ]); + out.extend(Some(paren(output))); + it.next() + } + TT::Punct(ref p) => { + if p.as_char() != '!' { + return Err(MacroError::Message("expected operand".into(), p.span())); + } + let Some(rhs_tok) = it.next() else { + return Err(MacroError::Message( + "expected operand at end of input".into(), + p.span(), + )); + }; + let next = self.parse_primary(rhs_tok, it, out)?; + out.extend([punct('.'), ident("invert"), paren(TokenStream::new())]); + next + } + _ => { + return Err(MacroError::Message("unexpected literal".into(), tok.span())); + } + }; + Ok(next) + } + + fn parse_binop< + F: Fn( + &Self, + TokenTree, + &mut dyn Iterator<Item = TokenTree>, + &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError>, + >( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ch: char, + f: F, + method: &'static str, + ) -> Result<Option<TokenTree>, MacroError> { + let mut next = f(self, tok, it, out)?; + while next.is_some() { + let op = next.as_ref().unwrap(); + let TT::Punct(ref p) = op else { break }; + if p.as_char() != ch { + break; + } + + let Some(rhs_tok) = it.next() else { + return Err(MacroError::Message( + "expected operand at end of input".into(), + p.span(), + )); + }; + let mut rhs = TokenStream::new(); + next = f(self, rhs_tok, it, &mut rhs)?; + out.extend([punct('.'), ident(method), paren(rhs)]); + } + Ok(next) + } + + // sub ::= primary ('-' primary)* + pub fn parse_sub( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + self.parse_binop(tok, it, out, '-', Self::parse_primary, "difference") + } + + // and ::= sub ('&' sub)* + fn parse_and( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + self.parse_binop(tok, it, out, '&', Self::parse_sub, "intersection") + } + + // xor ::= and ('&' and)* + fn parse_xor( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + self.parse_binop(tok, it, out, '^', Self::parse_and, "symmetric_difference") + } + + // or ::= xor ('|' xor)* + pub fn parse_or( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + self.parse_binop(tok, it, out, '|', Self::parse_xor, "union") + } + + pub fn parse( + it: &mut dyn Iterator<Item = TokenTree>, + ) -> Result<proc_macro2::TokenStream, MacroError> { + let mut pos = Span::call_site(); + let mut typ = proc_macro2::TokenStream::new(); + + // Gobble everything up to an `@` sign, which is followed by a + // parenthesized expression; that is, all token trees except the + // last two form the type. + let next = loop { + let tok = it.next(); + if let Some(ref t) = tok { + pos = t.span(); + } + match tok { + None => break None, + Some(TT::Punct(ref p)) if p.as_char() == '@' => { + let tok = it.next(); + if let Some(ref t) = tok { + pos = t.span(); + } + break tok; + } + Some(x) => typ.extend(Some(x)), + } + }; + + let Some(tok) = next else { + return Err(MacroError::Message( + "expected expression, do not call this macro directly".into(), + pos, + )); + }; + let TT::Group(ref _group) = tok else { + return Err(MacroError::Message( + "expected parenthesis, do not call this macro directly".into(), + tok.span(), + )); + }; + let mut out = TokenStream::new(); + let state = Self { + typ: TT::Group(Group::new(Delimiter::None, typ)), + }; + + let next = state.parse_primary(tok, it, &mut out)?; + + // A parenthesized expression is a single production of the grammar, + // so the input must have reached the last token. + if let Some(tok) = next { + return Err(MacroError::Message( + format!("unexpected token {tok}"), + tok.span(), + )); + } + Ok(out) + } +} diff --git a/rust/qemu-api-macros/src/lib.rs b/rust/qemu-api-macros/src/lib.rs index f97449b..1034707 100644 --- a/rust/qemu-api-macros/src/lib.rs +++ b/rust/qemu-api-macros/src/lib.rs @@ -12,6 +12,9 @@ use syn::{ mod utils; use utils::MacroError; +mod bits; +use bits::BitsConstInternal; + fn get_fields<'a>( input: &'a DeriveInput, msg: &str, @@ -190,23 +193,51 @@ fn get_variants(input: &DeriveInput) -> Result<&Punctuated<Variant, Comma>, Macr } #[rustfmt::skip::macros(quote)] +fn derive_tryinto_body( + name: &Ident, + variants: &Punctuated<Variant, Comma>, + repr: &Path, +) -> Result<proc_macro2::TokenStream, MacroError> { + let discriminants: Vec<&Ident> = variants.iter().map(|f| &f.ident).collect(); + + Ok(quote! { + #(const #discriminants: #repr = #name::#discriminants as #repr;)*; + match value { + #(#discriminants => Ok(#name::#discriminants),)* + _ => Err(value), + } + }) +} + +#[rustfmt::skip::macros(quote)] fn derive_tryinto_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, MacroError> { let repr = get_repr_uN(&input, "#[derive(TryInto)]")?; - let name = &input.ident; - let variants = get_variants(&input)?; - let discriminants: Vec<&Ident> = variants.iter().map(|f| &f.ident).collect(); + let body = derive_tryinto_body(name, get_variants(&input)?, &repr)?; + let errmsg = format!("invalid value for {name}"); Ok(quote! { + impl #name { + #[allow(dead_code)] + pub const fn into_bits(self) -> #repr { + self as #repr + } + + #[allow(dead_code)] + pub const fn from_bits(value: #repr) -> Self { + match ({ + #body + }) { + Ok(x) => x, + Err(_) => panic!(#errmsg) + } + } + } impl core::convert::TryFrom<#repr> for #name { type Error = #repr; fn try_from(value: #repr) -> Result<Self, Self::Error> { - #(const #discriminants: #repr = #name::#discriminants as #repr;)*; - match value { - #(#discriminants => Ok(Self::#discriminants),)* - _ => Err(value), - } + #body } } }) @@ -219,3 +250,12 @@ pub fn derive_tryinto(input: TokenStream) -> TokenStream { TokenStream::from(expanded) } + +#[proc_macro] +pub fn bits_const_internal(ts: TokenStream) -> TokenStream { + let ts = proc_macro2::TokenStream::from(ts); + let mut it = ts.into_iter(); + + let expanded = BitsConstInternal::parse(&mut it).unwrap_or_else(Into::into); + TokenStream::from(expanded) +} diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build index 1ea86b8..b532281 100644 --- a/rust/qemu-api/meson.build +++ b/rust/qemu-api/meson.build @@ -35,32 +35,24 @@ _qemu_api_rs = static_library( override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'rust', rust_args: _qemu_api_cfg, - dependencies: [libc_rs, qemu_api_macros], + dependencies: [libc_rs, qemu_api_macros, qemuutil_rs, + qom, hwcore, chardev, migration], ) rust.test('rust-qemu-api-tests', _qemu_api_rs, suite: ['unit', 'rust']) -qemu_api = declare_dependency(link_with: _qemu_api_rs) +qemu_api = declare_dependency(link_with: [_qemu_api_rs], + dependencies: [qemu_api_macros, qom, hwcore, chardev, migration]) -# Rust executables do not support objects, so add an intermediate step. -rust_qemu_api_objs = static_library( - 'rust_qemu_api_objs', - objects: [libqom.extract_all_objects(recursive: false), - libhwcore.extract_all_objects(recursive: false), - libchardev.extract_all_objects(recursive: false), - libcrypto.extract_all_objects(recursive: false), - libauthz.extract_all_objects(recursive: false), - libio.extract_all_objects(recursive: false), - libmigration.extract_all_objects(recursive: false)]) -rust_qemu_api_deps = declare_dependency( - dependencies: [ - qom_ss.dependencies(), - chardev_ss.dependencies(), - crypto_ss.dependencies(), - authz_ss.dependencies(), - io_ss.dependencies()], - link_whole: [rust_qemu_api_objs, libqemuutil]) +# Doctests are essentially integration tests, so they need the same dependencies. +# Note that running them requires the object files for C code, so place them +# in a separate suite that is run by the "build" CI jobs rather than "check". +rust.doctest('rust-qemu-api-doctests', + _qemu_api_rs, + protocol: 'rust', + dependencies: qemu_api, + suite: ['doc', 'rust']) test('rust-qemu-api-integration', executable( @@ -69,7 +61,7 @@ test('rust-qemu-api-integration', override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_args: ['--test'], install: false, - dependencies: [qemu_api, qemu_api_macros, rust_qemu_api_deps]), + dependencies: [qemu_api]), args: [ '--test', '--test-threads', '1', '--format', 'pretty', diff --git a/rust/qemu-api/src/bindings.rs b/rust/qemu-api/src/bindings.rs index 3c1d297..057de4b 100644 --- a/rust/qemu-api/src/bindings.rs +++ b/rust/qemu-api/src/bindings.rs @@ -11,6 +11,7 @@ clippy::restriction, clippy::style, clippy::missing_const_for_fn, + clippy::ptr_offset_with_cast, clippy::useless_transmute, clippy::missing_safety_doc )] diff --git a/rust/qemu-api/src/cell.rs b/rust/qemu-api/src/cell.rs index 05ce09f..27063b0 100644 --- a/rust/qemu-api/src/cell.rs +++ b/rust/qemu-api/src/cell.rs @@ -225,27 +225,23 @@ use crate::bindings; /// An internal function that is used by doctests. pub fn bql_start_test() { - if cfg!(MESON) { - // SAFETY: integration tests are run with --test-threads=1, while - // unit tests and doctests are not multithreaded and do not have - // any BQL-protected data. Just set bql_locked to true. - unsafe { - bindings::rust_bql_mock_lock(); - } + // SAFETY: integration tests are run with --test-threads=1, while + // unit tests and doctests are not multithreaded and do not have + // any BQL-protected data. Just set bql_locked to true. + unsafe { + bindings::rust_bql_mock_lock(); } } pub fn bql_locked() -> bool { // SAFETY: the function does nothing but return a thread-local bool - !cfg!(MESON) || unsafe { bindings::bql_locked() } + unsafe { bindings::bql_locked() } } fn bql_block_unlock(increase: bool) { - if cfg!(MESON) { - // SAFETY: this only adjusts a counter - unsafe { - bindings::bql_block_unlock(increase); - } + // SAFETY: this only adjusts a counter + unsafe { + bindings::bql_block_unlock(increase); } } diff --git a/scripts/rust/rustc_args.py b/scripts/rust/rustc_args.py index 2633157..63b0748 100644 --- a/scripts/rust/rustc_args.py +++ b/scripts/rust/rustc_args.py @@ -104,10 +104,7 @@ def generate_lint_flags(cargo_toml: CargoTOML, strict_lints: bool) -> Iterable[s else: raise Exception(f"invalid level {level} for {prefix}{lint}") - # This may change if QEMU ever invokes clippy-driver or rustdoc by - # hand. For now, check the syntax but do not add non-rustc lints to - # the command line. - if k == "rust" and not (strict_lints and lint in STRICT_LINTS): + if not (strict_lints and lint in STRICT_LINTS): lint_list.append(LintFlag(flags=[flag, prefix + lint], priority=priority)) if strict_lints: diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py index a74d61f..2688d4b 100644 --- a/scripts/tracetool/backend/simple.py +++ b/scripts/tracetool/backend/simple.py @@ -36,8 +36,17 @@ def generate_h_begin(events, group): def generate_h(event, group): - out(' _simple_%(api)s(%(args)s);', + event_id = 'TRACE_' + event.name.upper() + if "vcpu" in event.properties: + # already checked on the generic format code + cond = "true" + else: + cond = "trace_event_get_state(%s)" % event_id + out(' if (%(cond)s) {', + ' _simple_%(api)s(%(args)s);', + ' }', api=event.api(), + cond=cond, args=", ".join(event.args.names())) @@ -72,22 +81,10 @@ def generate_c(event, group): if len(event.args) == 0: sizestr = '0' - event_id = 'TRACE_' + event.name.upper() - if "vcpu" in event.properties: - # already checked on the generic format code - cond = "true" - else: - cond = "trace_event_get_state(%s)" % event_id - out('', - ' if (!%(cond)s) {', - ' return;', - ' }', - '', ' if (trace_record_start(&rec, %(event_obj)s.id, %(size_str)s)) {', ' return; /* Trace Buffer Full, Event Dropped ! */', ' }', - cond=cond, event_obj=event.api(event.QEMU_EVENT), size_str=sizestr) diff --git a/system/memory.c b/system/memory.c index 63b983e..306e9ff 100644 --- a/system/memory.c +++ b/system/memory.c @@ -2174,18 +2174,14 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, } /* Called with rcu_read_lock held. */ -bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - bool *mr_has_discard_manager, Error **errp) +MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp) { MemoryRegion *mr; hwaddr xlat; hwaddr len = iotlb->addr_mask + 1; bool writable = iotlb->perm & IOMMU_WO; - if (mr_has_discard_manager) { - *mr_has_discard_manager = false; - } /* * The IOMMU TLB entry we have just covers translation through * this IOMMU to its immediate target. We need to translate @@ -2195,7 +2191,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED); if (!memory_region_is_ram(mr)) { error_setg(errp, "iommu map to non memory area %" HWADDR_PRIx "", xlat); - return false; + return NULL; } else if (memory_region_has_ram_discard_manager(mr)) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); MemoryRegionSection tmp = { @@ -2203,9 +2199,6 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, .offset_within_region = xlat, .size = int128_make64(len), }; - if (mr_has_discard_manager) { - *mr_has_discard_manager = true; - } /* * Malicious VMs can map memory into the IOMMU, which is expected * to remain discarded. vfio will pin all pages, populating memory. @@ -2216,7 +2209,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, error_setg(errp, "iommu map to discarded memory (e.g., unplugged" " via virtio-mem): %" HWADDR_PRIx "", iotlb->translated_addr); - return false; + return NULL; } } @@ -2226,22 +2219,11 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, */ if (len & iotlb->addr_mask) { error_setg(errp, "iommu has granularity incompatible with target AS"); - return false; - } - - if (vaddr) { - *vaddr = memory_region_get_ram_ptr(mr) + xlat; - } - - if (ram_addr) { - *ram_addr = memory_region_get_ram_addr(mr) + xlat; - } - - if (read_only) { - *read_only = !writable || mr->readonly; + return NULL; } - return true; + *xlat_p = xlat; + return mr; } void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c9bd344..40aefb3 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -900,6 +900,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD) +#define TCG_7_1_ECX_FEATURES 0 #define TCG_7_1_EDX_FEATURES 0 #define TCG_7_2_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 @@ -1150,6 +1151,25 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_7_1_EAX_FEATURES, }, + [FEAT_7_1_ECX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, "msr-imm", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 1, + .reg = R_ECX, + }, + .tcg_features = TCG_7_1_ECX_FEATURES, + }, [FEAT_7_1_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1804,10 +1824,6 @@ static FeatureDep feature_dependencies[] = { .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, }, { - .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS }, - .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, - }, - { .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX }, .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_SGX_LC }, }, @@ -7446,9 +7462,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ } else if (count == 1) { *eax = env->features[FEAT_7_1_EAX]; + *ecx = env->features[FEAT_7_1_ECX]; *edx = env->features[FEAT_7_1_EDX]; *ebx = 0; - *ecx = 0; } else if (count == 2) { *edx = env->features[FEAT_7_2_EDX]; *eax = 0; @@ -8353,6 +8369,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX); x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX); + x86_cpu_adjust_feat_level(cpu, FEAT_7_1_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_7_2_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX); diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 1146465..545851c 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -668,6 +668,7 @@ typedef enum FeatureWord { FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */ FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */ FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ + FEAT_7_1_ECX, /* CPUID[EAX=7,ECX=1].ECX */ FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */ @@ -1000,6 +1001,9 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Linear Address Masking */ #define CPUID_7_1_EAX_LAM (1U << 26) +/* The immediate form of MSR access instructions */ +#define CPUID_7_1_ECX_MSR_IMM (1U << 5) + /* Support for VPDPB[SU,UU,SS]D[,S] */ #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) /* AVX NE CONVERT Instructions */ @@ -1023,6 +1027,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_2_EDX_DDPD_U (1U << 3) /* Indicate bit 10 of the IA32_SPEC_CTRL MSR is supported */ #define CPUID_7_2_EDX_BHI_CTRL (1U << 4) + /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ #define CPUID_7_2_EDX_MCDT_NO (1U << 5) diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c index 0a21ae5..820ca36 100644 --- a/target/i386/kvm/tdx.c +++ b/target/i386/kvm/tdx.c @@ -284,7 +284,7 @@ static void tdx_post_init_vcpus(void) hob = tdx_get_hob_entry(tdx_guest); CPU_FOREACH(cpu) { - tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address, + tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address, &error_fatal); } } @@ -339,7 +339,7 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused) uint32_t flags; region = (struct kvm_tdx_init_mem_region) { - .source_addr = (uint64_t)entry->mem_ptr, + .source_addr = (uintptr_t)entry->mem_ptr, .gpa = entry->address, .nr_pages = entry->size >> 12, }; @@ -893,16 +893,16 @@ static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) { if ((tdx->attributes & ~tdx_caps->supported_attrs)) { - error_setg(errp, "Invalid attributes 0x%lx for TDX VM " - "(KVM supported: 0x%llx)", tdx->attributes, - tdx_caps->supported_attrs); + error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM " + "(KVM supported: 0x%"PRIx64")", tdx->attributes, + (uint64_t)tdx_caps->supported_attrs); return -1; } if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { error_setg(errp, "Some QEMU unsupported TD attribute bits being " - "requested: 0x%lx (QEMU supported: 0x%llx)", - tdx->attributes, TDX_SUPPORTED_TD_ATTRS); + "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")", + tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS); return -1; } @@ -931,8 +931,8 @@ static int setup_td_xfam(X86CPU *x86cpu, Error **errp) env->features[FEAT_XSAVE_XSS_HI]; if (xfam & ~tdx_caps->supported_xfam) { - error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))", - xfam, tdx_caps->supported_xfam); + error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))", + xfam, (uint64_t)tdx_caps->supported_xfam); return -1; } @@ -999,14 +999,14 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { - error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency " + error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency " "between [%d, %d] kHz", env->tsc_khz, TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); return -EINVAL; } if (env->tsc_khz % (25 * 1000)) { - error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz", + error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz", env->tsc_khz); return -EINVAL; } @@ -1014,7 +1014,7 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); if (r < 0) { - error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz", + error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz", env->tsc_khz); return r; } @@ -1139,7 +1139,7 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) uint64_t gpa = -1ull; if (error_code & 0xffff) { - error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx", + error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64, error_code); return -1; } diff --git a/tests/docker/dockerfiles/fedora-rust-nightly.docker b/tests/docker/dockerfiles/fedora-rust-nightly.docker index fe4a6ed..4a03330 100644 --- a/tests/docker/dockerfiles/fedora-rust-nightly.docker +++ b/tests/docker/dockerfiles/fedora-rust-nightly.docker @@ -156,6 +156,7 @@ ENV PYTHON "/usr/bin/python3" RUN dnf install -y wget ENV RUSTUP_HOME=/usr/local/rustup CARGO_HOME=/usr/local/cargo ENV RUSTC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustc +ENV RUSTDOC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustdoc ENV CARGO=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/cargo RUN set -eux && \ rustArch='x86_64-unknown-linux-gnu' && \ @@ -170,6 +171,7 @@ RUN set -eux && \ /usr/local/cargo/bin/rustup run nightly cargo --version && \ /usr/local/cargo/bin/rustup run nightly rustc --version && \ test "$CARGO" = "$(/usr/local/cargo/bin/rustup +nightly which cargo)" && \ + test "$RUSTDOC" = "$(/usr/local/cargo/bin/rustup +nightly which rustdoc)" && \ test "$RUSTC" = "$(/usr/local/cargo/bin/rustup +nightly which rustc)" ENV PATH=$CARGO_HOME/bin:$PATH RUN /usr/local/cargo/bin/rustup run nightly cargo install bindgen-cli diff --git a/tests/docker/dockerfiles/ubuntu2204.docker b/tests/docker/dockerfiles/ubuntu2204.docker index 4a1cf2b..28a6f93 100644 --- a/tests/docker/dockerfiles/ubuntu2204.docker +++ b/tests/docker/dockerfiles/ubuntu2204.docker @@ -151,6 +151,7 @@ ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" ENV RUSTC=/usr/bin/rustc-1.77 +ENV RUSTDOC=/usr/bin/rustdoc-1.77 ENV CARGO_HOME=/usr/local/cargo ENV PATH=$CARGO_HOME/bin:$PATH RUN DEBIAN_FRONTEND=noninteractive eatmydata \ diff --git a/tests/lcitool/mappings.yml b/tests/lcitool/mappings.yml index 673baf3..8f0e95e 100644 --- a/tests/lcitool/mappings.yml +++ b/tests/lcitool/mappings.yml @@ -8,6 +8,10 @@ mappings: meson: OpenSUSELeap15: + # Use Meson from PyPI wherever Rust is enabled + Debian: + Fedora: + Ubuntu: python3: OpenSUSELeap15: python311-base @@ -72,7 +76,7 @@ mappings: pypi_mappings: # Request more recent version meson: - default: meson==1.5.0 + default: meson==1.8.1 # Drop packages that need devel headers python3-numpy: diff --git a/tests/lcitool/refresh b/tests/lcitool/refresh index 8474ea8..d3488b2 100755 --- a/tests/lcitool/refresh +++ b/tests/lcitool/refresh @@ -121,6 +121,7 @@ fedora_rustup_nightly_extras = [ "RUN dnf install -y wget\n", "ENV RUSTUP_HOME=/usr/local/rustup CARGO_HOME=/usr/local/cargo\n", "ENV RUSTC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustc\n", + "ENV RUSTDOC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustdoc\n", "ENV CARGO=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/cargo\n", "RUN set -eux && \\\n", " rustArch='x86_64-unknown-linux-gnu' && \\\n", @@ -135,6 +136,7 @@ fedora_rustup_nightly_extras = [ " /usr/local/cargo/bin/rustup run nightly cargo --version && \\\n", " /usr/local/cargo/bin/rustup run nightly rustc --version && \\\n", ' test "$CARGO" = "$(/usr/local/cargo/bin/rustup +nightly which cargo)" && \\\n', + ' test "$RUSTDOC" = "$(/usr/local/cargo/bin/rustup +nightly which rustdoc)" && \\\n', ' test "$RUSTC" = "$(/usr/local/cargo/bin/rustup +nightly which rustc)"\n', 'ENV PATH=$CARGO_HOME/bin:$PATH\n', 'RUN /usr/local/cargo/bin/rustup run nightly cargo install bindgen-cli\n', @@ -143,6 +145,7 @@ fedora_rustup_nightly_extras = [ ubuntu2204_rust_extras = [ "ENV RUSTC=/usr/bin/rustc-1.77\n", + "ENV RUSTDOC=/usr/bin/rustdoc-1.77\n", "ENV CARGO_HOME=/usr/local/cargo\n", 'ENV PATH=$CARGO_HOME/bin:$PATH\n', "RUN DEBIAN_FRONTEND=noninteractive eatmydata \\\n", diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 index 9b281e1..f8af9ff 100755 --- a/tests/qemu-iotests/240 +++ b/tests/qemu-iotests/240 @@ -81,8 +81,6 @@ class TestCase(iotests.QMPTestCase): self.vm.qmp_log('device_del', id='scsi-hd0') self.vm.event_wait('DEVICE_DELETED') - self.vm.qmp_log('device_add', id='scsi-hd1', driver='scsi-hd', drive='hd0', bus="scsi1.0") - self.vm.qmp_log('device_del', id='scsi-hd1') self.vm.event_wait('DEVICE_DELETED') self.vm.qmp_log('blockdev-del', node_name='hd0') diff --git a/tests/qemu-iotests/240.out b/tests/qemu-iotests/240.out index 89ed25e..10dcc42 100644 --- a/tests/qemu-iotests/240.out +++ b/tests/qemu-iotests/240.out @@ -46,10 +46,8 @@ {"execute": "device_add", "arguments": {"bus": "scsi0.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd0"}} {"return": {}} {"execute": "device_add", "arguments": {"bus": "scsi1.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd1"}} -{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}} -{"execute": "device_del", "arguments": {"id": "scsi-hd0"}} {"return": {}} -{"execute": "device_add", "arguments": {"bus": "scsi1.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd1"}} +{"execute": "device_del", "arguments": {"id": "scsi-hd0"}} {"return": {}} {"execute": "device_del", "arguments": {"id": "scsi-hd1"}} {"return": {}} diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io index 194fda5..dca1167 100755 --- a/tests/qemu-iotests/tests/graph-changes-while-io +++ b/tests/qemu-iotests/tests/graph-changes-while-io @@ -27,6 +27,7 @@ from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ top = os.path.join(iotests.test_dir, 'top.img') +mid = os.path.join(iotests.test_dir, 'mid.img') nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') @@ -57,6 +58,16 @@ class TestGraphChangesWhileIO(QMPTestCase): def tearDown(self) -> None: self.qsd.stop() + os.remove(top) + + def _wait_for_blockjob(self, status: str) -> None: + done = False + while not done: + for event in self.qsd.get_qmp().get_events(wait=10.0): + if event['event'] != 'JOB_STATUS_CHANGE': + continue + if event['data']['status'] == status: + done = True def test_blockdev_add_while_io(self) -> None: # Run qemu-img bench in the background @@ -116,15 +127,92 @@ class TestGraphChangesWhileIO(QMPTestCase): 'device': 'job0', }) - cancelled = False - while not cancelled: - for event in self.qsd.get_qmp().get_events(wait=10.0): - if event['event'] != 'JOB_STATUS_CHANGE': - continue - if event['data']['status'] == 'null': - cancelled = True + self._wait_for_blockjob('null') + + bench_thr.join() + + def test_remove_lower_snapshot_while_io(self) -> None: + # Run qemu-img bench in the background + bench_thr = Thread(target=do_qemu_img_bench, args=(100000, )) + bench_thr.start() + + # While I/O is performed on 'node0' node, consequently add 2 snapshots + # on top of it, then remove (commit) them starting from lower one. + while bench_thr.is_alive(): + # Recreate snapshot images on every iteration + qemu_img_create('-f', imgfmt, mid, '1G') + qemu_img_create('-f', imgfmt, top, '1G') + + self.qsd.cmd('blockdev-add', { + 'driver': imgfmt, + 'node-name': 'mid', + 'file': { + 'driver': 'file', + 'filename': mid + } + }) + + self.qsd.cmd('blockdev-snapshot', { + 'node': 'node0', + 'overlay': 'mid', + }) + + self.qsd.cmd('blockdev-add', { + 'driver': imgfmt, + 'node-name': 'top', + 'file': { + 'driver': 'file', + 'filename': top + } + }) + + self.qsd.cmd('blockdev-snapshot', { + 'node': 'mid', + 'overlay': 'top', + }) + + self.qsd.cmd('block-commit', { + 'job-id': 'commit-mid', + 'device': 'top', + 'top-node': 'mid', + 'base-node': 'node0', + 'auto-finalize': True, + 'auto-dismiss': False, + }) + + self._wait_for_blockjob('concluded') + self.qsd.cmd('job-dismiss', { + 'id': 'commit-mid', + }) + + self.qsd.cmd('block-commit', { + 'job-id': 'commit-top', + 'device': 'top', + 'top-node': 'top', + 'base-node': 'node0', + 'auto-finalize': True, + 'auto-dismiss': False, + }) + + self._wait_for_blockjob('ready') + self.qsd.cmd('job-complete', { + 'id': 'commit-top', + }) + + self._wait_for_blockjob('concluded') + self.qsd.cmd('job-dismiss', { + 'id': 'commit-top', + }) + + self.qsd.cmd('blockdev-del', { + 'node-name': 'mid' + }) + self.qsd.cmd('blockdev-del', { + 'node-name': 'top' + }) bench_thr.join() + os.remove(mid) if __name__ == '__main__': # Format must support raw backing files diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out index fbc63e6..8d7e9967 100644 --- a/tests/qemu-iotests/tests/graph-changes-while-io.out +++ b/tests/qemu-iotests/tests/graph-changes-while-io.out @@ -1,5 +1,5 @@ -.. +... ---------------------------------------------------------------------- -Ran 2 tests +Ran 3 tests OK diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 290cd2a..59c2793 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -772,9 +772,11 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, tjob->bs = src; job = &tjob->common; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); switch (result) { case TEST_JOB_SUCCESS: @@ -953,11 +955,13 @@ static void bdrv_test_top_close(BlockDriverState *bs) { BdrvChild *c, *next_c; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { bdrv_unref_child(bs, c); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } static int coroutine_fn GRAPH_RDLOCK @@ -1014,7 +1018,9 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) bdrv_graph_co_rdlock(); QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { bdrv_graph_co_rdunlock(); + bdrv_drain_all_begin(); bdrv_co_unref_child(bs, c); + bdrv_drain_all_end(); bdrv_graph_co_rdlock(); } bdrv_graph_co_rdunlock(); @@ -1047,10 +1053,12 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, &error_abort); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); /* This child will be the one to pass to requests through to, and * it will stall until a drain occurs */ @@ -1058,21 +1066,25 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, &error_abort); child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; /* Takes our reference to child_bs */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", &child_of_bds, BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); /* This child is just there to be deleted * (for detach_instead_of_delete == true) */ null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, &error_abort); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, bs, &error_abort); @@ -1155,6 +1167,7 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) bdrv_dec_in_flight(data->child_b->bs); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(data->parent_b, data->child_b); @@ -1163,6 +1176,7 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret) @@ -1260,6 +1274,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) /* Set child relationships */ bdrv_ref(b); bdrv_ref(a); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds, BDRV_CHILD_DATA, &error_abort); @@ -1271,6 +1286,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_assert_cmpint(parent_a->refcnt, ==, 1); g_assert_cmpint(parent_b->refcnt, ==, 1); @@ -1396,14 +1412,10 @@ static void test_set_aio_context(void) bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); - bdrv_drained_begin(bs); bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort); - bdrv_drained_end(bs); - bdrv_drained_begin(bs); bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort); bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort); - bdrv_drained_end(bs); bdrv_unref(bs); iothread_join(a); @@ -1687,6 +1699,7 @@ static void test_drop_intermediate_poll(void) * Establish the chain last, so the chain links are the first * elements in the BDS.parents lists */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); for (i = 0; i < 3; i++) { if (i) { @@ -1696,6 +1709,7 @@ static void test_drop_intermediate_poll(void) } } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); job = block_job_create("job", &test_simple_job_driver, NULL, job_node, 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); @@ -1942,10 +1956,12 @@ static void do_test_replace_child_mid_drain(int old_drain_count, new_child_bs->total_sectors = 1; bdrv_ref(old_child_bs); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, BDRV_CHILD_COW, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); parent_s->setup_completed = true; for (i = 0; i < old_drain_count; i++) { diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c index d743abb..7b03ebe 100644 --- a/tests/unit/test-bdrv-graph-mod.c +++ b/tests/unit/test-bdrv-graph-mod.c @@ -137,10 +137,12 @@ static void test_update_perm_tree(void) blk_insert_bs(root, bs, &error_abort); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(filter, bs, "child", &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); ret = bdrv_append(filter, bs, NULL); g_assert_cmpint(ret, <, 0); @@ -204,11 +206,13 @@ static void test_should_update_child(void) bdrv_set_backing_hd(target, bs, &error_abort); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); g_assert(target->backing->bs == bs); bdrv_attach_child(filter, target, "target", &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_append(filter, bs, &error_abort); bdrv_graph_rdlock_main_loop(); @@ -244,6 +248,7 @@ static void test_parallel_exclusive_write(void) bdrv_ref(base); bdrv_ref(fl1); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(top, fl1, "backing", &child_of_bds, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, @@ -257,6 +262,7 @@ static void test_parallel_exclusive_write(void) bdrv_replace_node(fl1, fl2, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_drained_end(fl2); bdrv_drained_end(fl1); @@ -363,6 +369,7 @@ static void test_parallel_perm_update(void) */ bdrv_ref(base); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA, &error_abort); @@ -377,6 +384,7 @@ static void test_parallel_perm_update(void) BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); /* Select fl1 as first child to be active */ s->selected = c_fl1; @@ -430,11 +438,13 @@ static void test_append_greedy_filter(void) BlockDriverState *base = no_perm_node("base"); BlockDriverState *fl = exclusive_writer_node("fl1"); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(top, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_append(fl, base, &error_abort); bdrv_unref(fl); |