diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2024-01-26 13:10:30 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2024-01-26 13:10:31 +0000 |
commit | b9c4a2018aa9c89233b8fc68ce26faf8e4ce1c78 (patch) | |
tree | 5dd331773caafb3dc376816c5408c765d85b2965 /hw | |
parent | e029fe22caad9b75c7ab69bd4e84853c11fb71e0 (diff) | |
parent | acf2b9fce9f402b070a65bea584582df0378da9e (diff) | |
download | qemu-b9c4a2018aa9c89233b8fc68ce26faf8e4ce1c78.zip qemu-b9c4a2018aa9c89233b8fc68ce26faf8e4ce1c78.tar.gz qemu-b9c4a2018aa9c89233b8fc68ce26faf8e4ce1c78.tar.bz2 |
Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging
Block layer patches
- virtio-blk: Multiqueue fixes and cleanups
- blklogwrites: Fixes for write_zeroes and superblock update races
- commit/stream: Allow users to request only format driver names in
backing file format
- monitor: only run coroutine commands in qemu_aio_context
- Some iotest fixes
# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmWzpOwRHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9ZNzg//W1+C7HxLft4Jc4O1BcOoOLlGCg4Esupt
# z0/XLZ9+xVQUtjQ82pFzf9XaWQs8CuNT3FBUKi+ngdwZ0JBThIv0aGiMZBcAeQjD
# qshPFgDM1lGL4ICIaT73/qfUzQgO3oruZj9F+ShBBzoasNWVoRzqqVDR3pinLwTp
# D4TU+3A6LkdhlYGT60SYfRq/UKNmCA1s2wysdjqXxS6KOEURNF2VBnz0Nu76qrVb
# 3P/a55GPiJIn+VVsdQ0J4vyyzn23m7I7WZOJ7Sjm1EfSJ6SvcDbhWsZTUonaV2rU
# qZ3WI/jggqxXRV8F2AaA4suS/Cc8RkX2KfcN8fB6wDC2eI5USSatjh6xfw5xH9Ll
# NRKUO4vFFR3Lf8wN9apg0Bwxqi0GOm9kvBJT5QqjQ16R1dvqBLqbZqcx6ZXqWFXe
# /Iy243Tz19mWTFVUj0EgCKQpNz9F4SyXxV83HtSR1lJ5mhthnLxkvUOe7jsFPE4d
# 1Z3uBNWnx2mKFkhlwocMTKayYqxPuKQ+YjqrRoplLW1GZoBeoalKRGf8/RHa6kQx
# gh4cguihlb71AH1AO1QuYpiZt9G4RJR2RZlIoCPJY5TaKJedcxMVn8H+8/F0PnQd
# gPysZf7hTU1xCUV6TClDd+f2fuvqZYwXdwHJ9iiohNkbFq4HFQUp4nk4/eEPGSe/
# uv8oE813E30=
# =KQJl
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 26 Jan 2024 12:26:20 GMT
# gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg: issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* tag 'for-upstream' of https://repo.or.cz/qemu/kevin:
iotests/277: Use iotests.sock_dir for socket creation
iotests/iothreads-stream: Use the right TimeoutError
tests/unit: Bump test-replication timeout to 60 seconds
iotests/264: Use iotests.sock_dir for socket creation
block/blklogwrites: Protect mutable driver state with a mutex.
virtio-blk: always set ioeventfd during startup
virtio-blk: tolerate failure to set BlockBackend AioContext
virtio-blk: restart s->rq reqs in vq AioContexts
virtio-blk: rename dataplane to ioeventfd
virtio-blk: rename dataplane create/destroy functions
virtio-blk: move dataplane code into virtio-blk.c
monitor: only run coroutine commands in qemu_aio_context
iotests: port 141 to Python for reliable QMP testing
iotests: add filter_qmp_generated_node_ids()
stream: Allow users to request only format driver names in backing file format
commit: Allow users to request only format driver names in backing file format
string-output-visitor: Fix (pseudo) struct handling
block/blklogwrites: Fix a bug when logging "write zeroes" operations.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/block/dataplane/meson.build | 1 | ||||
-rw-r--r-- | hw/block/dataplane/trace-events | 5 | ||||
-rw-r--r-- | hw/block/dataplane/trace.h | 1 | ||||
-rw-r--r-- | hw/block/dataplane/virtio-blk.c | 404 | ||||
-rw-r--r-- | hw/block/dataplane/virtio-blk.h | 34 | ||||
-rw-r--r-- | hw/block/virtio-blk.c | 412 |
6 files changed, 380 insertions, 477 deletions
diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build index 025b3b0..11a5eba 100644 --- a/hw/block/dataplane/meson.build +++ b/hw/block/dataplane/meson.build @@ -1,2 +1 @@ -system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) diff --git a/hw/block/dataplane/trace-events b/hw/block/dataplane/trace-events deleted file mode 100644 index 38fc3e7..0000000 --- a/hw/block/dataplane/trace-events +++ /dev/null @@ -1,5 +0,0 @@ -# See docs/devel/tracing.rst for syntax documentation. - -# virtio-blk.c -virtio_blk_data_plane_start(void *s) "dataplane %p" -virtio_blk_data_plane_stop(void *s) "dataplane %p" diff --git a/hw/block/dataplane/trace.h b/hw/block/dataplane/trace.h deleted file mode 100644 index 240cc59..0000000 --- a/hw/block/dataplane/trace.h +++ /dev/null @@ -1 +0,0 @@ -#include "trace/trace-hw_block_dataplane.h" diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c deleted file mode 100644 index ba22732..0000000 --- a/hw/block/dataplane/virtio-blk.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Dedicated thread for virtio-blk I/O processing - * - * Copyright 2012 IBM, Corp. - * Copyright 2012 Red Hat, Inc. and/or its affiliates - * - * Authors: - * Stefan Hajnoczi <stefanha@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "trace.h" -#include "qemu/iov.h" -#include "qemu/main-loop.h" -#include "qemu/thread.h" -#include "qemu/error-report.h" -#include "hw/virtio/virtio-blk.h" -#include "virtio-blk.h" -#include "block/aio.h" -#include "hw/virtio/virtio-bus.h" -#include "qom/object_interfaces.h" - -struct VirtIOBlockDataPlane { - bool starting; - bool stopping; - - VirtIOBlkConf *conf; - VirtIODevice *vdev; - - /* - * The AioContext for each virtqueue. The BlockDriverState will use the - * first element as its AioContext. - */ - AioContext **vq_aio_context; -}; - -/* Raise an interrupt to signal guest, if necessary */ -void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) -{ - virtio_notify_irqfd(s->vdev, vq); -} - -/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ -static void -apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, - AioContext **vq_aio_context, uint16_t num_queues) -{ - IOThreadVirtQueueMappingList *node; - size_t num_iothreads = 0; - size_t cur_iothread = 0; - - for (node = iothread_vq_mapping_list; node; node = node->next) { - num_iothreads++; - } - - for (node = iothread_vq_mapping_list; node; node = node->next) { - IOThread *iothread = iothread_by_id(node->value->iothread); - AioContext *ctx = iothread_get_aio_context(iothread); - - /* Released in virtio_blk_data_plane_destroy() */ - object_ref(OBJECT(iothread)); - - if (node->value->vqs) { - uint16List *vq; - - /* Explicit vq:IOThread assignment */ - for (vq = node->value->vqs; vq; vq = vq->next) { - vq_aio_context[vq->value] = ctx; - } - } else { - /* Round-robin vq:IOThread assignment */ - for (unsigned i = cur_iothread; i < num_queues; - i += num_iothreads) { - vq_aio_context[i] = ctx; - } - } - - cur_iothread++; - } -} - -/* Context: BQL held */ -bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - VirtIOBlockDataPlane **dataplane, - Error **errp) -{ - VirtIOBlockDataPlane *s; - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - - *dataplane = NULL; - - if (conf->iothread || conf->iothread_vq_mapping_list) { - if (!k->set_guest_notifiers || !k->ioeventfd_assign) { - error_setg(errp, - "device is incompatible with iothread " - "(transport does not support notifiers)"); - return false; - } - if (!virtio_device_ioeventfd_enabled(vdev)) { - error_setg(errp, "ioeventfd is required for iothread"); - return false; - } - - /* If dataplane is (re-)enabled while the guest is running there could - * be block jobs that can conflict. - */ - if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - error_prepend(errp, "cannot start virtio-blk dataplane: "); - return false; - } - } - /* Don't try if transport does not support notifiers. */ - if (!virtio_device_ioeventfd_enabled(vdev)) { - return false; - } - - s = g_new0(VirtIOBlockDataPlane, 1); - s->vdev = vdev; - s->conf = conf; - s->vq_aio_context = g_new(AioContext *, conf->num_queues); - - if (conf->iothread_vq_mapping_list) { - apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, - conf->num_queues); - } else if (conf->iothread) { - AioContext *ctx = iothread_get_aio_context(conf->iothread); - for (unsigned i = 0; i < conf->num_queues; i++) { - s->vq_aio_context[i] = ctx; - } - - /* Released in virtio_blk_data_plane_destroy() */ - object_ref(OBJECT(conf->iothread)); - } else { - AioContext *ctx = qemu_get_aio_context(); - for (unsigned i = 0; i < conf->num_queues; i++) { - s->vq_aio_context[i] = ctx; - } - } - - *dataplane = s; - - return true; -} - -/* Context: BQL held */ -void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) -{ - VirtIOBlock *vblk; - VirtIOBlkConf *conf; - - if (!s) { - return; - } - - vblk = VIRTIO_BLK(s->vdev); - assert(!vblk->dataplane_started); - conf = s->conf; - - if (conf->iothread_vq_mapping_list) { - IOThreadVirtQueueMappingList *node; - - for (node = conf->iothread_vq_mapping_list; node; node = node->next) { - IOThread *iothread = iothread_by_id(node->value->iothread); - object_unref(OBJECT(iothread)); - } - } - - if (conf->iothread) { - object_unref(OBJECT(conf->iothread)); - } - - g_free(s->vq_aio_context); - g_free(s); -} - -/* Context: BQL held */ -int virtio_blk_data_plane_start(VirtIODevice *vdev) -{ - VirtIOBlock *vblk = VIRTIO_BLK(vdev); - VirtIOBlockDataPlane *s = vblk->dataplane; - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - unsigned i; - unsigned nvqs = s->conf->num_queues; - Error *local_err = NULL; - int r; - - if (vblk->dataplane_started || s->starting) { - return 0; - } - - s->starting = true; - - /* Set up guest notifier (irq) */ - r = k->set_guest_notifiers(qbus->parent, nvqs, true); - if (r != 0) { - error_report("virtio-blk failed to set guest notifier (%d), " - "ensure -accel kvm is set.", r); - goto fail_guest_notifiers; - } - - /* - * Batch all the host notifiers in a single transaction to avoid - * quadratic time complexity in address_space_update_ioeventfds(). - */ - memory_region_transaction_begin(); - - /* Set up virtqueue notify */ - for (i = 0; i < nvqs; i++) { - r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); - if (r != 0) { - int j = i; - - fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); - while (i--) { - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); - } - - /* - * The transaction expects the ioeventfds to be open when it - * commits. Do it now, before the cleanup loop. - */ - memory_region_transaction_commit(); - - while (j--) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); - } - goto fail_host_notifiers; - } - } - - memory_region_transaction_commit(); - - trace_virtio_blk_data_plane_start(s); - - r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], - &local_err); - if (r < 0) { - error_report_err(local_err); - goto fail_aio_context; - } - - /* - * These fields must be visible to the IOThread when it processes the - * virtqueue, otherwise it will think dataplane has not started yet. - * - * Make sure ->dataplane_started is false when blk_set_aio_context() is - * called above so that draining does not cause the host notifier to be - * detached/attached prematurely. - */ - s->starting = false; - vblk->dataplane_started = true; - smp_wmb(); /* paired with aio_notify_accept() on the read side */ - - /* Get this show started by hooking up our callbacks */ - if (!blk_in_drain(s->conf->conf.blk)) { - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); - AioContext *ctx = s->vq_aio_context[i]; - - /* Kick right away to begin processing requests already in vring */ - event_notifier_set(virtio_queue_get_host_notifier(vq)); - - virtio_queue_aio_attach_host_notifier(vq, ctx); - } - } - return 0; - - fail_aio_context: - memory_region_transaction_begin(); - - for (i = 0; i < nvqs; i++) { - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); - } - - memory_region_transaction_commit(); - - for (i = 0; i < nvqs; i++) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } - fail_host_notifiers: - k->set_guest_notifiers(qbus->parent, nvqs, false); - fail_guest_notifiers: - vblk->dataplane_disabled = true; - s->starting = false; - return -ENOSYS; -} - -/* Stop notifications for new requests from guest. - * - * Context: BH in IOThread - */ -static void virtio_blk_data_plane_stop_vq_bh(void *opaque) -{ - VirtQueue *vq = opaque; - EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); - - virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); - - /* - * Test and clear notifier after disabling event, in case poll callback - * didn't have time to run. - */ - virtio_queue_host_notifier_read(host_notifier); -} - -/* Context: BQL held */ -void virtio_blk_data_plane_stop(VirtIODevice *vdev) -{ - VirtIOBlock *vblk = VIRTIO_BLK(vdev); - VirtIOBlockDataPlane *s = vblk->dataplane; - BusState *qbus = qdev_get_parent_bus(DEVICE(vblk)); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - unsigned i; - unsigned nvqs = s->conf->num_queues; - - if (!vblk->dataplane_started || s->stopping) { - return; - } - - /* Better luck next time. */ - if (vblk->dataplane_disabled) { - vblk->dataplane_disabled = false; - vblk->dataplane_started = false; - return; - } - s->stopping = true; - trace_virtio_blk_data_plane_stop(s); - - if (!blk_in_drain(s->conf->conf.blk)) { - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); - AioContext *ctx = s->vq_aio_context[i]; - - aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); - } - } - - /* - * Batch all the host notifiers in a single transaction to avoid - * quadratic time complexity in address_space_update_ioeventfds(). - */ - memory_region_transaction_begin(); - - for (i = 0; i < nvqs; i++) { - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); - } - - /* - * The transaction expects the ioeventfds to be open when it - * commits. Do it now, before the cleanup loop. - */ - memory_region_transaction_commit(); - - for (i = 0; i < nvqs; i++) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } - - /* - * Set ->dataplane_started to false before draining so that host notifiers - * are not detached/attached anymore. - */ - vblk->dataplane_started = false; - - /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ - blk_drain(s->conf->conf.blk); - - /* - * Try to switch bs back to the QEMU main loop. If other users keep the - * BlockBackend in the iothread, that's ok - */ - blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); - - /* Clean up guest notifier (irq) */ - k->set_guest_notifiers(qbus->parent, nvqs, false); - - s->stopping = false; -} - -void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); - - for (uint16_t i = 0; i < s->conf->num_queues; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); - virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); - } -} - -void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); - - for (uint16_t i = 0; i < s->conf->num_queues; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); - virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); - } -} diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h deleted file mode 100644 index 1a806fe..0000000 --- a/hw/block/dataplane/virtio-blk.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Dedicated thread for virtio-blk I/O processing - * - * Copyright 2012 IBM, Corp. - * Copyright 2012 Red Hat, Inc. and/or its affiliates - * - * Authors: - * Stefan Hajnoczi <stefanha@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef HW_DATAPLANE_VIRTIO_BLK_H -#define HW_DATAPLANE_VIRTIO_BLK_H - -#include "hw/virtio/virtio.h" - -typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane; - -bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - VirtIOBlockDataPlane **dataplane, - Error **errp); -void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s); -void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); - -int virtio_blk_data_plane_start(VirtIODevice *vdev); -void virtio_blk_data_plane_stop(VirtIODevice *vdev); - -void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); -void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); - -#endif /* HW_DATAPLANE_VIRTIO_BLK_H */ diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index b7a344c..227d835 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -27,7 +27,6 @@ #include "sysemu/sysemu.h" #include "sysemu/runstate.h" #include "hw/virtio/virtio-blk.h" -#include "dataplane/virtio-blk.h" #include "scsi/constants.h" #ifdef __linux__ # include <scsi/sg.h> @@ -65,8 +64,8 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) iov_discard_undo(&req->inhdr_undo); iov_discard_undo(&req->outhdr_undo); virtqueue_push(req->vq, &req->elem, req->in_len); - if (s->dataplane_started && !s->dataplane_disabled) { - virtio_blk_data_plane_notify(s->dataplane, req->vq); + if (s->ioeventfd_started && !s->ioeventfd_disabled) { + virtio_notify_irqfd(vdev, req->vq); } else { virtio_notify(vdev, req->vq); } @@ -1142,12 +1141,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBlock *s = (VirtIOBlock *)vdev; - if (s->dataplane && !s->dataplane_started) { + if (!s->ioeventfd_disabled && !s->ioeventfd_started) { /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start - * dataplane here instead of waiting for .set_status(). + * ioeventfd here instead of waiting for .set_status(). */ virtio_device_start_ioeventfd(vdev); - if (!s->dataplane_disabled) { + if (!s->ioeventfd_disabled) { return; } } @@ -1157,16 +1156,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) static void virtio_blk_dma_restart_bh(void *opaque) { - VirtIOBlock *s = opaque; + VirtIOBlockReq *req = opaque; + VirtIOBlock *s = req->dev; /* we're called with at least one request */ - VirtIOBlockReq *req; MultiReqBuffer mrb = {}; - WITH_QEMU_LOCK_GUARD(&s->rq_lock) { - req = s->rq; - s->rq = NULL; - } - while (req) { VirtIOBlockReq *next = req->next; if (virtio_blk_handle_request(req, &mrb)) { @@ -1196,16 +1190,43 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, RunState state) { VirtIOBlock *s = opaque; + uint16_t num_queues = s->conf.num_queues; if (!running) { return; } - /* Paired with dec in virtio_blk_dma_restart_bh() */ - blk_inc_in_flight(s->conf.conf.blk); + /* Split the device-wide s->rq request list into per-vq request lists */ + g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); + VirtIOBlockReq *rq; + + WITH_QEMU_LOCK_GUARD(&s->rq_lock) { + rq = s->rq; + s->rq = NULL; + } + + while (rq) { + VirtIOBlockReq *next = rq->next; + uint16_t idx = virtio_get_queue_index(rq->vq); + + rq->next = vq_rq[idx]; + vq_rq[idx] = rq; + rq = next; + } + + /* Schedule a BH to submit the requests in each vq's AioContext */ + for (uint16_t i = 0; i < num_queues; i++) { + if (!vq_rq[i]) { + continue; + } + + /* Paired with dec in virtio_blk_dma_restart_bh() */ + blk_inc_in_flight(s->conf.conf.blk); - aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.conf.blk), - virtio_blk_dma_restart_bh, s); + aio_bh_schedule_oneshot(s->vq_aio_context[i], + virtio_blk_dma_restart_bh, + vq_rq[i]); + } } static void virtio_blk_reset(VirtIODevice *vdev) @@ -1214,7 +1235,7 @@ static void virtio_blk_reset(VirtIODevice *vdev) VirtIOBlockReq *req; /* Dataplane has stopped... */ - assert(!s->dataplane_started); + assert(!s->ioeventfd_started); /* ...but requests may still be in flight. */ blk_drain(s->blk); @@ -1381,7 +1402,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) VirtIOBlock *s = VIRTIO_BLK(vdev); if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { - assert(!s->dataplane_started); + assert(!s->ioeventfd_started); } if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { @@ -1546,16 +1567,34 @@ static void virtio_blk_resize(void *opaque) aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); } +static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + for (uint16_t i = 0; i < s->conf.num_queues; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); + virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); + } +} + +static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s); + + for (uint16_t i = 0; i < s->conf.num_queues; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); + virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); + } +} + /* Suspend virtqueue ioeventfd processing during drain */ static void virtio_blk_drained_begin(void *opaque) { VirtIOBlock *s = opaque; - if (!s->dataplane || !s->dataplane_started) { - return; + if (s->ioeventfd_started) { + virtio_blk_ioeventfd_detach(s); } - - virtio_blk_data_plane_detach(s->dataplane); } /* Resume virtqueue ioeventfd processing after drain */ @@ -1563,11 +1602,9 @@ static void virtio_blk_drained_end(void *opaque) { VirtIOBlock *s = opaque; - if (!s->dataplane || !s->dataplane_started) { - return; + if (s->ioeventfd_started) { + virtio_blk_ioeventfd_attach(s); } - - virtio_blk_data_plane_attach(s->dataplane); } static const BlockDevOps virtio_block_ops = { @@ -1576,6 +1613,312 @@ static const BlockDevOps virtio_block_ops = { .drained_end = virtio_blk_drained_end, }; +/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ +static void +apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, + AioContext **vq_aio_context, uint16_t num_queues) +{ + IOThreadVirtQueueMappingList *node; + size_t num_iothreads = 0; + size_t cur_iothread = 0; + + for (node = iothread_vq_mapping_list; node; node = node->next) { + num_iothreads++; + } + + for (node = iothread_vq_mapping_list; node; node = node->next) { + IOThread *iothread = iothread_by_id(node->value->iothread); + AioContext *ctx = iothread_get_aio_context(iothread); + + /* Released in virtio_blk_vq_aio_context_cleanup() */ + object_ref(OBJECT(iothread)); + + if (node->value->vqs) { + uint16List *vq; + + /* Explicit vq:IOThread assignment */ + for (vq = node->value->vqs; vq; vq = vq->next) { + vq_aio_context[vq->value] = ctx; + } + } else { + /* Round-robin vq:IOThread assignment */ + for (unsigned i = cur_iothread; i < num_queues; + i += num_iothreads) { + vq_aio_context[i] = ctx; + } + } + + cur_iothread++; + } +} + +/* Context: BQL held */ +static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s); + VirtIOBlkConf *conf = &s->conf; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + + if (conf->iothread || conf->iothread_vq_mapping_list) { + if (!k->set_guest_notifiers || !k->ioeventfd_assign) { + error_setg(errp, + "device is incompatible with iothread " + "(transport does not support notifiers)"); + return false; + } + if (!virtio_device_ioeventfd_enabled(vdev)) { + error_setg(errp, "ioeventfd is required for iothread"); + return false; + } + + /* + * If ioeventfd is (re-)enabled while the guest is running there could + * be block jobs that can conflict. + */ + if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + error_prepend(errp, "cannot start virtio-blk ioeventfd: "); + return false; + } + } + + s->vq_aio_context = g_new(AioContext *, conf->num_queues); + + if (conf->iothread_vq_mapping_list) { + apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, + conf->num_queues); + } else if (conf->iothread) { + AioContext *ctx = iothread_get_aio_context(conf->iothread); + for (unsigned i = 0; i < conf->num_queues; i++) { + s->vq_aio_context[i] = ctx; + } + + /* Released in virtio_blk_vq_aio_context_cleanup() */ + object_ref(OBJECT(conf->iothread)); + } else { + AioContext *ctx = qemu_get_aio_context(); + for (unsigned i = 0; i < conf->num_queues; i++) { + s->vq_aio_context[i] = ctx; + } + } + + return true; +} + +/* Context: BQL held */ +static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) +{ + VirtIOBlkConf *conf = &s->conf; + + assert(!s->ioeventfd_started); + + if (conf->iothread_vq_mapping_list) { + IOThreadVirtQueueMappingList *node; + + for (node = conf->iothread_vq_mapping_list; node; node = node->next) { + IOThread *iothread = iothread_by_id(node->value->iothread); + object_unref(OBJECT(iothread)); + } + } + + if (conf->iothread) { + object_unref(OBJECT(conf->iothread)); + } + + g_free(s->vq_aio_context); + s->vq_aio_context = NULL; +} + +/* Context: BQL held */ +static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + unsigned i; + unsigned nvqs = s->conf.num_queues; + Error *local_err = NULL; + int r; + + if (s->ioeventfd_started || s->ioeventfd_starting) { + return 0; + } + + s->ioeventfd_starting = true; + + /* Set up guest notifier (irq) */ + r = k->set_guest_notifiers(qbus->parent, nvqs, true); + if (r != 0) { + error_report("virtio-blk failed to set guest notifier (%d), " + "ensure -accel kvm is set.", r); + goto fail_guest_notifiers; + } + + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ + memory_region_transaction_begin(); + + /* Set up virtqueue notify */ + for (i = 0; i < nvqs; i++) { + r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); + if (r != 0) { + int j = i; + + fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); + while (i--) { + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + } + + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ + memory_region_transaction_commit(); + + while (j--) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); + } + goto fail_host_notifiers; + } + } + + memory_region_transaction_commit(); + + /* + * Try to change the AioContext so that block jobs and other operations can + * co-locate their activity in the same AioContext. If it fails, nevermind. + */ + r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], + &local_err); + if (r < 0) { + warn_report_err(local_err); + } + + /* + * These fields must be visible to the IOThread when it processes the + * virtqueue, otherwise it will think ioeventfd has not started yet. + * + * Make sure ->ioeventfd_started is false when blk_set_aio_context() is + * called above so that draining does not cause the host notifier to be + * detached/attached prematurely. + */ + s->ioeventfd_starting = false; + s->ioeventfd_started = true; + smp_wmb(); /* paired with aio_notify_accept() on the read side */ + + /* Get this show started by hooking up our callbacks */ + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); + AioContext *ctx = s->vq_aio_context[i]; + + /* Kick right away to begin processing requests already in vring */ + event_notifier_set(virtio_queue_get_host_notifier(vq)); + + if (!blk_in_drain(s->conf.conf.blk)) { + virtio_queue_aio_attach_host_notifier(vq, ctx); + } + } + return 0; + + fail_host_notifiers: + k->set_guest_notifiers(qbus->parent, nvqs, false); + fail_guest_notifiers: + s->ioeventfd_disabled = true; + s->ioeventfd_starting = false; + return -ENOSYS; +} + +/* Stop notifications for new requests from guest. + * + * Context: BH in IOThread + */ +static void virtio_blk_ioeventfd_stop_vq_bh(void *opaque) +{ + VirtQueue *vq = opaque; + EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); + + virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); + + /* + * Test and clear notifier after disabling event, in case poll callback + * didn't have time to run. + */ + virtio_queue_host_notifier_read(host_notifier); +} + +/* Context: BQL held */ +static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) +{ + VirtIOBlock *s = VIRTIO_BLK(vdev); + BusState *qbus = qdev_get_parent_bus(DEVICE(s)); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + unsigned i; + unsigned nvqs = s->conf.num_queues; + + if (!s->ioeventfd_started || s->ioeventfd_stopping) { + return; + } + + /* Better luck next time. */ + if (s->ioeventfd_disabled) { + s->ioeventfd_disabled = false; + s->ioeventfd_started = false; + return; + } + s->ioeventfd_stopping = true; + + if (!blk_in_drain(s->conf.conf.blk)) { + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); + AioContext *ctx = s->vq_aio_context[i]; + + aio_wait_bh_oneshot(ctx, virtio_blk_ioeventfd_stop_vq_bh, vq); + } + } + + /* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ + memory_region_transaction_begin(); + + for (i = 0; i < nvqs; i++) { + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + } + + /* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ + memory_region_transaction_commit(); + + for (i = 0; i < nvqs; i++) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + + /* + * Set ->ioeventfd_started to false before draining so that host notifiers + * are not detached/attached anymore. + */ + s->ioeventfd_started = false; + + /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ + blk_drain(s->conf.conf.blk); + + /* + * Try to switch bs back to the QEMU main loop. If other users keep the + * BlockBackend in the iothread, that's ok + */ + blk_set_aio_context(s->conf.conf.blk, qemu_get_aio_context(), NULL); + + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, nvqs, false); + + s->ioeventfd_stopping = false; +} + static void virtio_blk_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); @@ -1680,7 +2023,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); } qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); - virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); + + /* Don't start ioeventfd if transport does not support notifiers. */ + if (!virtio_device_ioeventfd_enabled(vdev)) { + s->ioeventfd_disabled = true; + } + + virtio_blk_vq_aio_context_init(s, &err); if (err != NULL) { error_propagate(errp, err); for (i = 0; i < conf->num_queues; i++) { @@ -1717,8 +2066,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) blk_drain(s->blk); del_boot_device_lchs(dev, "/disk@0,0"); - virtio_blk_data_plane_destroy(s->dataplane); - s->dataplane = NULL; + virtio_blk_vq_aio_context_cleanup(s); for (i = 0; i < conf->num_queues; i++) { virtio_del_queue(vdev, i); } @@ -1802,8 +2150,8 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data) vdc->reset = virtio_blk_reset; vdc->save = virtio_blk_save_device; vdc->load = virtio_blk_load_device; - vdc->start_ioeventfd = virtio_blk_data_plane_start; - vdc->stop_ioeventfd = virtio_blk_data_plane_stop; + vdc->start_ioeventfd = virtio_blk_start_ioeventfd; + vdc->stop_ioeventfd = virtio_blk_stop_ioeventfd; } static const TypeInfo virtio_blk_info = { |