From 9c67f33fcab654d6b7f9f4236c5a21ea946e0931 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 14 Sep 2023 10:00:59 -0400 Subject: virtio-blk: add lock to protect s->rq s->rq is accessed from IO_CODE and GLOBAL_STATE_CODE. Introduce a lock to protect s->rq and eliminate reliance on the AioContext lock. Signed-off-by: Stefan Hajnoczi Message-ID: <20230914140101.1065008-3-stefanha@redhat.com> Reviewed-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Michael S. Tsirkin Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 67 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 22 deletions(-) (limited to 'hw') diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index a1f8e15..ee38e08 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -82,8 +82,11 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, /* Break the link as the next request is going to be parsed from the * ring again. Otherwise we may end up doing a double completion! */ req->mr_next = NULL; - req->next = s->rq; - s->rq = req; + + WITH_QEMU_LOCK_GUARD(&s->rq_lock) { + req->next = s->rq; + s->rq = req; + } } else if (action == BLOCK_ERROR_ACTION_REPORT) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); if (acct_failed) { @@ -1183,10 +1186,13 @@ static void virtio_blk_dma_restart_bh(void *opaque) { VirtIOBlock *s = opaque; - VirtIOBlockReq *req = s->rq; + VirtIOBlockReq *req; MultiReqBuffer mrb = {}; - s->rq = NULL; + WITH_QEMU_LOCK_GUARD(&s->rq_lock) { + req = s->rq; + s->rq = NULL; + } aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); while (req) { @@ -1238,22 +1244,29 @@ static void virtio_blk_reset(VirtIODevice *vdev) AioContext *ctx; VirtIOBlockReq *req; + /* Dataplane has stopped... */ + assert(!s->dataplane_started); + + /* ...but requests may still be in flight. */ ctx = blk_get_aio_context(s->blk); aio_context_acquire(ctx); blk_drain(s->blk); + aio_context_release(ctx); /* We drop queued requests after blk_drain() because blk_drain() itself can * produce them. */ - while (s->rq) { - req = s->rq; - s->rq = req->next; - virtqueue_detach_element(req->vq, &req->elem, 0); - virtio_blk_free_request(req); - } + WITH_QEMU_LOCK_GUARD(&s->rq_lock) { + while (s->rq) { + req = s->rq; + s->rq = req->next; - aio_context_release(ctx); + /* No other threads can access req->vq here */ + virtqueue_detach_element(req->vq, &req->elem, 0); + + virtio_blk_free_request(req); + } + } - assert(!s->dataplane_started); blk_set_enable_write_cache(s->blk, s->original_wce); } @@ -1443,18 +1456,22 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) { VirtIOBlock *s = VIRTIO_BLK(vdev); - VirtIOBlockReq *req = s->rq; - while (req) { - qemu_put_sbyte(f, 1); + WITH_QEMU_LOCK_GUARD(&s->rq_lock) { + VirtIOBlockReq *req = s->rq; - if (s->conf.num_queues > 1) { - qemu_put_be32(f, virtio_get_queue_index(req->vq)); - } + while (req) { + qemu_put_sbyte(f, 1); - qemu_put_virtqueue_element(vdev, f, &req->elem); - req = req->next; + if (s->conf.num_queues > 1) { + qemu_put_be32(f, virtio_get_queue_index(req->vq)); + } + + qemu_put_virtqueue_element(vdev, f, &req->elem); + req = req->next; + } } + qemu_put_sbyte(f, 0); } @@ -1480,8 +1497,11 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req); - req->next = s->rq; - s->rq = req; + + WITH_QEMU_LOCK_GUARD(&s->rq_lock) { + req->next = s->rq; + s->rq = req; + } } return 0; @@ -1628,6 +1648,8 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) s->host_features); virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); + qemu_mutex_init(&s->rq_lock); + s->blk = conf->conf.blk; s->rq = NULL; s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; @@ -1679,6 +1701,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) virtio_del_queue(vdev, i); } qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); + qemu_mutex_destroy(&s->rq_lock); blk_ram_registrar_destroy(&s->blk_ram_registrar); qemu_del_vm_change_state_handler(s->change); blockdev_mark_auto_del(s->blk); -- cgit v1.1 From c1135913c655ecfac2f3074671663f4251c8177f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 14 Sep 2023 10:01:00 -0400 Subject: virtio-blk: don't lock AioContext in the completion code path Nothing in the completion code path relies on the AioContext lock anymore. Virtqueues are only accessed from one thread at any moment and the s->rq global state is protected by its own lock now. Signed-off-by: Stefan Hajnoczi Message-ID: <20230914140101.1065008-4-stefanha@redhat.com> Reviewed-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Michael S. Tsirkin Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) (limited to 'hw') diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index ee38e08..f5315df 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -105,7 +105,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) VirtIOBlock *s = next->dev; VirtIODevice *vdev = VIRTIO_DEVICE(s); - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); while (next) { VirtIOBlockReq *req = next; next = req->mr_next; @@ -138,7 +137,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) block_acct_done(blk_get_stats(s->blk), &req->acct); virtio_blk_free_request(req); } - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } static void virtio_blk_flush_complete(void *opaque, int ret) @@ -146,19 +144,13 @@ static void virtio_blk_flush_complete(void *opaque, int ret) VirtIOBlockReq *req = opaque; VirtIOBlock *s = req->dev; - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - if (ret) { - if (virtio_blk_handle_rw_error(req, -ret, 0, true)) { - goto out; - } + if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) { + return; } virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); block_acct_done(blk_get_stats(s->blk), &req->acct); virtio_blk_free_request(req); - -out: - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) @@ -168,11 +160,8 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - if (ret) { - if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { - goto out; - } + if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { + return; } virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); @@ -180,9 +169,6 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) block_acct_done(blk_get_stats(s->blk), &req->acct); } virtio_blk_free_request(req); - -out: - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } #ifdef __linux__ @@ -229,10 +215,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int status) virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); out: - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); virtio_blk_req_complete(req, status); virtio_blk_free_request(req); - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); g_free(ioctl_req); } @@ -672,7 +656,6 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) { ZoneCmdData *data = opaque; VirtIOBlockReq *req = data->req; - VirtIOBlock *s = req->dev; VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); struct iovec *in_iov = data->in_iov; unsigned in_num = data->in_num; @@ -763,10 +746,8 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) } out: - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); virtio_blk_req_complete(req, err_status); virtio_blk_free_request(req); - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); g_free(data->zone_report_data.zones); g_free(data); } @@ -829,10 +810,8 @@ static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; } - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); virtio_blk_req_complete(req, err_status); virtio_blk_free_request(req); - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) @@ -882,7 +861,6 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) { ZoneCmdData *data = opaque; VirtIOBlockReq *req = data->req; - VirtIOBlock *s = req->dev; VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); int64_t append_sector, n; uint8_t err_status = VIRTIO_BLK_S_OK; @@ -905,10 +883,8 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret); out: - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); virtio_blk_req_complete(req, err_status); virtio_blk_free_request(req); - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); g_free(data); } @@ -944,10 +920,8 @@ static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, return 0; out: - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); virtio_blk_req_complete(req, err_status); virtio_blk_free_request(req); - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); return err_status; } -- cgit v1.1 From 6b5aa3a0fdfc74d42f9f7705fa6ced73253b06b1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 14 Sep 2023 10:01:01 -0400 Subject: virtio-blk: don't lock AioContext in the submission code path There is no need to acquire the AioContext lock around blk_aio_*() or blk_get_geometry() anymore. I/O plugging (defer_call()) also does not require the AioContext lock anymore. Signed-off-by: Stefan Hajnoczi Message-ID: <20230914140101.1065008-5-stefanha@redhat.com> Reviewed-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Michael S. Tsirkin Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'hw') diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index f5315df..e110f97 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1111,7 +1111,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) MultiReqBuffer mrb = {}; bool suppress_notifications = virtio_queue_get_notification(vq); - aio_context_acquire(blk_get_aio_context(s->blk)); defer_call_begin(); do { @@ -1137,7 +1136,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) } defer_call_end(); - aio_context_release(blk_get_aio_context(s->blk)); } static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) @@ -1168,7 +1166,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) s->rq = NULL; } - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); while (req) { VirtIOBlockReq *next = req->next; if (virtio_blk_handle_request(req, &mrb)) { @@ -1192,8 +1189,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) /* Paired with inc in virtio_blk_dma_restart_cb() */ blk_dec_in_flight(s->conf.conf.blk); - - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } static void virtio_blk_dma_restart_cb(void *opaque, bool running, -- cgit v1.1 From eaad0fe26050c227dc5dad63205835bac4912a51 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 4 Dec 2023 11:42:56 -0500 Subject: scsi: only access SCSIDevice->requests from one thread Stop depending on the AioContext lock and instead access SCSIDevice->requests from only one thread at a time: - When the VM is running only the BlockBackend's AioContext may access the requests list. - When the VM is stopped only the main loop may access the requests list. These constraints protect the requests list without the need for locking in the I/O code path. Note that multiple IOThreads are not supported yet because the code assumes all SCSIRequests are executed from a single AioContext. Leave that as future work. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Message-ID: <20231204164259.1515217-2-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- hw/scsi/scsi-bus.c | 181 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 125 insertions(+), 56 deletions(-) (limited to 'hw') diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index fc4b77f..b649cdf 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -85,6 +85,89 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun) return d; } +/* + * Invoke @fn() for each enqueued request in device @s. Must be called from the + * main loop thread while the guest is stopped. This is only suitable for + * vmstate ->put(), use scsi_device_for_each_req_async() for other cases. + */ +static void scsi_device_for_each_req_sync(SCSIDevice *s, + void (*fn)(SCSIRequest *, void *), + void *opaque) +{ + SCSIRequest *req; + SCSIRequest *next_req; + + assert(!runstate_is_running()); + assert(qemu_in_main_thread()); + + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) { + fn(req, opaque); + } +} + +typedef struct { + SCSIDevice *s; + void (*fn)(SCSIRequest *, void *); + void *fn_opaque; +} SCSIDeviceForEachReqAsyncData; + +static void scsi_device_for_each_req_async_bh(void *opaque) +{ + g_autofree SCSIDeviceForEachReqAsyncData *data = opaque; + SCSIDevice *s = data->s; + AioContext *ctx; + SCSIRequest *req; + SCSIRequest *next; + + /* + * If the AioContext changed before this BH was called then reschedule into + * the new AioContext before accessing ->requests. This can happen when + * scsi_device_for_each_req_async() is called and then the AioContext is + * changed before BHs are run. + */ + ctx = blk_get_aio_context(s->conf.blk); + if (ctx != qemu_get_current_aio_context()) { + aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, + g_steal_pointer(&data)); + return; + } + + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { + data->fn(req, data->fn_opaque); + } + + /* Drop the reference taken by scsi_device_for_each_req_async() */ + object_unref(OBJECT(s)); +} + +/* + * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() + * runs in the AioContext that is executing the request. + */ +static void scsi_device_for_each_req_async(SCSIDevice *s, + void (*fn)(SCSIRequest *, void *), + void *opaque) +{ + assert(qemu_in_main_thread()); + + SCSIDeviceForEachReqAsyncData *data = + g_new(SCSIDeviceForEachReqAsyncData, 1); + + data->s = s; + data->fn = fn; + data->fn_opaque = opaque; + + /* + * Hold a reference to the SCSIDevice until + * scsi_device_for_each_req_async_bh() finishes. + */ + object_ref(OBJECT(s)); + + aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), + scsi_device_for_each_req_async_bh, + data); +} + static void scsi_device_realize(SCSIDevice *s, Error **errp) { SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s); @@ -144,20 +227,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host, qbus_set_bus_hotplug_handler(BUS(bus)); } -static void scsi_dma_restart_bh(void *opaque) +void scsi_req_retry(SCSIRequest *req) { - SCSIDevice *s = opaque; - SCSIRequest *req, *next; - - qemu_bh_delete(s->bh); - s->bh = NULL; + req->retry = true; +} - aio_context_acquire(blk_get_aio_context(s->conf.blk)); - QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { - scsi_req_ref(req); - if (req->retry) { - req->retry = false; - switch (req->cmd.mode) { +/* Called in the AioContext that is executing the request */ +static void scsi_dma_restart_req(SCSIRequest *req, void *opaque) +{ + scsi_req_ref(req); + if (req->retry) { + req->retry = false; + switch (req->cmd.mode) { case SCSI_XFER_FROM_DEV: case SCSI_XFER_TO_DEV: scsi_req_continue(req); @@ -166,37 +247,22 @@ static void scsi_dma_restart_bh(void *opaque) scsi_req_dequeue(req); scsi_req_enqueue(req); break; - } } - scsi_req_unref(req); } - aio_context_release(blk_get_aio_context(s->conf.blk)); - /* Drop the reference that was acquired in scsi_dma_restart_cb */ - object_unref(OBJECT(s)); -} - -void scsi_req_retry(SCSIRequest *req) -{ - /* No need to save a reference, because scsi_dma_restart_bh just - * looks at the request list. */ - req->retry = true; + scsi_req_unref(req); } static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) { SCSIDevice *s = opaque; + assert(qemu_in_main_thread()); + if (!running) { return; } - if (!s->bh) { - AioContext *ctx = blk_get_aio_context(s->conf.blk); - /* The reference is dropped in scsi_dma_restart_bh.*/ - object_ref(OBJECT(s)); - s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, - &DEVICE(s)->mem_reentrancy_guard); - qemu_bh_schedule(s->bh); - } + + scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL); } static bool scsi_bus_is_address_free(SCSIBus *bus, @@ -1657,15 +1723,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense) } } +static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) +{ + scsi_req_cancel_async(req, NULL); +} + void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) { - SCSIRequest *req; + scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); - while (!QTAILQ_EMPTY(&sdev->requests)) { - req = QTAILQ_FIRST(&sdev->requests); - scsi_req_cancel_async(req, NULL); - } blk_drain(sdev->conf.blk); aio_context_release(blk_get_aio_context(sdev->conf.blk)); scsi_device_set_ua(sdev, sense); @@ -1737,31 +1804,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev) /* SCSI request list. For simplicity, pv points to the whole device */ +static void put_scsi_req(SCSIRequest *req, void *opaque) +{ + QEMUFile *f = opaque; + + assert(!req->io_canceled); + assert(req->status == -1 && req->host_status == -1); + assert(req->enqueued); + + qemu_put_sbyte(f, req->retry ? 1 : 2); + qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); + qemu_put_be32s(f, &req->tag); + qemu_put_be32s(f, &req->lun); + if (req->bus->info->save_request) { + req->bus->info->save_request(f, req); + } + if (req->ops->save_request) { + req->ops->save_request(f, req); + } +} + static int put_scsi_requests(QEMUFile *f, void *pv, size_t size, const VMStateField *field, JSONWriter *vmdesc) { SCSIDevice *s = pv; - SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus); - SCSIRequest *req; - QTAILQ_FOREACH(req, &s->requests, next) { - assert(!req->io_canceled); - assert(req->status == -1 && req->host_status == -1); - assert(req->enqueued); - - qemu_put_sbyte(f, req->retry ? 1 : 2); - qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); - qemu_put_be32s(f, &req->tag); - qemu_put_be32s(f, &req->lun); - if (bus->info->save_request) { - bus->info->save_request(f, req); - } - if (req->ops->save_request) { - req->ops->save_request(f, req); - } - } + scsi_device_for_each_req_sync(s, put_scsi_req, f); qemu_put_sbyte(f, 0); - return 0; } -- cgit v1.1 From 765ca516069a40e2ea7081997a508892db084c8e Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 4 Dec 2023 11:42:57 -0500 Subject: virtio-scsi: don't lock AioContext around virtio_queue_aio_attach_host_notifier() virtio_queue_aio_attach_host_notifier() does not require the AioContext lock. Stop taking the lock and add an explicit smp_wmb() because we were relying on the implicit barrier in the AioContext lock before. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Message-ID: <20231204164259.1515217-3-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- hw/scsi/virtio-scsi-dataplane.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'hw') diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 1e684be..135e23f 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -149,23 +149,17 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) memory_region_transaction_commit(); - /* - * These fields are visible to the IOThread so we rely on implicit barriers - * in aio_context_acquire() on the write side and aio_notify_accept() on - * the read side. - */ s->dataplane_starting = false; s->dataplane_started = true; + smp_wmb(); /* paired with aio_notify_accept() */ if (s->bus.drain_count == 0) { - aio_context_acquire(s->ctx); virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); for (i = 0; i < vs->conf.num_queues; i++) { virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); } - aio_context_release(s->ctx); } return 0; -- cgit v1.1 From 14042268045596a89f3fae3606389536d4784113 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 4 Dec 2023 11:42:58 -0500 Subject: scsi: don't lock AioContext in I/O code path blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's internal state also does not anymore. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Acked-by: Kevin Wolf Message-ID: <20231204164259.1515217-4-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- hw/scsi/scsi-disk.c | 23 ----------------------- hw/scsi/scsi-generic.c | 20 +++----------------- 2 files changed, 3 insertions(+), 40 deletions(-) (limited to 'hw') diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 6691f5e..2c1bbb3 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -273,8 +273,6 @@ static void scsi_aio_complete(void *opaque, int ret) SCSIDiskReq *r = (SCSIDiskReq *)opaque; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - assert(r->req.aiocb != NULL); r->req.aiocb = NULL; @@ -286,7 +284,6 @@ static void scsi_aio_complete(void *opaque, int ret) scsi_req_complete(&r->req, GOOD); done: - aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); scsi_req_unref(&r->req); } @@ -394,8 +391,6 @@ static void scsi_read_complete(void *opaque, int ret) SCSIDiskReq *r = (SCSIDiskReq *)opaque; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - assert(r->req.aiocb != NULL); r->req.aiocb = NULL; @@ -406,7 +401,6 @@ static void scsi_read_complete(void *opaque, int ret) trace_scsi_disk_read_complete(r->req.tag, r->qiov.size); } scsi_read_complete_noio(r, ret); - aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); } /* Actually issue a read to the block device. */ @@ -448,8 +442,6 @@ static void scsi_do_read_cb(void *opaque, int ret) SCSIDiskReq *r = (SCSIDiskReq *)opaque; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - assert (r->req.aiocb != NULL); r->req.aiocb = NULL; @@ -459,7 +451,6 @@ static void scsi_do_read_cb(void *opaque, int ret) block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); } scsi_do_read(opaque, ret); - aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); } /* Read more data from scsi device into buffer. */ @@ -533,8 +524,6 @@ static void scsi_write_complete(void * opaque, int ret) SCSIDiskReq *r = (SCSIDiskReq *)opaque; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - assert (r->req.aiocb != NULL); r->req.aiocb = NULL; @@ -544,7 +533,6 @@ static void scsi_write_complete(void * opaque, int ret) block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); } scsi_write_complete_noio(r, ret); - aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); } static void scsi_write_data(SCSIRequest *req) @@ -1742,8 +1730,6 @@ static void scsi_unmap_complete(void *opaque, int ret) SCSIDiskReq *r = data->r; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - assert(r->req.aiocb != NULL); r->req.aiocb = NULL; @@ -1754,7 +1740,6 @@ static void scsi_unmap_complete(void *opaque, int ret) block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); scsi_unmap_complete_noio(data, ret); } - aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); } static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf) @@ -1822,8 +1807,6 @@ static void scsi_write_same_complete(void *opaque, int ret) SCSIDiskReq *r = data->r; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - assert(r->req.aiocb != NULL); r->req.aiocb = NULL; @@ -1847,7 +1830,6 @@ static void scsi_write_same_complete(void *opaque, int ret) data->sector << BDRV_SECTOR_BITS, &data->qiov, 0, scsi_write_same_complete, data); - aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); return; } @@ -1857,7 +1839,6 @@ done: scsi_req_unref(&r->req); qemu_vfree(data->iov.iov_base); g_free(data); - aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); } static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf) @@ -2810,7 +2791,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret) { SCSIBlockReq *req = (SCSIBlockReq *)opaque; SCSIDiskReq *r = &req->req; - SCSIDevice *s = r->req.dev; sg_io_hdr_t *io_hdr = &req->io_header; if (ret == 0) { @@ -2827,13 +2807,10 @@ static void scsi_block_sgio_complete(void *opaque, int ret) } if (ret > 0) { - aio_context_acquire(blk_get_aio_context(s->conf.blk)); if (scsi_handle_rw_error(r, ret, true)) { - aio_context_release(blk_get_aio_context(s->conf.blk)); scsi_req_unref(&r->req); return; } - aio_context_release(blk_get_aio_context(s->conf.blk)); /* Ignore error. */ ret = 0; diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index 2417f0a..b7b04e1 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -109,15 +109,11 @@ done: static void scsi_command_complete(void *opaque, int ret) { SCSIGenericReq *r = (SCSIGenericReq *)opaque; - SCSIDevice *s = r->req.dev; - - aio_context_acquire(blk_get_aio_context(s->conf.blk)); assert(r->req.aiocb != NULL); r->req.aiocb = NULL; scsi_command_complete_noio(r, ret); - aio_context_release(blk_get_aio_context(s->conf.blk)); } static int execute_command(BlockBackend *blk, @@ -274,14 +270,12 @@ static void scsi_read_complete(void * opaque, int ret) SCSIDevice *s = r->req.dev; int len; - aio_context_acquire(blk_get_aio_context(s->conf.blk)); - assert(r->req.aiocb != NULL); r->req.aiocb = NULL; if (ret || r->req.io_canceled) { scsi_command_complete_noio(r, ret); - goto done; + return; } len = r->io_header.dxfer_len - r->io_header.resid; @@ -320,7 +314,7 @@ static void scsi_read_complete(void * opaque, int ret) r->io_header.status != GOOD || len == 0) { scsi_command_complete_noio(r, 0); - goto done; + return; } /* Snoop READ CAPACITY output to set the blocksize. */ @@ -356,9 +350,6 @@ static void scsi_read_complete(void * opaque, int ret) req_complete: scsi_req_data(&r->req, len); scsi_req_unref(&r->req); - -done: - aio_context_release(blk_get_aio_context(s->conf.blk)); } /* Read more data from scsi device into buffer. */ @@ -391,14 +382,12 @@ static void scsi_write_complete(void * opaque, int ret) trace_scsi_generic_write_complete(ret); - aio_context_acquire(blk_get_aio_context(s->conf.blk)); - assert(r->req.aiocb != NULL); r->req.aiocb = NULL; if (ret || r->req.io_canceled) { scsi_command_complete_noio(r, ret); - goto done; + return; } if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 && @@ -408,9 +397,6 @@ static void scsi_write_complete(void * opaque, int ret) } scsi_command_complete_noio(r, ret); - -done: - aio_context_release(blk_get_aio_context(s->conf.blk)); } /* Write data to a scsi device. Returns nonzero on failure. -- cgit v1.1 From ed18b1ed4f34888872b5fbc2f217c65d62c95cfd Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 5 Dec 2023 13:19:58 -0500 Subject: virtio-scsi: replace AioContext lock with tmf_bh_lock Protect the Task Management Function BH state with a lock. The TMF BH runs in the main loop thread. An IOThread might process a TMF at the same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh must be protected by a lock. Run TMF request completion in the IOThread using aio_wait_bh_oneshot(). This avoids more locking to protect the virtqueue and SCSI layer state. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Message-ID: <20231205182011.1976568-2-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- hw/scsi/virtio-scsi.c | 62 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 21 deletions(-) (limited to 'hw') diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 9c751bf..4f8d35f 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -123,6 +123,30 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req) virtio_scsi_free_req(req); } +static void virtio_scsi_complete_req_bh(void *opaque) +{ + VirtIOSCSIReq *req = opaque; + + virtio_scsi_complete_req(req); +} + +/* + * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop + * thread cannot touch the virtqueue since that could race with an IOThread. + */ +static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req) +{ + VirtIOSCSI *s = req->dev; + + if (!s->ctx || s->ctx == qemu_get_aio_context()) { + /* No need to schedule a BH when there is no IOThread */ + virtio_scsi_complete_req(req); + } else { + /* Run request completion in the IOThread */ + aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req); + } +} + static void virtio_scsi_bad_req(VirtIOSCSIReq *req) { virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers"); @@ -338,10 +362,7 @@ static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) out: object_unref(OBJECT(d)); - - virtio_scsi_acquire(s); - virtio_scsi_complete_req(req); - virtio_scsi_release(s); + virtio_scsi_complete_req_from_main_loop(req); } /* Some TMFs must be processed from the main loop thread */ @@ -354,18 +375,16 @@ static void virtio_scsi_do_tmf_bh(void *opaque) GLOBAL_STATE_CODE(); - virtio_scsi_acquire(s); + WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { + QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { + QTAILQ_REMOVE(&s->tmf_bh_list, req, next); + QTAILQ_INSERT_TAIL(&reqs, req, next); + } - QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { - QTAILQ_REMOVE(&s->tmf_bh_list, req, next); - QTAILQ_INSERT_TAIL(&reqs, req, next); + qemu_bh_delete(s->tmf_bh); + s->tmf_bh = NULL; } - qemu_bh_delete(s->tmf_bh); - s->tmf_bh = NULL; - - virtio_scsi_release(s); - QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { QTAILQ_REMOVE(&reqs, req, next); virtio_scsi_do_one_tmf_bh(req); @@ -379,8 +398,7 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) GLOBAL_STATE_CODE(); - virtio_scsi_acquire(s); - + /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */ if (s->tmf_bh) { qemu_bh_delete(s->tmf_bh); s->tmf_bh = NULL; @@ -393,19 +411,19 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; virtio_scsi_complete_req(req); } - - virtio_scsi_release(s); } static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) { VirtIOSCSI *s = req->dev; - QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); + WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { + QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); - if (!s->tmf_bh) { - s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); - qemu_bh_schedule(s->tmf_bh); + if (!s->tmf_bh) { + s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); + qemu_bh_schedule(s->tmf_bh); + } } } @@ -1235,6 +1253,7 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) Error *err = NULL; QTAILQ_INIT(&s->tmf_bh_list); + qemu_mutex_init(&s->tmf_bh_lock); virtio_scsi_common_realize(dev, virtio_scsi_handle_ctrl, @@ -1277,6 +1296,7 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) qbus_set_hotplug_handler(BUS(&s->bus), NULL); virtio_scsi_common_unrealize(dev); + qemu_mutex_destroy(&s->tmf_bh_lock); } static Property virtio_scsi_properties[] = { -- cgit v1.1 From 10bcb0d996634aec642b000f05a72c93b652b2e6 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 5 Dec 2023 13:19:59 -0500 Subject: scsi: assert that callbacks run in the correct AioContext Since the removal of AioContext locking, the correctness of the code relies on running requests from a single AioContext at any given time. Add assertions that verify that callbacks are invoked in the correct AioContext. Signed-off-by: Stefan Hajnoczi Message-ID: <20231205182011.1976568-3-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- hw/scsi/scsi-disk.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'hw') diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 2c1bbb3..a5048e0 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -273,6 +273,10 @@ static void scsi_aio_complete(void *opaque, int ret) SCSIDiskReq *r = (SCSIDiskReq *)opaque; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + /* The request must only run in the BlockBackend's AioContext */ + assert(blk_get_aio_context(s->qdev.conf.blk) == + qemu_get_current_aio_context()); + assert(r->req.aiocb != NULL); r->req.aiocb = NULL; @@ -370,8 +374,13 @@ static void scsi_dma_complete(void *opaque, int ret) static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); uint32_t n; + /* The request must only run in the BlockBackend's AioContext */ + assert(blk_get_aio_context(s->qdev.conf.blk) == + qemu_get_current_aio_context()); + assert(r->req.aiocb == NULL); if (scsi_disk_req_check_error(r, ret, false)) { goto done; @@ -496,8 +505,13 @@ static void scsi_read_data(SCSIRequest *req) static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); uint32_t n; + /* The request must only run in the BlockBackend's AioContext */ + assert(blk_get_aio_context(s->qdev.conf.blk) == + qemu_get_current_aio_context()); + assert (r->req.aiocb == NULL); if (scsi_disk_req_check_error(r, ret, false)) { goto done; -- cgit v1.1 From b49f4755c7fa35ea6e17e5b52c1cdaef6b4aa21c Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 5 Dec 2023 13:20:03 -0500 Subject: block: remove AioContext locking This is the big patch that removes aio_context_acquire()/aio_context_release() from the block layer and affected block layer users. There isn't a clean way to split this patch and the reviewers are likely the same group of people, so I decided to do it in one patch. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Reviewed-by: Paul Durrant Message-ID: <20231205182011.1976568-7-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- hw/block/dataplane/virtio-blk.c | 10 ---------- hw/block/dataplane/xen-block.c | 17 ++--------------- hw/block/virtio-blk.c | 13 ------------- hw/core/qdev-properties-system.c | 9 --------- 4 files changed, 2 insertions(+), 47 deletions(-) (limited to 'hw') diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index f83bb0f..7bbbd98 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -124,7 +124,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) VirtIOBlockDataPlane *s = vblk->dataplane; BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - AioContext *old_context; unsigned i; unsigned nvqs = s->conf->num_queues; Error *local_err = NULL; @@ -178,10 +177,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) trace_virtio_blk_data_plane_start(s); - old_context = blk_get_aio_context(s->conf->conf.blk); - aio_context_acquire(old_context); r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); - aio_context_release(old_context); if (r < 0) { error_report_err(local_err); goto fail_aio_context; @@ -208,13 +204,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) /* Get this show started by hooking up our callbacks */ if (!blk_in_drain(s->conf->conf.blk)) { - aio_context_acquire(s->ctx); for (i = 0; i < nvqs; i++) { VirtQueue *vq = virtio_get_queue(s->vdev, i); virtio_queue_aio_attach_host_notifier(vq, s->ctx); } - aio_context_release(s->ctx); } return 0; @@ -314,8 +308,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) */ vblk->dataplane_started = false; - aio_context_acquire(s->ctx); - /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ blk_drain(s->conf->conf.blk); @@ -325,8 +317,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) */ blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); - aio_context_release(s->ctx); - /* Clean up guest notifier (irq) */ k->set_guest_notifiers(qbus->parent, nvqs, false); diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index c4bb28c..98501e6 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -260,8 +260,6 @@ static void xen_block_complete_aio(void *opaque, int ret) XenBlockRequest *request = opaque; XenBlockDataPlane *dataplane = request->dataplane; - aio_context_acquire(dataplane->ctx); - if (ret != 0) { error_report("%s I/O error", request->req.operation == BLKIF_OP_READ ? @@ -273,10 +271,10 @@ static void xen_block_complete_aio(void *opaque, int ret) if (request->presync) { request->presync = 0; xen_block_do_aio(request); - goto done; + return; } if (request->aio_inflight > 0) { - goto done; + return; } switch (request->req.operation) { @@ -318,9 +316,6 @@ static void xen_block_complete_aio(void *opaque, int ret) if (dataplane->more_work) { qemu_bh_schedule(dataplane->bh); } - -done: - aio_context_release(dataplane->ctx); } static bool xen_block_split_discard(XenBlockRequest *request, @@ -601,9 +596,7 @@ static void xen_block_dataplane_bh(void *opaque) { XenBlockDataPlane *dataplane = opaque; - aio_context_acquire(dataplane->ctx); xen_block_handle_requests(dataplane); - aio_context_release(dataplane->ctx); } static bool xen_block_dataplane_event(void *opaque) @@ -703,10 +696,8 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) xen_block_dataplane_detach(dataplane); } - aio_context_acquire(dataplane->ctx); /* Xen doesn't have multiple users for nodes, so this can't fail */ blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); - aio_context_release(dataplane->ctx); /* * Now that the context has been moved onto the main thread, cancel @@ -752,7 +743,6 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, { ERRP_GUARD(); XenDevice *xendev = dataplane->xendev; - AioContext *old_context; unsigned int ring_size; unsigned int i; @@ -836,11 +826,8 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, goto stop; } - old_context = blk_get_aio_context(dataplane->blk); - aio_context_acquire(old_context); /* If other users keep the BlockBackend in the iothread, that's ok */ blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); - aio_context_release(old_context); if (!blk_in_drain(dataplane->blk)) { xen_block_dataplane_attach(dataplane); diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index e110f97..ec9ed09 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1210,17 +1210,13 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, static void virtio_blk_reset(VirtIODevice *vdev) { VirtIOBlock *s = VIRTIO_BLK(vdev); - AioContext *ctx; VirtIOBlockReq *req; /* Dataplane has stopped... */ assert(!s->dataplane_started); /* ...but requests may still be in flight. */ - ctx = blk_get_aio_context(s->blk); - aio_context_acquire(ctx); blk_drain(s->blk); - aio_context_release(ctx); /* We drop queued requests after blk_drain() because blk_drain() itself can * produce them. */ @@ -1250,10 +1246,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) uint64_t capacity; int64_t length; int blk_size = conf->logical_block_size; - AioContext *ctx; - - ctx = blk_get_aio_context(s->blk); - aio_context_acquire(ctx); blk_get_geometry(s->blk, &capacity); memset(&blkcfg, 0, sizeof(blkcfg)); @@ -1277,7 +1269,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) * per track (cylinder). */ length = blk_getlength(s->blk); - aio_context_release(ctx); if (length > 0 && length / conf->heads / conf->secs % blk_size) { blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; } else { @@ -1344,9 +1335,7 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) memcpy(&blkcfg, config, s->config_size); - aio_context_acquire(blk_get_aio_context(s->blk)); blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); - aio_context_release(blk_get_aio_context(s->blk)); } static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, @@ -1414,11 +1403,9 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) * s->blk would erroneously be placed in writethrough mode. */ if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { - aio_context_acquire(blk_get_aio_context(s->blk)); blk_set_enable_write_cache(s->blk, virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_WCE)); - aio_context_release(blk_get_aio_context(s->blk)); } } diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 1473ab3..73cced4 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -120,9 +120,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, "node"); } - aio_context_acquire(ctx); blk_replace_bs(blk, bs, errp); - aio_context_release(ctx); return; } @@ -148,10 +146,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, 0, BLK_PERM_ALL); blk_created = true; - aio_context_acquire(ctx); ret = blk_insert_bs(blk, bs, errp); - aio_context_release(ctx); - if (ret < 0) { goto fail; } @@ -207,12 +202,8 @@ static void release_drive(Object *obj, const char *name, void *opaque) BlockBackend **ptr = object_field_prop_ptr(obj, prop); if (*ptr) { - AioContext *ctx = blk_get_aio_context(*ptr); - - aio_context_acquire(ctx); blockdev_auto_del(*ptr); blk_detach_dev(*ptr, dev); - aio_context_release(ctx); } } -- cgit v1.1 From 4f36b1384756914a6c4ffe04080a96da149c4c03 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 5 Dec 2023 13:20:05 -0500 Subject: scsi: remove AioContext locking The AioContext lock no longer has any effect. Remove it. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Message-ID: <20231205182011.1976568-9-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- hw/scsi/scsi-bus.c | 2 -- hw/scsi/scsi-disk.c | 31 +++++-------------------------- hw/scsi/virtio-scsi.c | 18 ------------------ 3 files changed, 5 insertions(+), 46 deletions(-) (limited to 'hw') diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index b649cdf..5b08cbf 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -1732,9 +1732,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) { scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); - aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); blk_drain(sdev->conf.blk); - aio_context_release(blk_get_aio_context(sdev->conf.blk)); scsi_device_set_ua(sdev, sense); } diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index a5048e0..61be3d3 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2339,14 +2339,10 @@ static void scsi_disk_reset(DeviceState *dev) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev); uint64_t nb_sectors; - AioContext *ctx; scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET)); - ctx = blk_get_aio_context(s->qdev.conf.blk); - aio_context_acquire(ctx); blk_get_geometry(s->qdev.conf.blk, &nb_sectors); - aio_context_release(ctx); nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE; if (nb_sectors) { @@ -2545,15 +2541,13 @@ static void scsi_unrealize(SCSIDevice *dev) static void scsi_hd_realize(SCSIDevice *dev, Error **errp) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); - AioContext *ctx = NULL; + /* can happen for devices without drive. The error message for missing * backend will be issued in scsi_realize */ if (s->qdev.conf.blk) { - ctx = blk_get_aio_context(s->qdev.conf.blk); - aio_context_acquire(ctx); if (!blkconf_blocksizes(&s->qdev.conf, errp)) { - goto out; + return; } } s->qdev.blocksize = s->qdev.conf.logical_block_size; @@ -2562,16 +2556,11 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) s->product = g_strdup("QEMU HARDDISK"); } scsi_realize(&s->qdev, errp); -out: - if (ctx) { - aio_context_release(ctx); - } } static void scsi_cd_realize(SCSIDevice *dev, Error **errp) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); - AioContext *ctx; int ret; uint32_t blocksize = 2048; @@ -2587,8 +2576,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) blocksize = dev->conf.physical_block_size; } - ctx = blk_get_aio_context(dev->conf.blk); - aio_context_acquire(ctx); s->qdev.blocksize = blocksize; s->qdev.type = TYPE_ROM; s->features |= 1 << SCSI_DISK_F_REMOVABLE; @@ -2596,7 +2583,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) s->product = g_strdup("QEMU CD-ROM"); } scsi_realize(&s->qdev, errp); - aio_context_release(ctx); } @@ -2727,7 +2713,6 @@ static int get_device_type(SCSIDiskState *s) static void scsi_block_realize(SCSIDevice *dev, Error **errp) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); - AioContext *ctx; int sg_version; int rc; @@ -2742,9 +2727,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) "be removed in a future version"); } - ctx = blk_get_aio_context(s->qdev.conf.blk); - aio_context_acquire(ctx); - /* check we are using a driver managing SG_IO (version 3 and after) */ rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version); if (rc < 0) { @@ -2752,18 +2734,18 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) if (rc != -EPERM) { error_append_hint(errp, "Is this a SCSI device?\n"); } - goto out; + return; } if (sg_version < 30000) { error_setg(errp, "scsi generic interface too old"); - goto out; + return; } /* get device type from INQUIRY data */ rc = get_device_type(s); if (rc < 0) { error_setg(errp, "INQUIRY failed"); - goto out; + return; } /* Make a guess for the block size, we'll fix it when the guest sends. @@ -2783,9 +2765,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) scsi_realize(&s->qdev, errp); scsi_generic_read_device_inquiry(&s->qdev); - -out: - aio_context_release(ctx); } typedef struct SCSIBlockReq { diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 4f8d35f..ca365a7 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -642,9 +642,7 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) return; } - virtio_scsi_acquire(s); virtio_scsi_handle_ctrl_vq(s, vq); - virtio_scsi_release(s); } static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req) @@ -882,9 +880,7 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) return; } - virtio_scsi_acquire(s); virtio_scsi_handle_cmd_vq(s, vq); - virtio_scsi_release(s); } static void virtio_scsi_get_config(VirtIODevice *vdev, @@ -1031,9 +1027,7 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) return; } - virtio_scsi_acquire(s); virtio_scsi_handle_event_vq(s, vq); - virtio_scsi_release(s); } static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) @@ -1052,9 +1046,7 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) }, }; - virtio_scsi_acquire(s); virtio_scsi_push_event(s, &info); - virtio_scsi_release(s); } } @@ -1071,17 +1063,13 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); VirtIOSCSI *s = VIRTIO_SCSI(vdev); SCSIDevice *sd = SCSI_DEVICE(dev); - AioContext *old_context; int ret; if (s->ctx && !s->dataplane_fenced) { if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; } - old_context = blk_get_aio_context(sd->conf.blk); - aio_context_acquire(old_context); ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); - aio_context_release(old_context); if (ret < 0) { return; } @@ -1097,10 +1085,8 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, }, }; - virtio_scsi_acquire(s); virtio_scsi_push_event(s, &info); scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); - virtio_scsi_release(s); } } @@ -1122,17 +1108,13 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); if (s->ctx) { - virtio_scsi_acquire(s); /* If other users keep the BlockBackend in the iothread, that's ok */ blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); - virtio_scsi_release(s); } if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { - virtio_scsi_acquire(s); virtio_scsi_push_event(s, &info); scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); - virtio_scsi_release(s); } } -- cgit v1.1 From e7fc3c4a8cc3e729d178b95721d51ca95573ba91 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 5 Dec 2023 13:20:09 -0500 Subject: scsi: remove outdated AioContext lock comment The SCSI subsystem no longer uses the AioContext lock. Request processing runs exclusively in the BlockBackend's AioContext since "scsi: only access SCSIDevice->requests from one thread" and hence the lock is unnecessary. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Message-ID: <20231205182011.1976568-13-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- hw/scsi/scsi-disk.c | 1 - 1 file changed, 1 deletion(-) (limited to 'hw') diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 61be3d3..2e7e1e9 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -355,7 +355,6 @@ done: scsi_req_unref(&r->req); } -/* Called with AioContext lock held */ static void scsi_dma_complete(void *opaque, int ret) { SCSIDiskReq *r = (SCSIDiskReq *)opaque; -- cgit v1.1 From 350147a871a545ab56b4a1062c8485635d9ffc24 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 20 Dec 2023 08:47:52 -0500 Subject: qdev-properties: alias all object class properties qdev_alias_all_properties() aliases a DeviceState's qdev properties onto an Object. This is used for VirtioPCIProxy types so that --device virtio-blk-pci has properties of its embedded --device virtio-blk-device object. Currently this function is implemented using qdev properties. Change the function to use QOM object class properties instead. This works because qdev properties create QOM object class properties, but it also catches any QOM object class-only properties that have no qdev properties. This change ensures that properties of devices are shown with --device foo,\? even if they are QOM object class properties. Signed-off-by: Stefan Hajnoczi Message-ID: <20231220134755.814917-2-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- hw/core/qdev-properties.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'hw') diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 840006e..7d6fa72 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -1076,16 +1076,18 @@ void device_class_set_props(DeviceClass *dc, Property *props) void qdev_alias_all_properties(DeviceState *target, Object *source) { ObjectClass *class; - Property *prop; + ObjectPropertyIterator iter; + ObjectProperty *prop; class = object_get_class(OBJECT(target)); - do { - DeviceClass *dc = DEVICE_CLASS(class); - for (prop = dc->props_; prop && prop->name; prop++) { - object_property_add_alias(source, prop->name, - OBJECT(target), prop->name); + object_class_property_iter_init(&iter, class); + while ((prop = object_property_iter_next(&iter))) { + if (object_property_find(source, prop->name)) { + continue; /* skip duplicate properties */ } - class = object_class_get_parent(class); - } while (class != object_class_by_name(TYPE_DEVICE)); + + object_property_add_alias(source, prop->name, + OBJECT(target), prop->name); + } } -- cgit v1.1 From cf03a152c5d749fd0083bfe540df9524f1d2ff1d Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 20 Dec 2023 08:47:54 -0500 Subject: qdev: add IOThreadVirtQueueMappingList property type virtio-blk and virtio-scsi devices will need a way to specify the mapping between IOThreads and virtqueues. At the moment all virtqueues are assigned to a single IOThread or the main loop. This single thread can be a CPU bottleneck, so it is necessary to allow finer-grained assignment to spread the load. Introduce DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST() so devices can take a parameter that maps virtqueues to IOThreads. The command-line syntax for this new property is as follows: --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0","vqs":[0,1,2]},...]}' IOThreads are specified by name and virtqueues are specified by 0-based index. It will be common to simply assign virtqueues round-robin across a set of IOThreads. A convenient syntax that does not require specifying individual virtqueue indices is available: --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0"},{"iothread":"iothread1"},...]}' Signed-off-by: Stefan Hajnoczi Message-ID: <20231220134755.814917-4-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- hw/core/qdev-properties-system.c | 46 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'hw') diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 73cced4..1a39652 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -18,6 +18,7 @@ #include "qapi/qapi-types-block.h" #include "qapi/qapi-types-machine.h" #include "qapi/qapi-types-migration.h" +#include "qapi/qapi-visit-virtio.h" #include "qapi/qmp/qerror.h" #include "qemu/ctype.h" #include "qemu/cutils.h" @@ -1160,3 +1161,48 @@ const PropertyInfo qdev_prop_cpus390entitlement = { .set = qdev_propinfo_set_enum, .set_default_value = qdev_propinfo_set_default_value_enum, }; + +/* --- IOThreadVirtQueueMappingList --- */ + +static void get_iothread_vq_mapping_list(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + IOThreadVirtQueueMappingList **prop_ptr = + object_field_prop_ptr(obj, opaque); + + visit_type_IOThreadVirtQueueMappingList(v, name, prop_ptr, errp); +} + +static void set_iothread_vq_mapping_list(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + IOThreadVirtQueueMappingList **prop_ptr = + object_field_prop_ptr(obj, opaque); + IOThreadVirtQueueMappingList *list; + + if (!visit_type_IOThreadVirtQueueMappingList(v, name, &list, errp)) { + return; + } + + qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); + *prop_ptr = list; +} + +static void release_iothread_vq_mapping_list(Object *obj, + const char *name, void *opaque) +{ + IOThreadVirtQueueMappingList **prop_ptr = + object_field_prop_ptr(obj, opaque); + + qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); + *prop_ptr = NULL; +} + +const PropertyInfo qdev_prop_iothread_vq_mapping_list = { + .name = "IOThreadVirtQueueMappingList", + .description = "IOThread virtqueue mapping list [{\"iothread\":\"\", " + "\"vqs\":[1,2,3,...]},...]", + .get = get_iothread_vq_mapping_list, + .set = set_iothread_vq_mapping_list, + .release = release_iothread_vq_mapping_list, +}; -- cgit v1.1 From b6948ab01df068bef591868c22d1f873d2d05cde Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 20 Dec 2023 08:47:55 -0500 Subject: virtio-blk: add iothread-vq-mapping parameter Add the iothread-vq-mapping parameter to assign virtqueues to IOThreads. Store the vq:AioContext mapping in the new struct VirtIOBlockDataPlane->vq_aio_context[] field and refactor the code to use the per-vq AioContext instead of the BlockDriverState's AioContext. Reimplement --device virtio-blk-pci,iothread= and non-IOThread mode by assigning all virtqueues to the IOThread and main loop's AioContext in vq_aio_context[], respectively. The comment in struct VirtIOBlockDataPlane about EventNotifiers is stale. Remove it. Signed-off-by: Stefan Hajnoczi Message-ID: <20231220134755.814917-5-stefanha@redhat.com> Signed-off-by: Kevin Wolf --- hw/block/dataplane/virtio-blk.c | 155 ++++++++++++++++++++++++++++++---------- hw/block/dataplane/virtio-blk.h | 3 + hw/block/virtio-blk.c | 92 ++++++++++++++++++++---- 3 files changed, 200 insertions(+), 50 deletions(-) (limited to 'hw') diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 7bbbd98..6debd44 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -32,13 +32,11 @@ struct VirtIOBlockDataPlane { VirtIOBlkConf *conf; VirtIODevice *vdev; - /* Note that these EventNotifiers are assigned by value. This is - * fine as long as you do not call event_notifier_cleanup on them - * (because you don't own the file descriptor or handle; you just - * use it). + /* + * The AioContext for each virtqueue. The BlockDriverState will use the + * first element as its AioContext. */ - IOThread *iothread; - AioContext *ctx; + AioContext **vq_aio_context; }; /* Raise an interrupt to signal guest, if necessary */ @@ -47,6 +45,45 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) virtio_notify_irqfd(s->vdev, vq); } +/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ +static void +apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, + AioContext **vq_aio_context, uint16_t num_queues) +{ + IOThreadVirtQueueMappingList *node; + size_t num_iothreads = 0; + size_t cur_iothread = 0; + + for (node = iothread_vq_mapping_list; node; node = node->next) { + num_iothreads++; + } + + for (node = iothread_vq_mapping_list; node; node = node->next) { + IOThread *iothread = iothread_by_id(node->value->iothread); + AioContext *ctx = iothread_get_aio_context(iothread); + + /* Released in virtio_blk_data_plane_destroy() */ + object_ref(OBJECT(iothread)); + + if (node->value->vqs) { + uint16List *vq; + + /* Explicit vq:IOThread assignment */ + for (vq = node->value->vqs; vq; vq = vq->next) { + vq_aio_context[vq->value] = ctx; + } + } else { + /* Round-robin vq:IOThread assignment */ + for (unsigned i = cur_iothread; i < num_queues; + i += num_iothreads) { + vq_aio_context[i] = ctx; + } + } + + cur_iothread++; + } +} + /* Context: QEMU global mutex held */ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, VirtIOBlockDataPlane **dataplane, @@ -58,7 +95,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, *dataplane = NULL; - if (conf->iothread) { + if (conf->iothread || conf->iothread_vq_mapping_list) { if (!k->set_guest_notifiers || !k->ioeventfd_assign) { error_setg(errp, "device is incompatible with iothread " @@ -86,13 +123,24 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, s = g_new0(VirtIOBlockDataPlane, 1); s->vdev = vdev; s->conf = conf; + s->vq_aio_context = g_new(AioContext *, conf->num_queues); + + if (conf->iothread_vq_mapping_list) { + apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, + conf->num_queues); + } else if (conf->iothread) { + AioContext *ctx = iothread_get_aio_context(conf->iothread); + for (unsigned i = 0; i < conf->num_queues; i++) { + s->vq_aio_context[i] = ctx; + } - if (conf->iothread) { - s->iothread = conf->iothread; - object_ref(OBJECT(s->iothread)); - s->ctx = iothread_get_aio_context(s->iothread); + /* Released in virtio_blk_data_plane_destroy() */ + object_ref(OBJECT(conf->iothread)); } else { - s->ctx = qemu_get_aio_context(); + AioContext *ctx = qemu_get_aio_context(); + for (unsigned i = 0; i < conf->num_queues; i++) { + s->vq_aio_context[i] = ctx; + } } *dataplane = s; @@ -104,6 +152,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) { VirtIOBlock *vblk; + VirtIOBlkConf *conf = s->conf; if (!s) { return; @@ -111,9 +160,21 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) vblk = VIRTIO_BLK(s->vdev); assert(!vblk->dataplane_started); - if (s->iothread) { - object_unref(OBJECT(s->iothread)); + + if (conf->iothread_vq_mapping_list) { + IOThreadVirtQueueMappingList *node; + + for (node = conf->iothread_vq_mapping_list; node; node = node->next) { + IOThread *iothread = iothread_by_id(node->value->iothread); + object_unref(OBJECT(iothread)); + } + } + + if (conf->iothread) { + object_unref(OBJECT(conf->iothread)); } + + g_free(s->vq_aio_context); g_free(s); } @@ -177,19 +238,13 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) trace_virtio_blk_data_plane_start(s); - r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); + r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], + &local_err); if (r < 0) { error_report_err(local_err); goto fail_aio_context; } - /* Kick right away to begin processing requests already in vring */ - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); - - event_notifier_set(virtio_queue_get_host_notifier(vq)); - } - /* * These fields must be visible to the IOThread when it processes the * virtqueue, otherwise it will think dataplane has not started yet. @@ -206,8 +261,12 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) if (!blk_in_drain(s->conf->conf.blk)) { for (i = 0; i < nvqs; i++) { VirtQueue *vq = virtio_get_queue(s->vdev, i); + AioContext *ctx = s->vq_aio_context[i]; - virtio_queue_aio_attach_host_notifier(vq, s->ctx); + /* Kick right away to begin processing requests already in vring */ + event_notifier_set(virtio_queue_get_host_notifier(vq)); + + virtio_queue_aio_attach_host_notifier(vq, ctx); } } return 0; @@ -236,23 +295,18 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) * * Context: BH in IOThread */ -static void virtio_blk_data_plane_stop_bh(void *opaque) +static void virtio_blk_data_plane_stop_vq_bh(void *opaque) { - VirtIOBlockDataPlane *s = opaque; - unsigned i; - - for (i = 0; i < s->conf->num_queues; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); - EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); + VirtQueue *vq = opaque; + EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); - virtio_queue_aio_detach_host_notifier(vq, s->ctx); + virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); - /* - * Test and clear notifier after disabling event, in case poll callback - * didn't have time to run. - */ - virtio_queue_host_notifier_read(host_notifier); - } + /* + * Test and clear notifier after disabling event, in case poll callback + * didn't have time to run. + */ + virtio_queue_host_notifier_read(host_notifier); } /* Context: QEMU global mutex held */ @@ -279,7 +333,12 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) trace_virtio_blk_data_plane_stop(s); if (!blk_in_drain(s->conf->conf.blk)) { - aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(s->vdev, i); + AioContext *ctx = s->vq_aio_context[i]; + + aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); + } } /* @@ -322,3 +381,23 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) s->stopping = false; } + +void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); + + for (uint16_t i = 0; i < s->conf->num_queues; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); + virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); + } +} + +void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); + + for (uint16_t i = 0; i < s->conf->num_queues; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); + virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); + } +} diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h index 5e18bb9..1a806fe 100644 --- a/hw/block/dataplane/virtio-blk.h +++ b/hw/block/dataplane/virtio-blk.h @@ -28,4 +28,7 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); int virtio_blk_data_plane_start(VirtIODevice *vdev); void virtio_blk_data_plane_stop(VirtIODevice *vdev); +void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); +void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); + #endif /* HW_DATAPLANE_VIRTIO_BLK_H */ diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index ec9ed09..46e73b2 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1151,6 +1151,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) return; } } + virtio_blk_handle_vq(s, vq); } @@ -1463,6 +1464,68 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, return 0; } +static bool +validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, + uint16_t num_queues, Error **errp) +{ + g_autofree unsigned long *vqs = bitmap_new(num_queues); + g_autoptr(GHashTable) iothreads = + g_hash_table_new(g_str_hash, g_str_equal); + + for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { + const char *name = node->value->iothread; + uint16List *vq; + + if (!iothread_by_id(name)) { + error_setg(errp, "IOThread \"%s\" object does not exist", name); + return false; + } + + if (!g_hash_table_add(iothreads, (gpointer)name)) { + error_setg(errp, + "duplicate IOThread name \"%s\" in iothread-vq-mapping", + name); + return false; + } + + if (node != list) { + if (!!node->value->vqs != !!list->value->vqs) { + error_setg(errp, "either all items in iothread-vq-mapping " + "must have vqs or none of them must have it"); + return false; + } + } + + for (vq = node->value->vqs; vq; vq = vq->next) { + if (vq->value >= num_queues) { + error_setg(errp, "vq index %u for IOThread \"%s\" must be " + "less than num_queues %u in iothread-vq-mapping", + vq->value, name, num_queues); + return false; + } + + if (test_and_set_bit(vq->value, vqs)) { + error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " + "because it is already assigned", vq->value, name); + return false; + } + } + } + + if (list->value->vqs) { + for (uint16_t i = 0; i < num_queues; i++) { + if (!test_bit(i, vqs)) { + error_setg(errp, + "missing vq %u IOThread assignment in iothread-vq-mapping", + i); + return false; + } + } + } + + return true; +} + static void virtio_resize_cb(void *opaque) { VirtIODevice *vdev = opaque; @@ -1487,34 +1550,24 @@ static void virtio_blk_resize(void *opaque) static void virtio_blk_drained_begin(void *opaque) { VirtIOBlock *s = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(opaque); - AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); if (!s->dataplane || !s->dataplane_started) { return; } - for (uint16_t i = 0; i < s->conf.num_queues; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); - virtio_queue_aio_detach_host_notifier(vq, ctx); - } + virtio_blk_data_plane_detach(s->dataplane); } /* Resume virtqueue ioeventfd processing after drain */ static void virtio_blk_drained_end(void *opaque) { VirtIOBlock *s = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(opaque); - AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); if (!s->dataplane || !s->dataplane_started) { return; } - for (uint16_t i = 0; i < s->conf.num_queues; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); - virtio_queue_aio_attach_host_notifier(vq, ctx); - } + virtio_blk_data_plane_attach(s->dataplane); } static const BlockDevOps virtio_block_ops = { @@ -1600,6 +1653,19 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } + if (conf->iothread_vq_mapping_list) { + if (conf->iothread) { + error_setg(errp, "iothread and iothread-vq-mapping properties " + "cannot be set at the same time"); + return; + } + + if (!validate_iothread_vq_mapping_list(conf->iothread_vq_mapping_list, + conf->num_queues, errp)) { + return; + } + } + s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, s->host_features); virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); @@ -1702,6 +1768,8 @@ static Property virtio_blk_properties[] = { DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, IOThread *), + DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOBlock, + conf.iothread_vq_mapping_list), DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, VIRTIO_BLK_F_DISCARD, true), DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock, -- cgit v1.1