diff options
author | Fam Zheng <famz@redhat.com> | 2017-04-10 20:20:17 +0800 |
---|---|---|
committer | Fam Zheng <famz@redhat.com> | 2017-04-11 20:07:15 +0800 |
commit | e92f0e1910f0655a0edd8d87c5a7262d36517a89 (patch) | |
tree | 6a50c54236c0c103ed5b82c2fc362860630822ae /block | |
parent | 324ec3e4f26e0a2a422478db4b9992f8aad3bde4 (diff) | |
download | qemu-e92f0e1910f0655a0edd8d87c5a7262d36517a89.zip qemu-e92f0e1910f0655a0edd8d87c5a7262d36517a89.tar.gz qemu-e92f0e1910f0655a0edd8d87c5a7262d36517a89.tar.bz2 |
block: Use bdrv_coroutine_enter to start I/O coroutines
BDRV_POLL_WHILE waits for the started I/O by releasing bs's ctx then polling
the main context, which relies on the yielded coroutine continuing on bs->ctx
before notifying qemu_aio_context with bdrv_wakeup().
Thus, using qemu_coroutine_enter to start I/O is wrong because if the coroutine
is entered from main loop, co->ctx will be qemu_aio_context, as a result of the
"release, poll, acquire" loop of BDRV_POLL_WHILE, race conditions happen when
both main thread and the iothread access the same BDS:
main loop iothread
-----------------------------------------------------------------------
blockdev_snapshot
aio_context_acquire(bs->ctx)
virtio_scsi_data_plane_handle_cmd
bdrv_drained_begin(bs->ctx)
bdrv_flush(bs)
bdrv_co_flush(bs) aio_context_acquire(bs->ctx).enter
...
qemu_coroutine_yield(co)
BDRV_POLL_WHILE()
aio_context_release(bs->ctx)
aio_context_acquire(bs->ctx).return
...
aio_co_wake(co)
aio_poll(qemu_aio_context) ...
co_schedule_bh_cb() ...
qemu_coroutine_enter(co) ...
/* (A) bdrv_co_flush(bs) /* (B) I/O on bs */
continues... */
aio_context_release(bs->ctx)
aio_context_acquire(bs->ctx)
Note that in above case, bdrv_drained_begin() doesn't do the "release,
poll, acquire" in BDRV_POLL_WHILE, because bs->in_flight == 0.
Fix this by using bdrv_coroutine_enter and enter coroutine in the right
context.
iotests 109 output is updated because the coroutine reenter flow during
mirror job complete is different (now through co_queue_wakeup, instead
of the unconditional qemu_coroutine_switch before), making the end job
len different.
Signed-off-by: Fam Zheng <famz@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/block-backend.c | 4 | ||||
-rw-r--r-- | block/io.c | 14 |
2 files changed, 9 insertions, 9 deletions
diff --git a/block/block-backend.c b/block/block-backend.c index 18ece99..a8f2b34 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1045,7 +1045,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, co_entry(&rwco); } else { Coroutine *co = qemu_coroutine_create(co_entry, &rwco); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(blk_bs(blk), co); BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); } @@ -1152,7 +1152,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, acb->has_returned = false; co = qemu_coroutine_create(co_entry, acb); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(blk_bs(blk), co); acb->has_returned = true; if (acb->rwco.ret != NOT_DONE) { @@ -616,7 +616,7 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset, bdrv_rw_co_entry(&rwco); } else { co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(child->bs, co); BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE); } return rwco.ret; @@ -1880,7 +1880,7 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs, } else { co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry, &data); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, !data.done); } return data.ret; @@ -2006,7 +2006,7 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, }; Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); while (data.ret == -EINPROGRESS) { aio_poll(bdrv_get_aio_context(bs), true); } @@ -2223,7 +2223,7 @@ static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child, acb->is_write = is_write; co = qemu_coroutine_create(bdrv_co_do_rw, acb); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(child->bs, co); bdrv_co_maybe_schedule_bh(acb); return &acb->common; @@ -2254,7 +2254,7 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, acb->req.error = -EINPROGRESS; co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); bdrv_co_maybe_schedule_bh(acb); return &acb->common; @@ -2387,7 +2387,7 @@ int bdrv_flush(BlockDriverState *bs) bdrv_flush_co_entry(&flush_co); } else { co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE); } @@ -2534,7 +2534,7 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count) bdrv_pdiscard_co_entry(&rwco); } else { co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE); } |