aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorKevin Wolf <kwolf@redhat.com>2019-01-07 13:02:48 +0100
committerKevin Wolf <kwolf@redhat.com>2019-02-01 13:46:44 +0100
commit4720cbeea1f42fd905fc69338fd42b191e58b412 (patch)
tree07200afa9838a91eec01071b8495a9534a3be262 /block
parent4e20c1becba3fd2e8e71a2663cefb9627fd2a6e0 (diff)
downloadqemu-4720cbeea1f42fd905fc69338fd42b191e58b412.zip
qemu-4720cbeea1f42fd905fc69338fd42b191e58b412.tar.gz
qemu-4720cbeea1f42fd905fc69338fd42b191e58b412.tar.bz2
block: Fix hangs in synchronous APIs with iothreads
In the block layer, synchronous APIs are often implemented by creating a coroutine that calls the asynchronous coroutine-based implementation and then waiting for completion with BDRV_POLL_WHILE(). For this to work with iothreads (more specifically, when the synchronous API is called in a thread that is not the home thread of the block device, so that the coroutine will run in a different thread), we must make sure to call aio_wait_kick() at the end of the operation. Many places are missing this, so that BDRV_POLL_WHILE() keeps hanging even if the condition has long become false. Note that bdrv_dec_in_flight() involves an aio_wait_kick() call. This corresponds to the BDRV_POLL_WHILE() in the drain functions, but it is generally not enough for most other operations because they haven't set the return value in the coroutine entry stub yet. To avoid race conditions there, we need to kick after setting the return value. The race window is small enough that the problem doesn't usually surface in the common path. However, it does surface and causes easily reproducible hangs if the operation can return early before even calling bdrv_inc/dec_in_flight, which many of them do (trivial error or no-op success paths). The bug in bdrv_truncate(), bdrv_check() and bdrv_invalidate_cache() is slightly different: These functions even neglected to schedule the coroutine in the home thread of the node. This avoids the hang, but is obviously wrong, too. Fix those to schedule the coroutine in the right AioContext in addition to adding aio_wait_kick() calls. Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'block')
-rw-r--r--block/block-backend.c5
-rw-r--r--block/io.c8
-rw-r--r--block/nbd-client.c1
-rw-r--r--block/nvme.c1
-rw-r--r--block/qcow2.c1
-rw-r--r--block/qed.c1
6 files changed, 16 insertions, 1 deletions
diff --git a/block/block-backend.c b/block/block-backend.c
index cf05abd..f0be0d9 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1220,6 +1220,7 @@ static void blk_read_entry(void *opaque)
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
qiov, rwco->flags);
+ aio_wait_kick();
}
static void blk_write_entry(void *opaque)
@@ -1229,6 +1230,7 @@ static void blk_write_entry(void *opaque)
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
qiov, rwco->flags);
+ aio_wait_kick();
}
static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
@@ -1540,6 +1542,7 @@ static void blk_ioctl_entry(void *opaque)
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
qiov->iov[0].iov_base);
+ aio_wait_kick();
}
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
@@ -1586,6 +1589,7 @@ static void blk_flush_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
rwco->ret = blk_co_flush(rwco->blk);
+ aio_wait_kick();
}
int blk_flush(BlockBackend *blk)
@@ -2018,6 +2022,7 @@ static void blk_pdiscard_entry(void *opaque)
QEMUIOVector *qiov = rwco->iobuf;
rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
+ aio_wait_kick();
}
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
diff --git a/block/io.c b/block/io.c
index bd9d688..213ca03 100644
--- a/block/io.c
+++ b/block/io.c
@@ -806,6 +806,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
rwco->qiov->size, rwco->qiov,
rwco->flags);
}
+ aio_wait_kick();
}
/*
@@ -2279,6 +2280,7 @@ static void coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
data->offset, data->bytes,
data->pnum, data->map, data->file);
data->done = true;
+ aio_wait_kick();
}
/*
@@ -2438,6 +2440,7 @@ static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
{
BdrvVmstateCo *co = opaque;
co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
+ aio_wait_kick();
}
static inline int
@@ -2559,6 +2562,7 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
FlushCo *rwco = opaque;
rwco->ret = bdrv_co_flush(rwco->bs);
+ aio_wait_kick();
}
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
@@ -2704,6 +2708,7 @@ static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
DiscardCo *rwco = opaque;
rwco->ret = bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
+ aio_wait_kick();
}
int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes)
@@ -3217,6 +3222,7 @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
TruncateCo *tco = opaque;
tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc,
tco->errp);
+ aio_wait_kick();
}
int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
@@ -3236,7 +3242,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
bdrv_truncate_co_entry(&tco);
} else {
co = qemu_coroutine_create(bdrv_truncate_co_entry, &tco);
- qemu_coroutine_enter(co);
+ bdrv_coroutine_enter(child->bs, co);
BDRV_POLL_WHILE(child->bs, tco.ret == NOT_DONE);
}
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 8135396..50a8dad 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -119,6 +119,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
s->quit = true;
nbd_recv_coroutines_wake_all(s);
s->read_reply_co = NULL;
+ aio_wait_kick();
}
static int nbd_co_send_request(BlockDriverState *bs,
diff --git a/block/nvme.c b/block/nvme.c
index 982097b..b5952c9 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -390,6 +390,7 @@ static void nvme_cmd_sync_cb(void *opaque, int ret)
{
int *pret = opaque;
*pret = ret;
+ aio_wait_kick();
}
static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
diff --git a/block/qcow2.c b/block/qcow2.c
index 4897aba..8c91b92 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1671,6 +1671,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
/* From bdrv_co_create. */
qcow2_open_entry(&qoc);
} else {
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
}
diff --git a/block/qed.c b/block/qed.c
index 9377c0b..1280870 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -559,6 +559,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
if (qemu_in_coroutine()) {
bdrv_qed_open_entry(&qoc);
} else {
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
qemu_coroutine_enter(qemu_coroutine_create(bdrv_qed_open_entry, &qoc));
BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
}