block-backend: Queue requests while drained

This fixes devices like IDE that can still start new requests from I/O handlers in the CPU thread while the block backend is drained. The basic assumption is that in a drain section, no new requests should be allowed through a BlockBackend (blk_drained_begin/end don't exist, we get drain sections only on the node level). However, there are two special cases where requests should not be queued: 1. Block jobs: We already make sure that block jobs are paused in a drain section, so they won't start new requests. However, if the drain_begin is called on the job's BlockBackend first, it can happen that we deadlock because the job stays busy until it reaches a pause point - which it can't if its requests aren't processed any more. The proper solution here would be to make all requests through the job's filter node instead of using a BlockBackend. For now, just disabling request queuing on the job BlockBackend is simpler. 2. In test cases where making requests through bdrv_* would be cumbersome because we'd need a BdrvChild. As we already got the functionality to disable request queuing from 1., use it in tests, too, for convenience. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
author: Kevin Wolf <kwolf@redhat.com> 2019-07-22 17:46:23 +0200
committer: Kevin Wolf <kwolf@redhat.com> 2019-08-16 10:25:16 +0200
commit: cf3129323f900ef5ddbccbe86e4fa801e88c566e (patch)
tree: ba516ad030d6a983ba70a16264c8c5a9dfea15a8 /block/block-backend.c
parent: d2da5e288a2e71e82866c8fdefd41b5727300124 (diff)
download: qemu-cf3129323f900ef5ddbccbe86e4fa801e88c566e.zip
qemu-cf3129323f900ef5ddbccbe86e4fa801e88c566e.tar.gz
qemu-cf3129323f900ef5ddbccbe86e4fa801e88c566e.tar.bz2
1 files changed, 50 insertions, 3 deletions
diff --git a/block/block-backend.c b/block/block-backend.c
index fdd6b01..c13c5c8 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -79,6 +79,9 @@ struct BlockBackend {
     QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
 
     int quiesce_counter;
+    CoQueue queued_requests;
+    bool disable_request_queuing;
+
     VMChangeStateEntry *vmsh;
     bool force_allow_inactivate;
 
@@ -339,6 +342,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
 
     block_acct_init(&blk->stats);
 
+    qemu_co_queue_init(&blk->queued_requests);
     notifier_list_init(&blk->remove_bs_notifiers);
     notifier_list_init(&blk->insert_bs_notifiers);
     QLIST_INIT(&blk->aio_notifiers);
@@ -1096,6 +1100,11 @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
     blk->allow_aio_context_change = allow;
 }
 
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
+{
+    blk->disable_request_queuing = disable;
+}
+
 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
                                   size_t size)
 {
@@ -1127,13 +1136,24 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
     return 0;
 }
 
+static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
+{
+    if (blk->quiesce_counter && !blk->disable_request_queuing) {
+        qemu_co_queue_wait(&blk->queued_requests, NULL);
+    }
+}
+
 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
                                unsigned int bytes, QEMUIOVector *qiov,
                                BdrvRequestFlags flags)
 {
     int ret;
-    BlockDriverState *bs = blk_bs(blk);
+    BlockDriverState *bs;
 
+    blk_wait_while_drained(blk);
+
+    /* Call blk_bs() only after waiting, the graph may have changed */
+    bs = blk_bs(blk);
     trace_blk_co_preadv(blk, bs, offset, bytes, flags);
 
     ret = blk_check_byte_request(blk, offset, bytes);
@@ -1159,8 +1179,12 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
                                 BdrvRequestFlags flags)
 {
     int ret;
-    BlockDriverState *bs = blk_bs(blk);
+    BlockDriverState *bs;
 
+    blk_wait_while_drained(blk);
+
+    /* Call blk_bs() only after waiting, the graph may have changed */
+    bs = blk_bs(blk);
     trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
 
     ret = blk_check_byte_request(blk, offset, bytes);
@@ -1349,6 +1373,12 @@ static void blk_aio_read_entry(void *opaque)
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
+    if (rwco->blk->quiesce_counter) {
+        blk_dec_in_flight(rwco->blk);
+        blk_wait_while_drained(rwco->blk);
+        blk_inc_in_flight(rwco->blk);
+    }
+
     assert(qiov->size == acb->bytes);
     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
                               qiov, rwco->flags);
@@ -1361,6 +1391,12 @@ static void blk_aio_write_entry(void *opaque)
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
+    if (rwco->blk->quiesce_counter) {
+        blk_dec_in_flight(rwco->blk);
+        blk_wait_while_drained(rwco->blk);
+        blk_inc_in_flight(rwco->blk);
+    }
+
     assert(!qiov || qiov->size == acb->bytes);
     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
                                qiov, rwco->flags);
@@ -1482,6 +1518,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
 
 int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
+    blk_wait_while_drained(blk);
+
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
@@ -1522,7 +1560,11 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
 
 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
 {
-    int ret = blk_check_byte_request(blk, offset, bytes);
+    int ret;
+
+    blk_wait_while_drained(blk);
+
+    ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
         return ret;
     }
@@ -1532,6 +1574,8 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
 
 int blk_co_flush(BlockBackend *blk)
 {
+    blk_wait_while_drained(blk);
+
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
@@ -2232,6 +2276,9 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
         if (blk->dev_ops && blk->dev_ops->drained_end) {
             blk->dev_ops->drained_end(blk->dev_opaque);
         }
+        while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+            /* Resume all queued requests */
+        }
     }
 }
author	Kevin Wolf <kwolf@redhat.com>	2019-07-22 17:46:23 +0200
committer	Kevin Wolf <kwolf@redhat.com>	2019-08-16 10:25:16 +0200
commit	cf3129323f900ef5ddbccbe86e4fa801e88c566e (patch)
tree	ba516ad030d6a983ba70a16264c8c5a9dfea15a8 /block/block-backend.c
parent	d2da5e288a2e71e82866c8fdefd41b5727300124 (diff)
download	qemu-cf3129323f900ef5ddbccbe86e4fa801e88c566e.zip qemu-cf3129323f900ef5ddbccbe86e4fa801e88c566e.tar.gz qemu-cf3129323f900ef5ddbccbe86e4fa801e88c566e.tar.bz2