From 8f90b5e91df59fde0dfecc6738ff39f3edf14be5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Nov 2016 12:33:34 +0100 Subject: block: get rid of bdrv_io_unplugged_begin/end bdrv_io_plug and bdrv_io_unplug are only called (via their BlockBackend equivalents) after starting asynchronous I/O. bdrv_drain is not going to be called while they are running, because---even if a coroutine runs for some reason---it will only drain in the next iteration of the event loop through bdrv_co_yield_to_drain. So this mechanism is unnecessary, get rid of it. Signed-off-by: Paolo Bonzini Message-id: 20161129113334.605-1-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- include/block/block.h | 2 -- include/block/block_int.h | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/block/block.h b/include/block/block.h index 49bb0b2..8b0dcda 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -526,8 +526,6 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); void bdrv_io_plug(BlockDriverState *bs); void bdrv_io_unplug(BlockDriverState *bs); -void bdrv_io_unplugged_begin(BlockDriverState *bs); -void bdrv_io_unplugged_end(BlockDriverState *bs); /** * bdrv_drained_begin: diff --git a/include/block/block_int.h b/include/block/block_int.h index 4e4562d..2d92d7e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -526,9 +526,8 @@ struct BlockDriverState { uint64_t write_threshold_offset; NotifierWithReturn write_threshold_notifier; - /* counters for nested bdrv_io_plug and bdrv_io_unplugged_begin */ + /* counter for nested bdrv_io_plug */ unsigned io_plugged; - unsigned io_plug_disabled; int quiesce_counter; }; -- cgit v1.1 From cf2c02c8ea0f2d7888f3b45b20e7eb24b47a93f1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jan 2017 19:07:51 +0100 Subject: aio: rename bh_lock to list_lock This will be used for AioHandlers too. There is going to be little or no contention, so it is better to reuse the same lock. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 20170112180800.21085-2-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- include/block/aio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/block/aio.h b/include/block/aio.h index 4dca54d..013d400 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -91,7 +91,7 @@ struct AioContext { uint32_t notify_me; /* lock to protect between bh's adders and deleter */ - QemuMutex bh_lock; + QemuMutex list_lock; /* Anchor of the list of Bottom Halves belonging to the context */ struct QEMUBH *first_bh; -- cgit v1.1 From 51dee5e465e1b3454a886792ca3e14b851e8e67d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jan 2017 19:07:52 +0100 Subject: qemu-thread: introduce QemuLockCnt A QemuLockCnt comprises a counter and a mutex, with primitives to increment and decrement the counter, and to take and release the mutex. It can be used to do lock-free visits to a data structure whenever mutexes would be too heavy-weight and the critical section is too long for RCU. This could be implemented simply by protecting the counter with the mutex, but QemuLockCnt is harder to misuse and more efficient. Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Message-id: 20170112180800.21085-3-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- include/qemu/thread.h | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) (limited to 'include') diff --git a/include/qemu/thread.h b/include/qemu/thread.h index e8e665f..5f7de7b 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -8,6 +8,7 @@ typedef struct QemuMutex QemuMutex; typedef struct QemuCond QemuCond; typedef struct QemuSemaphore QemuSemaphore; typedef struct QemuEvent QemuEvent; +typedef struct QemuLockCnt QemuLockCnt; typedef struct QemuThread QemuThread; #ifdef _WIN32 @@ -98,4 +99,113 @@ static inline void qemu_spin_unlock(QemuSpin *spin) __sync_lock_release(&spin->value); } +struct QemuLockCnt { + QemuMutex mutex; + unsigned count; +}; + +/** + * qemu_lockcnt_init: initialize a QemuLockcnt + * @lockcnt: the lockcnt to initialize + * + * Initialize lockcnt's counter to zero and prepare its mutex + * for usage. + */ +void qemu_lockcnt_init(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_destroy: destroy a QemuLockcnt + * @lockcnt: the lockcnt to destruct + * + * Destroy lockcnt's mutex. + */ +void qemu_lockcnt_destroy(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_inc: increment a QemuLockCnt's counter + * @lockcnt: the lockcnt to operate on + * + * If the lockcnt's count is zero, wait for critical sections + * to finish and increment lockcnt's count to 1. If the count + * is not zero, just increment it. + * + * Because this function can wait on the mutex, it must not be + * called while the lockcnt's mutex is held by the current thread. + * For the same reason, qemu_lockcnt_inc can also contribute to + * AB-BA deadlocks. This is a sample deadlock scenario: + * + * thread 1 thread 2 + * ------------------------------------------------------- + * qemu_lockcnt_lock(&lc1); + * qemu_lockcnt_lock(&lc2); + * qemu_lockcnt_inc(&lc2); + * qemu_lockcnt_inc(&lc1); + */ +void qemu_lockcnt_inc(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec: decrement a QemuLockCnt's counter + * @lockcnt: the lockcnt to operate on + */ +void qemu_lockcnt_dec(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec_and_lock: decrement a QemuLockCnt's counter and + * possibly lock it. + * @lockcnt: the lockcnt to operate on + * + * Decrement lockcnt's count. If the new count is zero, lock + * the mutex and return true. Otherwise, return false. + */ +bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec_if_lock: possibly decrement a QemuLockCnt's counter and + * lock it. + * @lockcnt: the lockcnt to operate on + * + * If the count is 1, decrement the count to zero, lock + * the mutex and return true. Otherwise, return false. + */ +bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_lock: lock a QemuLockCnt's mutex. + * @lockcnt: the lockcnt to operate on + * + * Remember that concurrent visits are not blocked unless the count is + * also zero. You can use qemu_lockcnt_count to check for this inside a + * critical section. + */ +void qemu_lockcnt_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_unlock: release a QemuLockCnt's mutex. + * @lockcnt: the lockcnt to operate on. + */ +void qemu_lockcnt_unlock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_inc_and_unlock: combined unlock/increment on a QemuLockCnt. + * @lockcnt: the lockcnt to operate on. + * + * This is the same as + * + * qemu_lockcnt_unlock(lockcnt); + * qemu_lockcnt_inc(lockcnt); + * + * but more efficient. + */ +void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_count: query a LockCnt's count. + * @lockcnt: the lockcnt to query. + * + * Note that the count can change at any time. Still, while the + * lockcnt is locked, one can usefully check whether the count + * is non-zero. + */ +unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt); + #endif -- cgit v1.1 From d7c99a1282ca2de1c344b8aa91be5364e9c6aa8f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jan 2017 19:07:53 +0100 Subject: aio: make ctx->list_lock a QemuLockCnt, subsuming ctx->walking_bh This will make it possible to walk the list of bottom halves without holding the AioContext lock---and in turn to call bottom half handlers without holding the lock. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 20170112180800.21085-4-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- include/block/aio.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/block/aio.h b/include/block/aio.h index 013d400..be3adfe 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -90,17 +90,15 @@ struct AioContext { */ uint32_t notify_me; - /* lock to protect between bh's adders and deleter */ - QemuMutex list_lock; + /* A lock to protect between bh's adders and deleter, and to ensure + * that no callbacks are removed while we're walking and dispatching + * them. + */ + QemuLockCnt list_lock; /* Anchor of the list of Bottom Halves belonging to the context */ struct QEMUBH *first_bh; - /* A simple lock used to protect the first_bh list, and ensure that - * no callbacks are removed while we're walking and dispatching callbacks. - */ - int walking_bh; - /* Used by aio_notify. * * "notified" is used to avoid expensive event_notifier_test_and_clear -- cgit v1.1 From fbcc3e5004f01653b2885965c59cade25e286c18 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jan 2017 19:07:54 +0100 Subject: qemu-thread: optimize QemuLockCnt with futexes on Linux This is complex, but I think it is reasonably documented in the source. Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Message-id: 20170112180800.21085-5-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- include/qemu/futex.h | 36 ++++++++++++++++++++++++++++++++++++ include/qemu/thread.h | 2 ++ 2 files changed, 38 insertions(+) create mode 100644 include/qemu/futex.h (limited to 'include') diff --git a/include/qemu/futex.h b/include/qemu/futex.h new file mode 100644 index 0000000..bb7dc9e --- /dev/null +++ b/include/qemu/futex.h @@ -0,0 +1,36 @@ +/* + * Wrappers around Linux futex syscall + * + * Copyright Red Hat, Inc. 2017 + * + * Author: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include +#include + +#define qemu_futex(...) syscall(__NR_futex, __VA_ARGS__) + +static inline void qemu_futex_wake(void *f, int n) +{ + qemu_futex(f, FUTEX_WAKE, n, NULL, NULL, 0); +} + +static inline void qemu_futex_wait(void *f, unsigned val) +{ + while (qemu_futex(f, FUTEX_WAIT, (int) val, NULL, NULL, 0)) { + switch (errno) { + case EWOULDBLOCK: + return; + case EINTR: + break; /* get out of switch and retry */ + default: + abort(); + } + } +} diff --git a/include/qemu/thread.h b/include/qemu/thread.h index 5f7de7b..9910f49 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -100,7 +100,9 @@ static inline void qemu_spin_unlock(QemuSpin *spin) } struct QemuLockCnt { +#ifndef CONFIG_LINUX QemuMutex mutex; +#endif unsigned count; }; -- cgit v1.1 From 7c690fd1931f0908b18f7034b5d71d7b27ca59ef Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jan 2017 19:07:59 +0100 Subject: aio: document locking Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Reviewed-by: Fam Zheng Message-id: 20170112180800.21085-10-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- include/block/aio.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/block/aio.h b/include/block/aio.h index be3adfe..7df271d 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -53,18 +53,12 @@ struct LinuxAioState; struct AioContext { GSource source; - /* Protects all fields from multi-threaded access */ + /* Used by AioContext users to protect from multi-threaded access. */ QemuRecMutex lock; - /* The list of registered AIO handlers */ + /* The list of registered AIO handlers. Protected by ctx->list_lock. */ QLIST_HEAD(, AioHandler) aio_handlers; - /* This is a simple lock used to protect the aio_handlers list. - * Specifically, it's used to ensure that no callbacks are removed while - * we're walking and dispatching callbacks. - */ - int walking_handlers; - /* Used to avoid unnecessary event_notifier_set calls in aio_notify; * accessed with atomic primitives. If this field is 0, everything * (file descriptors, bottom halves, timers) will be re-evaluated @@ -90,9 +84,9 @@ struct AioContext { */ uint32_t notify_me; - /* A lock to protect between bh's adders and deleter, and to ensure - * that no callbacks are removed while we're walking and dispatching - * them. + /* A lock to protect between QEMUBH and AioHandler adders and deleter, + * and to ensure that no callbacks are removed while we're walking and + * dispatching them. */ QemuLockCnt list_lock; @@ -114,7 +108,9 @@ struct AioContext { bool notified; EventNotifier notifier; - /* Thread pool for performing work and receiving completion callbacks */ + /* Thread pool for performing work and receiving completion callbacks. + * Has its own locking. + */ struct ThreadPool *thread_pool; #ifdef CONFIG_LINUX_AIO @@ -124,7 +120,9 @@ struct AioContext { struct LinuxAioState *linux_aio; #endif - /* TimerLists for calling timers - one per clock type */ + /* TimerLists for calling timers - one per clock type. Has its own + * locking. + */ QEMUTimerListGroup tlg; int external_disable_cnt; @@ -178,9 +176,11 @@ void aio_context_unref(AioContext *ctx); * automatically takes care of calling aio_context_acquire and * aio_context_release. * - * Access to timers and BHs from a thread that has not acquired AioContext - * is possible. Access to callbacks for now must be done while the AioContext - * is owned by the thread (FIXME). + * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A + * thread still has to call those to avoid being interrupted by the guest. + * + * Bottom halves, timers and callbacks can be created or removed without + * acquiring the AioContext. */ void aio_context_acquire(AioContext *ctx); -- cgit v1.1