aboutsummaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/Makefile.objs1
-rw-r--r--util/aio-posix.c187
-rw-r--r--util/async.c237
-rw-r--r--util/log.c2
-rw-r--r--util/module.c7
-rw-r--r--util/nvdimm-utils.c29
-rw-r--r--util/oslib-posix.c32
-rw-r--r--util/vfio-helpers.c6
8 files changed, 334 insertions, 167 deletions
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 11262aa..6b38b67 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -20,6 +20,7 @@ util-obj-y += envlist.o path.o module.o
util-obj-y += host-utils.o
util-obj-y += bitmap.o bitops.o hbitmap.o
util-obj-y += fifo8.o
+util-obj-y += nvdimm-utils.o
util-obj-y += cacheinfo.o
util-obj-y += error.o qemu-error.o
util-obj-y += qemu-print.o
diff --git a/util/aio-posix.c b/util/aio-posix.c
index a4977f5..9e1befc 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -15,6 +15,7 @@
#include "qemu/osdep.h"
#include "block/block.h"
+#include "qemu/rcu.h"
#include "qemu/rcu_queue.h"
#include "qemu/sockets.h"
#include "qemu/cutils.h"
@@ -31,12 +32,23 @@ struct AioHandler
AioPollFn *io_poll;
IOHandler *io_poll_begin;
IOHandler *io_poll_end;
- int deleted;
void *opaque;
bool is_external;
QLIST_ENTRY(AioHandler) node;
+ QLIST_ENTRY(AioHandler) node_ready; /* only used during aio_poll() */
+ QLIST_ENTRY(AioHandler) node_deleted;
};
+/* Add a handler to a ready list */
+static void add_ready_handler(AioHandlerList *ready_list,
+ AioHandler *node,
+ int revents)
+{
+ QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
+ node->pfd.revents = revents;
+ QLIST_INSERT_HEAD(ready_list, node, node_ready);
+}
+
#ifdef CONFIG_EPOLL_CREATE1
/* The fd number threshold to switch to epoll */
@@ -67,7 +79,7 @@ static bool aio_epoll_try_enable(AioContext *ctx)
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
int r;
- if (node->deleted || !node->pfd.events) {
+ if (QLIST_IS_INSERTED(node, node_deleted) || !node->pfd.events) {
continue;
}
event.events = epoll_events_from_pfd(node->pfd.events);
@@ -104,17 +116,22 @@ static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
}
}
-static int aio_epoll(AioContext *ctx, GPollFD *pfds,
- unsigned npfd, int64_t timeout)
+static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
+ int64_t timeout)
{
+ GPollFD pfd = {
+ .fd = ctx->epollfd,
+ .events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR,
+ };
AioHandler *node;
int i, ret = 0;
struct epoll_event events[128];
- assert(npfd == 1);
- assert(pfds[0].fd == ctx->epollfd);
if (timeout > 0) {
- ret = qemu_poll_ns(pfds, npfd, timeout);
+ ret = qemu_poll_ns(&pfd, 1, timeout);
+ if (ret > 0) {
+ timeout = 0;
+ }
}
if (timeout <= 0 || ret > 0) {
ret = epoll_wait(ctx->epollfd, events,
@@ -125,11 +142,13 @@ static int aio_epoll(AioContext *ctx, GPollFD *pfds,
}
for (i = 0; i < ret; i++) {
int ev = events[i].events;
+ int revents = (ev & EPOLLIN ? G_IO_IN : 0) |
+ (ev & EPOLLOUT ? G_IO_OUT : 0) |
+ (ev & EPOLLHUP ? G_IO_HUP : 0) |
+ (ev & EPOLLERR ? G_IO_ERR : 0);
+
node = events[i].data.ptr;
- node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
- (ev & EPOLLOUT ? G_IO_OUT : 0) |
- (ev & EPOLLHUP ? G_IO_HUP : 0) |
- (ev & EPOLLERR ? G_IO_ERR : 0);
+ add_ready_handler(ready_list, node, revents);
}
}
out:
@@ -167,8 +186,8 @@ static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
{
}
-static int aio_epoll(AioContext *ctx, GPollFD *pfds,
- unsigned npfd, int64_t timeout)
+static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
+ int64_t timeout)
{
assert(false);
}
@@ -191,9 +210,11 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd)
AioHandler *node;
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
- if (node->pfd.fd == fd)
- if (!node->deleted)
+ if (node->pfd.fd == fd) {
+ if (!QLIST_IS_INSERTED(node, node_deleted)) {
return node;
+ }
+ }
}
return NULL;
@@ -212,7 +233,7 @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
/* If a read is in progress, just mark the node as deleted */
if (qemu_lockcnt_count(&ctx->list_lock)) {
- node->deleted = 1;
+ QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
node->pfd.revents = 0;
return false;
}
@@ -354,7 +375,7 @@ static void poll_set_started(AioContext *ctx, bool started)
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
IOHandler *fn;
- if (node->deleted) {
+ if (QLIST_IS_INSERTED(node, node_deleted)) {
continue;
}
@@ -411,43 +432,82 @@ bool aio_pending(AioContext *ctx)
return result;
}
-static bool aio_dispatch_handlers(AioContext *ctx)
+static void aio_free_deleted_handlers(AioContext *ctx)
{
- AioHandler *node, *tmp;
- bool progress = false;
+ AioHandler *node;
- QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
- int revents;
+ if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) {
+ return;
+ }
+ if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+ return; /* we are nested, let the parent do the freeing */
+ }
- revents = node->pfd.revents & node->pfd.events;
- node->pfd.revents = 0;
+ while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) {
+ QLIST_REMOVE(node, node);
+ QLIST_REMOVE(node, node_deleted);
+ g_free(node);
+ }
- if (!node->deleted &&
- (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
- aio_node_check(ctx, node->is_external) &&
- node->io_read) {
- node->io_read(node->opaque);
+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+}
- /* aio_notify() does not count as progress */
- if (node->opaque != &ctx->notifier) {
- progress = true;
- }
- }
- if (!node->deleted &&
- (revents & (G_IO_OUT | G_IO_ERR)) &&
- aio_node_check(ctx, node->is_external) &&
- node->io_write) {
- node->io_write(node->opaque);
+static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
+{
+ bool progress = false;
+ int revents;
+
+ revents = node->pfd.revents & node->pfd.events;
+ node->pfd.revents = 0;
+
+ if (!QLIST_IS_INSERTED(node, node_deleted) &&
+ (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
+ aio_node_check(ctx, node->is_external) &&
+ node->io_read) {
+ node->io_read(node->opaque);
+
+ /* aio_notify() does not count as progress */
+ if (node->opaque != &ctx->notifier) {
progress = true;
}
+ }
+ if (!QLIST_IS_INSERTED(node, node_deleted) &&
+ (revents & (G_IO_OUT | G_IO_ERR)) &&
+ aio_node_check(ctx, node->is_external) &&
+ node->io_write) {
+ node->io_write(node->opaque);
+ progress = true;
+ }
- if (node->deleted) {
- if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
- QLIST_REMOVE(node, node);
- g_free(node);
- qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
- }
- }
+ return progress;
+}
+
+/*
+ * If we have a list of ready handlers then this is more efficient than
+ * scanning all handlers with aio_dispatch_handlers().
+ */
+static bool aio_dispatch_ready_handlers(AioContext *ctx,
+ AioHandlerList *ready_list)
+{
+ bool progress = false;
+ AioHandler *node;
+
+ while ((node = QLIST_FIRST(ready_list))) {
+ QLIST_SAFE_REMOVE(node, node_ready);
+ progress = aio_dispatch_handler(ctx, node) || progress;
+ }
+
+ return progress;
+}
+
+/* Slower than aio_dispatch_ready_handlers() but only used via glib */
+static bool aio_dispatch_handlers(AioContext *ctx)
+{
+ AioHandler *node, *tmp;
+ bool progress = false;
+
+ QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
+ progress = aio_dispatch_handler(ctx, node) || progress;
}
return progress;
@@ -458,6 +518,7 @@ void aio_dispatch(AioContext *ctx)
qemu_lockcnt_inc(&ctx->list_lock);
aio_bh_poll(ctx);
aio_dispatch_handlers(ctx);
+ aio_free_deleted_handlers(ctx);
qemu_lockcnt_dec(&ctx->list_lock);
timerlistgroup_run_timers(&ctx->tlg);
@@ -514,8 +575,18 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout)
bool progress = false;
AioHandler *node;
+ /*
+ * Optimization: ->io_poll() handlers often contain RCU read critical
+ * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
+ * -> rcu_read_lock() -> ... sequences with expensive memory
+ * synchronization primitives. Make the entire polling loop an RCU
+ * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
+ * are cheap.
+ */
+ RCU_READ_LOCK_GUARD();
+
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
- if (!node->deleted && node->io_poll &&
+ if (!QLIST_IS_INSERTED(node, node_deleted) && node->io_poll &&
aio_node_check(ctx, node->is_external) &&
node->io_poll(node->opaque)) {
/*
@@ -609,6 +680,7 @@ static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
bool aio_poll(AioContext *ctx, bool blocking)
{
+ AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
AioHandler *node;
int i;
int ret = 0;
@@ -649,7 +721,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
if (!aio_epoll_enabled(ctx)) {
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
- if (!node->deleted && node->pfd.events
+ if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
&& aio_node_check(ctx, node->is_external)) {
add_pollfd(node);
}
@@ -658,13 +730,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
/* wait until next event */
if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
- AioHandler epoll_handler;
-
- epoll_handler.pfd.fd = ctx->epollfd;
- epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
- npfd = 0;
- add_pollfd(&epoll_handler);
- ret = aio_epoll(ctx, pollfds, npfd, timeout);
+ npfd = 0; /* pollfds[] is not being used */
+ ret = aio_epoll(ctx, &ready_list, timeout);
} else {
ret = qemu_poll_ns(pollfds, npfd, timeout);
}
@@ -719,7 +786,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
/* if we have any readable fds, dispatch event */
if (ret > 0) {
for (i = 0; i < npfd; i++) {
- nodes[i]->pfd.revents = pollfds[i].revents;
+ int revents = pollfds[i].revents;
+
+ if (revents) {
+ add_ready_handler(&ready_list, nodes[i], revents);
+ }
}
}
@@ -728,9 +799,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
progress |= aio_bh_poll(ctx);
if (ret > 0) {
- progress |= aio_dispatch_handlers(ctx);
+ progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
}
+ aio_free_deleted_handlers(ctx);
+
qemu_lockcnt_dec(&ctx->list_lock);
progress |= timerlistgroup_run_timers(&ctx->tlg);
diff --git a/util/async.c b/util/async.c
index c192a24..b94518b 100644
--- a/util/async.c
+++ b/util/async.c
@@ -29,6 +29,7 @@
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
#include "qemu/atomic.h"
+#include "qemu/rcu_queue.h"
#include "block/raw-aio.h"
#include "qemu/coroutine_int.h"
#include "trace.h"
@@ -36,16 +37,76 @@
/***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */
+/* QEMUBH::flags values */
+enum {
+ /* Already enqueued and waiting for aio_bh_poll() */
+ BH_PENDING = (1 << 0),
+
+ /* Invoke the callback */
+ BH_SCHEDULED = (1 << 1),
+
+ /* Delete without invoking callback */
+ BH_DELETED = (1 << 2),
+
+ /* Delete after invoking callback */
+ BH_ONESHOT = (1 << 3),
+
+ /* Schedule periodically when the event loop is idle */
+ BH_IDLE = (1 << 4),
+};
+
struct QEMUBH {
AioContext *ctx;
QEMUBHFunc *cb;
void *opaque;
- QEMUBH *next;
- bool scheduled;
- bool idle;
- bool deleted;
+ QSLIST_ENTRY(QEMUBH) next;
+ unsigned flags;
};
+/* Called concurrently from any thread */
+static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
+{
+ AioContext *ctx = bh->ctx;
+ unsigned old_flags;
+
+ /*
+ * The memory barrier implicit in atomic_fetch_or makes sure that:
+ * 1. idle & any writes needed by the callback are done before the
+ * locations are read in the aio_bh_poll.
+ * 2. ctx is loaded before the callback has a chance to execute and bh
+ * could be freed.
+ */
+ old_flags = atomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
+ if (!(old_flags & BH_PENDING)) {
+ QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
+ }
+
+ aio_notify(ctx);
+}
+
+/* Only called from aio_bh_poll() and aio_ctx_finalize() */
+static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
+{
+ QEMUBH *bh = QSLIST_FIRST_RCU(head);
+
+ if (!bh) {
+ return NULL;
+ }
+
+ QSLIST_REMOVE_HEAD(head, next);
+
+ /*
+ * The atomic_and is paired with aio_bh_enqueue(). The implicit memory
+ * barrier ensures that the callback sees all writes done by the scheduling
+ * thread. It also ensures that the scheduling thread sees the cleared
+ * flag before bh->cb has run, and thus will call aio_notify again if
+ * necessary.
+ */
+ *flags = atomic_fetch_and(&bh->flags,
+ ~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
+ return bh;
+}
+
void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
{
QEMUBH *bh;
@@ -55,15 +116,7 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
.cb = cb,
.opaque = opaque,
};
- qemu_lockcnt_lock(&ctx->list_lock);
- bh->next = ctx->first_bh;
- bh->scheduled = 1;
- bh->deleted = 1;
- /* Make sure that the members are ready before putting bh into list */
- smp_wmb();
- ctx->first_bh = bh;
- qemu_lockcnt_unlock(&ctx->list_lock);
- aio_notify(ctx);
+ aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
}
QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
@@ -75,12 +128,6 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
.cb = cb,
.opaque = opaque,
};
- qemu_lockcnt_lock(&ctx->list_lock);
- bh->next = ctx->first_bh;
- /* Make sure that the members are ready before putting bh into list */
- smp_wmb();
- ctx->first_bh = bh;
- qemu_lockcnt_unlock(&ctx->list_lock);
return bh;
}
@@ -89,91 +136,56 @@ void aio_bh_call(QEMUBH *bh)
bh->cb(bh->opaque);
}
-/* Multiple occurrences of aio_bh_poll cannot be called concurrently.
- * The count in ctx->list_lock is incremented before the call, and is
- * not affected by the call.
- */
+/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
int aio_bh_poll(AioContext *ctx)
{
- QEMUBH *bh, **bhp, *next;
- int ret;
- bool deleted = false;
-
- ret = 0;
- for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
- next = atomic_rcu_read(&bh->next);
- /* The atomic_xchg is paired with the one in qemu_bh_schedule. The
- * implicit memory barrier ensures that the callback sees all writes
- * done by the scheduling thread. It also ensures that the scheduling
- * thread sees the zero before bh->cb has run, and thus will call
- * aio_notify again if necessary.
- */
- if (atomic_xchg(&bh->scheduled, 0)) {
+ BHListSlice slice;
+ BHListSlice *s;
+ int ret = 0;
+
+ QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
+ QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
+
+ while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) {
+ QEMUBH *bh;
+ unsigned flags;
+
+ bh = aio_bh_dequeue(&s->bh_list, &flags);
+ if (!bh) {
+ QSIMPLEQ_REMOVE_HEAD(&ctx->bh_slice_list, next);
+ continue;
+ }
+
+ if ((flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
/* Idle BHs don't count as progress */
- if (!bh->idle) {
+ if (!(flags & BH_IDLE)) {
ret = 1;
}
- bh->idle = 0;
aio_bh_call(bh);
}
- if (bh->deleted) {
- deleted = true;
+ if (flags & (BH_DELETED | BH_ONESHOT)) {
+ g_free(bh);
}
}
- /* remove deleted bhs */
- if (!deleted) {
- return ret;
- }
-
- if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
- bhp = &ctx->first_bh;
- while (*bhp) {
- bh = *bhp;
- if (bh->deleted && !bh->scheduled) {
- *bhp = bh->next;
- g_free(bh);
- } else {
- bhp = &bh->next;
- }
- }
- qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
- }
return ret;
}
void qemu_bh_schedule_idle(QEMUBH *bh)
{
- bh->idle = 1;
- /* Make sure that idle & any writes needed by the callback are done
- * before the locations are read in the aio_bh_poll.
- */
- atomic_mb_set(&bh->scheduled, 1);
+ aio_bh_enqueue(bh, BH_SCHEDULED | BH_IDLE);
}
void qemu_bh_schedule(QEMUBH *bh)
{
- AioContext *ctx;
-
- ctx = bh->ctx;
- bh->idle = 0;
- /* The memory barrier implicit in atomic_xchg makes sure that:
- * 1. idle & any writes needed by the callback are done before the
- * locations are read in the aio_bh_poll.
- * 2. ctx is loaded before scheduled is set and the callback has a chance
- * to execute.
- */
- if (atomic_xchg(&bh->scheduled, 1) == 0) {
- aio_notify(ctx);
- }
+ aio_bh_enqueue(bh, BH_SCHEDULED);
}
-
/* This func is async.
*/
void qemu_bh_cancel(QEMUBH *bh)
{
- atomic_mb_set(&bh->scheduled, 0);
+ atomic_and(&bh->flags, ~BH_SCHEDULED);
}
/* This func is async.The bottom half will do the delete action at the finial
@@ -181,21 +193,16 @@ void qemu_bh_cancel(QEMUBH *bh)
*/
void qemu_bh_delete(QEMUBH *bh)
{
- bh->scheduled = 0;
- bh->deleted = 1;
+ aio_bh_enqueue(bh, BH_DELETED);
}
-int64_t
-aio_compute_timeout(AioContext *ctx)
+static int64_t aio_compute_bh_timeout(BHList *head, int timeout)
{
- int64_t deadline;
- int timeout = -1;
QEMUBH *bh;
- for (bh = atomic_rcu_read(&ctx->first_bh); bh;
- bh = atomic_rcu_read(&bh->next)) {
- if (bh->scheduled) {
- if (bh->idle) {
+ QSLIST_FOREACH_RCU(bh, head, next) {
+ if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
+ if (bh->flags & BH_IDLE) {
/* idle bottom halves will be polled at least
* every 10ms */
timeout = 10000000;
@@ -207,6 +214,28 @@ aio_compute_timeout(AioContext *ctx)
}
}
+ return timeout;
+}
+
+int64_t
+aio_compute_timeout(AioContext *ctx)
+{
+ BHListSlice *s;
+ int64_t deadline;
+ int timeout = -1;
+
+ timeout = aio_compute_bh_timeout(&ctx->bh_list, timeout);
+ if (timeout == 0) {
+ return 0;
+ }
+
+ QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
+ timeout = aio_compute_bh_timeout(&s->bh_list, timeout);
+ if (timeout == 0) {
+ return 0;
+ }
+ }
+
deadline = timerlistgroup_deadline_ns(&ctx->tlg);
if (deadline == 0) {
return 0;
@@ -237,15 +266,24 @@ aio_ctx_check(GSource *source)
{
AioContext *ctx = (AioContext *) source;
QEMUBH *bh;
+ BHListSlice *s;
atomic_and(&ctx->notify_me, ~1);
aio_notify_accept(ctx);
- for (bh = ctx->first_bh; bh; bh = bh->next) {
- if (bh->scheduled) {
+ QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
+ if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
return true;
}
}
+
+ QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) {
+ QSLIST_FOREACH_RCU(bh, &s->bh_list, next) {
+ if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) {
+ return true;
+ }
+ }
+ }
return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
}
@@ -265,6 +303,8 @@ static void
aio_ctx_finalize(GSource *source)
{
AioContext *ctx = (AioContext *) source;
+ QEMUBH *bh;
+ unsigned flags;
thread_pool_free(ctx->thread_pool);
@@ -287,18 +327,15 @@ aio_ctx_finalize(GSource *source)
assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
qemu_bh_delete(ctx->co_schedule_bh);
- qemu_lockcnt_lock(&ctx->list_lock);
- assert(!qemu_lockcnt_count(&ctx->list_lock));
- while (ctx->first_bh) {
- QEMUBH *next = ctx->first_bh->next;
+ /* There must be no aio_bh_poll() calls going on */
+ assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
+ while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
/* qemu_bh_delete() must have been called on BHs in this AioContext */
- assert(ctx->first_bh->deleted);
+ assert(flags & BH_DELETED);
- g_free(ctx->first_bh);
- ctx->first_bh = next;
+ g_free(bh);
}
- qemu_lockcnt_unlock(&ctx->list_lock);
aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
event_notifier_cleanup(&ctx->notifier);
@@ -445,6 +482,8 @@ AioContext *aio_context_new(Error **errp)
AioContext *ctx;
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+ QSLIST_INIT(&ctx->bh_list);
+ QSIMPLEQ_INIT(&ctx->bh_slice_list);
aio_context_setup(ctx);
ret = event_notifier_init(&ctx->notifier, false);
diff --git a/util/log.c b/util/log.c
index 47f2827..2da6cb3 100644
--- a/util/log.c
+++ b/util/log.c
@@ -332,6 +332,8 @@ const QEMULogItem qemu_log_items[] = {
#ifdef CONFIG_PLUGIN
{ CPU_LOG_PLUGIN, "plugin", "output from TCG plugins\n"},
#endif
+ { LOG_STRACE, "strace",
+ "log every user-mode syscall, its input, and its result" },
{ 0, NULL, NULL },
};
diff --git a/util/module.c b/util/module.c
index 8c5315a..236a7bb 100644
--- a/util/module.c
+++ b/util/module.c
@@ -30,6 +30,7 @@ typedef struct ModuleEntry
typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList;
static ModuleTypeList init_type_list[MODULE_INIT_MAX];
+static bool modules_init_done[MODULE_INIT_MAX];
static ModuleTypeList dso_init_list;
@@ -91,11 +92,17 @@ void module_call_init(module_init_type type)
ModuleTypeList *l;
ModuleEntry *e;
+ if (modules_init_done[type]) {
+ return;
+ }
+
l = find_type(type);
QTAILQ_FOREACH(e, l, node) {
e->init();
}
+
+ modules_init_done[type] = true;
}
#ifdef CONFIG_MODULES
diff --git a/util/nvdimm-utils.c b/util/nvdimm-utils.c
new file mode 100644
index 0000000..5cc768c
--- /dev/null
+++ b/util/nvdimm-utils.c
@@ -0,0 +1,29 @@
+#include "qemu/nvdimm-utils.h"
+#include "hw/mem/nvdimm.h"
+
+static int nvdimm_device_list(Object *obj, void *opaque)
+{
+ GSList **list = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_NVDIMM)) {
+ *list = g_slist_append(*list, DEVICE(obj));
+ }
+
+ object_child_foreach(obj, nvdimm_device_list, opaque);
+ return 0;
+}
+
+/*
+ * inquire NVDIMM devices and link them into the list which is
+ * returned to the caller.
+ *
+ * Note: it is the caller's responsibility to free the list to avoid
+ * memory leak.
+ */
+GSList *nvdimm_get_device_list(void)
+{
+ GSList *list = NULL;
+
+ object_child_foreach(qdev_get_machine(), nvdimm_device_list, &list);
+ return list;
+}
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 5a291cc..897e8f3 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -76,6 +76,10 @@ static MemsetThread *memset_thread;
static int memset_num_threads;
static bool memset_thread_failed;
+static QemuMutex page_mutex;
+static QemuCond page_cond;
+static bool threads_created_flag;
+
int qemu_get_thread_id(void)
{
#if defined(__linux__)
@@ -403,6 +407,17 @@ static void *do_touch_pages(void *arg)
MemsetThread *memset_args = (MemsetThread *)arg;
sigset_t set, oldset;
+ /*
+ * On Linux, the page faults from the loop below can cause mmap_sem
+ * contention with allocation of the thread stacks. Do not start
+ * clearing until all threads have been created.
+ */
+ qemu_mutex_lock(&page_mutex);
+ while(!threads_created_flag){
+ qemu_cond_wait(&page_cond, &page_mutex);
+ }
+ qemu_mutex_unlock(&page_mutex);
+
/* unblock SIGBUS */
sigemptyset(&set);
sigaddset(&set, SIGBUS);
@@ -451,27 +466,28 @@ static inline int get_memset_num_threads(int smp_cpus)
static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
int smp_cpus)
{
- size_t numpages_per_thread;
- size_t size_per_thread;
+ size_t numpages_per_thread, leftover;
char *addr = area;
int i = 0;
memset_thread_failed = false;
+ threads_created_flag = false;
memset_num_threads = get_memset_num_threads(smp_cpus);
memset_thread = g_new0(MemsetThread, memset_num_threads);
- numpages_per_thread = (numpages / memset_num_threads);
- size_per_thread = (hpagesize * numpages_per_thread);
+ numpages_per_thread = numpages / memset_num_threads;
+ leftover = numpages % memset_num_threads;
for (i = 0; i < memset_num_threads; i++) {
memset_thread[i].addr = addr;
- memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
- numpages : numpages_per_thread;
+ memset_thread[i].numpages = numpages_per_thread + (i < leftover);
memset_thread[i].hpagesize = hpagesize;
qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
do_touch_pages, &memset_thread[i],
QEMU_THREAD_JOINABLE);
- addr += size_per_thread;
- numpages -= numpages_per_thread;
+ addr += memset_thread[i].numpages * hpagesize;
}
+ threads_created_flag = true;
+ qemu_cond_broadcast(&page_cond);
+
for (i = 0; i < memset_num_threads; i++) {
qemu_thread_join(&memset_thread[i].pgthread);
}
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 813f7ec..ddd9a96 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -545,7 +545,7 @@ static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
trace_qemu_vfio_do_mapping(s, host, size, iova);
if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
- error_report("VFIO_MAP_DMA: %d", -errno);
+ error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
return -errno;
}
return 0;
@@ -570,7 +570,7 @@ static void qemu_vfio_undo_mapping(QEMUVFIOState *s, IOVAMapping *mapping,
assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size));
assert(index >= 0 && index < s->nr_mappings);
if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
- error_setg(errp, "VFIO_UNMAP_DMA failed: %d", -errno);
+ error_setg_errno(errp, errno, "VFIO_UNMAP_DMA failed");
}
memmove(mapping, &s->mappings[index + 1],
sizeof(s->mappings[0]) * (s->nr_mappings - index - 1));
@@ -669,7 +669,7 @@ int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
trace_qemu_vfio_dma_reset_temporary(s);
qemu_mutex_lock(&s->lock);
if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
- error_report("VFIO_UNMAP_DMA: %d", -errno);
+ error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
qemu_mutex_unlock(&s->lock);
return -errno;
}