diff options
Diffstat (limited to 'util')
-rw-r--r-- | util/Makefile.objs | 1 | ||||
-rw-r--r-- | util/aio-posix.c | 187 | ||||
-rw-r--r-- | util/async.c | 237 | ||||
-rw-r--r-- | util/log.c | 2 | ||||
-rw-r--r-- | util/module.c | 7 | ||||
-rw-r--r-- | util/nvdimm-utils.c | 29 | ||||
-rw-r--r-- | util/oslib-posix.c | 32 | ||||
-rw-r--r-- | util/vfio-helpers.c | 6 |
8 files changed, 334 insertions, 167 deletions
diff --git a/util/Makefile.objs b/util/Makefile.objs index 11262aa..6b38b67 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -20,6 +20,7 @@ util-obj-y += envlist.o path.o module.o util-obj-y += host-utils.o util-obj-y += bitmap.o bitops.o hbitmap.o util-obj-y += fifo8.o +util-obj-y += nvdimm-utils.o util-obj-y += cacheinfo.o util-obj-y += error.o qemu-error.o util-obj-y += qemu-print.o diff --git a/util/aio-posix.c b/util/aio-posix.c index a4977f5..9e1befc 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -15,6 +15,7 @@ #include "qemu/osdep.h" #include "block/block.h" +#include "qemu/rcu.h" #include "qemu/rcu_queue.h" #include "qemu/sockets.h" #include "qemu/cutils.h" @@ -31,12 +32,23 @@ struct AioHandler AioPollFn *io_poll; IOHandler *io_poll_begin; IOHandler *io_poll_end; - int deleted; void *opaque; bool is_external; QLIST_ENTRY(AioHandler) node; + QLIST_ENTRY(AioHandler) node_ready; /* only used during aio_poll() */ + QLIST_ENTRY(AioHandler) node_deleted; }; +/* Add a handler to a ready list */ +static void add_ready_handler(AioHandlerList *ready_list, + AioHandler *node, + int revents) +{ + QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */ + node->pfd.revents = revents; + QLIST_INSERT_HEAD(ready_list, node, node_ready); +} + #ifdef CONFIG_EPOLL_CREATE1 /* The fd number threshold to switch to epoll */ @@ -67,7 +79,7 @@ static bool aio_epoll_try_enable(AioContext *ctx) QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { int r; - if (node->deleted || !node->pfd.events) { + if (QLIST_IS_INSERTED(node, node_deleted) || !node->pfd.events) { continue; } event.events = epoll_events_from_pfd(node->pfd.events); @@ -104,17 +116,22 @@ static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new) } } -static int aio_epoll(AioContext *ctx, GPollFD *pfds, - unsigned npfd, int64_t timeout) +static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list, + int64_t timeout) { + GPollFD pfd = { + .fd = ctx->epollfd, + .events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR, + }; AioHandler *node; int i, ret = 0; struct epoll_event events[128]; - assert(npfd == 1); - assert(pfds[0].fd == ctx->epollfd); if (timeout > 0) { - ret = qemu_poll_ns(pfds, npfd, timeout); + ret = qemu_poll_ns(&pfd, 1, timeout); + if (ret > 0) { + timeout = 0; + } } if (timeout <= 0 || ret > 0) { ret = epoll_wait(ctx->epollfd, events, @@ -125,11 +142,13 @@ static int aio_epoll(AioContext *ctx, GPollFD *pfds, } for (i = 0; i < ret; i++) { int ev = events[i].events; + int revents = (ev & EPOLLIN ? G_IO_IN : 0) | + (ev & EPOLLOUT ? G_IO_OUT : 0) | + (ev & EPOLLHUP ? G_IO_HUP : 0) | + (ev & EPOLLERR ? G_IO_ERR : 0); + node = events[i].data.ptr; - node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) | - (ev & EPOLLOUT ? G_IO_OUT : 0) | - (ev & EPOLLHUP ? G_IO_HUP : 0) | - (ev & EPOLLERR ? G_IO_ERR : 0); + add_ready_handler(ready_list, node, revents); } } out: @@ -167,8 +186,8 @@ static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new) { } -static int aio_epoll(AioContext *ctx, GPollFD *pfds, - unsigned npfd, int64_t timeout) +static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list, + int64_t timeout) { assert(false); } @@ -191,9 +210,11 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd) AioHandler *node; QLIST_FOREACH(node, &ctx->aio_handlers, node) { - if (node->pfd.fd == fd) - if (!node->deleted) + if (node->pfd.fd == fd) { + if (!QLIST_IS_INSERTED(node, node_deleted)) { return node; + } + } } return NULL; @@ -212,7 +233,7 @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node) /* If a read is in progress, just mark the node as deleted */ if (qemu_lockcnt_count(&ctx->list_lock)) { - node->deleted = 1; + QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted); node->pfd.revents = 0; return false; } @@ -354,7 +375,7 @@ static void poll_set_started(AioContext *ctx, bool started) QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { IOHandler *fn; - if (node->deleted) { + if (QLIST_IS_INSERTED(node, node_deleted)) { continue; } @@ -411,43 +432,82 @@ bool aio_pending(AioContext *ctx) return result; } -static bool aio_dispatch_handlers(AioContext *ctx) +static void aio_free_deleted_handlers(AioContext *ctx) { - AioHandler *node, *tmp; - bool progress = false; + AioHandler *node; - QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) { - int revents; + if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) { + return; + } + if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { + return; /* we are nested, let the parent do the freeing */ + } - revents = node->pfd.revents & node->pfd.events; - node->pfd.revents = 0; + while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) { + QLIST_REMOVE(node, node); + QLIST_REMOVE(node, node_deleted); + g_free(node); + } - if (!node->deleted && - (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) && - aio_node_check(ctx, node->is_external) && - node->io_read) { - node->io_read(node->opaque); + qemu_lockcnt_inc_and_unlock(&ctx->list_lock); +} - /* aio_notify() does not count as progress */ - if (node->opaque != &ctx->notifier) { - progress = true; - } - } - if (!node->deleted && - (revents & (G_IO_OUT | G_IO_ERR)) && - aio_node_check(ctx, node->is_external) && - node->io_write) { - node->io_write(node->opaque); +static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node) +{ + bool progress = false; + int revents; + + revents = node->pfd.revents & node->pfd.events; + node->pfd.revents = 0; + + if (!QLIST_IS_INSERTED(node, node_deleted) && + (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) && + aio_node_check(ctx, node->is_external) && + node->io_read) { + node->io_read(node->opaque); + + /* aio_notify() does not count as progress */ + if (node->opaque != &ctx->notifier) { progress = true; } + } + if (!QLIST_IS_INSERTED(node, node_deleted) && + (revents & (G_IO_OUT | G_IO_ERR)) && + aio_node_check(ctx, node->is_external) && + node->io_write) { + node->io_write(node->opaque); + progress = true; + } - if (node->deleted) { - if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { - QLIST_REMOVE(node, node); - g_free(node); - qemu_lockcnt_inc_and_unlock(&ctx->list_lock); - } - } + return progress; +} + +/* + * If we have a list of ready handlers then this is more efficient than + * scanning all handlers with aio_dispatch_handlers(). + */ +static bool aio_dispatch_ready_handlers(AioContext *ctx, + AioHandlerList *ready_list) +{ + bool progress = false; + AioHandler *node; + + while ((node = QLIST_FIRST(ready_list))) { + QLIST_SAFE_REMOVE(node, node_ready); + progress = aio_dispatch_handler(ctx, node) || progress; + } + + return progress; +} + +/* Slower than aio_dispatch_ready_handlers() but only used via glib */ +static bool aio_dispatch_handlers(AioContext *ctx) +{ + AioHandler *node, *tmp; + bool progress = false; + + QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) { + progress = aio_dispatch_handler(ctx, node) || progress; } return progress; @@ -458,6 +518,7 @@ void aio_dispatch(AioContext *ctx) qemu_lockcnt_inc(&ctx->list_lock); aio_bh_poll(ctx); aio_dispatch_handlers(ctx); + aio_free_deleted_handlers(ctx); qemu_lockcnt_dec(&ctx->list_lock); timerlistgroup_run_timers(&ctx->tlg); @@ -514,8 +575,18 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) bool progress = false; AioHandler *node; + /* + * Optimization: ->io_poll() handlers often contain RCU read critical + * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock() + * -> rcu_read_lock() -> ... sequences with expensive memory + * synchronization primitives. Make the entire polling loop an RCU + * critical section because nested rcu_read_lock()/rcu_read_unlock() calls + * are cheap. + */ + RCU_READ_LOCK_GUARD(); + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { - if (!node->deleted && node->io_poll && + if (!QLIST_IS_INSERTED(node, node_deleted) && node->io_poll && aio_node_check(ctx, node->is_external) && node->io_poll(node->opaque)) { /* @@ -609,6 +680,7 @@ static bool try_poll_mode(AioContext *ctx, int64_t *timeout) bool aio_poll(AioContext *ctx, bool blocking) { + AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list); AioHandler *node; int i; int ret = 0; @@ -649,7 +721,7 @@ bool aio_poll(AioContext *ctx, bool blocking) if (!aio_epoll_enabled(ctx)) { QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { - if (!node->deleted && node->pfd.events + if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events && aio_node_check(ctx, node->is_external)) { add_pollfd(node); } @@ -658,13 +730,8 @@ bool aio_poll(AioContext *ctx, bool blocking) /* wait until next event */ if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) { - AioHandler epoll_handler; - - epoll_handler.pfd.fd = ctx->epollfd; - epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR; - npfd = 0; - add_pollfd(&epoll_handler); - ret = aio_epoll(ctx, pollfds, npfd, timeout); + npfd = 0; /* pollfds[] is not being used */ + ret = aio_epoll(ctx, &ready_list, timeout); } else { ret = qemu_poll_ns(pollfds, npfd, timeout); } @@ -719,7 +786,11 @@ bool aio_poll(AioContext *ctx, bool blocking) /* if we have any readable fds, dispatch event */ if (ret > 0) { for (i = 0; i < npfd; i++) { - nodes[i]->pfd.revents = pollfds[i].revents; + int revents = pollfds[i].revents; + + if (revents) { + add_ready_handler(&ready_list, nodes[i], revents); + } } } @@ -728,9 +799,11 @@ bool aio_poll(AioContext *ctx, bool blocking) progress |= aio_bh_poll(ctx); if (ret > 0) { - progress |= aio_dispatch_handlers(ctx); + progress |= aio_dispatch_ready_handlers(ctx, &ready_list); } + aio_free_deleted_handlers(ctx); + qemu_lockcnt_dec(&ctx->list_lock); progress |= timerlistgroup_run_timers(&ctx->tlg); diff --git a/util/async.c b/util/async.c index c192a24..b94518b 100644 --- a/util/async.c +++ b/util/async.c @@ -29,6 +29,7 @@ #include "block/thread-pool.h" #include "qemu/main-loop.h" #include "qemu/atomic.h" +#include "qemu/rcu_queue.h" #include "block/raw-aio.h" #include "qemu/coroutine_int.h" #include "trace.h" @@ -36,16 +37,76 @@ /***********************************************************/ /* bottom halves (can be seen as timers which expire ASAP) */ +/* QEMUBH::flags values */ +enum { + /* Already enqueued and waiting for aio_bh_poll() */ + BH_PENDING = (1 << 0), + + /* Invoke the callback */ + BH_SCHEDULED = (1 << 1), + + /* Delete without invoking callback */ + BH_DELETED = (1 << 2), + + /* Delete after invoking callback */ + BH_ONESHOT = (1 << 3), + + /* Schedule periodically when the event loop is idle */ + BH_IDLE = (1 << 4), +}; + struct QEMUBH { AioContext *ctx; QEMUBHFunc *cb; void *opaque; - QEMUBH *next; - bool scheduled; - bool idle; - bool deleted; + QSLIST_ENTRY(QEMUBH) next; + unsigned flags; }; +/* Called concurrently from any thread */ +static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) +{ + AioContext *ctx = bh->ctx; + unsigned old_flags; + + /* + * The memory barrier implicit in atomic_fetch_or makes sure that: + * 1. idle & any writes needed by the callback are done before the + * locations are read in the aio_bh_poll. + * 2. ctx is loaded before the callback has a chance to execute and bh + * could be freed. + */ + old_flags = atomic_fetch_or(&bh->flags, BH_PENDING | new_flags); + if (!(old_flags & BH_PENDING)) { + QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); + } + + aio_notify(ctx); +} + +/* Only called from aio_bh_poll() and aio_ctx_finalize() */ +static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) +{ + QEMUBH *bh = QSLIST_FIRST_RCU(head); + + if (!bh) { + return NULL; + } + + QSLIST_REMOVE_HEAD(head, next); + + /* + * The atomic_and is paired with aio_bh_enqueue(). The implicit memory + * barrier ensures that the callback sees all writes done by the scheduling + * thread. It also ensures that the scheduling thread sees the cleared + * flag before bh->cb has run, and thus will call aio_notify again if + * necessary. + */ + *flags = atomic_fetch_and(&bh->flags, + ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); + return bh; +} + void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) { QEMUBH *bh; @@ -55,15 +116,7 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) .cb = cb, .opaque = opaque, }; - qemu_lockcnt_lock(&ctx->list_lock); - bh->next = ctx->first_bh; - bh->scheduled = 1; - bh->deleted = 1; - /* Make sure that the members are ready before putting bh into list */ - smp_wmb(); - ctx->first_bh = bh; - qemu_lockcnt_unlock(&ctx->list_lock); - aio_notify(ctx); + aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT); } QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) @@ -75,12 +128,6 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) .cb = cb, .opaque = opaque, }; - qemu_lockcnt_lock(&ctx->list_lock); - bh->next = ctx->first_bh; - /* Make sure that the members are ready before putting bh into list */ - smp_wmb(); - ctx->first_bh = bh; - qemu_lockcnt_unlock(&ctx->list_lock); return bh; } @@ -89,91 +136,56 @@ void aio_bh_call(QEMUBH *bh) bh->cb(bh->opaque); } -/* Multiple occurrences of aio_bh_poll cannot be called concurrently. - * The count in ctx->list_lock is incremented before the call, and is - * not affected by the call. - */ +/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ int aio_bh_poll(AioContext *ctx) { - QEMUBH *bh, **bhp, *next; - int ret; - bool deleted = false; - - ret = 0; - for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) { - next = atomic_rcu_read(&bh->next); - /* The atomic_xchg is paired with the one in qemu_bh_schedule. The - * implicit memory barrier ensures that the callback sees all writes - * done by the scheduling thread. It also ensures that the scheduling - * thread sees the zero before bh->cb has run, and thus will call - * aio_notify again if necessary. - */ - if (atomic_xchg(&bh->scheduled, 0)) { + BHListSlice slice; + BHListSlice *s; + int ret = 0; + + QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); + QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); + + while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) { + QEMUBH *bh; + unsigned flags; + + bh = aio_bh_dequeue(&s->bh_list, &flags); + if (!bh) { + QSIMPLEQ_REMOVE_HEAD(&ctx->bh_slice_list, next); + continue; + } + + if ((flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) { /* Idle BHs don't count as progress */ - if (!bh->idle) { + if (!(flags & BH_IDLE)) { ret = 1; } - bh->idle = 0; aio_bh_call(bh); } - if (bh->deleted) { - deleted = true; + if (flags & (BH_DELETED | BH_ONESHOT)) { + g_free(bh); } } - /* remove deleted bhs */ - if (!deleted) { - return ret; - } - - if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { - bhp = &ctx->first_bh; - while (*bhp) { - bh = *bhp; - if (bh->deleted && !bh->scheduled) { - *bhp = bh->next; - g_free(bh); - } else { - bhp = &bh->next; - } - } - qemu_lockcnt_inc_and_unlock(&ctx->list_lock); - } return ret; } void qemu_bh_schedule_idle(QEMUBH *bh) { - bh->idle = 1; - /* Make sure that idle & any writes needed by the callback are done - * before the locations are read in the aio_bh_poll. - */ - atomic_mb_set(&bh->scheduled, 1); + aio_bh_enqueue(bh, BH_SCHEDULED | BH_IDLE); } void qemu_bh_schedule(QEMUBH *bh) { - AioContext *ctx; - - ctx = bh->ctx; - bh->idle = 0; - /* The memory barrier implicit in atomic_xchg makes sure that: - * 1. idle & any writes needed by the callback are done before the - * locations are read in the aio_bh_poll. - * 2. ctx is loaded before scheduled is set and the callback has a chance - * to execute. - */ - if (atomic_xchg(&bh->scheduled, 1) == 0) { - aio_notify(ctx); - } + aio_bh_enqueue(bh, BH_SCHEDULED); } - /* This func is async. */ void qemu_bh_cancel(QEMUBH *bh) { - atomic_mb_set(&bh->scheduled, 0); + atomic_and(&bh->flags, ~BH_SCHEDULED); } /* This func is async.The bottom half will do the delete action at the finial @@ -181,21 +193,16 @@ void qemu_bh_cancel(QEMUBH *bh) */ void qemu_bh_delete(QEMUBH *bh) { - bh->scheduled = 0; - bh->deleted = 1; + aio_bh_enqueue(bh, BH_DELETED); } -int64_t -aio_compute_timeout(AioContext *ctx) +static int64_t aio_compute_bh_timeout(BHList *head, int timeout) { - int64_t deadline; - int timeout = -1; QEMUBH *bh; - for (bh = atomic_rcu_read(&ctx->first_bh); bh; - bh = atomic_rcu_read(&bh->next)) { - if (bh->scheduled) { - if (bh->idle) { + QSLIST_FOREACH_RCU(bh, head, next) { + if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) { + if (bh->flags & BH_IDLE) { /* idle bottom halves will be polled at least * every 10ms */ timeout = 10000000; @@ -207,6 +214,28 @@ aio_compute_timeout(AioContext *ctx) } } + return timeout; +} + +int64_t +aio_compute_timeout(AioContext *ctx) +{ + BHListSlice *s; + int64_t deadline; + int timeout = -1; + + timeout = aio_compute_bh_timeout(&ctx->bh_list, timeout); + if (timeout == 0) { + return 0; + } + + QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) { + timeout = aio_compute_bh_timeout(&s->bh_list, timeout); + if (timeout == 0) { + return 0; + } + } + deadline = timerlistgroup_deadline_ns(&ctx->tlg); if (deadline == 0) { return 0; @@ -237,15 +266,24 @@ aio_ctx_check(GSource *source) { AioContext *ctx = (AioContext *) source; QEMUBH *bh; + BHListSlice *s; atomic_and(&ctx->notify_me, ~1); aio_notify_accept(ctx); - for (bh = ctx->first_bh; bh; bh = bh->next) { - if (bh->scheduled) { + QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) { + if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) { return true; } } + + QSIMPLEQ_FOREACH(s, &ctx->bh_slice_list, next) { + QSLIST_FOREACH_RCU(bh, &s->bh_list, next) { + if ((bh->flags & (BH_SCHEDULED | BH_DELETED)) == BH_SCHEDULED) { + return true; + } + } + } return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0); } @@ -265,6 +303,8 @@ static void aio_ctx_finalize(GSource *source) { AioContext *ctx = (AioContext *) source; + QEMUBH *bh; + unsigned flags; thread_pool_free(ctx->thread_pool); @@ -287,18 +327,15 @@ aio_ctx_finalize(GSource *source) assert(QSLIST_EMPTY(&ctx->scheduled_coroutines)); qemu_bh_delete(ctx->co_schedule_bh); - qemu_lockcnt_lock(&ctx->list_lock); - assert(!qemu_lockcnt_count(&ctx->list_lock)); - while (ctx->first_bh) { - QEMUBH *next = ctx->first_bh->next; + /* There must be no aio_bh_poll() calls going on */ + assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list)); + while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) { /* qemu_bh_delete() must have been called on BHs in this AioContext */ - assert(ctx->first_bh->deleted); + assert(flags & BH_DELETED); - g_free(ctx->first_bh); - ctx->first_bh = next; + g_free(bh); } - qemu_lockcnt_unlock(&ctx->list_lock); aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL); event_notifier_cleanup(&ctx->notifier); @@ -445,6 +482,8 @@ AioContext *aio_context_new(Error **errp) AioContext *ctx; ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); + QSLIST_INIT(&ctx->bh_list); + QSIMPLEQ_INIT(&ctx->bh_slice_list); aio_context_setup(ctx); ret = event_notifier_init(&ctx->notifier, false); @@ -332,6 +332,8 @@ const QEMULogItem qemu_log_items[] = { #ifdef CONFIG_PLUGIN { CPU_LOG_PLUGIN, "plugin", "output from TCG plugins\n"}, #endif + { LOG_STRACE, "strace", + "log every user-mode syscall, its input, and its result" }, { 0, NULL, NULL }, }; diff --git a/util/module.c b/util/module.c index 8c5315a..236a7bb 100644 --- a/util/module.c +++ b/util/module.c @@ -30,6 +30,7 @@ typedef struct ModuleEntry typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList; static ModuleTypeList init_type_list[MODULE_INIT_MAX]; +static bool modules_init_done[MODULE_INIT_MAX]; static ModuleTypeList dso_init_list; @@ -91,11 +92,17 @@ void module_call_init(module_init_type type) ModuleTypeList *l; ModuleEntry *e; + if (modules_init_done[type]) { + return; + } + l = find_type(type); QTAILQ_FOREACH(e, l, node) { e->init(); } + + modules_init_done[type] = true; } #ifdef CONFIG_MODULES diff --git a/util/nvdimm-utils.c b/util/nvdimm-utils.c new file mode 100644 index 0000000..5cc768c --- /dev/null +++ b/util/nvdimm-utils.c @@ -0,0 +1,29 @@ +#include "qemu/nvdimm-utils.h" +#include "hw/mem/nvdimm.h" + +static int nvdimm_device_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_NVDIMM)) { + *list = g_slist_append(*list, DEVICE(obj)); + } + + object_child_foreach(obj, nvdimm_device_list, opaque); + return 0; +} + +/* + * inquire NVDIMM devices and link them into the list which is + * returned to the caller. + * + * Note: it is the caller's responsibility to free the list to avoid + * memory leak. + */ +GSList *nvdimm_get_device_list(void) +{ + GSList *list = NULL; + + object_child_foreach(qdev_get_machine(), nvdimm_device_list, &list); + return list; +} diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 5a291cc..897e8f3 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -76,6 +76,10 @@ static MemsetThread *memset_thread; static int memset_num_threads; static bool memset_thread_failed; +static QemuMutex page_mutex; +static QemuCond page_cond; +static bool threads_created_flag; + int qemu_get_thread_id(void) { #if defined(__linux__) @@ -403,6 +407,17 @@ static void *do_touch_pages(void *arg) MemsetThread *memset_args = (MemsetThread *)arg; sigset_t set, oldset; + /* + * On Linux, the page faults from the loop below can cause mmap_sem + * contention with allocation of the thread stacks. Do not start + * clearing until all threads have been created. + */ + qemu_mutex_lock(&page_mutex); + while(!threads_created_flag){ + qemu_cond_wait(&page_cond, &page_mutex); + } + qemu_mutex_unlock(&page_mutex); + /* unblock SIGBUS */ sigemptyset(&set); sigaddset(&set, SIGBUS); @@ -451,27 +466,28 @@ static inline int get_memset_num_threads(int smp_cpus) static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, int smp_cpus) { - size_t numpages_per_thread; - size_t size_per_thread; + size_t numpages_per_thread, leftover; char *addr = area; int i = 0; memset_thread_failed = false; + threads_created_flag = false; memset_num_threads = get_memset_num_threads(smp_cpus); memset_thread = g_new0(MemsetThread, memset_num_threads); - numpages_per_thread = (numpages / memset_num_threads); - size_per_thread = (hpagesize * numpages_per_thread); + numpages_per_thread = numpages / memset_num_threads; + leftover = numpages % memset_num_threads; for (i = 0; i < memset_num_threads; i++) { memset_thread[i].addr = addr; - memset_thread[i].numpages = (i == (memset_num_threads - 1)) ? - numpages : numpages_per_thread; + memset_thread[i].numpages = numpages_per_thread + (i < leftover); memset_thread[i].hpagesize = hpagesize; qemu_thread_create(&memset_thread[i].pgthread, "touch_pages", do_touch_pages, &memset_thread[i], QEMU_THREAD_JOINABLE); - addr += size_per_thread; - numpages -= numpages_per_thread; + addr += memset_thread[i].numpages * hpagesize; } + threads_created_flag = true; + qemu_cond_broadcast(&page_cond); + for (i = 0; i < memset_num_threads; i++) { qemu_thread_join(&memset_thread[i].pgthread); } diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c index 813f7ec..ddd9a96 100644 --- a/util/vfio-helpers.c +++ b/util/vfio-helpers.c @@ -545,7 +545,7 @@ static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size, trace_qemu_vfio_do_mapping(s, host, size, iova); if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) { - error_report("VFIO_MAP_DMA: %d", -errno); + error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); return -errno; } return 0; @@ -570,7 +570,7 @@ static void qemu_vfio_undo_mapping(QEMUVFIOState *s, IOVAMapping *mapping, assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size)); assert(index >= 0 && index < s->nr_mappings); if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) { - error_setg(errp, "VFIO_UNMAP_DMA failed: %d", -errno); + error_setg_errno(errp, errno, "VFIO_UNMAP_DMA failed"); } memmove(mapping, &s->mappings[index + 1], sizeof(s->mappings[0]) * (s->nr_mappings - index - 1)); @@ -669,7 +669,7 @@ int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s) trace_qemu_vfio_dma_reset_temporary(s); qemu_mutex_lock(&s->lock); if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) { - error_report("VFIO_UNMAP_DMA: %d", -errno); + error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); qemu_mutex_unlock(&s->lock); return -errno; } |