diff options
138 files changed, 3818 insertions, 2386 deletions
@@ -171,8 +171,7 @@ endif qemu-img.o: qemu-img-cmds.h tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \ - qemu-timer-common.o main-loop.o notify.o \ - iohandler.o cutils.o iov.o async.o error.o + main-loop.o iohandler.o error.o tools-obj-$(CONFIG_POSIX) += compatfd.o qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) @@ -181,7 +180,7 @@ qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y) qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o -vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) $(tools-obj-y) qemu-timer-common.o libcacard/vscclient.o +vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) libcacard/vscclient.o $(call quiet-command,$(CC) $(LDFLAGS) -o $@ $^ $(libcacard_libs) $(LIBS)," LINK $@") fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/virtio-9p-marshal.o oslib-posix.o $(trace-obj-y) @@ -224,7 +223,7 @@ $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py) QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h) $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN) -qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(tools-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y) +qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(oslib-obj-y) $(trace-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y) QEMULIBS=libuser libdis libdis-user diff --git a/Makefile.objs b/Makefile.objs index 9eca179..2b5427e 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -19,7 +19,7 @@ universal-obj-y += $(qom-obj-y) ####################################################################### # oslib-obj-y is code depending on the OS (win32 vs posix) -oslib-obj-y = osdep.o +oslib-obj-y = osdep.o cutils.o qemu-timer-common.o oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o @@ -41,12 +41,12 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o ####################################################################### # block-obj-y is code used by both qemu system emulation and qemu-img -block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o -block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o -block-obj-y += qemu-progress.o qemu-sockets.o uri.o notify.o +block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o +block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o +block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y) -block-obj-$(CONFIG_POSIX) += posix-aio-compat.o -block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o +block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o +block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o block-obj-y += block/ block-obj-y += $(qapi-obj-y) qapi-types.o qapi-visit.o @@ -92,9 +92,8 @@ common-obj-y += ui/ common-obj-y += bt-host.o bt-vhci.o common-obj-y += dma-helpers.o -common-obj-y += iov.o acl.o +common-obj-y += acl.o common-obj-$(CONFIG_POSIX) += compatfd.o -common-obj-y += event_notifier.o common-obj-y += qemu-timer.o qemu-timer-common.o common-obj-y += qtest.o common-obj-y += vl.o @@ -113,7 +112,7 @@ endif user-obj-y = user-obj-y += envlist.o path.o user-obj-y += tcg-runtime.o host-utils.o -user-obj-y += cutils.o iov.o cache-utils.o +user-obj-y += cache-utils.o user-obj-y += module.o user-obj-y += qemu-user.o user-obj-y += $(trace-obj-y) @@ -228,9 +227,8 @@ universal-obj-y += $(qapi-obj-y) ###################################################################### # guest agent -qga-obj-y = qga/ qemu-ga.o module.o -qga-obj-$(CONFIG_WIN32) += oslib-win32.o -qga-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-sockets.o qemu-option.o +qga-obj-y = qga/ qemu-ga.o module.o qemu-tool.o +qga-obj-$(CONFIG_POSIX) += qemu-sockets.o qemu-option.o vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS) diff --git a/aio-posix.c b/aio-posix.c new file mode 100644 index 0000000..05cc84e --- /dev/null +++ b/aio-posix.c @@ -0,0 +1,268 @@ +/* + * QEMU aio implementation + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu-common.h" +#include "block.h" +#include "qemu-queue.h" +#include "qemu_socket.h" + +struct AioHandler +{ + GPollFD pfd; + IOHandler *io_read; + IOHandler *io_write; + AioFlushHandler *io_flush; + int deleted; + void *opaque; + QLIST_ENTRY(AioHandler) node; +}; + +static AioHandler *find_aio_handler(AioContext *ctx, int fd) +{ + AioHandler *node; + + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + if (node->pfd.fd == fd) + if (!node->deleted) + return node; + } + + return NULL; +} + +void aio_set_fd_handler(AioContext *ctx, + int fd, + IOHandler *io_read, + IOHandler *io_write, + AioFlushHandler *io_flush, + void *opaque) +{ + AioHandler *node; + + node = find_aio_handler(ctx, fd); + + /* Are we deleting the fd handler? */ + if (!io_read && !io_write) { + if (node) { + g_source_remove_poll(&ctx->source, &node->pfd); + + /* If the lock is held, just mark the node as deleted */ + if (ctx->walking_handlers) { + node->deleted = 1; + node->pfd.revents = 0; + } else { + /* Otherwise, delete it for real. We can't just mark it as + * deleted because deleted nodes are only cleaned up after + * releasing the walking_handlers lock. + */ + QLIST_REMOVE(node, node); + g_free(node); + } + } + } else { + if (node == NULL) { + /* Alloc and insert if it's not already there */ + node = g_malloc0(sizeof(AioHandler)); + node->pfd.fd = fd; + QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); + + g_source_add_poll(&ctx->source, &node->pfd); + } + /* Update handler with latest information */ + node->io_read = io_read; + node->io_write = io_write; + node->io_flush = io_flush; + node->opaque = opaque; + + node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP : 0); + node->pfd.events |= (io_write ? G_IO_OUT : 0); + } + + aio_notify(ctx); +} + +void aio_set_event_notifier(AioContext *ctx, + EventNotifier *notifier, + EventNotifierHandler *io_read, + AioFlushEventNotifierHandler *io_flush) +{ + aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), + (IOHandler *)io_read, NULL, + (AioFlushHandler *)io_flush, notifier); +} + +bool aio_pending(AioContext *ctx) +{ + AioHandler *node; + + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + int revents; + + /* + * FIXME: right now we cannot get G_IO_HUP and G_IO_ERR because + * main-loop.c is still select based (due to the slirp legacy). + * If main-loop.c ever switches to poll, G_IO_ERR should be + * tested too. Dispatching G_IO_ERR to both handlers should be + * okay, since handlers need to be ready for spurious wakeups. + */ + revents = node->pfd.revents & node->pfd.events; + if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) { + return true; + } + if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) { + return true; + } + } + + return false; +} + +bool aio_poll(AioContext *ctx, bool blocking) +{ + static struct timeval tv0; + AioHandler *node; + fd_set rdfds, wrfds; + int max_fd = -1; + int ret; + bool busy, progress; + + progress = false; + + /* + * If there are callbacks left that have been queued, we need to call then. + * Do not call select in this case, because it is possible that the caller + * does not need a complete flush (as is the case for qemu_aio_wait loops). + */ + if (aio_bh_poll(ctx)) { + blocking = false; + progress = true; + } + + /* + * Then dispatch any pending callbacks from the GSource. + * + * We have to walk very carefully in case qemu_aio_set_fd_handler is + * called while we're walking. + */ + node = QLIST_FIRST(&ctx->aio_handlers); + while (node) { + AioHandler *tmp; + int revents; + + ctx->walking_handlers++; + + revents = node->pfd.revents & node->pfd.events; + node->pfd.revents = 0; + + /* See comment in aio_pending. */ + if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) { + node->io_read(node->opaque); + progress = true; + } + if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) { + node->io_write(node->opaque); + progress = true; + } + + tmp = node; + node = QLIST_NEXT(node, node); + + ctx->walking_handlers--; + + if (!ctx->walking_handlers && tmp->deleted) { + QLIST_REMOVE(tmp, node); + g_free(tmp); + } + } + + if (progress && !blocking) { + return true; + } + + ctx->walking_handlers++; + + FD_ZERO(&rdfds); + FD_ZERO(&wrfds); + + /* fill fd sets */ + busy = false; + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + /* If there aren't pending AIO operations, don't invoke callbacks. + * Otherwise, if there are no AIO requests, qemu_aio_wait() would + * wait indefinitely. + */ + if (!node->deleted && node->io_flush) { + if (node->io_flush(node->opaque) == 0) { + continue; + } + busy = true; + } + if (!node->deleted && node->io_read) { + FD_SET(node->pfd.fd, &rdfds); + max_fd = MAX(max_fd, node->pfd.fd + 1); + } + if (!node->deleted && node->io_write) { + FD_SET(node->pfd.fd, &wrfds); + max_fd = MAX(max_fd, node->pfd.fd + 1); + } + } + + ctx->walking_handlers--; + + /* No AIO operations? Get us out of here */ + if (!busy) { + return progress; + } + + /* wait until next event */ + ret = select(max_fd, &rdfds, &wrfds, NULL, blocking ? NULL : &tv0); + + /* if we have any readable fds, dispatch event */ + if (ret > 0) { + /* we have to walk very carefully in case + * qemu_aio_set_fd_handler is called while we're walking */ + node = QLIST_FIRST(&ctx->aio_handlers); + while (node) { + AioHandler *tmp; + + ctx->walking_handlers++; + + if (!node->deleted && + FD_ISSET(node->pfd.fd, &rdfds) && + node->io_read) { + node->io_read(node->opaque); + progress = true; + } + if (!node->deleted && + FD_ISSET(node->pfd.fd, &wrfds) && + node->io_write) { + node->io_write(node->opaque); + progress = true; + } + + tmp = node; + node = QLIST_NEXT(node, node); + + ctx->walking_handlers--; + + if (!ctx->walking_handlers && tmp->deleted) { + QLIST_REMOVE(tmp, node); + g_free(tmp); + } + } + } + + return progress; +} diff --git a/aio-win32.c b/aio-win32.c new file mode 100644 index 0000000..a84eb71 --- /dev/null +++ b/aio-win32.c @@ -0,0 +1,215 @@ +/* + * QEMU aio implementation + * + * Copyright IBM Corp., 2008 + * Copyright Red Hat Inc., 2012 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu-common.h" +#include "block.h" +#include "qemu-queue.h" +#include "qemu_socket.h" + +struct AioHandler { + EventNotifier *e; + EventNotifierHandler *io_notify; + AioFlushEventNotifierHandler *io_flush; + GPollFD pfd; + int deleted; + QLIST_ENTRY(AioHandler) node; +}; + +void aio_set_event_notifier(AioContext *ctx, + EventNotifier *e, + EventNotifierHandler *io_notify, + AioFlushEventNotifierHandler *io_flush) +{ + AioHandler *node; + + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + if (node->e == e && !node->deleted) { + break; + } + } + + /* Are we deleting the fd handler? */ + if (!io_notify) { + if (node) { + g_source_remove_poll(&ctx->source, &node->pfd); + + /* If the lock is held, just mark the node as deleted */ + if (ctx->walking_handlers) { + node->deleted = 1; + node->pfd.revents = 0; + } else { + /* Otherwise, delete it for real. We can't just mark it as + * deleted because deleted nodes are only cleaned up after + * releasing the walking_handlers lock. + */ + QLIST_REMOVE(node, node); + g_free(node); + } + } + } else { + if (node == NULL) { + /* Alloc and insert if it's not already there */ + node = g_malloc0(sizeof(AioHandler)); + node->e = e; + node->pfd.fd = (uintptr_t)event_notifier_get_handle(e); + node->pfd.events = G_IO_IN; + QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); + + g_source_add_poll(&ctx->source, &node->pfd); + } + /* Update handler with latest information */ + node->io_notify = io_notify; + node->io_flush = io_flush; + } + + aio_notify(ctx); +} + +bool aio_pending(AioContext *ctx) +{ + AioHandler *node; + + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + if (node->pfd.revents && node->io_notify) { + return true; + } + } + + return false; +} + +bool aio_poll(AioContext *ctx, bool blocking) +{ + AioHandler *node; + HANDLE events[MAXIMUM_WAIT_OBJECTS + 1]; + bool busy, progress; + int count; + + progress = false; + + /* + * If there are callbacks left that have been queued, we need to call then. + * Do not call select in this case, because it is possible that the caller + * does not need a complete flush (as is the case for qemu_aio_wait loops). + */ + if (aio_bh_poll(ctx)) { + blocking = false; + progress = true; + } + + /* + * Then dispatch any pending callbacks from the GSource. + * + * We have to walk very carefully in case qemu_aio_set_fd_handler is + * called while we're walking. + */ + node = QLIST_FIRST(&ctx->aio_handlers); + while (node) { + AioHandler *tmp; + + ctx->walking_handlers++; + + if (node->pfd.revents && node->io_notify) { + node->pfd.revents = 0; + node->io_notify(node->e); + progress = true; + } + + tmp = node; + node = QLIST_NEXT(node, node); + + ctx->walking_handlers--; + + if (!ctx->walking_handlers && tmp->deleted) { + QLIST_REMOVE(tmp, node); + g_free(tmp); + } + } + + if (progress && !blocking) { + return true; + } + + ctx->walking_handlers++; + + /* fill fd sets */ + busy = false; + count = 0; + QLIST_FOREACH(node, &ctx->aio_handlers, node) { + /* If there aren't pending AIO operations, don't invoke callbacks. + * Otherwise, if there are no AIO requests, qemu_aio_wait() would + * wait indefinitely. + */ + if (!node->deleted && node->io_flush) { + if (node->io_flush(node->e) == 0) { + continue; + } + busy = true; + } + if (!node->deleted && node->io_notify) { + events[count++] = event_notifier_get_handle(node->e); + } + } + + ctx->walking_handlers--; + + /* No AIO operations? Get us out of here */ + if (!busy) { + return progress; + } + + /* wait until next event */ + for (;;) { + int timeout = blocking ? INFINITE : 0; + int ret = WaitForMultipleObjects(count, events, FALSE, timeout); + + /* if we have any signaled events, dispatch event */ + if ((DWORD) (ret - WAIT_OBJECT_0) >= count) { + break; + } + + blocking = false; + + /* we have to walk very carefully in case + * qemu_aio_set_fd_handler is called while we're walking */ + node = QLIST_FIRST(&ctx->aio_handlers); + while (node) { + AioHandler *tmp; + + ctx->walking_handlers++; + + if (!node->deleted && + event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] && + node->io_notify) { + node->io_notify(node->e); + progress = true; + } + + tmp = node; + node = QLIST_NEXT(node, node); + + ctx->walking_handlers--; + + if (!ctx->walking_handlers && tmp->deleted) { + QLIST_REMOVE(tmp, node); + g_free(tmp); + } + } + } + + return progress; +} @@ -1,194 +0,0 @@ -/* - * QEMU aio implementation - * - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include "qemu-common.h" -#include "block.h" -#include "qemu-queue.h" -#include "qemu_socket.h" - -typedef struct AioHandler AioHandler; - -/* The list of registered AIO handlers */ -static QLIST_HEAD(, AioHandler) aio_handlers; - -/* This is a simple lock used to protect the aio_handlers list. Specifically, - * it's used to ensure that no callbacks are removed while we're walking and - * dispatching callbacks. - */ -static int walking_handlers; - -struct AioHandler -{ - int fd; - IOHandler *io_read; - IOHandler *io_write; - AioFlushHandler *io_flush; - int deleted; - void *opaque; - QLIST_ENTRY(AioHandler) node; -}; - -static AioHandler *find_aio_handler(int fd) -{ - AioHandler *node; - - QLIST_FOREACH(node, &aio_handlers, node) { - if (node->fd == fd) - if (!node->deleted) - return node; - } - - return NULL; -} - -int qemu_aio_set_fd_handler(int fd, - IOHandler *io_read, - IOHandler *io_write, - AioFlushHandler *io_flush, - void *opaque) -{ - AioHandler *node; - - node = find_aio_handler(fd); - - /* Are we deleting the fd handler? */ - if (!io_read && !io_write) { - if (node) { - /* If the lock is held, just mark the node as deleted */ - if (walking_handlers) - node->deleted = 1; - else { - /* Otherwise, delete it for real. We can't just mark it as - * deleted because deleted nodes are only cleaned up after - * releasing the walking_handlers lock. - */ - QLIST_REMOVE(node, node); - g_free(node); - } - } - } else { - if (node == NULL) { - /* Alloc and insert if it's not already there */ - node = g_malloc0(sizeof(AioHandler)); - node->fd = fd; - QLIST_INSERT_HEAD(&aio_handlers, node, node); - } - /* Update handler with latest information */ - node->io_read = io_read; - node->io_write = io_write; - node->io_flush = io_flush; - node->opaque = opaque; - } - - qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque); - - return 0; -} - -void qemu_aio_flush(void) -{ - while (qemu_aio_wait()); -} - -bool qemu_aio_wait(void) -{ - AioHandler *node; - fd_set rdfds, wrfds; - int max_fd = -1; - int ret; - bool busy; - - /* - * If there are callbacks left that have been queued, we need to call then. - * Do not call select in this case, because it is possible that the caller - * does not need a complete flush (as is the case for qemu_aio_wait loops). - */ - if (qemu_bh_poll()) { - return true; - } - - walking_handlers++; - - FD_ZERO(&rdfds); - FD_ZERO(&wrfds); - - /* fill fd sets */ - busy = false; - QLIST_FOREACH(node, &aio_handlers, node) { - /* If there aren't pending AIO operations, don't invoke callbacks. - * Otherwise, if there are no AIO requests, qemu_aio_wait() would - * wait indefinitely. - */ - if (node->io_flush) { - if (node->io_flush(node->opaque) == 0) { - continue; - } - busy = true; - } - if (!node->deleted && node->io_read) { - FD_SET(node->fd, &rdfds); - max_fd = MAX(max_fd, node->fd + 1); - } - if (!node->deleted && node->io_write) { - FD_SET(node->fd, &wrfds); - max_fd = MAX(max_fd, node->fd + 1); - } - } - - walking_handlers--; - - /* No AIO operations? Get us out of here */ - if (!busy) { - return false; - } - - /* wait until next event */ - ret = select(max_fd, &rdfds, &wrfds, NULL, NULL); - - /* if we have any readable fds, dispatch event */ - if (ret > 0) { - /* we have to walk very carefully in case - * qemu_aio_set_fd_handler is called while we're walking */ - node = QLIST_FIRST(&aio_handlers); - while (node) { - AioHandler *tmp; - - walking_handlers++; - - if (!node->deleted && - FD_ISSET(node->fd, &rdfds) && - node->io_read) { - node->io_read(node->opaque); - } - if (!node->deleted && - FD_ISSET(node->fd, &wrfds) && - node->io_write) { - node->io_write(node->opaque); - } - - tmp = node; - node = QLIST_NEXT(node, node); - - walking_handlers--; - - if (!walking_handlers && tmp->deleted) { - QLIST_REMOVE(tmp, node); - g_free(tmp); - } - } - } - - return true; -} diff --git a/arch_init.h b/arch_init.h index d9c572a..5fc780c 100644 --- a/arch_init.h +++ b/arch_init.h @@ -34,6 +34,6 @@ int tcg_available(void); int kvm_available(void); int xen_available(void); -CpuDefinitionInfoList GCC_WEAK_DECL *arch_query_cpu_definitions(Error **errp); +CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp); #endif @@ -26,13 +26,11 @@ #include "qemu-aio.h" #include "main-loop.h" -/* Anchor of the list of Bottom Halves belonging to the context */ -static struct QEMUBH *first_bh; - /***********************************************************/ /* bottom halves (can be seen as timers which expire ASAP) */ struct QEMUBH { + AioContext *ctx; QEMUBHFunc *cb; void *opaque; QEMUBH *next; @@ -41,27 +39,27 @@ struct QEMUBH { bool deleted; }; -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) +QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) { QEMUBH *bh; bh = g_malloc0(sizeof(QEMUBH)); + bh->ctx = ctx; bh->cb = cb; bh->opaque = opaque; - bh->next = first_bh; - first_bh = bh; + bh->next = ctx->first_bh; + ctx->first_bh = bh; return bh; } -int qemu_bh_poll(void) +int aio_bh_poll(AioContext *ctx) { QEMUBH *bh, **bhp, *next; int ret; - static int nesting = 0; - nesting++; + ctx->walking_bh++; ret = 0; - for (bh = first_bh; bh; bh = next) { + for (bh = ctx->first_bh; bh; bh = next) { next = bh->next; if (!bh->deleted && bh->scheduled) { bh->scheduled = 0; @@ -72,11 +70,11 @@ int qemu_bh_poll(void) } } - nesting--; + ctx->walking_bh--; /* remove deleted bhs */ - if (!nesting) { - bhp = &first_bh; + if (!ctx->walking_bh) { + bhp = &ctx->first_bh; while (*bhp) { bh = *bhp; if (bh->deleted) { @@ -105,8 +103,7 @@ void qemu_bh_schedule(QEMUBH *bh) return; bh->scheduled = 1; bh->idle = 0; - /* stop the currently executing CPU to execute the BH ASAP */ - qemu_notify_event(); + aio_notify(bh->ctx); } void qemu_bh_cancel(QEMUBH *bh) @@ -120,16 +117,20 @@ void qemu_bh_delete(QEMUBH *bh) bh->deleted = 1; } -void qemu_bh_update_timeout(uint32_t *timeout) +static gboolean +aio_ctx_prepare(GSource *source, gint *timeout) { + AioContext *ctx = (AioContext *) source; QEMUBH *bh; + bool scheduled = false; - for (bh = first_bh; bh; bh = bh->next) { + for (bh = ctx->first_bh; bh; bh = bh->next) { if (!bh->deleted && bh->scheduled) { + scheduled = true; if (bh->idle) { /* idle bottom halves will be polled at least * every 10ms */ - *timeout = MIN(10, *timeout); + *timeout = 10; } else { /* non-idle bottom halves will be executed * immediately */ @@ -138,5 +139,86 @@ void qemu_bh_update_timeout(uint32_t *timeout) } } } + + return scheduled; +} + +static gboolean +aio_ctx_check(GSource *source) +{ + AioContext *ctx = (AioContext *) source; + QEMUBH *bh; + + for (bh = ctx->first_bh; bh; bh = bh->next) { + if (!bh->deleted && bh->scheduled) { + return true; + } + } + return aio_pending(ctx); +} + +static gboolean +aio_ctx_dispatch(GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + AioContext *ctx = (AioContext *) source; + + assert(callback == NULL); + aio_poll(ctx, false); + return true; +} + +static void +aio_ctx_finalize(GSource *source) +{ + AioContext *ctx = (AioContext *) source; + + aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL); + event_notifier_cleanup(&ctx->notifier); +} + +static GSourceFuncs aio_source_funcs = { + aio_ctx_prepare, + aio_ctx_check, + aio_ctx_dispatch, + aio_ctx_finalize +}; + +GSource *aio_get_g_source(AioContext *ctx) +{ + g_source_ref(&ctx->source); + return &ctx->source; +} + +void aio_notify(AioContext *ctx) +{ + event_notifier_set(&ctx->notifier); +} + +AioContext *aio_context_new(void) +{ + AioContext *ctx; + ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); + event_notifier_init(&ctx->notifier, false); + aio_set_event_notifier(ctx, &ctx->notifier, + (EventNotifierHandler *) + event_notifier_test_and_clear, NULL); + + return ctx; } +void aio_context_ref(AioContext *ctx) +{ + g_source_ref(&ctx->source); +} + +void aio_context_unref(AioContext *ctx) +{ + g_source_unref(&ctx->source); +} + +void aio_flush(AioContext *ctx) +{ + while (aio_poll(ctx, true)); +} diff --git a/block/Makefile.objs b/block/Makefile.objs index 806e526..7f01510 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -2,13 +2,18 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o -block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o -block-obj-$(CONFIG_WIN32) += raw-win32.o +block-obj-y += parallels.o blkdebug.o blkverify.o +block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o +block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o + +ifeq ($(CONFIG_POSIX),y) +block-obj-y += nbd.o sheepdog.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o +endif common-obj-y += stream.o common-obj-y += commit.o diff --git a/linux-aio.c b/block/linux-aio.c index ce9b5d4..6ca984d 100644 --- a/linux-aio.c +++ b/block/linux-aio.c @@ -9,9 +9,10 @@ */ #include "qemu-common.h" #include "qemu-aio.h" -#include "block/raw-posix-aio.h" +#include "qemu-queue.h" +#include "block/raw-aio.h" +#include "event_notifier.h" -#include <sys/eventfd.h> #include <libaio.h> /* @@ -37,7 +38,7 @@ struct qemu_laiocb { struct qemu_laio_state { io_context_t ctx; - int efd; + EventNotifier e; int count; }; @@ -76,29 +77,17 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, qemu_aio_release(laiocb); } -static void qemu_laio_completion_cb(void *opaque) +static void qemu_laio_completion_cb(EventNotifier *e) { - struct qemu_laio_state *s = opaque; + struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e); - while (1) { + while (event_notifier_test_and_clear(&s->e)) { struct io_event events[MAX_EVENTS]; - uint64_t val; - ssize_t ret; struct timespec ts = { 0 }; int nevents, i; do { - ret = read(s->efd, &val, sizeof(val)); - } while (ret == -1 && errno == EINTR); - - if (ret == -1 && errno == EAGAIN) - break; - - if (ret != 8) - break; - - do { - nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts); + nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts); } while (nevents == -EINTR); for (i = 0; i < nevents; i++) { @@ -112,9 +101,9 @@ static void qemu_laio_completion_cb(void *opaque) } } -static int qemu_laio_flush_cb(void *opaque) +static int qemu_laio_flush_cb(EventNotifier *e) { - struct qemu_laio_state *s = opaque; + struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e); return (s->count > 0) ? 1 : 0; } @@ -146,8 +135,9 @@ static void laio_cancel(BlockDriverAIOCB *blockacb) * We might be able to do this slightly more optimal by removing the * O_NONBLOCK flag. */ - while (laiocb->ret == -EINPROGRESS) - qemu_laio_completion_cb(laiocb->ctx); + while (laiocb->ret == -EINPROGRESS) { + qemu_laio_completion_cb(&laiocb->ctx->e); + } } static AIOPool laio_pool = { @@ -186,7 +176,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, __func__, type); goto out_free_aiocb; } - io_set_eventfd(&laiocb->iocb, s->efd); + io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); s->count++; if (io_submit(s->ctx, 1, &iocbs) < 0) @@ -205,21 +195,21 @@ void *laio_init(void) struct qemu_laio_state *s; s = g_malloc0(sizeof(*s)); - s->efd = eventfd(0, 0); - if (s->efd == -1) + if (event_notifier_init(&s->e, false) < 0) { goto out_free_state; - fcntl(s->efd, F_SETFL, O_NONBLOCK); + } - if (io_setup(MAX_EVENTS, &s->ctx) != 0) + if (io_setup(MAX_EVENTS, &s->ctx) != 0) { goto out_close_efd; + } - qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL, - qemu_laio_flush_cb, s); + qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb, + qemu_laio_flush_cb); return s; out_close_efd: - close(s->efd); + event_notifier_cleanup(&s->e); out_free_state: g_free(s); return NULL; diff --git a/block/raw-posix-aio.h b/block/raw-aio.h index ba118f6..e77f361 100644 --- a/block/raw-posix-aio.h +++ b/block/raw-aio.h @@ -1,5 +1,5 @@ /* - * QEMU Posix block I/O backend AIO support + * Declarations for AIO in the raw protocol * * Copyright IBM, Corp. 2008 * @@ -12,8 +12,8 @@ * Contributions after 2012-01-13 are licensed under the terms of the * GNU GPL, version 2 or (at your option) any later version. */ -#ifndef QEMU_RAW_POSIX_AIO_H -#define QEMU_RAW_POSIX_AIO_H +#ifndef QEMU_RAW_AIO_H +#define QEMU_RAW_AIO_H /* AIO request types */ #define QEMU_AIO_READ 0x0001 @@ -27,19 +27,22 @@ #define QEMU_AIO_MISALIGNED 0x1000 -/* posix-aio-compat.c - thread pool based implementation */ -int paio_init(void); -BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque, int type); -BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, - unsigned long int req, void *buf, - BlockDriverCompletionFunc *cb, void *opaque); - /* linux-aio.c - Linux native implementation */ +#ifdef CONFIG_LINUX_AIO void *laio_init(void); BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type); +#endif + +#ifdef _WIN32 +typedef struct QEMUWin32AIOState QEMUWin32AIOState; +QEMUWin32AIOState *win32_aio_init(void); +int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile); +BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, + QEMUWin32AIOState *aio, HANDLE hfile, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type); +#endif -#endif /* QEMU_RAW_POSIX_AIO_H */ +#endif /* QEMU_RAW_AIO_H */ diff --git a/block/raw-posix.c b/block/raw-posix.c index 28d439f..f2f0404 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -27,7 +27,10 @@ #include "qemu-log.h" #include "block_int.h" #include "module.h" -#include "block/raw-posix-aio.h" +#include "trace.h" +#include "thread-pool.h" +#include "iov.h" +#include "raw-aio.h" #if defined(__APPLE__) && (__MACH__) #include <paths.h> @@ -149,6 +152,20 @@ typedef struct BDRVRawReopenState { static int fd_open(BlockDriverState *bs); static int64_t raw_getlength(BlockDriverState *bs); +typedef struct RawPosixAIOData { + BlockDriverState *bs; + int aio_fildes; + union { + struct iovec *aio_iov; + void *aio_ioctl_buf; + }; + int aio_niov; + size_t aio_nbytes; +#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ + off_t aio_offset; + int aio_type; +} RawPosixAIOData; + #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) static int cdrom_reopen(BlockDriverState *bs); #endif @@ -266,14 +283,10 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, } s->fd = fd; - /* We're falling back to POSIX AIO in some cases so init always */ - if (paio_init() < 0) { - goto out_close; - } - #ifdef CONFIG_LINUX_AIO if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) { - goto out_close; + qemu_close(fd); + return -errno; } #endif @@ -284,10 +297,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, #endif return 0; - -out_close: - qemu_close(fd); - return -errno; } static int raw_open(BlockDriverState *bs, const char *filename, int flags) @@ -434,6 +443,283 @@ static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) return 1; } +static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) +{ + int ret; + + ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf); + if (ret == -1) { + return -errno; + } + + /* + * This looks weird, but the aio code only considers a request + * successful if it has written the full number of bytes. + * + * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command, + * so in fact we return the ioctl command here to make posix_aio_read() + * happy.. + */ + return aiocb->aio_nbytes; +} + +static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb) +{ + int ret; + + ret = qemu_fdatasync(aiocb->aio_fildes); + if (ret == -1) { + return -errno; + } + return 0; +} + +#ifdef CONFIG_PREADV + +static bool preadv_present = true; + +static ssize_t +qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return preadv(fd, iov, nr_iov, offset); +} + +static ssize_t +qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return pwritev(fd, iov, nr_iov, offset); +} + +#else + +static bool preadv_present = false; + +static ssize_t +qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return -ENOSYS; +} + +static ssize_t +qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) +{ + return -ENOSYS; +} + +#endif + +static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb) +{ + ssize_t len; + + do { + if (aiocb->aio_type & QEMU_AIO_WRITE) + len = qemu_pwritev(aiocb->aio_fildes, + aiocb->aio_iov, + aiocb->aio_niov, + aiocb->aio_offset); + else + len = qemu_preadv(aiocb->aio_fildes, + aiocb->aio_iov, + aiocb->aio_niov, + aiocb->aio_offset); + } while (len == -1 && errno == EINTR); + + if (len == -1) { + return -errno; + } + return len; +} + +/* + * Read/writes the data to/from a given linear buffer. + * + * Returns the number of bytes handles or -errno in case of an error. Short + * reads are only returned if the end of the file is reached. + */ +static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf) +{ + ssize_t offset = 0; + ssize_t len; + + while (offset < aiocb->aio_nbytes) { + if (aiocb->aio_type & QEMU_AIO_WRITE) { + len = pwrite(aiocb->aio_fildes, + (const char *)buf + offset, + aiocb->aio_nbytes - offset, + aiocb->aio_offset + offset); + } else { + len = pread(aiocb->aio_fildes, + buf + offset, + aiocb->aio_nbytes - offset, + aiocb->aio_offset + offset); + } + if (len == -1 && errno == EINTR) { + continue; + } else if (len == -1) { + offset = -errno; + break; + } else if (len == 0) { + break; + } + offset += len; + } + + return offset; +} + +static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) +{ + ssize_t nbytes; + char *buf; + + if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) { + /* + * If there is just a single buffer, and it is properly aligned + * we can just use plain pread/pwrite without any problems. + */ + if (aiocb->aio_niov == 1) { + return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base); + } + /* + * We have more than one iovec, and all are properly aligned. + * + * Try preadv/pwritev first and fall back to linearizing the + * buffer if it's not supported. + */ + if (preadv_present) { + nbytes = handle_aiocb_rw_vector(aiocb); + if (nbytes == aiocb->aio_nbytes || + (nbytes < 0 && nbytes != -ENOSYS)) { + return nbytes; + } + preadv_present = false; + } + + /* + * XXX(hch): short read/write. no easy way to handle the reminder + * using these interfaces. For now retry using plain + * pread/pwrite? + */ + } + + /* + * Ok, we have to do it the hard way, copy all segments into + * a single aligned buffer. + */ + buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes); + if (aiocb->aio_type & QEMU_AIO_WRITE) { + char *p = buf; + int i; + + for (i = 0; i < aiocb->aio_niov; ++i) { + memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); + p += aiocb->aio_iov[i].iov_len; + } + } + + nbytes = handle_aiocb_rw_linear(aiocb, buf); + if (!(aiocb->aio_type & QEMU_AIO_WRITE)) { + char *p = buf; + size_t count = aiocb->aio_nbytes, copy; + int i; + + for (i = 0; i < aiocb->aio_niov && count; ++i) { + copy = count; + if (copy > aiocb->aio_iov[i].iov_len) { + copy = aiocb->aio_iov[i].iov_len; + } + memcpy(aiocb->aio_iov[i].iov_base, p, copy); + p += copy; + count -= copy; + } + } + qemu_vfree(buf); + + return nbytes; +} + +static int aio_worker(void *arg) +{ + RawPosixAIOData *aiocb = arg; + ssize_t ret = 0; + + switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { + case QEMU_AIO_READ: + ret = handle_aiocb_rw(aiocb); + if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) { + iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret, + 0, aiocb->aio_nbytes - ret); + + ret = aiocb->aio_nbytes; + } + if (ret == aiocb->aio_nbytes) { + ret = 0; + } else if (ret >= 0 && ret < aiocb->aio_nbytes) { + ret = -EINVAL; + } + break; + case QEMU_AIO_WRITE: + ret = handle_aiocb_rw(aiocb); + if (ret == aiocb->aio_nbytes) { + ret = 0; + } else if (ret >= 0 && ret < aiocb->aio_nbytes) { + ret = -EINVAL; + } + break; + case QEMU_AIO_FLUSH: + ret = handle_aiocb_flush(aiocb); + break; + case QEMU_AIO_IOCTL: + ret = handle_aiocb_ioctl(aiocb); + break; + default: + fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); + ret = -EINVAL; + break; + } + + g_slice_free(RawPosixAIOData, aiocb); + return ret; +} + +static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type) +{ + RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); + + acb->bs = bs; + acb->aio_type = type; + acb->aio_fildes = fd; + + if (qiov) { + acb->aio_iov = qiov->iov; + acb->aio_niov = qiov->niov; + } + acb->aio_nbytes = nb_sectors * 512; + acb->aio_offset = sector_num * 512; + + trace_paio_submit(acb, opaque, sector_num, nb_sectors, type); + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); +} + +static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, + unsigned long int req, void *buf, + BlockDriverCompletionFunc *cb, void *opaque) +{ + RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); + + acb->bs = bs; + acb->aio_type = QEMU_AIO_IOCTL; + acb->aio_fildes = fd; + acb->aio_offset = 0; + acb->aio_ioctl_buf = buf; + acb->aio_ioctl_cmd = req; + + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); +} + static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type) diff --git a/block/raw-win32.c b/block/raw-win32.c index 78c8306..0c05c58 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -25,6 +25,10 @@ #include "qemu-timer.h" #include "block_int.h" #include "module.h" +#include "raw-aio.h" +#include "trace.h" +#include "thread-pool.h" +#include "iov.h" #include <windows.h> #include <winioctl.h> @@ -32,12 +36,130 @@ #define FTYPE_CD 1 #define FTYPE_HARDDISK 2 +static QEMUWin32AIOState *aio; + +typedef struct RawWin32AIOData { + BlockDriverState *bs; + HANDLE hfile; + struct iovec *aio_iov; + int aio_niov; + size_t aio_nbytes; + off64_t aio_offset; + int aio_type; +} RawWin32AIOData; + typedef struct BDRVRawState { HANDLE hfile; int type; char drive_path[16]; /* format: "d:\" */ + QEMUWin32AIOState *aio; } BDRVRawState; +/* + * Read/writes the data to/from a given linear buffer. + * + * Returns the number of bytes handles or -errno in case of an error. Short + * reads are only returned if the end of the file is reached. + */ +static size_t handle_aiocb_rw(RawWin32AIOData *aiocb) +{ + size_t offset = 0; + int i; + + for (i = 0; i < aiocb->aio_niov; i++) { + OVERLAPPED ov; + DWORD ret, ret_count, len; + + memset(&ov, 0, sizeof(ov)); + ov.Offset = (aiocb->aio_offset + offset); + ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32; + len = aiocb->aio_iov[i].iov_len; + if (aiocb->aio_type & QEMU_AIO_WRITE) { + ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base, + len, &ret_count, &ov); + } else { + ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base, + len, &ret_count, &ov); + } + if (!ret) { + ret_count = 0; + } + if (ret_count != len) { + break; + } + offset += len; + } + + return offset; +} + +static int aio_worker(void *arg) +{ + RawWin32AIOData *aiocb = arg; + ssize_t ret = 0; + size_t count; + + switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { + case QEMU_AIO_READ: + count = handle_aiocb_rw(aiocb); + if (count < aiocb->aio_nbytes && aiocb->bs->growable) { + /* A short read means that we have reached EOF. Pad the buffer + * with zeros for bytes after EOF. */ + iov_memset(aiocb->aio_iov, aiocb->aio_niov, count, + 0, aiocb->aio_nbytes - count); + + count = aiocb->aio_nbytes; + } + if (count == aiocb->aio_nbytes) { + ret = 0; + } else { + ret = -EINVAL; + } + break; + case QEMU_AIO_WRITE: + count = handle_aiocb_rw(aiocb); + if (count == aiocb->aio_nbytes) { + count = 0; + } else { + count = -EINVAL; + } + break; + case QEMU_AIO_FLUSH: + if (!FlushFileBuffers(aiocb->hfile)) { + return -EIO; + } + break; + default: + fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); + ret = -EINVAL; + break; + } + + g_slice_free(RawWin32AIOData, aiocb); + return ret; +} + +static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type) +{ + RawWin32AIOData *acb = g_slice_new(RawWin32AIOData); + + acb->bs = bs; + acb->hfile = hfile; + acb->aio_type = type; + + if (qiov) { + acb->aio_iov = qiov->iov; + acb->aio_niov = qiov->niov; + } + acb->aio_nbytes = nb_sectors * 512; + acb->aio_offset = sector_num * 512; + + trace_paio_submit(acb, opaque, sector_num, nb_sectors, type); + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); +} + int qemu_ftruncate64(int fd, int64_t length) { LARGE_INTEGER li; @@ -89,6 +211,9 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) } *overlapped = FILE_ATTRIBUTE_NORMAL; + if (flags & BDRV_O_NATIVE_AIO) { + *overlapped |= FILE_FLAG_OVERLAPPED; + } if (flags & BDRV_O_NOCACHE) { *overlapped |= FILE_FLAG_NO_BUFFERING; } @@ -103,6 +228,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) s->type = FTYPE_FILE; raw_parse_flags(flags, &access_flags, &overlapped); + + if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) { + aio = win32_aio_init(); + if (aio == NULL) { + return -EINVAL; + } + } s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, @@ -112,64 +244,53 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) if (err == ERROR_ACCESS_DENIED) return -EACCES; - return -1; + return -EINVAL; + } + + if (flags & BDRV_O_NATIVE_AIO) { + int ret = win32_aio_attach(aio, s->hfile); + if (ret < 0) { + CloseHandle(s->hfile); + return ret; + } + s->aio = aio; } return 0; } -static int raw_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) +static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - OVERLAPPED ov; - DWORD ret_count; - int ret; - int64_t offset = sector_num * 512; - int count = nb_sectors * 512; - - memset(&ov, 0, sizeof(ov)); - ov.Offset = offset; - ov.OffsetHigh = offset >> 32; - ret = ReadFile(s->hfile, buf, count, &ret_count, &ov); - if (!ret) - return ret_count; - if (ret_count == count) - ret_count = 0; - return ret_count; + if (s->aio) { + return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov, + nb_sectors, cb, opaque, QEMU_AIO_READ); + } else { + return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, + cb, opaque, QEMU_AIO_READ); + } } -static int raw_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) +static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - OVERLAPPED ov; - DWORD ret_count; - int ret; - int64_t offset = sector_num * 512; - int count = nb_sectors * 512; - - memset(&ov, 0, sizeof(ov)); - ov.Offset = offset; - ov.OffsetHigh = offset >> 32; - ret = WriteFile(s->hfile, buf, count, &ret_count, &ov); - if (!ret) - return ret_count; - if (ret_count == count) - ret_count = 0; - return ret_count; + if (s->aio) { + return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov, + nb_sectors, cb, opaque, QEMU_AIO_WRITE); + } else { + return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors, + cb, opaque, QEMU_AIO_WRITE); + } } -static int raw_flush(BlockDriverState *bs) +static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; - int ret; - - ret = FlushFileBuffers(s->hfile); - if (ret == 0) { - return -EIO; - } - - return 0; + return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH); } static void raw_close(BlockDriverState *bs) @@ -290,9 +411,9 @@ static BlockDriver bdrv_file = { .bdrv_close = raw_close, .bdrv_create = raw_create, - .bdrv_read = raw_read, - .bdrv_write = raw_write, - .bdrv_co_flush_to_disk = raw_flush, + .bdrv_aio_readv = raw_aio_readv, + .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -413,9 +534,9 @@ static BlockDriver bdrv_host_device = { .bdrv_close = raw_close, .bdrv_has_zero_init = hdev_has_zero_init, - .bdrv_read = raw_read, - .bdrv_write = raw_write, - .bdrv_co_flush_to_disk = raw_flush, + .bdrv_aio_readv = raw_aio_readv, + .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_flush = raw_aio_flush, .bdrv_getlength = raw_getlength, .bdrv_get_allocated_file_size diff --git a/block/win32-aio.c b/block/win32-aio.c new file mode 100644 index 0000000..c34dc73 --- /dev/null +++ b/block/win32-aio.c @@ -0,0 +1,226 @@ +/* + * Block driver for RAW files (win32) + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "qemu-timer.h" +#include "block_int.h" +#include "module.h" +#include "qemu-common.h" +#include "qemu-aio.h" +#include "raw-aio.h" +#include "event_notifier.h" +#include <windows.h> +#include <winioctl.h> + +#define FTYPE_FILE 0 +#define FTYPE_CD 1 +#define FTYPE_HARDDISK 2 + +struct QEMUWin32AIOState { + HANDLE hIOCP; + EventNotifier e; + int count; +}; + +typedef struct QEMUWin32AIOCB { + BlockDriverAIOCB common; + struct QEMUWin32AIOState *ctx; + int nbytes; + OVERLAPPED ov; + QEMUIOVector *qiov; + void *buf; + bool is_read; + bool is_linear; +} QEMUWin32AIOCB; + +/* + * Completes an AIO request (calls the callback and frees the ACB). + */ +static void win32_aio_process_completion(QEMUWin32AIOState *s, + QEMUWin32AIOCB *waiocb, DWORD count) +{ + int ret; + s->count--; + + if (waiocb->ov.Internal != 0) { + ret = -EIO; + } else { + ret = 0; + if (count < waiocb->nbytes) { + /* Short reads mean EOF, pad with zeros. */ + if (waiocb->is_read) { + qemu_iovec_memset(waiocb->qiov, count, 0, + waiocb->qiov->size - count); + } else { + ret = -EINVAL; + } + } + } + + if (!waiocb->is_linear) { + if (ret == 0 && waiocb->is_read) { + QEMUIOVector *qiov = waiocb->qiov; + char *p = waiocb->buf; + int i; + + for (i = 0; i < qiov->niov; ++i) { + memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); + p += qiov->iov[i].iov_len; + } + g_free(waiocb->buf); + } + } + + + waiocb->common.cb(waiocb->common.opaque, ret); + qemu_aio_release(waiocb); +} + +static void win32_aio_completion_cb(EventNotifier *e) +{ + QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e); + DWORD count; + ULONG_PTR key; + OVERLAPPED *ov; + + event_notifier_test_and_clear(&s->e); + while (GetQueuedCompletionStatus(s->hIOCP, &count, &key, &ov, 0)) { + QEMUWin32AIOCB *waiocb = container_of(ov, QEMUWin32AIOCB, ov); + + win32_aio_process_completion(s, waiocb, count); + } +} + +static int win32_aio_flush_cb(EventNotifier *e) +{ + QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e); + + return (s->count > 0) ? 1 : 0; +} + +static void win32_aio_cancel(BlockDriverAIOCB *blockacb) +{ + QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb; + + /* + * CancelIoEx is only supported in Vista and newer. For now, just + * wait for completion. + */ + while (!HasOverlappedIoCompleted(&waiocb->ov)) { + qemu_aio_wait(); + } +} + +static AIOPool win32_aio_pool = { + .aiocb_size = sizeof(QEMUWin32AIOCB), + .cancel = win32_aio_cancel, +}; + +BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, + QEMUWin32AIOState *aio, HANDLE hfile, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int type) +{ + struct QEMUWin32AIOCB *waiocb; + uint64_t offset = sector_num * 512; + DWORD rc; + + waiocb = qemu_aio_get(&win32_aio_pool, bs, cb, opaque); + waiocb->nbytes = nb_sectors * 512; + waiocb->qiov = qiov; + waiocb->is_read = (type == QEMU_AIO_READ); + + if (qiov->niov > 1) { + waiocb->buf = qemu_blockalign(bs, qiov->size); + if (type & QEMU_AIO_WRITE) { + char *p = waiocb->buf; + int i; + + for (i = 0; i < qiov->niov; ++i) { + memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); + p += qiov->iov[i].iov_len; + } + } + waiocb->is_linear = false; + } else { + waiocb->buf = qiov->iov[0].iov_base; + waiocb->is_linear = true; + } + + waiocb->ov = (OVERLAPPED) { + .Offset = (DWORD) offset, + .OffsetHigh = (DWORD) (offset >> 32), + .hEvent = event_notifier_get_handle(&aio->e) + }; + aio->count++; + + if (type & QEMU_AIO_READ) { + rc = ReadFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov); + } else { + rc = WriteFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov); + } + if(rc == 0 && GetLastError() != ERROR_IO_PENDING) { + goto out_dec_count; + } + return &waiocb->common; + +out_dec_count: + aio->count--; + qemu_aio_release(waiocb); + return NULL; +} + +int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile) +{ + if (CreateIoCompletionPort(hfile, aio->hIOCP, (ULONG_PTR) 0, 0) == NULL) { + return -EINVAL; + } else { + return 0; + } +} + +QEMUWin32AIOState *win32_aio_init(void) +{ + QEMUWin32AIOState *s; + + s = g_malloc0(sizeof(*s)); + if (event_notifier_init(&s->e, false) < 0) { + goto out_free_state; + } + + s->hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); + if (s->hIOCP == NULL) { + goto out_close_efd; + } + + qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb, + win32_aio_flush_cb); + + return s; + +out_close_efd: + event_notifier_cleanup(&s->e); +out_free_state: + g_free(s); + return NULL; +} @@ -433,6 +433,12 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) return NULL; } + if (qemu_opt_get(opts, "boot") != NULL) { + fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be " + "ignored. Future versions will reject this parameter. Please " + "update your scripts.\n"); + } + on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; if ((buf = qemu_opt_get(opts, "werror")) != NULL) { if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) { @@ -50,16 +50,13 @@ # define __printf__ __gnu_printf__ # endif # endif -#if defined(_WIN32) -#define GCC_WEAK __attribute__((weak)) -#define GCC_WEAK_DECL GCC_WEAK -#else -#define GCC_WEAK __attribute__((weak)) -#define GCC_WEAK_DECL -#endif +# define QEMU_WEAK_ALIAS(newname, oldname) \ + typeof(oldname) newname __attribute__((weak, alias (#oldname))) #else #define GCC_ATTR /**/ #define GCC_FMT_ATTR(n, m) +#define QEMU_WEAK_ALIAS(newname, oldname) \ + _Pragma("weak " #newname "=" #oldname) #endif #endif /* COMPILER_H */ @@ -438,8 +438,6 @@ void cpu_reset_interrupt(CPUArchState *env, int mask); void cpu_exit(CPUArchState *s); -bool qemu_cpu_has_work(CPUArchState *env); - /* Breakpoint/watchpoint flags */ #define BP_MEM_READ 0x01 #define BP_MEM_WRITE 0x02 @@ -466,8 +464,6 @@ void cpu_watchpoint_remove_all(CPUArchState *env, int mask); #define SSTEP_NOTIMER 0x4 /* Do not Timers while single stepping */ void cpu_single_step(CPUArchState *env, int enabled); -int cpu_is_stopped(CPUArchState *env); -void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data); #if !defined(CONFIG_USER_ONLY) @@ -201,15 +201,9 @@ typedef struct CPUWatchpoint { int nr_cores; /* number of cores within this CPU package */ \ int nr_threads;/* number of threads within this CPU */ \ int running; /* Nonzero if cpu is currently running(usermode). */ \ - int thread_id; \ /* user data */ \ void *opaque; \ \ - uint32_t created; \ - uint32_t stop; /* Stop request */ \ - uint32_t stopped; /* Artificially stopped */ \ - struct QemuCond *halt_cond; \ - struct qemu_work_item *queued_work_first, *queued_work_last; \ const char *cpu_model_str; \ struct KVMState *kvm_state; \ struct kvm_run *kvm_run; \ @@ -27,9 +27,9 @@ int tb_invalidated_flag; //#define CONFIG_DEBUG_EXEC -bool qemu_cpu_has_work(CPUArchState *env) +bool qemu_cpu_has_work(CPUState *cpu) { - return cpu_has_work(env); + return cpu_has_work(cpu); } void cpu_loop_exit(CPUArchState *env) @@ -181,16 +181,14 @@ volatile sig_atomic_t exit_request; int cpu_exec(CPUArchState *env) { -#ifdef TARGET_PPC CPUState *cpu = ENV_GET_CPU(env); -#endif int ret, interrupt_request; TranslationBlock *tb; uint8_t *tc_ptr; tcg_target_ulong next_tb; if (env->halted) { - if (!cpu_has_work(env)) { + if (!cpu_has_work(cpu)) { return EXCP_HALTED; } @@ -64,13 +64,15 @@ static CPUArchState *next_cpu; static bool cpu_thread_is_idle(CPUArchState *env) { - if (env->stop || env->queued_work_first) { + CPUState *cpu = ENV_GET_CPU(env); + + if (cpu->stop || cpu->queued_work_first) { return false; } - if (env->stopped || !runstate_is_running()) { + if (cpu->stopped || !runstate_is_running()) { return true; } - if (!env->halted || qemu_cpu_has_work(env) || + if (!env->halted || qemu_cpu_has_work(cpu) || kvm_async_interrupts_enabled()) { return false; } @@ -428,9 +430,9 @@ void cpu_synchronize_all_post_init(void) } } -int cpu_is_stopped(CPUArchState *env) +bool cpu_is_stopped(CPUState *cpu) { - return !runstate_is_running() || env->stopped; + return !runstate_is_running() || cpu->stopped; } static void do_vm_stop(RunState state) @@ -446,22 +448,24 @@ static void do_vm_stop(RunState state) } } -static int cpu_can_run(CPUArchState *env) +static bool cpu_can_run(CPUState *cpu) { - if (env->stop) { - return 0; + if (cpu->stop) { + return false; } - if (env->stopped || !runstate_is_running()) { - return 0; + if (cpu->stopped || !runstate_is_running()) { + return false; } - return 1; + return true; } static void cpu_handle_guest_debug(CPUArchState *env) { + CPUState *cpu = ENV_GET_CPU(env); + gdb_set_stop_cpu(env); qemu_system_debug_request(); - env->stopped = 1; + cpu->stopped = true; } static void cpu_signal(int sig) @@ -636,27 +640,27 @@ void qemu_init_cpu_loop(void) qemu_thread_get_self(&io_thread); } -void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data) +void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data) { struct qemu_work_item wi; - if (qemu_cpu_is_self(env)) { + if (qemu_cpu_is_self(cpu)) { func(data); return; } wi.func = func; wi.data = data; - if (!env->queued_work_first) { - env->queued_work_first = &wi; + if (cpu->queued_work_first == NULL) { + cpu->queued_work_first = &wi; } else { - env->queued_work_last->next = &wi; + cpu->queued_work_last->next = &wi; } - env->queued_work_last = &wi; + cpu->queued_work_last = &wi; wi.next = NULL; wi.done = false; - qemu_cpu_kick(env); + qemu_cpu_kick(cpu); while (!wi.done) { CPUArchState *self_env = cpu_single_env; @@ -665,33 +669,31 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data) } } -static void flush_queued_work(CPUArchState *env) +static void flush_queued_work(CPUState *cpu) { struct qemu_work_item *wi; - if (!env->queued_work_first) { + if (cpu->queued_work_first == NULL) { return; } - while ((wi = env->queued_work_first)) { - env->queued_work_first = wi->next; + while ((wi = cpu->queued_work_first)) { + cpu->queued_work_first = wi->next; wi->func(wi->data); wi->done = true; } - env->queued_work_last = NULL; + cpu->queued_work_last = NULL; qemu_cond_broadcast(&qemu_work_cond); } -static void qemu_wait_io_event_common(CPUArchState *env) +static void qemu_wait_io_event_common(CPUState *cpu) { - CPUState *cpu = ENV_GET_CPU(env); - - if (env->stop) { - env->stop = 0; - env->stopped = 1; + if (cpu->stop) { + cpu->stop = false; + cpu->stopped = true; qemu_cond_signal(&qemu_pause_cond); } - flush_queued_work(env); + flush_queued_work(cpu); cpu->thread_kicked = false; } @@ -711,18 +713,20 @@ static void qemu_tcg_wait_io_event(void) } for (env = first_cpu; env != NULL; env = env->next_cpu) { - qemu_wait_io_event_common(env); + qemu_wait_io_event_common(ENV_GET_CPU(env)); } } static void qemu_kvm_wait_io_event(CPUArchState *env) { + CPUState *cpu = ENV_GET_CPU(env); + while (cpu_thread_is_idle(env)) { - qemu_cond_wait(env->halt_cond, &qemu_global_mutex); + qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); } qemu_kvm_eat_signals(env); - qemu_wait_io_event_common(env); + qemu_wait_io_event_common(cpu); } static void *qemu_kvm_cpu_thread_fn(void *arg) @@ -733,7 +737,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) qemu_mutex_lock(&qemu_global_mutex); qemu_thread_get_self(cpu->thread); - env->thread_id = qemu_get_thread_id(); + cpu->thread_id = qemu_get_thread_id(); cpu_single_env = env; r = kvm_init_vcpu(env); @@ -745,11 +749,11 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) qemu_kvm_init_cpu_signals(env); /* signal CPU creation */ - env->created = 1; + cpu->created = true; qemu_cond_signal(&qemu_cpu_cond); while (1) { - if (cpu_can_run(env)) { + if (cpu_can_run(cpu)) { r = kvm_cpu_exec(env); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(env); @@ -774,13 +778,13 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) qemu_mutex_lock_iothread(); qemu_thread_get_self(cpu->thread); - env->thread_id = qemu_get_thread_id(); + cpu->thread_id = qemu_get_thread_id(); sigemptyset(&waitset); sigaddset(&waitset, SIG_IPI); /* signal CPU creation */ - env->created = 1; + cpu->created = true; qemu_cond_signal(&qemu_cpu_cond); cpu_single_env = env; @@ -797,7 +801,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) } qemu_mutex_lock_iothread(); cpu_single_env = env; - qemu_wait_io_event_common(env); + qemu_wait_io_event_common(cpu); } return NULL; @@ -808,8 +812,8 @@ static void tcg_exec_all(void); static void *qemu_tcg_cpu_thread_fn(void *arg) { - CPUArchState *env = arg; - CPUState *cpu = ENV_GET_CPU(env); + CPUState *cpu = arg; + CPUArchState *env; qemu_tcg_init_cpu_signals(); qemu_thread_get_self(cpu->thread); @@ -817,18 +821,19 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) /* signal CPU creation */ qemu_mutex_lock(&qemu_global_mutex); for (env = first_cpu; env != NULL; env = env->next_cpu) { - env->thread_id = qemu_get_thread_id(); - env->created = 1; + cpu = ENV_GET_CPU(env); + cpu->thread_id = qemu_get_thread_id(); + cpu->created = true; } qemu_cond_signal(&qemu_cpu_cond); /* wait for initial kick-off after machine start */ - while (first_cpu->stopped) { + while (ENV_GET_CPU(first_cpu)->stopped) { qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex); /* process any pending work */ for (env = first_cpu; env != NULL; env = env->next_cpu) { - qemu_wait_io_event_common(env); + qemu_wait_io_event_common(ENV_GET_CPU(env)); } } @@ -843,9 +848,8 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) return NULL; } -static void qemu_cpu_kick_thread(CPUArchState *env) +static void qemu_cpu_kick_thread(CPUState *cpu) { - CPUState *cpu = ENV_GET_CPU(env); #ifndef _WIN32 int err; @@ -855,7 +859,7 @@ static void qemu_cpu_kick_thread(CPUArchState *env) exit(1); } #else /* _WIN32 */ - if (!qemu_cpu_is_self(env)) { + if (!qemu_cpu_is_self(cpu)) { SuspendThread(cpu->hThread); cpu_signal(0); ResumeThread(cpu->hThread); @@ -863,14 +867,11 @@ static void qemu_cpu_kick_thread(CPUArchState *env) #endif } -void qemu_cpu_kick(void *_env) +void qemu_cpu_kick(CPUState *cpu) { - CPUArchState *env = _env; - CPUState *cpu = ENV_GET_CPU(env); - - qemu_cond_broadcast(env->halt_cond); + qemu_cond_broadcast(cpu->halt_cond); if (!tcg_enabled() && !cpu->thread_kicked) { - qemu_cpu_kick_thread(env); + qemu_cpu_kick_thread(cpu); cpu->thread_kicked = true; } } @@ -882,7 +883,7 @@ void qemu_cpu_kick_self(void) CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env); if (!cpu_single_cpu->thread_kicked) { - qemu_cpu_kick_thread(cpu_single_env); + qemu_cpu_kick_thread(cpu_single_cpu); cpu_single_cpu->thread_kicked = true; } #else @@ -890,17 +891,14 @@ void qemu_cpu_kick_self(void) #endif } -int qemu_cpu_is_self(void *_env) +bool qemu_cpu_is_self(CPUState *cpu) { - CPUArchState *env = _env; - CPUState *cpu = ENV_GET_CPU(env); - return qemu_thread_is_self(cpu->thread); } static bool qemu_in_vcpu_thread(void) { - return cpu_single_env && qemu_cpu_is_self(cpu_single_env); + return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env)); } void qemu_mutex_lock_iothread(void) @@ -910,7 +908,7 @@ void qemu_mutex_lock_iothread(void) } else { iothread_requesting_mutex = true; if (qemu_mutex_trylock(&qemu_global_mutex)) { - qemu_cpu_kick_thread(first_cpu); + qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu)); qemu_mutex_lock(&qemu_global_mutex); } iothread_requesting_mutex = false; @@ -928,7 +926,8 @@ static int all_vcpus_paused(void) CPUArchState *penv = first_cpu; while (penv) { - if (!penv->stopped) { + CPUState *pcpu = ENV_GET_CPU(penv); + if (!pcpu->stopped) { return 0; } penv = penv->next_cpu; @@ -943,8 +942,9 @@ void pause_all_vcpus(void) qemu_clock_enable(vm_clock, false); while (penv) { - penv->stop = 1; - qemu_cpu_kick(penv); + CPUState *pcpu = ENV_GET_CPU(penv); + pcpu->stop = true; + qemu_cpu_kick(pcpu); penv = penv->next_cpu; } @@ -952,8 +952,9 @@ void pause_all_vcpus(void) cpu_stop_current(); if (!kvm_enabled()) { while (penv) { - penv->stop = 0; - penv->stopped = 1; + CPUState *pcpu = ENV_GET_CPU(penv); + pcpu->stop = 0; + pcpu->stopped = true; penv = penv->next_cpu; } return; @@ -964,7 +965,7 @@ void pause_all_vcpus(void) qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); penv = first_cpu; while (penv) { - qemu_cpu_kick(penv); + qemu_cpu_kick(ENV_GET_CPU(penv)); penv = penv->next_cpu; } } @@ -976,36 +977,34 @@ void resume_all_vcpus(void) qemu_clock_enable(vm_clock, true); while (penv) { - penv->stop = 0; - penv->stopped = 0; - qemu_cpu_kick(penv); + CPUState *pcpu = ENV_GET_CPU(penv); + pcpu->stop = false; + pcpu->stopped = false; + qemu_cpu_kick(pcpu); penv = penv->next_cpu; } } -static void qemu_tcg_init_vcpu(void *_env) +static void qemu_tcg_init_vcpu(CPUState *cpu) { - CPUArchState *env = _env; - CPUState *cpu = ENV_GET_CPU(env); - /* share a single thread for all cpus with TCG */ if (!tcg_cpu_thread) { cpu->thread = g_malloc0(sizeof(QemuThread)); - env->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(env->halt_cond); - tcg_halt_cond = env->halt_cond; - qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, env, + cpu->halt_cond = g_malloc0(sizeof(QemuCond)); + qemu_cond_init(cpu->halt_cond); + tcg_halt_cond = cpu->halt_cond; + qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu, QEMU_THREAD_JOINABLE); #ifdef _WIN32 cpu->hThread = qemu_thread_get_handle(cpu->thread); #endif - while (env->created == 0) { + while (!cpu->created) { qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); } tcg_cpu_thread = cpu->thread; } else { cpu->thread = tcg_cpu_thread; - env->halt_cond = tcg_halt_cond; + cpu->halt_cond = tcg_halt_cond; } } @@ -1014,11 +1013,11 @@ static void qemu_kvm_start_vcpu(CPUArchState *env) CPUState *cpu = ENV_GET_CPU(env); cpu->thread = g_malloc0(sizeof(QemuThread)); - env->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(env->halt_cond); + cpu->halt_cond = g_malloc0(sizeof(QemuCond)); + qemu_cond_init(cpu->halt_cond); qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env, QEMU_THREAD_JOINABLE); - while (env->created == 0) { + while (!cpu->created) { qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); } } @@ -1028,11 +1027,11 @@ static void qemu_dummy_start_vcpu(CPUArchState *env) CPUState *cpu = ENV_GET_CPU(env); cpu->thread = g_malloc0(sizeof(QemuThread)); - env->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(env->halt_cond); + cpu->halt_cond = g_malloc0(sizeof(QemuCond)); + qemu_cond_init(cpu->halt_cond); qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env, QEMU_THREAD_JOINABLE); - while (env->created == 0) { + while (!cpu->created) { qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); } } @@ -1040,14 +1039,15 @@ static void qemu_dummy_start_vcpu(CPUArchState *env) void qemu_init_vcpu(void *_env) { CPUArchState *env = _env; + CPUState *cpu = ENV_GET_CPU(env); env->nr_cores = smp_cores; env->nr_threads = smp_threads; - env->stopped = 1; + cpu->stopped = true; if (kvm_enabled()) { qemu_kvm_start_vcpu(env); } else if (tcg_enabled()) { - qemu_tcg_init_vcpu(env); + qemu_tcg_init_vcpu(cpu); } else { qemu_dummy_start_vcpu(env); } @@ -1056,8 +1056,9 @@ void qemu_init_vcpu(void *_env) void cpu_stop_current(void) { if (cpu_single_env) { - cpu_single_env->stop = 0; - cpu_single_env->stopped = 1; + CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env); + cpu_single_cpu->stop = false; + cpu_single_cpu->stopped = true; cpu_exit(cpu_single_env); qemu_cond_signal(&qemu_pause_cond); } @@ -1138,17 +1139,18 @@ static void tcg_exec_all(void) } for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) { CPUArchState *env = next_cpu; + CPUState *cpu = ENV_GET_CPU(env); qemu_clock_enable(vm_clock, (env->singlestep_enabled & SSTEP_NOTIMER) == 0); - if (cpu_can_run(env)) { + if (cpu_can_run(cpu)) { r = tcg_cpu_exec(env); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(env); break; } - } else if (env->stop || env->stopped) { + } else if (cpu->stop || cpu->stopped) { break; } } @@ -1203,7 +1205,8 @@ CpuInfoList *qmp_query_cpus(Error **errp) CpuInfoList *head = NULL, *cur_item = NULL; CPUArchState *env; - for(env = first_cpu; env != NULL; env = env->next_cpu) { + for (env = first_cpu; env != NULL; env = env->next_cpu) { + CPUState *cpu = ENV_GET_CPU(env); CpuInfoList *info; cpu_synchronize_state(env); @@ -1213,7 +1216,7 @@ CpuInfoList *qmp_query_cpus(Error **errp) info->value->CPU = env->cpu_index; info->value->current = (env == first_cpu); info->value->halted = env->halted; - info->value->thread_id = env->thread_id; + info->value->thread_id = cpu->thread_id; #if defined(TARGET_I386) info->value->has_pc = true; info->value->pc = env->eip + env->segs[R_CS].base; @@ -142,109 +142,6 @@ int qemu_fdatasync(int fd) #endif } -/* io vectors */ - -void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint) -{ - qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec)); - qiov->niov = 0; - qiov->nalloc = alloc_hint; - qiov->size = 0; -} - -void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov) -{ - int i; - - qiov->iov = iov; - qiov->niov = niov; - qiov->nalloc = -1; - qiov->size = 0; - for (i = 0; i < niov; i++) - qiov->size += iov[i].iov_len; -} - -void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) -{ - assert(qiov->nalloc != -1); - - if (qiov->niov == qiov->nalloc) { - qiov->nalloc = 2 * qiov->nalloc + 1; - qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec)); - } - qiov->iov[qiov->niov].iov_base = base; - qiov->iov[qiov->niov].iov_len = len; - qiov->size += len; - ++qiov->niov; -} - -/* - * Concatenates (partial) iovecs from src to the end of dst. - * It starts copying after skipping `soffset' bytes at the - * beginning of src and adds individual vectors from src to - * dst copies up to `sbytes' bytes total, or up to the end - * of src if it comes first. This way, it is okay to specify - * very large value for `sbytes' to indicate "up to the end - * of src". - * Only vector pointers are processed, not the actual data buffers. - */ -void qemu_iovec_concat(QEMUIOVector *dst, - QEMUIOVector *src, size_t soffset, size_t sbytes) -{ - int i; - size_t done; - struct iovec *siov = src->iov; - assert(dst->nalloc != -1); - assert(src->size >= soffset); - for (i = 0, done = 0; done < sbytes && i < src->niov; i++) { - if (soffset < siov[i].iov_len) { - size_t len = MIN(siov[i].iov_len - soffset, sbytes - done); - qemu_iovec_add(dst, siov[i].iov_base + soffset, len); - done += len; - soffset = 0; - } else { - soffset -= siov[i].iov_len; - } - } - /* return done; */ -} - -void qemu_iovec_destroy(QEMUIOVector *qiov) -{ - assert(qiov->nalloc != -1); - - qemu_iovec_reset(qiov); - g_free(qiov->iov); - qiov->nalloc = 0; - qiov->iov = NULL; -} - -void qemu_iovec_reset(QEMUIOVector *qiov) -{ - assert(qiov->nalloc != -1); - - qiov->niov = 0; - qiov->size = 0; -} - -size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, - void *buf, size_t bytes) -{ - return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes); -} - -size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, - const void *buf, size_t bytes) -{ - return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes); -} - -size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, - int fillc, size_t bytes) -{ - return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes); -} - /* * Checks if a buffer is all zeroes * @@ -383,11 +280,6 @@ int qemu_parse_fd(const char *param) return fd; } -int qemu_parse_fdset(const char *param) -{ - return qemu_parse_fd(param); -} - /* round down to the nearest power of 2*/ int64_t pow2floor(int64_t value) { diff --git a/event_notifier-posix.c b/event_notifier-posix.c new file mode 100644 index 0000000..6f3239a --- /dev/null +++ b/event_notifier-posix.c @@ -0,0 +1,120 @@ +/* + * event notifier support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" +#include "event_notifier.h" +#include "qemu-char.h" + +#ifdef CONFIG_EVENTFD +#include <sys/eventfd.h> +#endif + +void event_notifier_init_fd(EventNotifier *e, int fd) +{ + e->rfd = fd; + e->wfd = fd; +} + +int event_notifier_init(EventNotifier *e, int active) +{ + int fds[2]; + int ret; + +#ifdef CONFIG_EVENTFD + ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); +#else + ret = -1; + errno = ENOSYS; +#endif + if (ret >= 0) { + e->rfd = e->wfd = ret; + } else { + if (errno != ENOSYS) { + return -errno; + } + if (qemu_pipe(fds) < 0) { + return -errno; + } + ret = fcntl_setfl(fds[0], O_NONBLOCK); + if (ret < 0) { + ret = -errno; + goto fail; + } + ret = fcntl_setfl(fds[1], O_NONBLOCK); + if (ret < 0) { + ret = -errno; + goto fail; + } + e->rfd = fds[0]; + e->wfd = fds[1]; + } + if (active) { + event_notifier_set(e); + } + return 0; + +fail: + close(fds[0]); + close(fds[1]); + return ret; +} + +void event_notifier_cleanup(EventNotifier *e) +{ + if (e->rfd != e->wfd) { + close(e->rfd); + } + close(e->wfd); +} + +int event_notifier_get_fd(EventNotifier *e) +{ + return e->rfd; +} + +int event_notifier_set_handler(EventNotifier *e, + EventNotifierHandler *handler) +{ + return qemu_set_fd_handler(e->rfd, (IOHandler *)handler, NULL, e); +} + +int event_notifier_set(EventNotifier *e) +{ + static const uint64_t value = 1; + ssize_t ret; + + do { + ret = write(e->wfd, &value, sizeof(value)); + } while (ret < 0 && errno == EINTR); + + /* EAGAIN is fine, a read must be pending. */ + if (ret < 0 && errno != EAGAIN) { + return -errno; + } + return 0; +} + +int event_notifier_test_and_clear(EventNotifier *e) +{ + int value; + ssize_t len; + char buffer[512]; + + /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */ + value = 0; + do { + len = read(e->rfd, buffer, sizeof(buffer)); + value |= (len > 0); + } while ((len == -1 && errno == EINTR) || len == sizeof(buffer)); + + return value; +} diff --git a/event_notifier-win32.c b/event_notifier-win32.c new file mode 100644 index 0000000..c723dad --- /dev/null +++ b/event_notifier-win32.c @@ -0,0 +1,59 @@ +/* + * event notifier support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" +#include "event_notifier.h" +#include "main-loop.h" + +int event_notifier_init(EventNotifier *e, int active) +{ + e->event = CreateEvent(NULL, FALSE, FALSE, NULL); + assert(e->event); + return 0; +} + +void event_notifier_cleanup(EventNotifier *e) +{ + CloseHandle(e->event); +} + +HANDLE event_notifier_get_handle(EventNotifier *e) +{ + return e->event; +} + +int event_notifier_set_handler(EventNotifier *e, + EventNotifierHandler *handler) +{ + if (handler) { + return qemu_add_wait_object(e->event, (IOHandler *)handler, e); + } else { + qemu_del_wait_object(e->event, (IOHandler *)handler, e); + return 0; + } +} + +int event_notifier_set(EventNotifier *e) +{ + SetEvent(e->event); + return 0; +} + +int event_notifier_test_and_clear(EventNotifier *e) +{ + int ret = WaitForSingleObject(e->event, 0); + if (ret == WAIT_OBJECT_0) { + ResetEvent(e->event); + return true; + } + return false; +} diff --git a/event_notifier.c b/event_notifier.c deleted file mode 100644 index 2c207e1..0000000 --- a/event_notifier.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * event notifier support - * - * Copyright Red Hat, Inc. 2010 - * - * Authors: - * Michael S. Tsirkin <mst@redhat.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "qemu-common.h" -#include "event_notifier.h" -#include "qemu-char.h" - -#ifdef CONFIG_EVENTFD -#include <sys/eventfd.h> -#endif - -void event_notifier_init_fd(EventNotifier *e, int fd) -{ - e->fd = fd; -} - -int event_notifier_init(EventNotifier *e, int active) -{ -#ifdef CONFIG_EVENTFD - int fd = eventfd(!!active, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) - return -errno; - e->fd = fd; - return 0; -#else - return -ENOSYS; -#endif -} - -void event_notifier_cleanup(EventNotifier *e) -{ - close(e->fd); -} - -int event_notifier_get_fd(EventNotifier *e) -{ - return e->fd; -} - -int event_notifier_set_handler(EventNotifier *e, - EventNotifierHandler *handler) -{ - return qemu_set_fd_handler(e->fd, (IOHandler *)handler, NULL, e); -} - -int event_notifier_set(EventNotifier *e) -{ - uint64_t value = 1; - int r = write(e->fd, &value, sizeof(value)); - return r == sizeof(value); -} - -int event_notifier_test_and_clear(EventNotifier *e) -{ - uint64_t value; - int r = read(e->fd, &value, sizeof(value)); - return r == sizeof(value); -} diff --git a/event_notifier.h b/event_notifier.h index f0ec2f2..88b57af 100644 --- a/event_notifier.h +++ b/event_notifier.h @@ -15,18 +15,32 @@ #include "qemu-common.h" +#ifdef _WIN32 +#include <windows.h> +#endif + struct EventNotifier { - int fd; +#ifdef _WIN32 + HANDLE event; +#else + int rfd; + int wfd; +#endif }; typedef void EventNotifierHandler(EventNotifier *); -void event_notifier_init_fd(EventNotifier *, int fd); int event_notifier_init(EventNotifier *, int active); void event_notifier_cleanup(EventNotifier *); -int event_notifier_get_fd(EventNotifier *); int event_notifier_set(EventNotifier *); int event_notifier_test_and_clear(EventNotifier *); int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *); +#ifdef CONFIG_POSIX +void event_notifier_init_fd(EventNotifier *, int fd); +int event_notifier_get_fd(EventNotifier *); +#else +HANDLE event_notifier_get_handle(EventNotifier *); +#endif + #endif @@ -689,6 +689,9 @@ CPUArchState *qemu_get_cpu(int cpu) void cpu_exec_init(CPUArchState *env) { +#ifndef CONFIG_USER_ONLY + CPUState *cpu = ENV_GET_CPU(env); +#endif CPUArchState **penv; int cpu_index; @@ -707,7 +710,7 @@ void cpu_exec_init(CPUArchState *env) QTAILQ_INIT(&env->breakpoints); QTAILQ_INIT(&env->watchpoints); #ifndef CONFIG_USER_ONLY - env->thread_id = qemu_get_thread_id(); + cpu->thread_id = qemu_get_thread_id(); #endif *penv = env; #if defined(CONFIG_USER_ONLY) @@ -1693,6 +1696,7 @@ static void cpu_unlink_tb(CPUArchState *env) /* mask must never be zero, except for A20 change call */ static void tcg_handle_interrupt(CPUArchState *env, int mask) { + CPUState *cpu = ENV_GET_CPU(env); int old_mask; old_mask = env->interrupt_request; @@ -1702,8 +1706,8 @@ static void tcg_handle_interrupt(CPUArchState *env, int mask) * If called from iothread context, wake the target cpu in * case its halted. */ - if (!qemu_cpu_is_self(env)) { - qemu_cpu_kick(env); + if (!qemu_cpu_is_self(cpu)) { + qemu_cpu_kick(cpu); return; } @@ -107,7 +107,7 @@ static void apic_sync_vapic(APICCommonState *s, int sync_type) length = offsetof(VAPICState, enabled) - offsetof(VAPICState, isr); if (sync_type & SYNC_TO_VAPIC) { - assert(qemu_cpu_is_self(s->cpu_env)); + assert(qemu_cpu_is_self(CPU(s->cpu))); vapic_state.tpr = s->tpr; vapic_state.enabled = 1; @@ -151,15 +151,15 @@ static void apic_local_deliver(APICCommonState *s, int vector) switch ((lvt >> 8) & 7) { case APIC_DM_SMI: - cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SMI); + cpu_interrupt(&s->cpu->env, CPU_INTERRUPT_SMI); break; case APIC_DM_NMI: - cpu_interrupt(s->cpu_env, CPU_INTERRUPT_NMI); + cpu_interrupt(&s->cpu->env, CPU_INTERRUPT_NMI); break; case APIC_DM_EXTINT: - cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD); + cpu_interrupt(&s->cpu->env, CPU_INTERRUPT_HARD); break; case APIC_DM_FIXED: @@ -187,7 +187,7 @@ void apic_deliver_pic_intr(DeviceState *d, int level) reset_bit(s->irr, lvt & 0xff); /* fall through */ case APIC_DM_EXTINT: - cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_HARD); + cpu_reset_interrupt(&s->cpu->env, CPU_INTERRUPT_HARD); break; } } @@ -248,18 +248,22 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask, case APIC_DM_SMI: foreach_apic(apic_iter, deliver_bitmask, - cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_SMI) ); + cpu_interrupt(&apic_iter->cpu->env, CPU_INTERRUPT_SMI) + ); return; case APIC_DM_NMI: foreach_apic(apic_iter, deliver_bitmask, - cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_NMI) ); + cpu_interrupt(&apic_iter->cpu->env, CPU_INTERRUPT_NMI) + ); return; case APIC_DM_INIT: /* normal INIT IPI sent to processors */ foreach_apic(apic_iter, deliver_bitmask, - cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_INIT) ); + cpu_interrupt(&apic_iter->cpu->env, + CPU_INTERRUPT_INIT) + ); return; case APIC_DM_EXTINT: @@ -293,7 +297,7 @@ static void apic_set_base(APICCommonState *s, uint64_t val) /* if disabled, cannot be enabled again */ if (!(val & MSR_IA32_APICBASE_ENABLE)) { s->apicbase &= ~MSR_IA32_APICBASE_ENABLE; - cpu_clear_apic_feature(s->cpu_env); + cpu_clear_apic_feature(&s->cpu->env); s->spurious_vec &= ~APIC_SV_ENABLE; } } @@ -359,13 +363,15 @@ static int apic_irq_pending(APICCommonState *s) /* signal the CPU if an irq is pending */ static void apic_update_irq(APICCommonState *s) { + CPUState *cpu = CPU(s->cpu); + if (!(s->spurious_vec & APIC_SV_ENABLE)) { return; } - if (!qemu_cpu_is_self(s->cpu_env)) { - cpu_interrupt(s->cpu_env, CPU_INTERRUPT_POLL); + if (!qemu_cpu_is_self(cpu)) { + cpu_interrupt(&s->cpu->env, CPU_INTERRUPT_POLL); } else if (apic_irq_pending(s) > 0) { - cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD); + cpu_interrupt(&s->cpu->env, CPU_INTERRUPT_HARD); } } @@ -472,18 +478,18 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, static void apic_startup(APICCommonState *s, int vector_num) { s->sipi_vector = vector_num; - cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI); + cpu_interrupt(&s->cpu->env, CPU_INTERRUPT_SIPI); } void apic_sipi(DeviceState *d) { APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); - cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI); + cpu_reset_interrupt(&s->cpu->env, CPU_INTERRUPT_SIPI); if (!s->wait_for_sipi) return; - cpu_x86_load_seg_cache_sipi(s->cpu_env, s->sipi_vector); + cpu_x86_load_seg_cache_sipi(s->cpu, s->sipi_vector); s->wait_for_sipi = 0; } @@ -672,7 +678,7 @@ static uint32_t apic_mem_readl(void *opaque, hwaddr addr) case 0x08: apic_sync_vapic(s, SYNC_FROM_VAPIC); if (apic_report_tpr_access) { - cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_READ); + cpu_report_tpr_access(&s->cpu->env, TPR_ACCESS_READ); } val = s->tpr; break; @@ -774,7 +780,7 @@ static void apic_mem_writel(void *opaque, hwaddr addr, uint32_t val) break; case 0x08: if (apic_report_tpr_access) { - cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_WRITE); + cpu_report_tpr_access(&s->cpu->env, TPR_ACCESS_WRITE); } s->tpr = val; apic_sync_vapic(s, SYNC_TO_VAPIC); diff --git a/hw/apic_common.c b/hw/apic_common.c index d68116d..5f54276 100644 --- a/hw/apic_common.c +++ b/hw/apic_common.c @@ -103,7 +103,7 @@ void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip, { APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); - vapic_report_tpr_access(s->vapic, s->cpu_env, ip, access); + vapic_report_tpr_access(s->vapic, &s->cpu->env, ip, access); } void apic_report_irq_delivered(int delivered) @@ -217,7 +217,7 @@ static void apic_reset_common(DeviceState *d) APICCommonClass *info = APIC_COMMON_GET_CLASS(s); bool bsp; - bsp = cpu_is_bsp(x86_env_get_cpu(s->cpu_env)); + bsp = cpu_is_bsp(s->cpu); s->apicbase = 0xfee00000 | (bsp ? MSR_IA32_APICBASE_BSP : 0) | MSR_IA32_APICBASE_ENABLE; @@ -368,7 +368,6 @@ static const VMStateDescription vmstate_apic_common = { static Property apic_properties_common[] = { DEFINE_PROP_UINT8("id", APICCommonState, id, -1), - DEFINE_PROP_PTR("cpu_env", APICCommonState, cpu_env), DEFINE_PROP_BIT("vapic", APICCommonState, vapic_control, VAPIC_ENABLE_BIT, true), DEFINE_PROP_END_OF_LIST(), diff --git a/hw/apic_internal.h b/hw/apic_internal.h index 30932a3..79e2de2 100644 --- a/hw/apic_internal.h +++ b/hw/apic_internal.h @@ -95,8 +95,9 @@ typedef struct APICCommonClass struct APICCommonState { SysBusDevice busdev; + MemoryRegion io_memory; - void *cpu_env; + X86CPU *cpu; uint32_t apicbase; uint8_t id; uint8_t arb_id; diff --git a/hw/arm-misc.h b/hw/arm-misc.h index d02f7f0..adb1665 100644 --- a/hw/arm-misc.h +++ b/hw/arm-misc.h @@ -56,6 +56,7 @@ struct arm_boot_info { const struct arm_boot_info *info); /* Used internally by arm_boot.c */ int is_linux; + hwaddr initrd_start; hwaddr initrd_size; hwaddr entry; }; diff --git a/hw/arm11mpcore.c b/hw/arm11mpcore.c index 105f158..640ed20 100644 --- a/hw/arm11mpcore.c +++ b/hw/arm11mpcore.c @@ -44,7 +44,9 @@ static uint64_t mpcore_scu_read(void *opaque, hwaddr offset, case 0x0c: /* Invalidate all. */ return 0; default: - hw_error("mpcore_priv_read: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "mpcore_priv_read: Bad offset %x\n", (int)offset); + return 0; } } @@ -61,7 +63,8 @@ static void mpcore_scu_write(void *opaque, hwaddr offset, /* This is a no-op as cache is not emulated. */ break; default: - hw_error("mpcore_priv_read: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "mpcore_priv_read: Bad offset %x\n", (int)offset); } } diff --git a/hw/arm_boot.c b/hw/arm_boot.c index 09bf6c5..92e2cab 100644 --- a/hw/arm_boot.c +++ b/hw/arm_boot.c @@ -18,7 +18,6 @@ #define KERNEL_ARGS_ADDR 0x100 #define KERNEL_LOAD_ADDR 0x00010000 -#define INITRD_LOAD_ADDR 0x00d00000 /* The worlds second smallest bootloader. Set r0-r2, then jump to kernel. */ static uint32_t bootloader[] = { @@ -109,7 +108,7 @@ static void set_kernel_args(const struct arm_boot_info *info) /* ATAG_INITRD2 */ WRITE_WORD(p, 4); WRITE_WORD(p, 0x54420005); - WRITE_WORD(p, info->loader_start + INITRD_LOAD_ADDR); + WRITE_WORD(p, info->initrd_start); WRITE_WORD(p, initrd_size); } if (info->kernel_cmdline && *info->kernel_cmdline) { @@ -185,10 +184,11 @@ static void set_kernel_args_old(const struct arm_boot_info *info) /* pages_in_vram */ WRITE_WORD(p, 0); /* initrd_start */ - if (initrd_size) - WRITE_WORD(p, info->loader_start + INITRD_LOAD_ADDR); - else + if (initrd_size) { + WRITE_WORD(p, info->initrd_start); + } else { WRITE_WORD(p, 0); + } /* initrd_size */ WRITE_WORD(p, initrd_size); /* rd_start */ @@ -281,14 +281,13 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo) if (binfo->initrd_size) { rc = qemu_devtree_setprop_cell(fdt, "/chosen", "linux,initrd-start", - binfo->loader_start + INITRD_LOAD_ADDR); + binfo->initrd_start); if (rc < 0) { fprintf(stderr, "couldn't set /chosen/linux,initrd-start\n"); } rc = qemu_devtree_setprop_cell(fdt, "/chosen", "linux,initrd-end", - binfo->loader_start + INITRD_LOAD_ADDR + - binfo->initrd_size); + binfo->initrd_start + binfo->initrd_size); if (rc < 0) { fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n"); } @@ -375,6 +374,19 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info) big_endian = 0; #endif + /* We want to put the initrd far enough into RAM that when the + * kernel is uncompressed it will not clobber the initrd. However + * on boards without much RAM we must ensure that we still leave + * enough room for a decent sized initrd, and on boards with large + * amounts of RAM we must avoid the initrd being so far up in RAM + * that it is outside lowmem and inaccessible to the kernel. + * So for boards with less than 256MB of RAM we put the initrd + * halfway into RAM, and for boards with 256MB of RAM or more we put + * the initrd at 128MB. + */ + info->initrd_start = info->loader_start + + MIN(info->ram_size / 2, 128 * 1024 * 1024); + /* Assume that raw images are linux kernels, and ELF images are not. */ kernel_size = load_elf(info->kernel_filename, NULL, NULL, &elf_entry, NULL, NULL, big_endian, ELF_MACHINE, 1); @@ -398,10 +410,9 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info) if (is_linux) { if (info->initrd_filename) { initrd_size = load_image_targphys(info->initrd_filename, - info->loader_start - + INITRD_LOAD_ADDR, - info->ram_size - - INITRD_LOAD_ADDR); + info->initrd_start, + info->ram_size - + info->initrd_start); if (initrd_size < 0) { fprintf(stderr, "qemu: could not load initrd '%s'\n", info->initrd_filename); @@ -419,9 +430,8 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info) */ if (info->dtb_filename) { /* Place the DTB after the initrd in memory */ - hwaddr dtb_start = TARGET_PAGE_ALIGN(info->loader_start - + INITRD_LOAD_ADDR - + initrd_size); + hwaddr dtb_start = TARGET_PAGE_ALIGN(info->initrd_start + + initrd_size); if (load_dtb(dtb_start, info)) { exit(1); } diff --git a/hw/arm_gic.c b/hw/arm_gic.c index ce16e83..f9e423f 100644 --- a/hw/arm_gic.c +++ b/hw/arm_gic.c @@ -324,7 +324,8 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset) } return res; bad_reg: - hw_error("gic_dist_readb: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "gic_dist_readb: Bad offset %x\n", (int)offset); return 0; } @@ -487,7 +488,8 @@ static void gic_dist_writeb(void *opaque, hwaddr offset, gic_update(s); return; bad_reg: - hw_error("gic_dist_writeb: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "gic_dist_writeb: Bad offset %x\n", (int)offset); } static void gic_dist_writew(void *opaque, hwaddr offset, @@ -556,7 +558,8 @@ static uint32_t gic_cpu_read(GICState *s, int cpu, int offset) case 0x18: /* Highest Pending Interrupt */ return s->current_pending[cpu]; default: - hw_error("gic_cpu_read: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "gic_cpu_read: Bad offset %x\n", (int)offset); return 0; } } @@ -577,7 +580,8 @@ static void gic_cpu_write(GICState *s, int cpu, int offset, uint32_t value) case 0x10: /* End Of Interrupt */ return gic_complete_irq(s, cpu, value & 0x3ff); default: - hw_error("gic_cpu_write: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "gic_cpu_write: Bad offset %x\n", (int)offset); return; } gic_update(s); diff --git a/hw/arm_l2x0.c b/hw/arm_l2x0.c index 8f5921c..6abf0ee 100644 --- a/hw/arm_l2x0.c +++ b/hw/arm_l2x0.c @@ -87,7 +87,8 @@ static uint64_t l2x0_priv_read(void *opaque, hwaddr offset, case 0xF80: return 0; default: - fprintf(stderr, "l2x0_priv_read: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "l2x0_priv_read: Bad offset %x\n", (int)offset); break; } return 0; @@ -128,7 +129,8 @@ static void l2x0_priv_write(void *opaque, hwaddr offset, case 0xF80: return; default: - fprintf(stderr, "l2x0_priv_write: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "l2x0_priv_write: Bad offset %x\n", (int)offset); break; } } diff --git a/hw/arm_sysctl.c b/hw/arm_sysctl.c index 26318e1..58eb982 100644 --- a/hw/arm_sysctl.c +++ b/hw/arm_sysctl.c @@ -184,7 +184,9 @@ static uint64_t arm_sysctl_read(void *opaque, hwaddr offset, return s->sys_cfgstat; default: bad_reg: - printf ("arm_sysctl_read: Bad register offset 0x%x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "arm_sysctl_read: Bad register offset 0x%x\n", + (int)offset); return 0; } } @@ -339,7 +341,9 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, return; default: bad_reg: - printf ("arm_sysctl_write: Bad register offset 0x%x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "arm_sysctl_write: Bad register offset 0x%x\n", + (int)offset); return; } } diff --git a/hw/arm_timer.c b/hw/arm_timer.c index 2e13621..af339d3 100644 --- a/hw/arm_timer.c +++ b/hw/arm_timer.c @@ -64,7 +64,8 @@ static uint32_t arm_timer_read(void *opaque, hwaddr offset) return 0; return s->int_level; default: - hw_error("%s: Bad offset %x\n", __func__, (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset %x\n", __func__, (int)offset); return 0; } } @@ -131,7 +132,8 @@ static void arm_timer_write(void *opaque, hwaddr offset, arm_timer_recalibrate(s, 0); break; default: - hw_error("%s: Bad offset %x\n", __func__, (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset %x\n", __func__, (int)offset); } arm_timer_update(s); } @@ -223,10 +225,14 @@ static uint64_t sp804_read(void *opaque, hwaddr offset, /* Integration Test control registers, which we won't support */ case 0xf00: /* TimerITCR */ case 0xf04: /* TimerITOP (strictly write only but..) */ + qemu_log_mask(LOG_UNIMP, + "%s: integration test registers unimplemented\n", + __func__); return 0; } - hw_error("%s: Bad offset %x\n", __func__, (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset %x\n", __func__, (int)offset); return 0; } @@ -246,7 +252,8 @@ static void sp804_write(void *opaque, hwaddr offset, } /* Technically we could be writing to the Test Registers, but not likely */ - hw_error("%s: Bad offset %x\n", __func__, (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", + __func__, (int)offset); } static const MemoryRegionOps sp804_ops = { @@ -300,7 +307,7 @@ static uint64_t icp_pit_read(void *opaque, hwaddr offset, /* ??? Don't know the PrimeCell ID for this device. */ n = offset >> 8; if (n > 2) { - hw_error("%s: Bad timer %d\n", __func__, n); + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad timer %d\n", __func__, n); } return arm_timer_read(s->timer[n], offset & 0xff); @@ -314,7 +321,7 @@ static void icp_pit_write(void *opaque, hwaddr offset, n = offset >> 8; if (n > 2) { - hw_error("%s: Bad timer %d\n", __func__, n); + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad timer %d\n", __func__, n); } arm_timer_write(s->timer[n], offset & 0xff, value); diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c index 35c1aa6..f0a2e7b 100644 --- a/hw/armv7m_nvic.c +++ b/hw/armv7m_nvic.c @@ -138,9 +138,8 @@ void armv7m_nvic_complete_irq(void *opaque, int irq) gic_complete_irq(&s->gic, 0, irq); } -static uint32_t nvic_readl(void *opaque, uint32_t offset) +static uint32_t nvic_readl(nvic_state *s, uint32_t offset) { - nvic_state *s = (nvic_state *)opaque; uint32_t val; int irq; @@ -216,14 +215,6 @@ static uint32_t nvic_readl(void *opaque, uint32_t offset) case 0xd14: /* Configuration Control. */ /* TODO: Implement Configuration Control bits. */ return 0; - case 0xd18: case 0xd1c: case 0xd20: /* System Handler Priority. */ - irq = offset - 0xd14; - val = 0; - val |= s->gic.priority1[irq++][0]; - val |= s->gic.priority1[irq++][0] << 8; - val |= s->gic.priority1[irq++][0] << 16; - val |= s->gic.priority1[irq][0] << 24; - return val; case 0xd24: /* System Handler Status. */ val = 0; if (s->gic.irq_state[ARMV7M_EXCP_MEM].active) val |= (1 << 0); @@ -243,7 +234,7 @@ static uint32_t nvic_readl(void *opaque, uint32_t offset) return val; case 0xd28: /* Configurable Fault Status. */ /* TODO: Implement Fault Status. */ - hw_error("Not implemented: Configurable Fault Status."); + qemu_log_mask(LOG_UNIMP, "Configurable Fault Status unimplemented\n"); return 0; case 0xd2c: /* Hard Fault Status. */ case 0xd30: /* Debug Fault Status. */ @@ -251,7 +242,8 @@ static uint32_t nvic_readl(void *opaque, uint32_t offset) case 0xd38: /* Bus Fault Address. */ case 0xd3c: /* Aux Fault Status. */ /* TODO: Implement fault status registers. */ - goto bad_reg; + qemu_log_mask(LOG_UNIMP, "Fault status registers unimplemented\n"); + return 0; case 0xd40: /* PFR0. */ return 0x00000030; case 0xd44: /* PRF1. */ @@ -280,14 +272,13 @@ static uint32_t nvic_readl(void *opaque, uint32_t offset) return 0x01310102; /* TODO: Implement debug registers. */ default: - bad_reg: - hw_error("NVIC: Bad read offset 0x%x\n", offset); + qemu_log_mask(LOG_GUEST_ERROR, "NVIC: Bad read offset 0x%x\n", offset); + return 0; } } -static void nvic_writel(void *opaque, uint32_t offset, uint32_t value) +static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value) { - nvic_state *s = (nvic_state *)opaque; uint32_t oldval; switch (offset) { case 0x10: /* SysTick Control and Status. */ @@ -345,27 +336,17 @@ static void nvic_writel(void *opaque, uint32_t offset, uint32_t value) case 0xd0c: /* Application Interrupt/Reset Control. */ if ((value >> 16) == 0x05fa) { if (value & 2) { - hw_error("VECTCLRACTIVE not implemented"); + qemu_log_mask(LOG_UNIMP, "VECTCLRACTIVE unimplemented\n"); } if (value & 5) { - hw_error("System reset"); + qemu_log_mask(LOG_UNIMP, "AIRCR system reset unimplemented\n"); } } break; case 0xd10: /* System Control. */ case 0xd14: /* Configuration Control. */ /* TODO: Implement control registers. */ - goto bad_reg; - case 0xd18: case 0xd1c: case 0xd20: /* System Handler Priority. */ - { - int irq; - irq = offset - 0xd14; - s->gic.priority1[irq++][0] = value & 0xff; - s->gic.priority1[irq++][0] = (value >> 8) & 0xff; - s->gic.priority1[irq++][0] = (value >> 16) & 0xff; - s->gic.priority1[irq][0] = (value >> 24) & 0xff; - gic_update(&s->gic); - } + qemu_log_mask(LOG_UNIMP, "NVIC: SCR and CCR unimplemented\n"); break; case 0xd24: /* System Handler Control. */ /* TODO: Real hardware allows you to set/clear the active bits @@ -380,47 +361,71 @@ static void nvic_writel(void *opaque, uint32_t offset, uint32_t value) case 0xd34: /* Mem Manage Address. */ case 0xd38: /* Bus Fault Address. */ case 0xd3c: /* Aux Fault Status. */ - goto bad_reg; + qemu_log_mask(LOG_UNIMP, + "NVIC: fault status registers unimplemented\n"); + break; case 0xf00: /* Software Triggered Interrupt Register */ if ((value & 0x1ff) < s->num_irq) { gic_set_pending_private(&s->gic, 0, value & 0x1ff); } break; default: - bad_reg: - hw_error("NVIC: Bad write offset 0x%x\n", offset); + qemu_log_mask(LOG_GUEST_ERROR, + "NVIC: Bad write offset 0x%x\n", offset); } } static uint64_t nvic_sysreg_read(void *opaque, hwaddr addr, unsigned size) { - /* At the moment we only support the ID registers for byte/word access. - * This is not strictly correct as a few of the other registers also - * allow byte access. - */ + nvic_state *s = (nvic_state *)opaque; uint32_t offset = addr; - if (offset >= 0xfe0) { + int i; + uint32_t val; + + switch (offset) { + case 0xd18 ... 0xd23: /* System Handler Priority. */ + val = 0; + for (i = 0; i < size; i++) { + val |= s->gic.priority1[(offset - 0xd14) + i][0] << (i * 8); + } + return val; + case 0xfe0 ... 0xfff: /* ID. */ if (offset & 3) { return 0; } return nvic_id[(offset - 0xfe0) >> 2]; } if (size == 4) { - return nvic_readl(opaque, offset); + return nvic_readl(s, offset); } - hw_error("NVIC: Bad read of size %d at offset 0x%x\n", size, offset); + qemu_log_mask(LOG_GUEST_ERROR, + "NVIC: Bad read of size %d at offset 0x%x\n", size, offset); + return 0; } static void nvic_sysreg_write(void *opaque, hwaddr addr, uint64_t value, unsigned size) { + nvic_state *s = (nvic_state *)opaque; uint32_t offset = addr; + int i; + + switch (offset) { + case 0xd18 ... 0xd23: /* System Handler Priority. */ + for (i = 0; i < size; i++) { + s->gic.priority1[(offset - 0xd14) + i][0] = + (value >> (i * 8)) & 0xff; + } + gic_update(&s->gic); + return; + } if (size == 4) { - nvic_writel(opaque, offset, value); + nvic_writel(s, offset, value); return; } - hw_error("NVIC: Bad write of size %d at offset 0x%x\n", size, offset); + qemu_log_mask(LOG_GUEST_ERROR, + "NVIC: Bad write of size %d at offset 0x%x\n", size, offset); } static const MemoryRegionOps nvic_sysreg_ops = { diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index e4af2e9..9bef96e 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -42,8 +42,6 @@ //#define DEBUG_CIRRUS //#define DEBUG_BITBLT -#define VGA_RAM_SIZE (8192 * 1024) - /*************************************** * * definitions @@ -2856,7 +2854,8 @@ static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci, /* I/O handler for LFB */ memory_region_init_io(&s->cirrus_linear_io, &cirrus_linear_io_ops, s, - "cirrus-linear-io", VGA_RAM_SIZE); + "cirrus-linear-io", s->vga.vram_size_mb + * 1024 * 1024); memory_region_set_flush_coalesced(&s->cirrus_linear_io); /* I/O handler for LFB */ @@ -2899,7 +2898,6 @@ static int vga_initfn(ISADevice *dev) ISACirrusVGAState *d = DO_UPCAST(ISACirrusVGAState, dev, dev); VGACommonState *s = &d->cirrus_vga.vga; - s->vram_size_mb = VGA_RAM_SIZE >> 20; vga_common_init(s); cirrus_init_common(&d->cirrus_vga, CIRRUS_ID_CLGD5430, 0, isa_address_space(dev)); @@ -2912,6 +2910,12 @@ static int vga_initfn(ISADevice *dev) return 0; } +static Property isa_vga_cirrus_properties[] = { + DEFINE_PROP_UINT32("vgamem_mb", struct ISACirrusVGAState, + cirrus_vga.vga.vram_size_mb, 8), + DEFINE_PROP_END_OF_LIST(), +}; + static void isa_cirrus_vga_class_init(ObjectClass *klass, void *data) { ISADeviceClass *k = ISA_DEVICE_CLASS(klass); @@ -2919,6 +2923,7 @@ static void isa_cirrus_vga_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_cirrus_vga; k->init = vga_initfn; + dc->props = isa_vga_cirrus_properties; } static TypeInfo isa_cirrus_vga_info = { @@ -2942,7 +2947,6 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev) int16_t device_id = pc->device_id; /* setup VGA */ - s->vga.vram_size_mb = VGA_RAM_SIZE >> 20; vga_common_init(&s->vga); cirrus_init_common(s, device_id, 1, pci_address_space(dev)); s->vga.ds = graphic_console_init(s->vga.update, s->vga.invalidate, @@ -2969,6 +2973,12 @@ static int pci_cirrus_vga_initfn(PCIDevice *dev) return 0; } +static Property pci_vga_cirrus_properties[] = { + DEFINE_PROP_UINT32("vgamem_mb", struct PCICirrusVGAState, + cirrus_vga.vga.vram_size_mb, 8), + DEFINE_PROP_END_OF_LIST(), +}; + static void cirrus_vga_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -2982,6 +2992,7 @@ static void cirrus_vga_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_DISPLAY_VGA; dc->desc = "Cirrus CLGD 54xx VGA"; dc->vmsd = &vmstate_pci_cirrus_vga; + dc->props = pci_vga_cirrus_properties; } static TypeInfo cirrus_vga_info = { diff --git a/hw/exynos4_boards.c b/hw/exynos4_boards.c index 4951064..bc815bb 100644 --- a/hw/exynos4_boards.c +++ b/hw/exynos4_boards.c @@ -93,11 +93,8 @@ static void lan9215_init(uint32_t base, qemu_irq irq) } } -static Exynos4210State *exynos4_boards_init_common( - const char *kernel_filename, - const char *kernel_cmdline, - const char *initrd_filename, - Exynos4BoardType board_type) +static Exynos4210State *exynos4_boards_init_common(QEMUMachineInitArgs *args, + Exynos4BoardType board_type) { if (smp_cpus != EXYNOS4210_NCPUS) { fprintf(stderr, "%s board supports only %d CPU cores. Ignoring smp_cpus" @@ -110,9 +107,9 @@ static Exynos4210State *exynos4_boards_init_common( exynos4_board_binfo.board_id = exynos4_board_id[board_type]; exynos4_board_binfo.smp_bootreg_addr = exynos4_board_smp_bootreg_addr[board_type]; - exynos4_board_binfo.kernel_filename = kernel_filename; - exynos4_board_binfo.initrd_filename = initrd_filename; - exynos4_board_binfo.kernel_cmdline = kernel_cmdline; + exynos4_board_binfo.kernel_filename = args->kernel_filename; + exynos4_board_binfo.initrd_filename = args->initrd_filename; + exynos4_board_binfo.kernel_cmdline = args->kernel_cmdline; exynos4_board_binfo.gic_cpu_if_addr = EXYNOS4210_SMP_PRIVATE_BASE_ADDR + 0x100; @@ -122,9 +119,9 @@ static Exynos4210State *exynos4_boards_init_common( " initrd_filename: %s\n", exynos4_board_ram_size[board_type] / 1048576, exynos4_board_ram_size[board_type], - kernel_filename, - kernel_cmdline, - initrd_filename); + args->kernel_filename, + args->kernel_cmdline, + args->initrd_filename); return exynos4210_init(get_system_memory(), exynos4_board_ram_size[board_type]); @@ -132,22 +129,15 @@ static Exynos4210State *exynos4_boards_init_common( static void nuri_init(QEMUMachineInitArgs *args) { - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - exynos4_boards_init_common(kernel_filename, kernel_cmdline, - initrd_filename, EXYNOS4_BOARD_NURI); + exynos4_boards_init_common(args, EXYNOS4_BOARD_NURI); arm_load_kernel(arm_env_get_cpu(first_cpu), &exynos4_board_binfo); } static void smdkc210_init(QEMUMachineInitArgs *args) { - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - Exynos4210State *s = exynos4_boards_init_common(kernel_filename, - kernel_cmdline, initrd_filename, EXYNOS4_BOARD_SMDKC210); + Exynos4210State *s = exynos4_boards_init_common(args, + EXYNOS4_BOARD_SMDKC210); lan9215_init(SMDK_LAN9118_BASE_ADDR, qemu_irq_invert(s->irq_table[exynos4210_get_irq(37, 1)])); @@ -10,6 +10,7 @@ #include "ioport.h" #include "irq.h" +#include "qemu-aio.h" #include "qemu-file.h" #include "vmstate.h" #include "qemu-log.h" diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c index dbac7ff..8b65d51 100644 --- a/hw/kvm/apic.c +++ b/hw/kvm/apic.c @@ -104,7 +104,7 @@ static void kvm_apic_enable_tpr_reporting(APICCommonState *s, bool enable) .enabled = enable }; - kvm_vcpu_ioctl(s->cpu_env, KVM_TPR_ACCESS_REPORTING, &ctl); + kvm_vcpu_ioctl(&s->cpu->env, KVM_TPR_ACCESS_REPORTING, &ctl); } static void kvm_apic_vapic_base_update(APICCommonState *s) @@ -114,7 +114,7 @@ static void kvm_apic_vapic_base_update(APICCommonState *s) }; int ret; - ret = kvm_vcpu_ioctl(s->cpu_env, KVM_SET_VAPIC_ADDR, &vapid_addr); + ret = kvm_vcpu_ioctl(&s->cpu->env, KVM_SET_VAPIC_ADDR, &vapid_addr); if (ret < 0) { fprintf(stderr, "KVM: setting VAPIC address failed (%s)\n", strerror(-ret)); @@ -125,7 +125,7 @@ static void kvm_apic_vapic_base_update(APICCommonState *s) static void do_inject_external_nmi(void *data) { APICCommonState *s = data; - CPUX86State *env = s->cpu_env; + CPUX86State *env = &s->cpu->env; uint32_t lvt; int ret; @@ -143,7 +143,7 @@ static void do_inject_external_nmi(void *data) static void kvm_apic_external_nmi(APICCommonState *s) { - run_on_cpu(s->cpu_env, do_inject_external_nmi, s); + run_on_cpu(CPU(s->cpu), do_inject_external_nmi, s); } static uint64_t kvm_apic_mem_read(void *opaque, hwaddr addr, diff --git a/hw/kvmvapic.c b/hw/kvmvapic.c index 5e0a7c9..dc111ee 100644 --- a/hw/kvmvapic.c +++ b/hw/kvmvapic.c @@ -475,11 +475,13 @@ static void vapic_enable_tpr_reporting(bool enable) VAPICEnableTPRReporting info = { .enable = enable, }; + X86CPU *cpu; CPUX86State *env; for (env = first_cpu; env != NULL; env = env->next_cpu) { + cpu = x86_env_get_cpu(env); info.apic = env->apic_state; - run_on_cpu(env, vapic_do_enable_tpr_reporting, &info); + run_on_cpu(CPU(cpu), vapic_do_enable_tpr_reporting, &info); } } @@ -717,7 +719,7 @@ static int vapic_post_load(void *opaque, int version_id) } if (s->state == VAPIC_ACTIVE) { if (smp_cpus == 1) { - run_on_cpu(first_cpu, do_vapic_enable, s); + run_on_cpu(ENV_GET_CPU(first_cpu), do_vapic_enable, s); } else { zero = g_malloc0(s->rom_state.vapic_size); cpu_physical_memory_rw(s->vapic_paddr, zero, diff --git a/hw/mainstone.c b/hw/mainstone.c index 3266946..5bbecb7 100644 --- a/hw/mainstone.c +++ b/hw/mainstone.c @@ -95,10 +95,8 @@ static struct arm_boot_info mainstone_binfo = { }; static void mainstone_common_init(MemoryRegion *address_space_mem, - ram_addr_t ram_size, - const char *kernel_filename, - const char *kernel_cmdline, const char *initrd_filename, - const char *cpu_model, enum mainstone_model_e model, int arm_id) + QEMUMachineInitArgs *args, + enum mainstone_model_e model, int arm_id) { uint32_t sector_len = 256 * 1024; hwaddr mainstone_flash_base[] = { MST_FLASH_0, MST_FLASH_1 }; @@ -108,6 +106,7 @@ static void mainstone_common_init(MemoryRegion *address_space_mem, int i; int be; MemoryRegion *rom = g_new(MemoryRegion, 1); + const char *cpu_model = args->cpu_model; if (!cpu_model) cpu_model = "pxa270-c5"; @@ -164,22 +163,16 @@ static void mainstone_common_init(MemoryRegion *address_space_mem, smc91c111_init(&nd_table[0], MST_ETH_PHYS, qdev_get_gpio_in(mst_irq, ETHERNET_IRQ)); - mainstone_binfo.kernel_filename = kernel_filename; - mainstone_binfo.kernel_cmdline = kernel_cmdline; - mainstone_binfo.initrd_filename = initrd_filename; + mainstone_binfo.kernel_filename = args->kernel_filename; + mainstone_binfo.kernel_cmdline = args->kernel_cmdline; + mainstone_binfo.initrd_filename = args->initrd_filename; mainstone_binfo.board_id = arm_id; arm_load_kernel(mpu->cpu, &mainstone_binfo); } static void mainstone_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - mainstone_common_init(get_system_memory(), ram_size, kernel_filename, - kernel_cmdline, initrd_filename, cpu_model, mainstone, 0x196); + mainstone_common_init(get_system_memory(), args, mainstone, 0x196); } static QEMUMachine mainstone2_machine = { diff --git a/hw/nseries.c b/hw/nseries.c index 9306aa1..652d9da 100644 --- a/hw/nseries.c +++ b/hw/nseries.c @@ -1284,17 +1284,15 @@ static int n810_atag_setup(const struct arm_boot_info *info, void *p) return n8x0_atag_setup(p, 810); } -static void n8x0_init(ram_addr_t ram_size, const char *boot_device, - const char *kernel_filename, - const char *kernel_cmdline, const char *initrd_filename, - const char *cpu_model, struct arm_boot_info *binfo, int model) +static void n8x0_init(QEMUMachineInitArgs *args, + struct arm_boot_info *binfo, int model) { MemoryRegion *sysmem = get_system_memory(); struct n800_s *s = (struct n800_s *) g_malloc0(sizeof(*s)); int sdram_size = binfo->ram_size; DisplayState *ds; - s->mpu = omap2420_mpu_init(sysmem, sdram_size, cpu_model); + s->mpu = omap2420_mpu_init(sysmem, sdram_size, args->cpu_model); /* Setup peripherals * @@ -1338,17 +1336,18 @@ static void n8x0_init(ram_addr_t ram_size, const char *boot_device, n8x0_usb_setup(s); } - if (kernel_filename) { + if (args->kernel_filename) { /* Or at the linux loader. */ - binfo->kernel_filename = kernel_filename; - binfo->kernel_cmdline = kernel_cmdline; - binfo->initrd_filename = initrd_filename; + binfo->kernel_filename = args->kernel_filename; + binfo->kernel_cmdline = args->kernel_cmdline; + binfo->initrd_filename = args->initrd_filename; arm_load_kernel(s->mpu->cpu, binfo); qemu_register_reset(n8x0_boot_init, s); } - if (option_rom[0].name && (boot_device[0] == 'n' || !kernel_filename)) { + if (option_rom[0].name && + (args->boot_device[0] == 'n' || !args->kernel_filename)) { int rom_size; uint8_t nolo_tags[0x10000]; /* No, wait, better start at the ROM. */ @@ -1400,28 +1399,12 @@ static struct arm_boot_info n810_binfo = { static void n800_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - return n8x0_init(ram_size, boot_device, - kernel_filename, kernel_cmdline, initrd_filename, - cpu_model, &n800_binfo, 800); + return n8x0_init(args, &n800_binfo, 800); } static void n810_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - return n8x0_init(ram_size, boot_device, - kernel_filename, kernel_cmdline, initrd_filename, - cpu_model, &n810_binfo, 810); + return n8x0_init(args, &n810_binfo, 810); } static QEMUMachine n800_machine = { diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c index eb2bf05..21a5bbb 100644 --- a/hw/omap_sx1.c +++ b/hw/omap_sx1.c @@ -97,11 +97,7 @@ static struct arm_boot_info sx1_binfo = { .board_id = 0x265, }; -static void sx1_init(ram_addr_t ram_size, - const char *boot_device, - const char *kernel_filename, const char *kernel_cmdline, - const char *initrd_filename, const char *cpu_model, - const int version) +static void sx1_init(QEMUMachineInitArgs *args, const int version) { struct omap_mpu_state_s *mpu; MemoryRegion *address_space = get_system_memory(); @@ -121,7 +117,7 @@ static void sx1_init(ram_addr_t ram_size, flash_size = flash2_size; } - mpu = omap310_mpu_init(address_space, sx1_binfo.ram_size, cpu_model); + mpu = omap310_mpu_init(address_space, sx1_binfo.ram_size, args->cpu_model); /* External Flash (EMIFS) */ memory_region_init_ram(flash, "omap_sx1.flash0-0", flash_size); @@ -192,16 +188,16 @@ static void sx1_init(ram_addr_t ram_size, OMAP_CS1_BASE, &cs[1]); } - if (!kernel_filename && !fl_idx) { + if (!args->kernel_filename && !fl_idx) { fprintf(stderr, "Kernel or Flash image must be specified\n"); exit(1); } /* Load the kernel. */ - if (kernel_filename) { - sx1_binfo.kernel_filename = kernel_filename; - sx1_binfo.kernel_cmdline = kernel_cmdline; - sx1_binfo.initrd_filename = initrd_filename; + if (args->kernel_filename) { + sx1_binfo.kernel_filename = args->kernel_filename; + sx1_binfo.kernel_cmdline = args->kernel_cmdline; + sx1_binfo.initrd_filename = args->initrd_filename; arm_load_kernel(mpu->cpu, &sx1_binfo); } @@ -211,26 +207,12 @@ static void sx1_init(ram_addr_t ram_size, static void sx1_init_v1(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - sx1_init(ram_size, boot_device, kernel_filename, - kernel_cmdline, initrd_filename, cpu_model, 1); + sx1_init(args, 1); } static void sx1_init_v2(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - sx1_init(ram_size, boot_device, kernel_filename, - kernel_cmdline, initrd_filename, cpu_model, 2); + sx1_init(args, 2); } static QEMUMachine sx1_machine_v2 = { @@ -71,8 +71,6 @@ #define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3) #define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4) -#define MSI_ADDR_BASE 0xfee00000 - #define E820_NR_ENTRIES 16 struct e820_entry { @@ -849,35 +847,6 @@ DeviceState *cpu_get_current_apic(void) } } -static DeviceState *apic_init(void *env, uint8_t apic_id) -{ - DeviceState *dev; - static int apic_mapped; - - if (kvm_irqchip_in_kernel()) { - dev = qdev_create(NULL, "kvm-apic"); - } else if (xen_enabled()) { - dev = qdev_create(NULL, "xen-apic"); - } else { - dev = qdev_create(NULL, "apic"); - } - - qdev_prop_set_uint8(dev, "id", apic_id); - qdev_prop_set_ptr(dev, "cpu_env", env); - qdev_init_nofail(dev); - - /* XXX: mapping more APICs at the same memory location */ - if (apic_mapped == 0) { - /* NOTE: the APIC is directly connected to the CPU - it is not - on the global memory bus. */ - /* XXX: what if the base changes? */ - sysbus_mmio_map(sysbus_from_qdev(dev), 0, MSI_ADDR_BASE); - apic_mapped = 1; - } - - return dev; -} - void pc_acpi_smi_interrupt(void *opaque, int irq, int level) { CPUX86State *s = opaque; @@ -887,24 +856,6 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) } } -static X86CPU *pc_new_cpu(const char *cpu_model) -{ - X86CPU *cpu; - CPUX86State *env; - - cpu = cpu_x86_init(cpu_model); - if (cpu == NULL) { - fprintf(stderr, "Unable to find x86 CPU definition\n"); - exit(1); - } - env = &cpu->env; - if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) { - env->apic_state = apic_init(env, env->cpuid_apic_id); - } - cpu_reset(CPU(cpu)); - return cpu; -} - void pc_cpus_init(const char *cpu_model) { int i; @@ -918,8 +869,11 @@ void pc_cpus_init(const char *cpu_model) #endif } - for(i = 0; i < smp_cpus; i++) { - pc_new_cpu(cpu_model); + for (i = 0; i < smp_cpus; i++) { + if (!cpu_x86_init(cpu_model)) { + fprintf(stderr, "Unable to find x86 CPU definition\n"); + exit(1); + } } } diff --git a/hw/pflash_cfi01.c b/hw/pflash_cfi01.c index 5e3a409..7d040b5 100644 --- a/hw/pflash_cfi01.c +++ b/hw/pflash_cfi01.c @@ -42,6 +42,7 @@ #include "qemu-timer.h" #include "exec-memory.h" #include "host-utils.h" +#include "sysbus.h" #define PFLASH_BUG(fmt, ...) \ do { \ @@ -60,23 +61,28 @@ do { \ #endif struct pflash_t { + SysBusDevice busdev; BlockDriverState *bs; - hwaddr base; - hwaddr sector_len; - hwaddr total_len; - int width; + uint32_t nb_blocs; + uint64_t sector_len; + uint8_t width; + uint8_t be; int wcycle; /* if 0, the flash is read normally */ int bypass; int ro; uint8_t cmd; uint8_t status; - uint16_t ident[4]; + uint16_t ident0; + uint16_t ident1; + uint16_t ident2; + uint16_t ident3; uint8_t cfi_len; uint8_t cfi_table[0x52]; hwaddr counter; unsigned int writeblock_size; QEMUTimer *timer; MemoryRegion mem; + char *name; void *storage; }; @@ -168,15 +174,16 @@ static uint32_t pflash_read (pflash_t *pfl, hwaddr offset, case 0x90: switch (boff) { case 0: - ret = pfl->ident[0] << 8 | pfl->ident[1]; + ret = pfl->ident0 << 8 | pfl->ident1; DPRINTF("%s: Manufacturer Code %04x\n", __func__, ret); break; case 1: - ret = pfl->ident[2] << 8 | pfl->ident[3]; + ret = pfl->ident2 << 8 | pfl->ident3; DPRINTF("%s: Device ID Code %04x\n", __func__, ret); break; default: - DPRINTF("%s: Read Device Information boff=%x\n", __func__, boff); + DPRINTF("%s: Read Device Information boff=%x\n", __func__, + (unsigned)boff); ret = 0; break; } @@ -279,9 +286,8 @@ static void pflash_write(pflash_t *pfl, hwaddr offset, p = pfl->storage; offset &= ~(pfl->sector_len - 1); - DPRINTF("%s: block erase at " TARGET_FMT_plx " bytes " - TARGET_FMT_plx "\n", - __func__, offset, pfl->sector_len); + DPRINTF("%s: block erase at " TARGET_FMT_plx " bytes %x\n", + __func__, offset, (unsigned)pfl->sector_len); if (!pfl->ro) { memset(p + offset, 0xff, pfl->sector_len); @@ -543,19 +549,13 @@ static const MemoryRegionOps pflash_cfi01_ops_le = { .endianness = DEVICE_NATIVE_ENDIAN, }; -pflash_t *pflash_cfi01_register(hwaddr base, - DeviceState *qdev, const char *name, - hwaddr size, - BlockDriverState *bs, uint32_t sector_len, - int nb_blocs, int width, - uint16_t id0, uint16_t id1, - uint16_t id2, uint16_t id3, int be) +static int pflash_cfi01_init(SysBusDevice *dev) { - pflash_t *pfl; - hwaddr total_len; + pflash_t *pfl = FROM_SYSBUS(typeof(*pfl), dev); + uint64_t total_len; int ret; - total_len = sector_len * nb_blocs; + total_len = pfl->sector_len * pfl->nb_blocs; /* XXX: to be fixed */ #if 0 @@ -564,27 +564,22 @@ pflash_t *pflash_cfi01_register(hwaddr base, return NULL; #endif - pfl = g_malloc0(sizeof(pflash_t)); - memory_region_init_rom_device( - &pfl->mem, be ? &pflash_cfi01_ops_be : &pflash_cfi01_ops_le, pfl, - name, size); - vmstate_register_ram(&pfl->mem, qdev); + &pfl->mem, pfl->be ? &pflash_cfi01_ops_be : &pflash_cfi01_ops_le, pfl, + pfl->name, total_len); + vmstate_register_ram(&pfl->mem, DEVICE(pfl)); pfl->storage = memory_region_get_ram_ptr(&pfl->mem); - memory_region_add_subregion(get_system_memory(), base, &pfl->mem); + sysbus_init_mmio(dev, &pfl->mem); - pfl->bs = bs; if (pfl->bs) { /* read the initial flash content */ ret = bdrv_read(pfl->bs, 0, pfl->storage, total_len >> 9); + if (ret < 0) { - memory_region_del_subregion(get_system_memory(), &pfl->mem); - vmstate_unregister_ram(&pfl->mem, qdev); + vmstate_unregister_ram(&pfl->mem, DEVICE(pfl)); memory_region_destroy(&pfl->mem); - g_free(pfl); - return NULL; + return 1; } - bdrv_attach_dev_nofail(pfl->bs, pfl); } if (pfl->bs) { @@ -594,17 +589,9 @@ pflash_t *pflash_cfi01_register(hwaddr base, } pfl->timer = qemu_new_timer_ns(vm_clock, pflash_timer, pfl); - pfl->base = base; - pfl->sector_len = sector_len; - pfl->total_len = total_len; - pfl->width = width; pfl->wcycle = 0; pfl->cmd = 0; pfl->status = 0; - pfl->ident[0] = id0; - pfl->ident[1] = id1; - pfl->ident[2] = id2; - pfl->ident[3] = id3; /* Hardcoded CFI table */ pfl->cfi_len = 0x52; /* Standard "QRY" string */ @@ -653,7 +640,7 @@ pflash_t *pflash_cfi01_register(hwaddr base, pfl->cfi_table[0x28] = 0x02; pfl->cfi_table[0x29] = 0x00; /* Max number of bytes in multi-bytes write */ - if (width == 1) { + if (pfl->width == 1) { pfl->cfi_table[0x2A] = 0x08; } else { pfl->cfi_table[0x2A] = 0x0B; @@ -664,10 +651,10 @@ pflash_t *pflash_cfi01_register(hwaddr base, /* Number of erase block regions (uniform) */ pfl->cfi_table[0x2C] = 0x01; /* Erase block region 1 */ - pfl->cfi_table[0x2D] = nb_blocs - 1; - pfl->cfi_table[0x2E] = (nb_blocs - 1) >> 8; - pfl->cfi_table[0x2F] = sector_len >> 8; - pfl->cfi_table[0x30] = sector_len >> 16; + pfl->cfi_table[0x2D] = pfl->nb_blocs - 1; + pfl->cfi_table[0x2E] = (pfl->nb_blocs - 1) >> 8; + pfl->cfi_table[0x2F] = pfl->sector_len >> 8; + pfl->cfi_table[0x30] = pfl->sector_len >> 16; /* Extended */ pfl->cfi_table[0x31] = 'P'; @@ -689,6 +676,75 @@ pflash_t *pflash_cfi01_register(hwaddr base, pfl->cfi_table[0x3f] = 0x01; /* Number of protection fields */ + return 0; +} + +static Property pflash_cfi01_properties[] = { + DEFINE_PROP_DRIVE("drive", struct pflash_t, bs), + DEFINE_PROP_UINT32("num-blocks", struct pflash_t, nb_blocs, 0), + DEFINE_PROP_UINT64("sector-length", struct pflash_t, sector_len, 0), + DEFINE_PROP_UINT8("width", struct pflash_t, width, 0), + DEFINE_PROP_UINT8("big-endian", struct pflash_t, be, 0), + DEFINE_PROP_UINT16("id0", struct pflash_t, ident0, 0), + DEFINE_PROP_UINT16("id1", struct pflash_t, ident1, 0), + DEFINE_PROP_UINT16("id2", struct pflash_t, ident2, 0), + DEFINE_PROP_UINT16("id3", struct pflash_t, ident3, 0), + DEFINE_PROP_STRING("name", struct pflash_t, name), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pflash_cfi01_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = pflash_cfi01_init; + dc->props = pflash_cfi01_properties; +} + + +static const TypeInfo pflash_cfi01_info = { + .name = "cfi.pflash01", + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(struct pflash_t), + .class_init = pflash_cfi01_class_init, +}; + +static void pflash_cfi01_register_types(void) +{ + type_register_static(&pflash_cfi01_info); +} + +type_init(pflash_cfi01_register_types) + +pflash_t *pflash_cfi01_register(hwaddr base, + DeviceState *qdev, const char *name, + hwaddr size, + BlockDriverState *bs, + uint32_t sector_len, int nb_blocs, int width, + uint16_t id0, uint16_t id1, + uint16_t id2, uint16_t id3, int be) +{ + DeviceState *dev = qdev_create(NULL, "cfi.pflash01"); + SysBusDevice *busdev = sysbus_from_qdev(dev); + pflash_t *pfl = (pflash_t *)object_dynamic_cast(OBJECT(dev), + "cfi.pflash01"); + + if (bs && qdev_prop_set_drive(dev, "drive", bs)) { + abort(); + } + qdev_prop_set_uint32(dev, "num-blocks", nb_blocs); + qdev_prop_set_uint64(dev, "sector-length", sector_len); + qdev_prop_set_uint8(dev, "width", width); + qdev_prop_set_uint8(dev, "big-endian", !!be); + qdev_prop_set_uint16(dev, "id0", id0); + qdev_prop_set_uint16(dev, "id1", id1); + qdev_prop_set_uint16(dev, "id2", id2); + qdev_prop_set_uint16(dev, "id3", id3); + qdev_prop_set_string(dev, "name", name); + qdev_init_nofail(dev); + + sysbus_mmio_map(busdev, 0, base); return pfl; } diff --git a/hw/pflash_cfi02.c b/hw/pflash_cfi02.c index 9f94c06..f918e36 100644 --- a/hw/pflash_cfi02.c +++ b/hw/pflash_cfi02.c @@ -41,6 +41,7 @@ #include "block.h" #include "exec-memory.h" #include "host-utils.h" +#include "sysbus.h" //#define PFLASH_DEBUG #ifdef PFLASH_DEBUG @@ -55,19 +56,26 @@ do { \ #define PFLASH_LAZY_ROMD_THRESHOLD 42 struct pflash_t { + SysBusDevice busdev; BlockDriverState *bs; - hwaddr base; uint32_t sector_len; + uint32_t nb_blocs; uint32_t chip_len; - int mappings; - int width; + uint8_t mappings; + uint8_t width; + uint8_t be; int wcycle; /* if 0, the flash is read normally */ int bypass; int ro; uint8_t cmd; uint8_t status; - uint16_t ident[4]; - uint16_t unlock_addr[2]; + /* FIXME: implement array device properties */ + uint16_t ident0; + uint16_t ident1; + uint16_t ident2; + uint16_t ident3; + uint16_t unlock_addr0; + uint16_t unlock_addr1; uint8_t cfi_len; uint8_t cfi_table[0x52]; QEMUTimer *timer; @@ -80,6 +88,7 @@ struct pflash_t { MemoryRegion orig_mem; int rom_mode; int read_counter; /* used for lazy switch-back to rom mode */ + char *name; void *storage; }; @@ -190,16 +199,17 @@ static uint32_t pflash_read (pflash_t *pfl, hwaddr offset, switch (boff) { case 0x00: case 0x01: - ret = pfl->ident[boff & 0x01]; + ret = boff & 0x01 ? pfl->ident1 : pfl->ident0; break; case 0x02: ret = 0x00; /* Pretend all sectors are unprotected */ break; case 0x0E: case 0x0F: - if (pfl->ident[2 + (boff & 0x01)] == (uint8_t)-1) + ret = boff & 0x01 ? pfl->ident3 : pfl->ident2; + if (ret == (uint8_t)-1) { goto flash_read; - ret = pfl->ident[2 + (boff & 0x01)]; + } break; default: goto flash_read; @@ -283,9 +293,9 @@ static void pflash_write (pflash_t *pfl, hwaddr offset, pfl->cmd = 0x98; return; } - if (boff != pfl->unlock_addr[0] || cmd != 0xAA) { + if (boff != pfl->unlock_addr0 || cmd != 0xAA) { DPRINTF("%s: unlock0 failed " TARGET_FMT_plx " %02x %04x\n", - __func__, boff, cmd, pfl->unlock_addr[0]); + __func__, boff, cmd, pfl->unlock_addr0); goto reset_flash; } DPRINTF("%s: unlock sequence started\n", __func__); @@ -293,7 +303,7 @@ static void pflash_write (pflash_t *pfl, hwaddr offset, case 1: /* We started an unlock sequence */ check_unlock1: - if (boff != pfl->unlock_addr[1] || cmd != 0x55) { + if (boff != pfl->unlock_addr1 || cmd != 0x55) { DPRINTF("%s: unlock1 failed " TARGET_FMT_plx " %02x\n", __func__, boff, cmd); goto reset_flash; @@ -302,7 +312,7 @@ static void pflash_write (pflash_t *pfl, hwaddr offset, break; case 2: /* We finished an unlock sequence */ - if (!pfl->bypass && boff != pfl->unlock_addr[0]) { + if (!pfl->bypass && boff != pfl->unlock_addr0) { DPRINTF("%s: command failed " TARGET_FMT_plx " %02x\n", __func__, boff, cmd); goto reset_flash; @@ -400,7 +410,7 @@ static void pflash_write (pflash_t *pfl, hwaddr offset, case 5: switch (cmd) { case 0x10: - if (boff != pfl->unlock_addr[0]) { + if (boff != pfl->unlock_addr0) { DPRINTF("%s: chip erase: invalid address " TARGET_FMT_plx "\n", __func__, offset); goto reset_flash; @@ -575,50 +585,38 @@ static const MemoryRegionOps pflash_cfi02_ops_le = { .endianness = DEVICE_NATIVE_ENDIAN, }; -pflash_t *pflash_cfi02_register(hwaddr base, - DeviceState *qdev, const char *name, - hwaddr size, - BlockDriverState *bs, uint32_t sector_len, - int nb_blocs, int nb_mappings, int width, - uint16_t id0, uint16_t id1, - uint16_t id2, uint16_t id3, - uint16_t unlock_addr0, uint16_t unlock_addr1, - int be) +static int pflash_cfi02_init(SysBusDevice *dev) { - pflash_t *pfl; - int32_t chip_len; + pflash_t *pfl = FROM_SYSBUS(typeof(*pfl), dev); + uint32_t chip_len; int ret; - chip_len = sector_len * nb_blocs; + chip_len = pfl->sector_len * pfl->nb_blocs; /* XXX: to be fixed */ #if 0 if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) && total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024)) return NULL; #endif - pfl = g_malloc0(sizeof(pflash_t)); - memory_region_init_rom_device( - &pfl->orig_mem, be ? &pflash_cfi02_ops_be : &pflash_cfi02_ops_le, pfl, - name, size); - vmstate_register_ram(&pfl->orig_mem, qdev); + + memory_region_init_rom_device(&pfl->orig_mem, pfl->be ? + &pflash_cfi02_ops_be : &pflash_cfi02_ops_le, + pfl, pfl->name, chip_len); + vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl)); pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem); - pfl->base = base; pfl->chip_len = chip_len; - pfl->mappings = nb_mappings; - pfl->bs = bs; if (pfl->bs) { /* read the initial flash content */ ret = bdrv_read(pfl->bs, 0, pfl->storage, chip_len >> 9); if (ret < 0) { g_free(pfl); - return NULL; + return 1; } - bdrv_attach_dev_nofail(pfl->bs, pfl); } pflash_setup_mappings(pfl); pfl->rom_mode = 1; - memory_region_add_subregion(get_system_memory(), pfl->base, &pfl->mem); + sysbus_init_mmio(dev, &pfl->mem); if (pfl->bs) { pfl->ro = bdrv_is_read_only(pfl->bs); @@ -627,17 +625,9 @@ pflash_t *pflash_cfi02_register(hwaddr base, } pfl->timer = qemu_new_timer_ns(vm_clock, pflash_timer, pfl); - pfl->sector_len = sector_len; - pfl->width = width; pfl->wcycle = 0; pfl->cmd = 0; pfl->status = 0; - pfl->ident[0] = id0; - pfl->ident[1] = id1; - pfl->ident[2] = id2; - pfl->ident[3] = id3; - pfl->unlock_addr[0] = unlock_addr0; - pfl->unlock_addr[1] = unlock_addr1; /* Hardcoded CFI table (mostly from SG29 Spansion flash) */ pfl->cfi_len = 0x52; /* Standard "QRY" string */ @@ -693,10 +683,10 @@ pflash_t *pflash_cfi02_register(hwaddr base, /* Number of erase block regions (uniform) */ pfl->cfi_table[0x2C] = 0x01; /* Erase block region 1 */ - pfl->cfi_table[0x2D] = nb_blocs - 1; - pfl->cfi_table[0x2E] = (nb_blocs - 1) >> 8; - pfl->cfi_table[0x2F] = sector_len >> 8; - pfl->cfi_table[0x30] = sector_len >> 16; + pfl->cfi_table[0x2D] = pfl->nb_blocs - 1; + pfl->cfi_table[0x2E] = (pfl->nb_blocs - 1) >> 8; + pfl->cfi_table[0x2F] = pfl->sector_len >> 8; + pfl->cfi_table[0x30] = pfl->sector_len >> 16; /* Extended */ pfl->cfi_table[0x31] = 'P'; @@ -716,5 +706,81 @@ pflash_t *pflash_cfi02_register(hwaddr base, pfl->cfi_table[0x3b] = 0x00; pfl->cfi_table[0x3c] = 0x00; + return 0; +} + +static Property pflash_cfi02_properties[] = { + DEFINE_PROP_DRIVE("drive", struct pflash_t, bs), + DEFINE_PROP_UINT32("num-blocks", struct pflash_t, nb_blocs, 0), + DEFINE_PROP_UINT32("sector-length", struct pflash_t, sector_len, 0), + DEFINE_PROP_UINT8("width", struct pflash_t, width, 0), + DEFINE_PROP_UINT8("mappings", struct pflash_t, mappings, 0), + DEFINE_PROP_UINT8("big-endian", struct pflash_t, be, 0), + DEFINE_PROP_UINT16("id0", struct pflash_t, ident0, 0), + DEFINE_PROP_UINT16("id1", struct pflash_t, ident1, 0), + DEFINE_PROP_UINT16("id2", struct pflash_t, ident2, 0), + DEFINE_PROP_UINT16("id3", struct pflash_t, ident3, 0), + DEFINE_PROP_UINT16("unlock-addr0", struct pflash_t, unlock_addr0, 0), + DEFINE_PROP_UINT16("unlock-addr1", struct pflash_t, unlock_addr1, 0), + DEFINE_PROP_STRING("name", struct pflash_t, name), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pflash_cfi02_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + + k->init = pflash_cfi02_init; + dc->props = pflash_cfi02_properties; +} + +static const TypeInfo pflash_cfi02_info = { + .name = "cfi.pflash02", + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(struct pflash_t), + .class_init = pflash_cfi02_class_init, +}; + +static void pflash_cfi02_register_types(void) +{ + type_register_static(&pflash_cfi02_info); +} + +type_init(pflash_cfi02_register_types) + +pflash_t *pflash_cfi02_register(hwaddr base, + DeviceState *qdev, const char *name, + hwaddr size, + BlockDriverState *bs, uint32_t sector_len, + int nb_blocs, int nb_mappings, int width, + uint16_t id0, uint16_t id1, + uint16_t id2, uint16_t id3, + uint16_t unlock_addr0, uint16_t unlock_addr1, + int be) +{ + DeviceState *dev = qdev_create(NULL, "cfi.pflash02"); + SysBusDevice *busdev = sysbus_from_qdev(dev); + pflash_t *pfl = (pflash_t *)object_dynamic_cast(OBJECT(dev), + "cfi.pflash02"); + + if (bs && qdev_prop_set_drive(dev, "drive", bs)) { + abort(); + } + qdev_prop_set_uint32(dev, "num-blocks", nb_blocs); + qdev_prop_set_uint32(dev, "sector-length", sector_len); + qdev_prop_set_uint8(dev, "width", width); + qdev_prop_set_uint8(dev, "mappings", nb_mappings); + qdev_prop_set_uint8(dev, "big-endian", !!be); + qdev_prop_set_uint16(dev, "id0", id0); + qdev_prop_set_uint16(dev, "id1", id1); + qdev_prop_set_uint16(dev, "id2", id2); + qdev_prop_set_uint16(dev, "id3", id3); + qdev_prop_set_uint16(dev, "unlock-addr0", unlock_addr0); + qdev_prop_set_uint16(dev, "unlock-addr1", unlock_addr1); + qdev_prop_set_string(dev, "name", name); + qdev_init_nofail(dev); + + sysbus_mmio_map(busdev, 0, base); return pfl; } @@ -95,7 +95,8 @@ static uint64_t pl050_read(void *opaque, hwaddr offset, case 4: /* KMIIR */ return s->pending | 2; default: - hw_error("pl050_read: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "pl050_read: Bad offset %x\n", (int)offset); return 0; } } @@ -123,7 +124,8 @@ static void pl050_write(void *opaque, hwaddr offset, s->clk = value; return; default: - hw_error("pl050_write: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "pl050_write: Bad offset %x\n", (int)offset); } } static const MemoryRegionOps pl050_ops = { @@ -164,7 +164,8 @@ static uint64_t pl061_read(void *opaque, hwaddr offset, case 0x528: /* Analog mode select */ return s->amsel; default: - hw_error("pl061_read: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "pl061_read: Bad offset %x\n", (int)offset); return 0; } } @@ -239,7 +240,8 @@ static void pl061_write(void *opaque, hwaddr offset, s->amsel = value & 0xff; break; default: - hw_error("pl061_write: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "pl061_write: Bad offset %x\n", (int)offset); } pl061_update(s); } @@ -281,7 +281,8 @@ static uint64_t pl080_read(void *opaque, hwaddr offset, return s->sync; default: bad_offset: - hw_error("pl080_read: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "pl080_read: Bad offset %x\n", (int)offset); return 0; } } @@ -327,12 +328,13 @@ static void pl080_write(void *opaque, hwaddr offset, case 10: /* SoftLBReq */ case 11: /* SoftLSReq */ /* ??? Implement these. */ - hw_error("pl080_write: Soft DMA not implemented\n"); + qemu_log_mask(LOG_UNIMP, "pl080_write: Soft DMA not implemented\n"); break; case 12: /* Configuration */ s->conf = value; if (s->conf & (PL080_CONF_M1 | PL080_CONF_M1)) { - hw_error("pl080_write: Big-endian DMA not implemented\n"); + qemu_log_mask(LOG_UNIMP, + "pl080_write: Big-endian DMA not implemented\n"); } pl080_run(s); break; @@ -341,7 +343,8 @@ static void pl080_write(void *opaque, hwaddr offset, break; default: bad_offset: - hw_error("pl080_write: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "pl080_write: Bad offset %x\n", (int)offset); } pl080_update(s); } @@ -349,7 +349,8 @@ static uint64_t pl110_read(void *opaque, hwaddr offset, case 12: /* LCDLPCURR */ return s->lpbase; default: - hw_error("pl110_read: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "pl110_read: Bad offset %x\n", (int)offset); return 0; } } @@ -417,7 +418,8 @@ static void pl110_write(void *opaque, hwaddr offset, pl110_update(s); break; default: - hw_error("pl110_write: Bad offset %x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "pl110_write: Bad offset %x\n", (int)offset); } } @@ -199,7 +199,7 @@ static void pl190_write(void *opaque, hwaddr offset, break; case 0xc0: /* ITCR */ if (val) { - hw_error("pl190: Test mode not implemented\n"); + qemu_log_mask(LOG_UNIMP, "pl190: Test mode not implemented\n"); } break; default: @@ -75,9 +75,10 @@ void ppc_set_irq(CPUPPCState *env, int n_IRQ, int level) } /* PowerPC 6xx / 7xx internal IRQ controller */ -static void ppc6xx_set_irq (void *opaque, int pin, int level) +static void ppc6xx_set_irq(void *opaque, int pin, int level) { - CPUPPCState *env = opaque; + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; int cur_level; LOG_IRQ("%s: env %p pin %d level %d\n", __func__, @@ -151,17 +152,20 @@ static void ppc6xx_set_irq (void *opaque, int pin, int level) } } -void ppc6xx_irq_init (CPUPPCState *env) +void ppc6xx_irq_init(CPUPPCState *env) { - env->irq_inputs = (void **)qemu_allocate_irqs(&ppc6xx_set_irq, env, + PowerPCCPU *cpu = ppc_env_get_cpu(env); + + env->irq_inputs = (void **)qemu_allocate_irqs(&ppc6xx_set_irq, cpu, PPC6xx_INPUT_NB); } #if defined(TARGET_PPC64) /* PowerPC 970 internal IRQ controller */ -static void ppc970_set_irq (void *opaque, int pin, int level) +static void ppc970_set_irq(void *opaque, int pin, int level) { - CPUPPCState *env = opaque; + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; int cur_level; LOG_IRQ("%s: env %p pin %d level %d\n", __func__, @@ -202,7 +206,7 @@ static void ppc970_set_irq (void *opaque, int pin, int level) } else { LOG_IRQ("%s: restart the CPU\n", __func__); env->halted = 0; - qemu_cpu_kick(env); + qemu_cpu_kick(CPU(cpu)); } break; case PPC970_INPUT_HRESET: @@ -233,16 +237,19 @@ static void ppc970_set_irq (void *opaque, int pin, int level) } } -void ppc970_irq_init (CPUPPCState *env) +void ppc970_irq_init(CPUPPCState *env) { - env->irq_inputs = (void **)qemu_allocate_irqs(&ppc970_set_irq, env, + PowerPCCPU *cpu = ppc_env_get_cpu(env); + + env->irq_inputs = (void **)qemu_allocate_irqs(&ppc970_set_irq, cpu, PPC970_INPUT_NB); } /* POWER7 internal IRQ controller */ -static void power7_set_irq (void *opaque, int pin, int level) +static void power7_set_irq(void *opaque, int pin, int level) { - CPUPPCState *env = opaque; + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; LOG_IRQ("%s: env %p pin %d level %d\n", __func__, env, pin, level); @@ -266,17 +273,20 @@ static void power7_set_irq (void *opaque, int pin, int level) } } -void ppcPOWER7_irq_init (CPUPPCState *env) +void ppcPOWER7_irq_init(CPUPPCState *env) { - env->irq_inputs = (void **)qemu_allocate_irqs(&power7_set_irq, env, + PowerPCCPU *cpu = ppc_env_get_cpu(env); + + env->irq_inputs = (void **)qemu_allocate_irqs(&power7_set_irq, cpu, POWER7_INPUT_NB); } #endif /* defined(TARGET_PPC64) */ /* PowerPC 40x internal IRQ controller */ -static void ppc40x_set_irq (void *opaque, int pin, int level) +static void ppc40x_set_irq(void *opaque, int pin, int level) { - CPUPPCState *env = opaque; + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; int cur_level; LOG_IRQ("%s: env %p pin %d level %d\n", __func__, @@ -325,7 +335,7 @@ static void ppc40x_set_irq (void *opaque, int pin, int level) } else { LOG_IRQ("%s: restart the CPU\n", __func__); env->halted = 0; - qemu_cpu_kick(env); + qemu_cpu_kick(CPU(cpu)); } break; case PPC40x_INPUT_DEBUG: @@ -346,16 +356,19 @@ static void ppc40x_set_irq (void *opaque, int pin, int level) } } -void ppc40x_irq_init (CPUPPCState *env) +void ppc40x_irq_init(CPUPPCState *env) { + PowerPCCPU *cpu = ppc_env_get_cpu(env); + env->irq_inputs = (void **)qemu_allocate_irqs(&ppc40x_set_irq, - env, PPC40x_INPUT_NB); + cpu, PPC40x_INPUT_NB); } /* PowerPC E500 internal IRQ controller */ -static void ppce500_set_irq (void *opaque, int pin, int level) +static void ppce500_set_irq(void *opaque, int pin, int level) { - CPUPPCState *env = opaque; + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; int cur_level; LOG_IRQ("%s: env %p pin %d level %d\n", __func__, @@ -407,10 +420,12 @@ static void ppce500_set_irq (void *opaque, int pin, int level) } } -void ppce500_irq_init (CPUPPCState *env) +void ppce500_irq_init(CPUPPCState *env) { + PowerPCCPU *cpu = ppc_env_get_cpu(env); + env->irq_inputs = (void **)qemu_allocate_irqs(&ppce500_set_irq, - env, PPCE500_INPUT_NB); + cpu, PPCE500_INPUT_NB); } /*****************************************************************************/ /* PowerPC time base and decrementer emulation */ diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c index 55aa9dc..c1a155b 100644 --- a/hw/ppce500_spin.c +++ b/hw/ppce500_spin.c @@ -49,7 +49,7 @@ typedef struct spin_state { } SpinState; typedef struct spin_kick { - CPUPPCState *env; + PowerPCCPU *cpu; SpinInfo *spin; } SpinKick; @@ -92,7 +92,8 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env, static void spin_kick(void *data) { SpinKick *kick = data; - CPUPPCState *env = kick->env; + CPUState *cpu = CPU(kick->cpu); + CPUPPCState *env = &kick->cpu->env; SpinInfo *curspin = kick->spin; hwaddr map_size = 64 * 1024 * 1024; hwaddr map_start; @@ -113,8 +114,8 @@ static void spin_kick(void *data) env->halted = 0; env->exception_index = -1; - env->stopped = 0; - qemu_cpu_kick(env); + cpu->stopped = false; + qemu_cpu_kick(cpu); } static void spin_write(void *opaque, hwaddr addr, uint64_t value, @@ -158,11 +159,11 @@ static void spin_write(void *opaque, hwaddr addr, uint64_t value, if (!(ldq_p(&curspin->addr) & 1)) { /* run CPU */ SpinKick kick = { - .env = env, + .cpu = ppc_env_get_cpu(env), .spin = curspin, }; - run_on_cpu(env, spin_kick, &kick); + run_on_cpu(CPU(kick.cpu), spin_kick, &kick); } } diff --git a/hw/realview.c b/hw/realview.c index b5cb08c..e789c15 100644 --- a/hw/realview.c +++ b/hw/realview.c @@ -44,11 +44,8 @@ static const int realview_board_id[] = { 0x76d }; -static void realview_init(ram_addr_t ram_size, - const char *boot_device, - const char *kernel_filename, const char *kernel_cmdline, - const char *initrd_filename, const char *cpu_model, - enum realview_board_type board_type) +static void realview_init(QEMUMachineInitArgs *args, + enum realview_board_type board_type) { ARMCPU *cpu = NULL; CPUARMState *env; @@ -73,6 +70,7 @@ static void realview_init(ram_addr_t ram_size, uint32_t proc_id = 0; uint32_t sys_id; ram_addr_t low_ram_size; + ram_addr_t ram_size = args->ram_size; switch (board_type) { case BOARD_EB: @@ -89,7 +87,7 @@ static void realview_init(ram_addr_t ram_size, break; } for (n = 0; n < smp_cpus; n++) { - cpu = cpu_arm_init(cpu_model); + cpu = cpu_arm_init(args->cpu_model); if (!cpu) { fprintf(stderr, "Unable to find CPU definition\n"); exit(1); @@ -321,9 +319,9 @@ static void realview_init(ram_addr_t ram_size, memory_region_add_subregion(sysmem, SMP_BOOT_ADDR, ram_hack); realview_binfo.ram_size = ram_size; - realview_binfo.kernel_filename = kernel_filename; - realview_binfo.kernel_cmdline = kernel_cmdline; - realview_binfo.initrd_filename = initrd_filename; + realview_binfo.kernel_filename = args->kernel_filename; + realview_binfo.kernel_cmdline = args->kernel_cmdline; + realview_binfo.initrd_filename = args->initrd_filename; realview_binfo.nb_cpus = smp_cpus; realview_binfo.board_id = realview_board_id[board_type]; realview_binfo.loader_start = (board_type == BOARD_PB_A8 ? 0x70000000 : 0); @@ -332,62 +330,34 @@ static void realview_init(ram_addr_t ram_size, static void realview_eb_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - if (!cpu_model) { - cpu_model = "arm926"; + if (!args->cpu_model) { + args->cpu_model = "arm926"; } - realview_init(ram_size, boot_device, kernel_filename, kernel_cmdline, - initrd_filename, cpu_model, BOARD_EB); + realview_init(args, BOARD_EB); } static void realview_eb_mpcore_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - if (!cpu_model) { - cpu_model = "arm11mpcore"; + if (!args->cpu_model) { + args->cpu_model = "arm11mpcore"; } - realview_init(ram_size, boot_device, kernel_filename, kernel_cmdline, - initrd_filename, cpu_model, BOARD_EB_MPCORE); + realview_init(args, BOARD_EB_MPCORE); } static void realview_pb_a8_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - if (!cpu_model) { - cpu_model = "cortex-a8"; + if (!args->cpu_model) { + args->cpu_model = "cortex-a8"; } - realview_init(ram_size, boot_device, kernel_filename, kernel_cmdline, - initrd_filename, cpu_model, BOARD_PB_A8); + realview_init(args, BOARD_PB_A8); } static void realview_pbx_a9_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - if (!cpu_model) { - cpu_model = "cortex-a9"; + if (!args->cpu_model) { + args->cpu_model = "cortex-a9"; } - realview_init(ram_size, boot_device, kernel_filename, kernel_cmdline, - initrd_filename, cpu_model, BOARD_PBX_A9); + realview_init(args, BOARD_PBX_A9); } static QEMUMachine realview_eb_machine = { @@ -55,24 +55,28 @@ typedef enum { sd_illegal = -2, } sd_rsp_type_t; +enum SDCardModes { + sd_inactive, + sd_card_identification_mode, + sd_data_transfer_mode, +}; + +enum SDCardStates { + sd_inactive_state = -1, + sd_idle_state = 0, + sd_ready_state, + sd_identification_state, + sd_standby_state, + sd_transfer_state, + sd_sendingdata_state, + sd_receivingdata_state, + sd_programming_state, + sd_disconnect_state, +}; + struct SDState { - enum { - sd_inactive, - sd_card_identification_mode, - sd_data_transfer_mode, - } mode; - enum { - sd_inactive_state = -1, - sd_idle_state = 0, - sd_ready_state, - sd_identification_state, - sd_standby_state, - sd_transfer_state, - sd_sendingdata_state, - sd_receivingdata_state, - sd_programming_state, - sd_disconnect_state, - } state; + uint32_t mode; /* current card mode, one of SDCardModes */ + int32_t state; /* current card state, one of SDCardStates */ uint32_t ocr; uint8_t scr[8]; uint8_t cid[16]; @@ -83,21 +87,22 @@ struct SDState { uint32_t vhs; bool wp_switch; unsigned long *wp_groups; + int32_t wpgrps_size; uint64_t size; - int blk_len; + uint32_t blk_len; uint32_t erase_start; uint32_t erase_end; uint8_t pwd[16]; - int pwd_len; - int function_group[6]; + uint32_t pwd_len; + uint8_t function_group[6]; bool spi; - int current_cmd; + uint8_t current_cmd; /* True if we will handle the next command as an ACMD. Note that this does * *not* track the APP_CMD status bit! */ bool expecting_acmd; - int blk_written; + uint32_t blk_written; uint64_t data_start; uint32_t data_offset; uint8_t data[512]; @@ -421,8 +426,9 @@ static void sd_reset(SDState *sd, BlockDriverState *bdrv) if (sd->wp_groups) g_free(sd->wp_groups); sd->wp_switch = bdrv ? bdrv_is_read_only(bdrv) : false; - sd->wp_groups = bitmap_new(sect); - memset(sd->function_group, 0, sizeof(int) * 6); + sd->wpgrps_size = sect; + sd->wp_groups = bitmap_new(sd->wpgrps_size); + memset(sd->function_group, 0, sizeof(sd->function_group)); sd->erase_start = 0; sd->erase_end = 0; sd->size = size; @@ -446,6 +452,38 @@ static const BlockDevOps sd_block_ops = { .change_media_cb = sd_cardchange, }; +static const VMStateDescription sd_vmstate = { + .name = "sd-card", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(mode, SDState), + VMSTATE_INT32(state, SDState), + VMSTATE_UINT8_ARRAY(cid, SDState, 16), + VMSTATE_UINT8_ARRAY(csd, SDState, 16), + VMSTATE_UINT16(rca, SDState), + VMSTATE_UINT32(card_status, SDState), + VMSTATE_PARTIAL_BUFFER(sd_status, SDState, 1), + VMSTATE_UINT32(vhs, SDState), + VMSTATE_BITMAP(wp_groups, SDState, 0, wpgrps_size), + VMSTATE_UINT32(blk_len, SDState), + VMSTATE_UINT32(erase_start, SDState), + VMSTATE_UINT32(erase_end, SDState), + VMSTATE_UINT8_ARRAY(pwd, SDState, 16), + VMSTATE_UINT32(pwd_len, SDState), + VMSTATE_UINT8_ARRAY(function_group, SDState, 6), + VMSTATE_UINT8(current_cmd, SDState), + VMSTATE_BOOL(expecting_acmd, SDState), + VMSTATE_UINT32(blk_written, SDState), + VMSTATE_UINT64(data_start, SDState), + VMSTATE_UINT32(data_offset, SDState), + VMSTATE_UINT8_ARRAY(data, SDState, 512), + VMSTATE_BUFFER_UNSAFE(buf, SDState, 1, 512), + VMSTATE_BOOL(enable, SDState), + VMSTATE_END_OF_LIST() + } +}; + /* We do not model the chip select pin, so allow the board to select whether card should be in SSI or MMC/SD mode. It is also up to the board to ensure that ssi transfers only occur when the chip select @@ -463,6 +501,7 @@ SDState *sd_init(BlockDriverState *bs, bool is_spi) bdrv_attach_dev_nofail(sd->bdrv, sd); bdrv_set_dev_ops(sd->bdrv, &sd_block_ops, sd); } + vmstate_register(NULL, -1, &sd_vmstate, sd); return sd; } @@ -476,19 +515,28 @@ void sd_set_cb(SDState *sd, qemu_irq readonly, qemu_irq insert) static void sd_erase(SDState *sd) { - int i, start, end; + int i; + uint64_t erase_start = sd->erase_start; + uint64_t erase_end = sd->erase_end; + if (!sd->erase_start || !sd->erase_end) { sd->card_status |= ERASE_SEQ_ERROR; return; } - start = sd_addr_to_wpnum(sd->erase_start); - end = sd_addr_to_wpnum(sd->erase_end); + if (extract32(sd->ocr, OCR_CCS_BITN, 1)) { + /* High capacity memory card: erase units are 512 byte blocks */ + erase_start *= 512; + erase_end *= 512; + } + + erase_start = sd_addr_to_wpnum(erase_start); + erase_end = sd_addr_to_wpnum(erase_end); sd->erase_start = 0; sd->erase_end = 0; sd->csd[14] |= 0x40; - for (i = start; i <= end; i++) { + for (i = erase_start; i <= erase_end; i++) { if (test_bit(i, sd->wp_groups)) { sd->card_status |= WP_ERASE_SKIP; } @@ -567,7 +615,7 @@ static void sd_lock_command(SDState *sd) sd->card_status |= LOCK_UNLOCK_FAILED; return; } - bitmap_zero(sd->wp_groups, sd_addr_to_wpnum(sd->size) + 1); + bitmap_zero(sd->wp_groups, sd->wpgrps_size); sd->csd[14] &= ~0x10; sd->card_status &= ~CARD_IS_LOCKED; sd->pwd_len = 0; @@ -50,6 +50,7 @@ #define READY_FOR_DATA (1 << 8) #define APP_CMD (1 << 5) #define AKE_SEQ_ERROR (1 << 3) +#define OCR_CCS_BITN 30 typedef enum { sd_none = -1, @@ -576,13 +576,15 @@ static uint64_t translate_kernel_address(void *opaque, uint64_t addr) return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR; } -static void emulate_spapr_hypercall(CPUPPCState *env) +static void emulate_spapr_hypercall(PowerPCCPU *cpu) { + CPUPPCState *env = &cpu->env; + if (msr_pr) { hcall_dprintf("Hypercall made with MSR[PR]=1\n"); env->gpr[3] = H_PRIVILEGE; } else { - env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]); + env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]); } } @@ -286,12 +286,12 @@ extern sPAPREnvironment *spapr; do { } while (0) #endif -typedef target_ulong (*spapr_hcall_fn)(CPUPPCState *env, sPAPREnvironment *spapr, +typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args); void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn); -target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode, +target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args); int spapr_allocate_irq(int hint, bool lsi); diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c index 621dabd..63cadb8 100644 --- a/hw/spapr_hcall.c +++ b/hw/spapr_hcall.c @@ -75,9 +75,10 @@ static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r, return rb; } -static target_ulong h_enter(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; target_ulong pte_index = args[1]; target_ulong pteh = args[2]; @@ -192,9 +193,10 @@ static target_ulong remove_hpte(CPUPPCState *env, target_ulong ptex, return REMOVE_SUCCESS; } -static target_ulong h_remove(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; target_ulong pte_index = args[1]; target_ulong avpn = args[2]; @@ -238,9 +240,10 @@ static target_ulong h_remove(CPUPPCState *env, sPAPREnvironment *spapr, #define H_BULK_REMOVE_MAX_BATCH 4 -static target_ulong h_bulk_remove(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; int i; for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { @@ -284,9 +287,10 @@ static target_ulong h_bulk_remove(CPUPPCState *env, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_protect(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; target_ulong pte_index = args[1]; target_ulong avpn = args[2]; @@ -321,7 +325,7 @@ static target_ulong h_protect(CPUPPCState *env, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_set_dabr(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { /* FIXME: actually implement this */ @@ -457,7 +461,7 @@ static target_ulong deregister_dtl(CPUPPCState *env, target_ulong addr) return H_SUCCESS; } -static target_ulong h_register_vpa(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong flags = args[0]; @@ -505,12 +509,14 @@ static target_ulong h_register_vpa(CPUPPCState *env, sPAPREnvironment *spapr, return ret; } -static target_ulong h_cede(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; + env->msr |= (1ULL << MSR_EE); hreg_compute_hflags(env); - if (!cpu_has_work(env)) { + if (!cpu_has_work(CPU(cpu))) { env->halted = 1; env->exception_index = EXCP_HLT; env->exit_request = 1; @@ -518,7 +524,7 @@ static target_ulong h_cede(CPUPPCState *env, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_rtas(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong rtas_r3 = args[0]; @@ -530,7 +536,7 @@ static target_ulong h_rtas(CPUPPCState *env, sPAPREnvironment *spapr, nret, rtas_r3 + 12 + 4*nargs); } -static target_ulong h_logical_load(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong size = args[0]; @@ -553,7 +559,7 @@ static target_ulong h_logical_load(CPUPPCState *env, sPAPREnvironment *spapr, return H_PARAMETER; } -static target_ulong h_logical_store(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong size = args[0]; @@ -577,7 +583,7 @@ static target_ulong h_logical_store(CPUPPCState *env, sPAPREnvironment *spapr, return H_PARAMETER; } -static target_ulong h_logical_memop(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong dst = args[0]; /* Destination address */ @@ -644,14 +650,14 @@ static target_ulong h_logical_memop(CPUPPCState *env, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_logical_icbi(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { /* Nothing to do on emulation, KVM will trap this in the kernel */ return H_SUCCESS; } -static target_ulong h_logical_dcbf(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { /* Nothing to do on emulation, KVM will trap this in the kernel */ @@ -679,7 +685,7 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn) *slot = fn; } -target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode, +target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args) { if ((opcode <= MAX_HCALL_OPCODE) @@ -687,14 +693,14 @@ target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode, spapr_hcall_fn fn = papr_hypercall_table[opcode / 4]; if (fn) { - return fn(env, spapr, opcode, args); + return fn(cpu, spapr, opcode, args); } } else if ((opcode >= KVMPPC_HCALL_BASE) && (opcode <= KVMPPC_HCALL_MAX)) { spapr_hcall_fn fn = kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE]; if (fn) { - return fn(env, spapr, opcode, args); + return fn(cpu, spapr, opcode, args); } } diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c index 86dc8f9..02d78cc 100644 --- a/hw/spapr_iommu.c +++ b/hw/spapr_iommu.c @@ -204,7 +204,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, return H_SUCCESS; } -static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong liobn = args[0]; diff --git a/hw/spapr_llan.c b/hw/spapr_llan.c index bd3f131..09ad69f 100644 --- a/hw/spapr_llan.c +++ b/hw/spapr_llan.c @@ -264,7 +264,7 @@ static int check_bd(VIOsPAPRVLANDevice *dev, vlan_bd_t bd, return 0; } -static target_ulong h_register_logical_lan(CPUPPCState *env, +static target_ulong h_register_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) @@ -328,7 +328,7 @@ static target_ulong h_register_logical_lan(CPUPPCState *env, } -static target_ulong h_free_logical_lan(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -349,7 +349,7 @@ static target_ulong h_free_logical_lan(CPUPPCState *env, sPAPREnvironment *spapr return H_SUCCESS; } -static target_ulong h_add_logical_lan_buffer(CPUPPCState *env, +static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) @@ -398,7 +398,7 @@ static target_ulong h_add_logical_lan_buffer(CPUPPCState *env, return H_SUCCESS; } -static target_ulong h_send_logical_lan(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -467,7 +467,7 @@ static target_ulong h_send_logical_lan(CPUPPCState *env, sPAPREnvironment *spapr return H_SUCCESS; } -static target_ulong h_multicast_ctrl(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c index c2c3079..a08ed11 100644 --- a/hw/spapr_pci.c +++ b/hw/spapr_pci.c @@ -439,6 +439,43 @@ static void pci_spapr_set_irq(void *opaque, int irq_num, int level) qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level); } +static uint64_t spapr_io_read(void *opaque, hwaddr addr, + unsigned size) +{ + switch (size) { + case 1: + return cpu_inb(addr); + case 2: + return cpu_inw(addr); + case 4: + return cpu_inl(addr); + } + assert(0); +} + +static void spapr_io_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + switch (size) { + case 1: + cpu_outb(addr, data); + return; + case 2: + cpu_outw(addr, data); + return; + case 4: + cpu_outl(addr, data); + return; + } + assert(0); +} + +static const MemoryRegionOps spapr_io_ops = { + .endianness = DEVICE_LITTLE_ENDIAN, + .read = spapr_io_read, + .write = spapr_io_write +}; + /* * MSI/MSIX memory region implementation. * The handler handles both MSI and MSIX. @@ -508,9 +545,14 @@ static int spapr_phb_init(SysBusDevice *s) * old_portion are updated */ sprintf(namebuf, "%s.io", sphb->dtbusname); memory_region_init(&sphb->iospace, namebuf, SPAPR_PCI_IO_WIN_SIZE); + /* FIXME: fix to support multiple PHBs */ + memory_region_add_subregion(get_system_io(), 0, &sphb->iospace); + sprintf(namebuf, "%s.io-alias", sphb->dtbusname); + memory_region_init_io(&sphb->iowindow, &spapr_io_ops, sphb, + namebuf, SPAPR_PCI_IO_WIN_SIZE); memory_region_add_subregion(get_system_memory(), sphb->io_win_addr, - &sphb->iospace); + &sphb->iowindow); /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors, * we need to allocate some memory to catch those writes coming diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h index a77d7d5..e307ac8 100644 --- a/hw/spapr_pci.h +++ b/hw/spapr_pci.h @@ -44,7 +44,7 @@ typedef struct sPAPRPHBState { MemoryRegion memspace, iospace; hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size; hwaddr msi_win_addr; - MemoryRegion memwindow, msiwindow; + MemoryRegion memwindow, iowindow, msiwindow; uint32_t dma_liobn; uint64_t dma_window_start; diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c index ce76c58..6d5c48a 100644 --- a/hw/spapr_rtas.c +++ b/hw/spapr_rtas.c @@ -163,6 +163,7 @@ static void rtas_start_cpu(sPAPREnvironment *spapr, uint32_t nret, target_ulong rets) { target_ulong id, start, r3; + CPUState *cpu; CPUPPCState *env; if (nargs != 3 || nret != 1) { @@ -175,6 +176,8 @@ static void rtas_start_cpu(sPAPREnvironment *spapr, r3 = rtas_ld(args, 2); for (env = first_cpu; env; env = env->next_cpu) { + cpu = ENV_GET_CPU(env); + if (env->cpu_index != id) { continue; } @@ -194,7 +197,7 @@ static void rtas_start_cpu(sPAPREnvironment *spapr, env->gpr[3] = r3; env->halted = 0; - qemu_cpu_kick(env); + qemu_cpu_kick(cpu); rtas_st(rets, 0, 0); return; diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c index 848806d..1f19fed 100644 --- a/hw/spapr_vio.c +++ b/hw/spapr_vio.c @@ -161,7 +161,7 @@ static int vio_make_devnode(VIOsPAPRDevice *dev, /* * CRQ handling */ -static target_ulong h_reg_crq(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -219,7 +219,7 @@ static target_ulong free_crq(VIOsPAPRDevice *dev) return H_SUCCESS; } -static target_ulong h_free_crq(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -233,7 +233,7 @@ static target_ulong h_free_crq(CPUPPCState *env, sPAPREnvironment *spapr, return free_crq(dev); } -static target_ulong h_send_crq(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -256,7 +256,7 @@ static target_ulong h_send_crq(CPUPPCState *env, sPAPREnvironment *spapr, return H_HARDWARE; } -static target_ulong h_enable_crq(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -463,7 +463,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev) return pc->init(dev); } -static target_ulong h_vio_signal(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { diff --git a/hw/spapr_vty.c b/hw/spapr_vty.c index 5da17a3..14f862f 100644 --- a/hw/spapr_vty.c +++ b/hw/spapr_vty.c @@ -70,7 +70,7 @@ static int spapr_vty_init(VIOsPAPRDevice *sdev) } /* Forward declaration */ -static target_ulong h_put_term_char(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -97,7 +97,7 @@ static target_ulong h_put_term_char(CPUPPCState *env, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_get_term_char(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -879,15 +879,14 @@ static struct arm_boot_info spitz_binfo = { .ram_size = 0x04000000, }; -static void spitz_common_init(ram_addr_t ram_size, - const char *kernel_filename, - const char *kernel_cmdline, const char *initrd_filename, - const char *cpu_model, enum spitz_model_e model, int arm_id) +static void spitz_common_init(QEMUMachineInitArgs *args, + enum spitz_model_e model, int arm_id) { PXA2xxState *mpu; DeviceState *scp0, *scp1 = NULL; MemoryRegion *address_space_mem = get_system_memory(); MemoryRegion *rom = g_new(MemoryRegion, 1); + const char *cpu_model = args->cpu_model; if (!cpu_model) cpu_model = (model == terrier) ? "pxa270-c5" : "pxa270-c0"; @@ -928,9 +927,9 @@ static void spitz_common_init(ram_addr_t ram_size, /* A 4.0 GB microdrive is permanently sitting in CF slot 0. */ spitz_microdrive_attach(mpu, 0); - spitz_binfo.kernel_filename = kernel_filename; - spitz_binfo.kernel_cmdline = kernel_cmdline; - spitz_binfo.initrd_filename = initrd_filename; + spitz_binfo.kernel_filename = args->kernel_filename; + spitz_binfo.kernel_cmdline = args->kernel_cmdline; + spitz_binfo.initrd_filename = args->initrd_filename; spitz_binfo.board_id = arm_id; arm_load_kernel(mpu->cpu, &spitz_binfo); sl_bootparam_write(SL_PXA_PARAM_BASE); @@ -938,46 +937,22 @@ static void spitz_common_init(ram_addr_t ram_size, static void spitz_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - spitz_common_init(ram_size, kernel_filename, - kernel_cmdline, initrd_filename, cpu_model, spitz, 0x2c9); + spitz_common_init(args, spitz, 0x2c9); } static void borzoi_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - spitz_common_init(ram_size, kernel_filename, - kernel_cmdline, initrd_filename, cpu_model, borzoi, 0x33f); + spitz_common_init(args, borzoi, 0x33f); } static void akita_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - spitz_common_init(ram_size, kernel_filename, - kernel_cmdline, initrd_filename, cpu_model, akita, 0x2e8); + spitz_common_init(args, akita, 0x2e8); } static void terrier_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - spitz_common_init(ram_size, kernel_filename, - kernel_cmdline, initrd_filename, cpu_model, terrier, 0x33f); + spitz_common_init(args, terrier, 0x33f); } static QEMUMachine akitapda_machine = { @@ -259,7 +259,7 @@ static void cpu_kick_irq(SPARCCPU *cpu) env->halted = 0; cpu_check_irqs(env); - qemu_cpu_kick(env); + qemu_cpu_kick(CPU(cpu)); } static void cpu_set_irq(void *opaque, int irq, int level) @@ -317,7 +317,7 @@ static void cpu_kick_irq(SPARCCPU *cpu) env->halted = 0; cpu_check_irqs(env); - qemu_cpu_kick(env); + qemu_cpu_kick(CPU(cpu)); } static void cpu_set_ivec_irq(void *opaque, int irq, int level) diff --git a/hw/versatile_i2c.c b/hw/versatile_i2c.c index 44e7e40..ad71e9d 100644 --- a/hw/versatile_i2c.c +++ b/hw/versatile_i2c.c @@ -40,7 +40,8 @@ static uint64_t versatile_i2c_read(void *opaque, hwaddr offset, if (offset == 0) { return (s->out & 1) | (s->in << 1); } else { - hw_error("%s: Bad offset 0x%x\n", __func__, (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%x\n", __func__, (int)offset); return -1; } } @@ -58,7 +59,8 @@ static void versatile_i2c_write(void *opaque, hwaddr offset, s->out &= ~value; break; default: - hw_error("%s: Bad offset 0x%x\n", __func__, (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%x\n", __func__, (int)offset); } bitbang_i2c_set(s->bitbang, BITBANG_I2C_SCL, (s->out & 1) != 0); s->in = bitbang_i2c_set(s->bitbang, BITBANG_I2C_SDA, (s->out & 2) != 0); diff --git a/hw/versatilepb.c b/hw/versatilepb.c index e85f982..25e652b 100644 --- a/hw/versatilepb.c +++ b/hw/versatilepb.c @@ -167,11 +167,7 @@ static int vpb_sic_init(SysBusDevice *dev) static struct arm_boot_info versatile_binfo; -static void versatile_init(ram_addr_t ram_size, - const char *boot_device, - const char *kernel_filename, const char *kernel_cmdline, - const char *initrd_filename, const char *cpu_model, - int board_id) +static void versatile_init(QEMUMachineInitArgs *args, int board_id) { ARMCPU *cpu; MemoryRegion *sysmem = get_system_memory(); @@ -189,15 +185,15 @@ static void versatile_init(ram_addr_t ram_size, int done_smc = 0; DriveInfo *dinfo; - if (!cpu_model) { - cpu_model = "arm926"; + if (!args->cpu_model) { + args->cpu_model = "arm926"; } - cpu = cpu_arm_init(cpu_model); + cpu = cpu_arm_init(args->cpu_model); if (!cpu) { fprintf(stderr, "Unable to find CPU definition\n"); exit(1); } - memory_region_init_ram(ram, "versatile.ram", ram_size); + memory_region_init_ram(ram, "versatile.ram", args->ram_size); vmstate_register_ram_global(ram); /* ??? RAM should repeat to fill physical memory space. */ /* SDRAM at address zero. */ @@ -340,40 +336,22 @@ static void versatile_init(ram_addr_t ram_size, fprintf(stderr, "qemu: Error registering flash memory.\n"); } - versatile_binfo.ram_size = ram_size; - versatile_binfo.kernel_filename = kernel_filename; - versatile_binfo.kernel_cmdline = kernel_cmdline; - versatile_binfo.initrd_filename = initrd_filename; + versatile_binfo.ram_size = args->ram_size; + versatile_binfo.kernel_filename = args->kernel_filename; + versatile_binfo.kernel_cmdline = args->kernel_cmdline; + versatile_binfo.initrd_filename = args->initrd_filename; versatile_binfo.board_id = board_id; arm_load_kernel(cpu, &versatile_binfo); } static void vpb_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - versatile_init(ram_size, - boot_device, - kernel_filename, kernel_cmdline, - initrd_filename, cpu_model, 0x183); + versatile_init(args, 0x183); } static void vab_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - versatile_init(ram_size, - boot_device, - kernel_filename, kernel_cmdline, - initrd_filename, cpu_model, 0x25e); + versatile_init(args, 0x25e); } static QEMUMachine versatilepb_machine = { diff --git a/hw/vexpress.c b/hw/vexpress.c index 3f7cb66..d93f057 100644 --- a/hw/vexpress.c +++ b/hw/vexpress.c @@ -348,12 +348,7 @@ static const VEDBoardInfo a15_daughterboard = { }; static void vexpress_common_init(const VEDBoardInfo *daughterboard, - ram_addr_t ram_size, - const char *boot_device, - const char *kernel_filename, - const char *kernel_cmdline, - const char *initrd_filename, - const char *cpu_model) + QEMUMachineInitArgs *args) { DeviceState *dev, *sysctl, *pl041; qemu_irq pic[64]; @@ -366,7 +361,8 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard, MemoryRegion *sram = g_new(MemoryRegion, 1); const hwaddr *map = daughterboard->motherboard_map; - daughterboard->init(daughterboard, ram_size, cpu_model, pic, &proc_id); + daughterboard->init(daughterboard, args->ram_size, args->cpu_model, + pic, &proc_id); /* Motherboard peripherals: the wiring is the same but the * addresses vary between the legacy and A-Series memory maps. @@ -454,10 +450,10 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard, /* VE_DAPROM: not modelled */ - vexpress_binfo.ram_size = ram_size; - vexpress_binfo.kernel_filename = kernel_filename; - vexpress_binfo.kernel_cmdline = kernel_cmdline; - vexpress_binfo.initrd_filename = initrd_filename; + vexpress_binfo.ram_size = args->ram_size; + vexpress_binfo.kernel_filename = args->kernel_filename; + vexpress_binfo.kernel_cmdline = args->kernel_cmdline; + vexpress_binfo.initrd_filename = args->initrd_filename; vexpress_binfo.nb_cpus = smp_cpus; vexpress_binfo.board_id = VEXPRESS_BOARD_ID; vexpress_binfo.loader_start = daughterboard->loader_start; @@ -469,28 +465,12 @@ static void vexpress_common_init(const VEDBoardInfo *daughterboard, static void vexpress_a9_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - vexpress_common_init(&a9_daughterboard, - ram_size, boot_device, kernel_filename, - kernel_cmdline, initrd_filename, cpu_model); + vexpress_common_init(&a9_daughterboard, args); } static void vexpress_a15_init(QEMUMachineInitArgs *args) { - ram_addr_t ram_size = args->ram_size; - const char *cpu_model = args->cpu_model; - const char *kernel_filename = args->kernel_filename; - const char *kernel_cmdline = args->kernel_cmdline; - const char *initrd_filename = args->initrd_filename; - const char *boot_device = args->boot_device; - vexpress_common_init(&a15_daughterboard, - ram_size, boot_device, kernel_filename, - kernel_cmdline, initrd_filename, cpu_model); + vexpress_common_init(&a15_daughterboard, args); } static QEMUMachine vexpress_a9_machine = { @@ -108,13 +108,13 @@ static void icp_set_cppr(struct icp_state *icp, int server, uint8_t cppr) } } -static void icp_set_mfrr(struct icp_state *icp, int nr, uint8_t mfrr) +static void icp_set_mfrr(struct icp_state *icp, int server, uint8_t mfrr) { - struct icp_server_state *ss = icp->ss + nr; + struct icp_server_state *ss = icp->ss + server; ss->mfrr = mfrr; if (mfrr < CPPR(ss)) { - icp_check_ipi(icp, nr); + icp_check_ipi(icp, server); } } @@ -326,8 +326,7 @@ static void ics_eoi(struct ics_state *ics, int nr) qemu_irq xics_get_qirq(struct icp_state *icp, int irq) { - if ((irq < icp->ics->offset) - || (irq >= (icp->ics->offset + icp->ics->nr_irqs))) { + if (!ics_valid_irq(icp->ics, irq)) { return NULL; } @@ -336,22 +335,22 @@ qemu_irq xics_get_qirq(struct icp_state *icp, int irq) void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi) { - assert((irq >= icp->ics->offset) - && (irq < (icp->ics->offset + icp->ics->nr_irqs))); + assert(ics_valid_irq(icp->ics, irq)); icp->ics->irqs[irq - icp->ics->offset].lsi = lsi; } -static target_ulong h_cppr(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; target_ulong cppr = args[0]; icp_set_cppr(spapr->icp, env->cpu_index, cppr); return H_SUCCESS; } -static target_ulong h_ipi(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { target_ulong server = args[0]; @@ -366,18 +365,20 @@ static target_ulong h_ipi(CPUPPCState *env, sPAPREnvironment *spapr, } -static target_ulong h_xirr(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; uint32_t xirr = icp_accept(spapr->icp->ss + env->cpu_index); args[0] = xirr; return H_SUCCESS; } -static target_ulong h_eoi(CPUPPCState *env, sPAPREnvironment *spapr, +static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; target_ulong xirr = args[0]; icp_eoi(spapr->icp, env->cpu_index, xirr); diff --git a/hw/xtensa_pic.c b/hw/xtensa_pic.c index 653ded6..1ec70cd 100644 --- a/hw/xtensa_pic.c +++ b/hw/xtensa_pic.c @@ -125,12 +125,13 @@ void xtensa_rearm_ccompare_timer(CPUXtensaState *env) static void xtensa_ccompare_cb(void *opaque) { - CPUXtensaState *env = opaque; + XtensaCPU *cpu = opaque; + CPUXtensaState *env = &cpu->env; if (env->halted) { env->halt_clock = qemu_get_clock_ns(vm_clock); xtensa_advance_ccount(env, env->wake_ccount - env->sregs[CCOUNT]); - if (!cpu_has_work(env)) { + if (!cpu_has_work(CPU(cpu))) { env->sregs[CCOUNT] = env->wake_ccount + 1; xtensa_rearm_ccompare_timer(env); } @@ -139,12 +140,14 @@ static void xtensa_ccompare_cb(void *opaque) void xtensa_irq_init(CPUXtensaState *env) { + XtensaCPU *cpu = xtensa_env_get_cpu(env); + env->irq_inputs = (void **)qemu_allocate_irqs( xtensa_set_irq, env, env->config->ninterrupt); if (xtensa_option_enabled(env->config, XTENSA_OPTION_TIMER_INTERRUPT) && env->config->nccompare > 0) { env->ccompare_timer = - qemu_new_timer_ns(vm_clock, &xtensa_ccompare_cb, env); + qemu_new_timer_ns(vm_clock, &xtensa_ccompare_cb, cpu); } } diff --git a/include/qemu/cpu.h b/include/qemu/cpu.h index ad706a6..61b7698 100644 --- a/include/qemu/cpu.h +++ b/include/qemu/cpu.h @@ -54,6 +54,9 @@ typedef struct CPUClass { /** * CPUState: + * @created: Indicates whether the CPU thread has been successfully created. + * @stop: Indicates a pending stop request. + * @stopped: Indicates the CPU has been artificially stopped. * * State of one CPU core or thread. */ @@ -66,7 +69,13 @@ struct CPUState { #ifdef _WIN32 HANDLE hThread; #endif + int thread_id; + struct QemuCond *halt_cond; + struct qemu_work_item *queued_work_first, *queued_work_last; bool thread_kicked; + bool created; + bool stop; + bool stopped; /* TODO Move common fields from CPUArchState here. */ }; @@ -78,5 +87,54 @@ struct CPUState { */ void cpu_reset(CPUState *cpu); +/** + * qemu_cpu_has_work: + * @cpu: The vCPU to check. + * + * Checks whether the CPU has work to do. + * + * Returns: %true if the CPU has work, %false otherwise. + */ +bool qemu_cpu_has_work(CPUState *cpu); + +/** + * qemu_cpu_is_self: + * @cpu: The vCPU to check against. + * + * Checks whether the caller is executing on the vCPU thread. + * + * Returns: %true if called from @cpu's thread, %false otherwise. + */ +bool qemu_cpu_is_self(CPUState *cpu); + +/** + * qemu_cpu_kick: + * @cpu: The vCPU to kick. + * + * Kicks @cpu's thread. + */ +void qemu_cpu_kick(CPUState *cpu); + +/** + * cpu_is_stopped: + * @cpu: The CPU to check. + * + * Checks whether the CPU is stopped. + * + * Returns: %true if run state is not running or if artificially stopped; + * %false otherwise. + */ +bool cpu_is_stopped(CPUState *cpu); + +/** + * run_on_cpu: + * @cpu: The vCPU to run on. + * @func: The function to be executed. + * @data: Data to pass to the function. + * + * Schedules the function @func for execution on the vCPU @cpu. + */ +void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data); + #endif diff --git a/iohandler.c b/iohandler.c index a2d871b..60460a6 100644 --- a/iohandler.c +++ b/iohandler.c @@ -26,6 +26,7 @@ #include "qemu-common.h" #include "qemu-char.h" #include "qemu-queue.h" +#include "qemu-aio.h" #include "main-loop.h" #ifndef _WIN32 @@ -251,3 +251,106 @@ unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt, assert(offset == 0); return j; } + +/* io vectors */ + +void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint) +{ + qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec)); + qiov->niov = 0; + qiov->nalloc = alloc_hint; + qiov->size = 0; +} + +void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov) +{ + int i; + + qiov->iov = iov; + qiov->niov = niov; + qiov->nalloc = -1; + qiov->size = 0; + for (i = 0; i < niov; i++) + qiov->size += iov[i].iov_len; +} + +void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) +{ + assert(qiov->nalloc != -1); + + if (qiov->niov == qiov->nalloc) { + qiov->nalloc = 2 * qiov->nalloc + 1; + qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec)); + } + qiov->iov[qiov->niov].iov_base = base; + qiov->iov[qiov->niov].iov_len = len; + qiov->size += len; + ++qiov->niov; +} + +/* + * Concatenates (partial) iovecs from src to the end of dst. + * It starts copying after skipping `soffset' bytes at the + * beginning of src and adds individual vectors from src to + * dst copies up to `sbytes' bytes total, or up to the end + * of src if it comes first. This way, it is okay to specify + * very large value for `sbytes' to indicate "up to the end + * of src". + * Only vector pointers are processed, not the actual data buffers. + */ +void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes) +{ + int i; + size_t done; + struct iovec *siov = src->iov; + assert(dst->nalloc != -1); + assert(src->size >= soffset); + for (i = 0, done = 0; done < sbytes && i < src->niov; i++) { + if (soffset < siov[i].iov_len) { + size_t len = MIN(siov[i].iov_len - soffset, sbytes - done); + qemu_iovec_add(dst, siov[i].iov_base + soffset, len); + done += len; + soffset = 0; + } else { + soffset -= siov[i].iov_len; + } + } + /* return done; */ +} + +void qemu_iovec_destroy(QEMUIOVector *qiov) +{ + assert(qiov->nalloc != -1); + + qemu_iovec_reset(qiov); + g_free(qiov->iov); + qiov->nalloc = 0; + qiov->iov = NULL; +} + +void qemu_iovec_reset(QEMUIOVector *qiov) +{ + assert(qiov->nalloc != -1); + + qiov->niov = 0; + qiov->size = 0; +} + +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, + void *buf, size_t bytes) +{ + return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes); +} + +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, + const void *buf, size_t bytes) +{ + return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes); +} + +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, + int fillc, size_t bytes) +{ + return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes); +} @@ -828,10 +828,12 @@ static MemoryListener kvm_io_listener = { static void kvm_handle_interrupt(CPUArchState *env, int mask) { + CPUState *cpu = ENV_GET_CPU(env); + env->interrupt_request |= mask; - if (!qemu_cpu_is_self(env)) { - qemu_cpu_kick(env); + if (!qemu_cpu_is_self(cpu)) { + qemu_cpu_kick(cpu); } } @@ -1498,8 +1500,10 @@ static void do_kvm_cpu_synchronize_state(void *_env) void kvm_cpu_synchronize_state(CPUArchState *env) { + CPUState *cpu = ENV_GET_CPU(env); + if (!env->kvm_vcpu_dirty) { - run_on_cpu(env, do_kvm_cpu_synchronize_state, env); + run_on_cpu(cpu, do_kvm_cpu_synchronize_state, env); } } @@ -1785,6 +1789,7 @@ static void kvm_invoke_set_guest_debug(void *data) int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap) { + CPUState *cpu = ENV_GET_CPU(env); struct kvm_set_guest_debug_data data; data.dbg.control = reinject_trap; @@ -1795,7 +1800,7 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap) kvm_arch_update_guest_debug(env, &data.dbg); data.env = env; - run_on_cpu(env, kvm_invoke_set_guest_debug, &data); + run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data); return data.err; } @@ -20,6 +20,7 @@ #ifdef CONFIG_KVM #include <linux/kvm.h> +#include <linux/kvm_para.h> #endif extern int kvm_allowed; diff --git a/main-loop.c b/main-loop.c index eb3b6e6..e43c7c8 100644 --- a/main-loop.c +++ b/main-loop.c @@ -26,75 +26,12 @@ #include "qemu-timer.h" #include "slirp/slirp.h" #include "main-loop.h" +#include "qemu-aio.h" #ifndef _WIN32 #include "compatfd.h" -static int io_thread_fd = -1; - -void qemu_notify_event(void) -{ - /* Write 8 bytes to be compatible with eventfd. */ - static const uint64_t val = 1; - ssize_t ret; - - if (io_thread_fd == -1) { - return; - } - do { - ret = write(io_thread_fd, &val, sizeof(val)); - } while (ret < 0 && errno == EINTR); - - /* EAGAIN is fine, a read must be pending. */ - if (ret < 0 && errno != EAGAIN) { - fprintf(stderr, "qemu_notify_event: write() failed: %s\n", - strerror(errno)); - exit(1); - } -} - -static void qemu_event_read(void *opaque) -{ - int fd = (intptr_t)opaque; - ssize_t len; - char buffer[512]; - - /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */ - do { - len = read(fd, buffer, sizeof(buffer)); - } while ((len == -1 && errno == EINTR) || len == sizeof(buffer)); -} - -static int qemu_event_init(void) -{ - int err; - int fds[2]; - - err = qemu_eventfd(fds); - if (err == -1) { - return -errno; - } - err = fcntl_setfl(fds[0], O_NONBLOCK); - if (err < 0) { - goto fail; - } - err = fcntl_setfl(fds[1], O_NONBLOCK); - if (err < 0) { - goto fail; - } - qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL, - (void *)(intptr_t)fds[0]); - - io_thread_fd = fds[1]; - return 0; - -fail: - close(fds[0]); - close(fds[1]); - return err; -} - /* If we have signalfd, we mask out the signals we want to handle and then * use signalfd to listen for them. We rely on whatever the current signal * handler is to dispatch the signals when we receive them. @@ -164,44 +101,29 @@ static int qemu_signal_init(void) #else /* _WIN32 */ -static HANDLE qemu_event_handle = NULL; - -static void dummy_event_handler(void *opaque) -{ -} - -static int qemu_event_init(void) +static int qemu_signal_init(void) { - qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL); - if (!qemu_event_handle) { - fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError()); - return -1; - } - qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL); return 0; } +#endif + +static AioContext *qemu_aio_context; void qemu_notify_event(void) { - if (!qemu_event_handle) { + if (!qemu_aio_context) { return; } - if (!SetEvent(qemu_event_handle)) { - fprintf(stderr, "qemu_notify_event: SetEvent failed: %ld\n", - GetLastError()); - exit(1); - } -} - -static int qemu_signal_init(void) -{ - return 0; + aio_notify(qemu_aio_context); } -#endif -int main_loop_init(void) +int qemu_init_main_loop(void) { int ret; + GSource *src; + + init_clocks(); + init_timer_alarm(); qemu_mutex_lock_iothread(); ret = qemu_signal_init(); @@ -209,12 +131,10 @@ int main_loop_init(void) return ret; } - /* Note eventfd must be drained before signalfd handlers run */ - ret = qemu_event_init(); - if (ret) { - return ret; - } - + qemu_aio_context = aio_context_new(); + src = aio_get_g_source(qemu_aio_context); + g_source_attach(src, NULL); + g_source_unref(src); return 0; } @@ -400,7 +320,8 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque) void qemu_fd_register(int fd) { - WSAEventSelect(fd, qemu_event_handle, FD_READ | FD_ACCEPT | FD_CLOSE | + WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier), + FD_READ | FD_ACCEPT | FD_CLOSE | FD_CONNECT | FD_WRITE | FD_OOB); } @@ -477,8 +398,6 @@ int main_loop_wait(int nonblocking) if (nonblocking) { timeout = 0; - } else { - qemu_bh_update_timeout(&timeout); } /* poll any events */ @@ -501,9 +420,41 @@ int main_loop_wait(int nonblocking) qemu_run_all_timers(); - /* Check bottom-halves last in case any of the earlier events triggered - them. */ - qemu_bh_poll(); - return ret; } + +/* Functions to operate on the main QEMU AioContext. */ + +QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) +{ + return aio_bh_new(qemu_aio_context, cb, opaque); +} + +void qemu_aio_flush(void) +{ + aio_flush(qemu_aio_context); +} + +bool qemu_aio_wait(void) +{ + return aio_poll(qemu_aio_context, true); +} + +#ifdef CONFIG_POSIX +void qemu_aio_set_fd_handler(int fd, + IOHandler *io_read, + IOHandler *io_write, + AioFlushHandler *io_flush, + void *opaque) +{ + aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, io_flush, + opaque); +} +#endif + +void qemu_aio_set_event_notifier(EventNotifier *notifier, + EventNotifierHandler *io_read, + AioFlushEventNotifierHandler *io_flush) +{ + aio_set_event_notifier(qemu_aio_context, notifier, io_read, io_flush); +} diff --git a/main-loop.h b/main-loop.h index dce1cd9..326c742 100644 --- a/main-loop.h +++ b/main-loop.h @@ -25,6 +25,8 @@ #ifndef QEMU_MAIN_LOOP_H #define QEMU_MAIN_LOOP_H 1 +#include "qemu-aio.h" + #define SIG_IPI SIGUSR1 /** @@ -43,16 +45,6 @@ int qemu_init_main_loop(void); /** - * main_loop_init: Initializes main loop - * - * Internal (but shared for compatibility reasons) initialization routine - * for the main loop. This should not be used by applications directly, - * use qemu_init_main_loop() instead. - * - */ -int main_loop_init(void); - -/** * main_loop_wait: Run one iteration of the main loop. * * If @nonblocking is true, poll for events, otherwise suspend until @@ -173,7 +165,6 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque); typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size); typedef int IOCanReadHandler(void *opaque); -typedef void IOHandler(void *opaque); /** * qemu_set_fd_handler2: Register a file descriptor with the main loop @@ -254,56 +245,6 @@ int qemu_set_fd_handler(int fd, IOHandler *fd_write, void *opaque); -typedef struct QEMUBH QEMUBH; -typedef void QEMUBHFunc(void *opaque); - -/** - * qemu_bh_new: Allocate a new bottom half structure. - * - * Bottom halves are lightweight callbacks whose invocation is guaranteed - * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure - * is opaque and must be allocated prior to its use. - */ -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); - -/** - * qemu_bh_schedule: Schedule a bottom half. - * - * Scheduling a bottom half interrupts the main loop and causes the - * execution of the callback that was passed to qemu_bh_new. - * - * Bottom halves that are scheduled from a bottom half handler are instantly - * invoked. This can create an infinite loop if a bottom half handler - * schedules itself. - * - * @bh: The bottom half to be scheduled. - */ -void qemu_bh_schedule(QEMUBH *bh); - -/** - * qemu_bh_cancel: Cancel execution of a bottom half. - * - * Canceling execution of a bottom half undoes the effect of calls to - * qemu_bh_schedule without freeing its resources yet. While cancellation - * itself is also wait-free and thread-safe, it can of course race with the - * loop that executes bottom halves unless you are holding the iothread - * mutex. This makes it mostly useless if you are not holding the mutex. - * - * @bh: The bottom half to be canceled. - */ -void qemu_bh_cancel(QEMUBH *bh); - -/** - *qemu_bh_delete: Cancel execution of a bottom half and free its resources. - * - * Deleting a bottom half frees the memory that was allocated for it by - * qemu_bh_new. It also implies canceling the bottom half if it was - * scheduled. - * - * @bh: The bottom half to be deleted. - */ -void qemu_bh_delete(QEMUBH *bh); - #ifdef CONFIG_POSIX /** * qemu_add_child_watch: Register a child process for reaping. @@ -359,8 +300,7 @@ void qemu_fd_register(int fd); void qemu_iohandler_fill(int *pnfds, fd_set *readfds, fd_set *writefds, fd_set *xfds); void qemu_iohandler_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds, int rc); +QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); void qemu_bh_schedule_idle(QEMUBH *bh); -int qemu_bh_poll(void); -void qemu_bh_update_timeout(uint32_t *timeout); #endif @@ -1988,7 +1988,8 @@ static void do_acl_remove(Monitor *mon, const QDict *qdict) #if defined(TARGET_I386) static void do_inject_mce(Monitor *mon, const QDict *qdict) { - CPUArchState *cenv; + X86CPU *cpu; + CPUX86State *cenv; int cpu_index = qdict_get_int(qdict, "cpu_index"); int bank = qdict_get_int(qdict, "bank"); uint64_t status = qdict_get_int(qdict, "status"); @@ -2001,8 +2002,9 @@ static void do_inject_mce(Monitor *mon, const QDict *qdict) flags |= MCE_INJECT_BROADCAST; } for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) { + cpu = x86_env_get_cpu(cenv); if (cenv->cpu_index == cpu_index) { - cpu_x86_inject_mce(mon, cenv, bank, status, mcg_status, addr, misc, + cpu_x86_inject_mce(mon, cpu, bank, status, mcg_status, addr, misc, flags); break; } @@ -134,6 +134,11 @@ fail: errno = serrno; return -1; } + +static int qemu_parse_fdset(const char *param) +{ + return qemu_parse_fd(param); +} #endif /* @@ -394,3 +399,28 @@ bool fips_get_state(void) { return fips_enabled; } + + +static int default_fdset_get_fd(int64_t fdset_id, int flags) +{ + return -1; +} +QEMU_WEAK_ALIAS(monitor_fdset_get_fd, default_fdset_get_fd); + +static int default_fdset_dup_fd_add(int64_t fdset_id, int dup_fd) +{ + return -1; +} +QEMU_WEAK_ALIAS(monitor_fdset_dup_fd_add, default_fdset_dup_fd_add); + +static int default_fdset_dup_fd_remove(int dup_fd) +{ + return -1; +} +QEMU_WEAK_ALIAS(monitor_fdset_dup_fd_remove, default_fdset_dup_fd_remove); + +static int default_fdset_dup_fd_find(int dup_fd) +{ + return -1; +} +QEMU_WEAK_ALIAS(monitor_fdset_dup_fd_find, default_fdset_dup_fd_find); diff --git a/oslib-posix.c b/oslib-posix.c index dbeb627..9db9c3d 100644 --- a/oslib-posix.c +++ b/oslib-posix.c @@ -61,9 +61,6 @@ static int running_on_valgrind = -1; #ifdef CONFIG_LINUX #include <sys/syscall.h> #endif -#ifdef CONFIG_EVENTFD -#include <sys/eventfd.h> -#endif int qemu_get_thread_id(void) { @@ -183,34 +180,6 @@ int qemu_pipe(int pipefd[2]) return ret; } -/* - * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set. - */ -int qemu_eventfd(int fds[2]) -{ -#ifdef CONFIG_EVENTFD - int ret; - - ret = eventfd(0, 0); - if (ret >= 0) { - fds[0] = ret; - fds[1] = dup(ret); - if (fds[1] == -1) { - close(ret); - return -1; - } - qemu_set_cloexec(ret); - qemu_set_cloexec(fds[1]); - return 0; - } - if (errno != ENOSYS) { - return -1; - } -#endif - - return qemu_pipe(fds); -} - int qemu_utimens(const char *path, const struct timespec *times) { struct timeval tv[2], tv_now; diff --git a/oslib-win32.c b/oslib-win32.c index 51b33e8..9ca83df 100644 --- a/oslib-win32.c +++ b/oslib-win32.c @@ -150,3 +150,8 @@ int qemu_get_thread_id(void) { return GetCurrentThreadId(); } + +static void default_qemu_fd_register(int fd) +{ +} +QEMU_WEAK_ALIAS(qemu_fd_register, default_qemu_fd_register); diff --git a/posix-aio-compat.c b/posix-aio-compat.c deleted file mode 100644 index 96e4daf..0000000 --- a/posix-aio-compat.c +++ /dev/null @@ -1,679 +0,0 @@ -/* - * QEMU posix-aio emulation - * - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include <sys/ioctl.h> -#include <sys/types.h> -#include <pthread.h> -#include <unistd.h> -#include <errno.h> -#include <time.h> -#include <string.h> -#include <stdlib.h> -#include <stdio.h> - -#include "qemu-queue.h" -#include "osdep.h" -#include "sysemu.h" -#include "qemu-common.h" -#include "trace.h" -#include "block_int.h" -#include "iov.h" - -#include "block/raw-posix-aio.h" - -static void do_spawn_thread(void); - -struct qemu_paiocb { - BlockDriverAIOCB common; - int aio_fildes; - union { - struct iovec *aio_iov; - void *aio_ioctl_buf; - }; - int aio_niov; - size_t aio_nbytes; -#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ - off_t aio_offset; - - QTAILQ_ENTRY(qemu_paiocb) node; - int aio_type; - ssize_t ret; - int active; - struct qemu_paiocb *next; -}; - -typedef struct PosixAioState { - int rfd, wfd; - struct qemu_paiocb *first_aio; -} PosixAioState; - - -static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; -static pthread_t thread_id; -static pthread_attr_t attr; -static int max_threads = 64; -static int cur_threads = 0; -static int idle_threads = 0; -static int new_threads = 0; /* backlog of threads we need to create */ -static int pending_threads = 0; /* threads created but not running yet */ -static QEMUBH *new_thread_bh; -static QTAILQ_HEAD(, qemu_paiocb) request_list; - -#ifdef CONFIG_PREADV -static int preadv_present = 1; -#else -static int preadv_present = 0; -#endif - -static void die2(int err, const char *what) -{ - fprintf(stderr, "%s failed: %s\n", what, strerror(err)); - abort(); -} - -static void die(const char *what) -{ - die2(errno, what); -} - -static void mutex_lock(pthread_mutex_t *mutex) -{ - int ret = pthread_mutex_lock(mutex); - if (ret) die2(ret, "pthread_mutex_lock"); -} - -static void mutex_unlock(pthread_mutex_t *mutex) -{ - int ret = pthread_mutex_unlock(mutex); - if (ret) die2(ret, "pthread_mutex_unlock"); -} - -static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, - struct timespec *ts) -{ - int ret = pthread_cond_timedwait(cond, mutex, ts); - if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait"); - return ret; -} - -static void cond_signal(pthread_cond_t *cond) -{ - int ret = pthread_cond_signal(cond); - if (ret) die2(ret, "pthread_cond_signal"); -} - -static void thread_create(pthread_t *thread, pthread_attr_t *attr, - void *(*start_routine)(void*), void *arg) -{ - int ret = pthread_create(thread, attr, start_routine, arg); - if (ret) die2(ret, "pthread_create"); -} - -static ssize_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb) -{ - int ret; - - ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf); - if (ret == -1) - return -errno; - - /* - * This looks weird, but the aio code only considers a request - * successful if it has written the full number of bytes. - * - * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command, - * so in fact we return the ioctl command here to make posix_aio_read() - * happy.. - */ - return aiocb->aio_nbytes; -} - -static ssize_t handle_aiocb_flush(struct qemu_paiocb *aiocb) -{ - int ret; - - ret = qemu_fdatasync(aiocb->aio_fildes); - if (ret == -1) - return -errno; - return 0; -} - -#ifdef CONFIG_PREADV - -static ssize_t -qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) -{ - return preadv(fd, iov, nr_iov, offset); -} - -static ssize_t -qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) -{ - return pwritev(fd, iov, nr_iov, offset); -} - -#else - -static ssize_t -qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset) -{ - return -ENOSYS; -} - -static ssize_t -qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) -{ - return -ENOSYS; -} - -#endif - -static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb) -{ - ssize_t len; - - do { - if (aiocb->aio_type & QEMU_AIO_WRITE) - len = qemu_pwritev(aiocb->aio_fildes, - aiocb->aio_iov, - aiocb->aio_niov, - aiocb->aio_offset); - else - len = qemu_preadv(aiocb->aio_fildes, - aiocb->aio_iov, - aiocb->aio_niov, - aiocb->aio_offset); - } while (len == -1 && errno == EINTR); - - if (len == -1) - return -errno; - return len; -} - -/* - * Read/writes the data to/from a given linear buffer. - * - * Returns the number of bytes handles or -errno in case of an error. Short - * reads are only returned if the end of the file is reached. - */ -static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf) -{ - ssize_t offset = 0; - ssize_t len; - - while (offset < aiocb->aio_nbytes) { - if (aiocb->aio_type & QEMU_AIO_WRITE) - len = pwrite(aiocb->aio_fildes, - (const char *)buf + offset, - aiocb->aio_nbytes - offset, - aiocb->aio_offset + offset); - else - len = pread(aiocb->aio_fildes, - buf + offset, - aiocb->aio_nbytes - offset, - aiocb->aio_offset + offset); - - if (len == -1 && errno == EINTR) - continue; - else if (len == -1) { - offset = -errno; - break; - } else if (len == 0) - break; - - offset += len; - } - - return offset; -} - -static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb) -{ - ssize_t nbytes; - char *buf; - - if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) { - /* - * If there is just a single buffer, and it is properly aligned - * we can just use plain pread/pwrite without any problems. - */ - if (aiocb->aio_niov == 1) - return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base); - - /* - * We have more than one iovec, and all are properly aligned. - * - * Try preadv/pwritev first and fall back to linearizing the - * buffer if it's not supported. - */ - if (preadv_present) { - nbytes = handle_aiocb_rw_vector(aiocb); - if (nbytes == aiocb->aio_nbytes) - return nbytes; - if (nbytes < 0 && nbytes != -ENOSYS) - return nbytes; - preadv_present = 0; - } - - /* - * XXX(hch): short read/write. no easy way to handle the reminder - * using these interfaces. For now retry using plain - * pread/pwrite? - */ - } - - /* - * Ok, we have to do it the hard way, copy all segments into - * a single aligned buffer. - */ - buf = qemu_blockalign(aiocb->common.bs, aiocb->aio_nbytes); - if (aiocb->aio_type & QEMU_AIO_WRITE) { - char *p = buf; - int i; - - for (i = 0; i < aiocb->aio_niov; ++i) { - memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); - p += aiocb->aio_iov[i].iov_len; - } - } - - nbytes = handle_aiocb_rw_linear(aiocb, buf); - if (!(aiocb->aio_type & QEMU_AIO_WRITE)) { - char *p = buf; - size_t count = aiocb->aio_nbytes, copy; - int i; - - for (i = 0; i < aiocb->aio_niov && count; ++i) { - copy = count; - if (copy > aiocb->aio_iov[i].iov_len) - copy = aiocb->aio_iov[i].iov_len; - memcpy(aiocb->aio_iov[i].iov_base, p, copy); - p += copy; - count -= copy; - } - } - qemu_vfree(buf); - - return nbytes; -} - -static void posix_aio_notify_event(void); - -static void *aio_thread(void *unused) -{ - mutex_lock(&lock); - pending_threads--; - mutex_unlock(&lock); - do_spawn_thread(); - - while (1) { - struct qemu_paiocb *aiocb; - ssize_t ret = 0; - qemu_timeval tv; - struct timespec ts; - - qemu_gettimeofday(&tv); - ts.tv_sec = tv.tv_sec + 10; - ts.tv_nsec = 0; - - mutex_lock(&lock); - - while (QTAILQ_EMPTY(&request_list) && - !(ret == ETIMEDOUT)) { - idle_threads++; - ret = cond_timedwait(&cond, &lock, &ts); - idle_threads--; - } - - if (QTAILQ_EMPTY(&request_list)) - break; - - aiocb = QTAILQ_FIRST(&request_list); - QTAILQ_REMOVE(&request_list, aiocb, node); - aiocb->active = 1; - mutex_unlock(&lock); - - switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { - case QEMU_AIO_READ: - ret = handle_aiocb_rw(aiocb); - if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) { - /* A short read means that we have reached EOF. Pad the buffer - * with zeros for bytes after EOF. */ - iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret, - 0, aiocb->aio_nbytes - ret); - - ret = aiocb->aio_nbytes; - } - break; - case QEMU_AIO_WRITE: - ret = handle_aiocb_rw(aiocb); - break; - case QEMU_AIO_FLUSH: - ret = handle_aiocb_flush(aiocb); - break; - case QEMU_AIO_IOCTL: - ret = handle_aiocb_ioctl(aiocb); - break; - default: - fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); - ret = -EINVAL; - break; - } - - mutex_lock(&lock); - aiocb->ret = ret; - mutex_unlock(&lock); - - posix_aio_notify_event(); - } - - cur_threads--; - mutex_unlock(&lock); - - return NULL; -} - -static void do_spawn_thread(void) -{ - sigset_t set, oldset; - - mutex_lock(&lock); - if (!new_threads) { - mutex_unlock(&lock); - return; - } - - new_threads--; - pending_threads++; - - mutex_unlock(&lock); - - /* block all signals */ - if (sigfillset(&set)) die("sigfillset"); - if (sigprocmask(SIG_SETMASK, &set, &oldset)) die("sigprocmask"); - - thread_create(&thread_id, &attr, aio_thread, NULL); - - if (sigprocmask(SIG_SETMASK, &oldset, NULL)) die("sigprocmask restore"); -} - -static void spawn_thread_bh_fn(void *opaque) -{ - do_spawn_thread(); -} - -static void spawn_thread(void) -{ - cur_threads++; - new_threads++; - /* If there are threads being created, they will spawn new workers, so - * we don't spend time creating many threads in a loop holding a mutex or - * starving the current vcpu. - * - * If there are no idle threads, ask the main thread to create one, so we - * inherit the correct affinity instead of the vcpu affinity. - */ - if (!pending_threads) { - qemu_bh_schedule(new_thread_bh); - } -} - -static void qemu_paio_submit(struct qemu_paiocb *aiocb) -{ - aiocb->ret = -EINPROGRESS; - aiocb->active = 0; - mutex_lock(&lock); - if (idle_threads == 0 && cur_threads < max_threads) - spawn_thread(); - QTAILQ_INSERT_TAIL(&request_list, aiocb, node); - mutex_unlock(&lock); - cond_signal(&cond); -} - -static ssize_t qemu_paio_return(struct qemu_paiocb *aiocb) -{ - ssize_t ret; - - mutex_lock(&lock); - ret = aiocb->ret; - mutex_unlock(&lock); - - return ret; -} - -static int qemu_paio_error(struct qemu_paiocb *aiocb) -{ - ssize_t ret = qemu_paio_return(aiocb); - - if (ret < 0) - ret = -ret; - else - ret = 0; - - return ret; -} - -static void posix_aio_read(void *opaque) -{ - PosixAioState *s = opaque; - struct qemu_paiocb *acb, **pacb; - int ret; - ssize_t len; - - /* read all bytes from signal pipe */ - for (;;) { - char bytes[16]; - - len = read(s->rfd, bytes, sizeof(bytes)); - if (len == -1 && errno == EINTR) - continue; /* try again */ - if (len == sizeof(bytes)) - continue; /* more to read */ - break; - } - - for(;;) { - pacb = &s->first_aio; - for(;;) { - acb = *pacb; - if (!acb) - return; - - ret = qemu_paio_error(acb); - if (ret == ECANCELED) { - /* remove the request */ - *pacb = acb->next; - qemu_aio_release(acb); - } else if (ret != EINPROGRESS) { - /* end of aio */ - if (ret == 0) { - ret = qemu_paio_return(acb); - if (ret == acb->aio_nbytes) - ret = 0; - else - ret = -EINVAL; - } else { - ret = -ret; - } - - trace_paio_complete(acb, acb->common.opaque, ret); - - /* remove the request */ - *pacb = acb->next; - /* call the callback */ - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); - break; - } else { - pacb = &acb->next; - } - } - } -} - -static int posix_aio_flush(void *opaque) -{ - PosixAioState *s = opaque; - return !!s->first_aio; -} - -static PosixAioState *posix_aio_state; - -static void posix_aio_notify_event(void) -{ - char byte = 0; - ssize_t ret; - - ret = write(posix_aio_state->wfd, &byte, sizeof(byte)); - if (ret < 0 && errno != EAGAIN) - die("write()"); -} - -static void paio_remove(struct qemu_paiocb *acb) -{ - struct qemu_paiocb **pacb; - - /* remove the callback from the queue */ - pacb = &posix_aio_state->first_aio; - for(;;) { - if (*pacb == NULL) { - fprintf(stderr, "paio_remove: aio request not found!\n"); - break; - } else if (*pacb == acb) { - *pacb = acb->next; - qemu_aio_release(acb); - break; - } - pacb = &(*pacb)->next; - } -} - -static void paio_cancel(BlockDriverAIOCB *blockacb) -{ - struct qemu_paiocb *acb = (struct qemu_paiocb *)blockacb; - int active = 0; - - trace_paio_cancel(acb, acb->common.opaque); - - mutex_lock(&lock); - if (!acb->active) { - QTAILQ_REMOVE(&request_list, acb, node); - acb->ret = -ECANCELED; - } else if (acb->ret == -EINPROGRESS) { - active = 1; - } - mutex_unlock(&lock); - - if (active) { - /* fail safe: if the aio could not be canceled, we wait for - it */ - while (qemu_paio_error(acb) == EINPROGRESS) - ; - } - - paio_remove(acb); -} - -static AIOPool raw_aio_pool = { - .aiocb_size = sizeof(struct qemu_paiocb), - .cancel = paio_cancel, -}; - -BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque, int type) -{ - struct qemu_paiocb *acb; - - acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque); - acb->aio_type = type; - acb->aio_fildes = fd; - - if (qiov) { - acb->aio_iov = qiov->iov; - acb->aio_niov = qiov->niov; - } - acb->aio_nbytes = nb_sectors * 512; - acb->aio_offset = sector_num * 512; - - acb->next = posix_aio_state->first_aio; - posix_aio_state->first_aio = acb; - - trace_paio_submit(acb, opaque, sector_num, nb_sectors, type); - qemu_paio_submit(acb); - return &acb->common; -} - -BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, - unsigned long int req, void *buf, - BlockDriverCompletionFunc *cb, void *opaque) -{ - struct qemu_paiocb *acb; - - acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque); - acb->aio_type = QEMU_AIO_IOCTL; - acb->aio_fildes = fd; - acb->aio_offset = 0; - acb->aio_ioctl_buf = buf; - acb->aio_ioctl_cmd = req; - - acb->next = posix_aio_state->first_aio; - posix_aio_state->first_aio = acb; - - qemu_paio_submit(acb); - return &acb->common; -} - -int paio_init(void) -{ - PosixAioState *s; - int fds[2]; - int ret; - - if (posix_aio_state) - return 0; - - s = g_malloc(sizeof(PosixAioState)); - - s->first_aio = NULL; - if (qemu_pipe(fds) == -1) { - fprintf(stderr, "failed to create pipe\n"); - g_free(s); - return -1; - } - - s->rfd = fds[0]; - s->wfd = fds[1]; - - fcntl(s->rfd, F_SETFL, O_NONBLOCK); - fcntl(s->wfd, F_SETFL, O_NONBLOCK); - - qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s); - - ret = pthread_attr_init(&attr); - if (ret) - die2(ret, "pthread_attr_init"); - - ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - if (ret) - die2(ret, "pthread_attr_setdetachstate"); - - QTAILQ_INIT(&request_list); - new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL); - - posix_aio_state = s; - return 0; -} diff --git a/qapi/Makefile.objs b/qapi/Makefile.objs index 5f5846e..f9bd3b9 100644 --- a/qapi/Makefile.objs +++ b/qapi/Makefile.objs @@ -1,3 +1,5 @@ qapi-obj-y = qapi-visit-core.o qapi-dealloc-visitor.o qmp-input-visitor.o qapi-obj-y += qmp-output-visitor.o qmp-registry.o qmp-dispatch.o -qapi-obj-y += string-input-visitor.o string-output-visitor.o opts-visitor.o +qapi-obj-y += string-input-visitor.o string-output-visitor.o + +common-obj-y += opts-visitor.o @@ -15,7 +15,8 @@ #define QEMU_AIO_H #include "qemu-common.h" -#include "qemu-char.h" +#include "qemu-queue.h" +#include "event_notifier.h" typedef struct BlockDriverAIOCB BlockDriverAIOCB; typedef void BlockDriverCompletionFunc(void *opaque, int ret); @@ -38,20 +39,162 @@ void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque); void qemu_aio_release(void *p); +typedef struct AioHandler AioHandler; +typedef void QEMUBHFunc(void *opaque); +typedef void IOHandler(void *opaque); + +typedef struct AioContext { + GSource source; + + /* The list of registered AIO handlers */ + QLIST_HEAD(, AioHandler) aio_handlers; + + /* This is a simple lock used to protect the aio_handlers list. + * Specifically, it's used to ensure that no callbacks are removed while + * we're walking and dispatching callbacks. + */ + int walking_handlers; + + /* Anchor of the list of Bottom Halves belonging to the context */ + struct QEMUBH *first_bh; + + /* A simple lock used to protect the first_bh list, and ensure that + * no callbacks are removed while we're walking and dispatching callbacks. + */ + int walking_bh; + + /* Used for aio_notify. */ + EventNotifier notifier; +} AioContext; + /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ -typedef int (AioFlushHandler)(void *opaque); +typedef int (AioFlushEventNotifierHandler)(EventNotifier *e); + +/** + * aio_context_new: Allocate a new AioContext. + * + * AioContext provide a mini event-loop that can be waited on synchronously. + * They also provide bottom halves, a service to execute a piece of code + * as soon as possible. + */ +AioContext *aio_context_new(void); + +/** + * aio_context_ref: + * @ctx: The AioContext to operate on. + * + * Add a reference to an AioContext. + */ +void aio_context_ref(AioContext *ctx); + +/** + * aio_context_unref: + * @ctx: The AioContext to operate on. + * + * Drop a reference to an AioContext. + */ +void aio_context_unref(AioContext *ctx); + +/** + * aio_bh_new: Allocate a new bottom half structure. + * + * Bottom halves are lightweight callbacks whose invocation is guaranteed + * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure + * is opaque and must be allocated prior to its use. + */ +QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); + +/** + * aio_notify: Force processing of pending events. + * + * Similar to signaling a condition variable, aio_notify forces + * aio_wait to exit, so that the next call will re-examine pending events. + * The caller of aio_notify will usually call aio_wait again very soon, + * or go through another iteration of the GLib main loop. Hence, aio_notify + * also has the side effect of recalculating the sets of file descriptors + * that the main loop waits for. + * + * Calling aio_notify is rarely necessary, because for example scheduling + * a bottom half calls it already. + */ +void aio_notify(AioContext *ctx); + +/** + * aio_bh_poll: Poll bottom halves for an AioContext. + * + * These are internal functions used by the QEMU main loop. + */ +int aio_bh_poll(AioContext *ctx); + +/** + * qemu_bh_schedule: Schedule a bottom half. + * + * Scheduling a bottom half interrupts the main loop and causes the + * execution of the callback that was passed to qemu_bh_new. + * + * Bottom halves that are scheduled from a bottom half handler are instantly + * invoked. This can create an infinite loop if a bottom half handler + * schedules itself. + * + * @bh: The bottom half to be scheduled. + */ +void qemu_bh_schedule(QEMUBH *bh); + +/** + * qemu_bh_cancel: Cancel execution of a bottom half. + * + * Canceling execution of a bottom half undoes the effect of calls to + * qemu_bh_schedule without freeing its resources yet. While cancellation + * itself is also wait-free and thread-safe, it can of course race with the + * loop that executes bottom halves unless you are holding the iothread + * mutex. This makes it mostly useless if you are not holding the mutex. + * + * @bh: The bottom half to be canceled. + */ +void qemu_bh_cancel(QEMUBH *bh); + +/** + *qemu_bh_delete: Cancel execution of a bottom half and free its resources. + * + * Deleting a bottom half frees the memory that was allocated for it by + * qemu_bh_new. It also implies canceling the bottom half if it was + * scheduled. + * + * @bh: The bottom half to be deleted. + */ +void qemu_bh_delete(QEMUBH *bh); /* Flush any pending AIO operation. This function will block until all * outstanding AIO operations have been completed or cancelled. */ -void qemu_aio_flush(void); +void aio_flush(AioContext *ctx); -/* Wait for a single AIO completion to occur. This function will wait - * until a single AIO event has completed and it will ensure something - * has moved before returning. This can issue new pending aio as - * result of executing I/O completion or bh callbacks. +/* Return whether there are any pending callbacks from the GSource + * attached to the AioContext. * - * Return whether there is still any pending AIO operation. */ -bool qemu_aio_wait(void); + * This is used internally in the implementation of the GSource. + */ +bool aio_pending(AioContext *ctx); + +/* Progress in completing AIO work to occur. This can issue new pending + * aio as a result of executing I/O completion or bh callbacks. + * + * If there is no pending AIO operation or completion (bottom half), + * return false. If there are pending bottom halves, return true. + * + * If there are no pending bottom halves, but there are pending AIO + * operations, it may not be possible to make any progress without + * blocking. If @blocking is true, this function will wait until one + * or more AIO events have completed, to ensure something has moved + * before returning. + * + * If @blocking is false, this function will also return false if the + * function cannot make any progress without blocking. + */ +bool aio_poll(AioContext *ctx, bool blocking); + +#ifdef CONFIG_POSIX +/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ +typedef int (AioFlushHandler)(void *opaque); /* Register a file descriptor and associated callbacks. Behaves very similarly * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will @@ -60,10 +203,45 @@ bool qemu_aio_wait(void); * Code that invokes AIO completion functions should rely on this function * instead of qemu_set_fd_handler[2]. */ -int qemu_aio_set_fd_handler(int fd, - IOHandler *io_read, - IOHandler *io_write, - AioFlushHandler *io_flush, - void *opaque); +void aio_set_fd_handler(AioContext *ctx, + int fd, + IOHandler *io_read, + IOHandler *io_write, + AioFlushHandler *io_flush, + void *opaque); +#endif + +/* Register an event notifier and associated callbacks. Behaves very similarly + * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks + * will be invoked when using either qemu_aio_wait() or qemu_aio_flush(). + * + * Code that invokes AIO completion functions should rely on this function + * instead of event_notifier_set_handler. + */ +void aio_set_event_notifier(AioContext *ctx, + EventNotifier *notifier, + EventNotifierHandler *io_read, + AioFlushEventNotifierHandler *io_flush); + +/* Return a GSource that lets the main loop poll the file descriptors attached + * to this AioContext. + */ +GSource *aio_get_g_source(AioContext *ctx); + +/* Functions to operate on the main QEMU AioContext. */ + +void qemu_aio_flush(void); +bool qemu_aio_wait(void); +void qemu_aio_set_event_notifier(EventNotifier *notifier, + EventNotifierHandler *io_read, + AioFlushEventNotifierHandler *io_flush); + +#ifdef CONFIG_POSIX +void qemu_aio_set_fd_handler(int fd, + IOHandler *io_read, + IOHandler *io_write, + AioFlushHandler *io_flush, + void *opaque); +#endif #endif diff --git a/qemu-char.h b/qemu-char.h index 297dd98..a121e04 100644 --- a/qemu-char.h +++ b/qemu-char.h @@ -5,6 +5,7 @@ #include "qemu-queue.h" #include "qemu-option.h" #include "qemu-config.h" +#include "qemu-aio.h" #include "qobject.h" #include "qstring.h" #include "main-loop.h" diff --git a/qemu-common.h b/qemu-common.h index b54612b..bef5826 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -14,6 +14,7 @@ typedef struct QEMUTimer QEMUTimer; typedef struct QEMUFile QEMUFile; +typedef struct QEMUBH QEMUBH; typedef struct DeviceState DeviceState; struct Monitor; @@ -167,7 +168,6 @@ int qemu_fls(int i); int qemu_fdatasync(int fd); int fcntl_setfl(int fd, int flag); int qemu_parse_fd(const char *param); -int qemu_parse_fdset(const char *param); /* * strtosz() suffixes used to specify the default treatment of an @@ -218,7 +218,6 @@ ssize_t qemu_recv_full(int fd, void *buf, size_t count, int flags) QEMU_WARN_UNUSED_RESULT; #ifndef _WIN32 -int qemu_eventfd(int pipefd[2]); int qemu_pipe(int pipefd[2]); #endif @@ -324,9 +323,7 @@ void cpu_save(QEMUFile *f, void *opaque); int cpu_load(QEMUFile *f, void *opaque, int version_id); /* Unblock cpu */ -void qemu_cpu_kick(void *env); void qemu_cpu_kick_self(void); -int qemu_cpu_is_self(void *env); /* work queue */ struct qemu_work_item { diff --git a/qemu-config.c b/qemu-config.c index e854fff..3154cac 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -114,6 +114,10 @@ static QemuOptsList qemu_drive_opts = { .name = "copy-on-read", .type = QEMU_OPT_BOOL, .help = "copy read data from backing file into image file", + },{ + .name = "boot", + .type = QEMU_OPT_BOOL, + .help = "(deprecated, ignored)", }, { /* end of list */ } }, diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c index 26ad76b..9dda3f8 100644 --- a/qemu-coroutine-lock.c +++ b/qemu-coroutine-lock.c @@ -26,7 +26,7 @@ #include "qemu-coroutine.h" #include "qemu-coroutine-int.h" #include "qemu-queue.h" -#include "main-loop.h" +#include "qemu-aio.h" #include "trace.h" static QTAILQ_HEAD(, Coroutine) unlock_bh_queue = diff --git a/qemu-options.hx b/qemu-options.hx index a67a255..fe8f15c 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2888,6 +2888,22 @@ STEXI Enable FIPS 140-2 compliance mode. ETEXI +HXCOMM Deprecated by -machine accel=tcg property +DEF("no-kvm", HAS_ARG, QEMU_OPTION_no_kvm, "", QEMU_ARCH_I386) + +HXCOMM Deprecated by kvm-pit driver properties +DEF("no-kvm-pit-reinjection", HAS_ARG, QEMU_OPTION_no_kvm_pit_reinjection, + "", QEMU_ARCH_I386) + +HXCOMM Deprecated (ignored) +DEF("no-kvm-pit", HAS_ARG, QEMU_OPTION_no_kvm_pit, "", QEMU_ARCH_I386) + +HXCOMM Deprecated by -machine kernel_irqchip=on|off property +DEF("no-kvm-irqchip", HAS_ARG, QEMU_OPTION_no_kvm_irqchip, "", QEMU_ARCH_I386) + +HXCOMM Deprecated (ignored) +DEF("tdf", 0, QEMU_OPTION_tdf,"", QEMU_ARCH_ALL) + HXCOMM This is the last statement. Insert new options before this line! STEXI @end table diff --git a/qemu-os-win32.h b/qemu-os-win32.h index 8ba466d..d0e9234d 100644 --- a/qemu-os-win32.h +++ b/qemu-os-win32.h @@ -28,7 +28,6 @@ #include <windows.h> #include <winsock2.h> -#include "main-loop.h" /* Workaround for older versions of MinGW. */ #ifndef ECONNREFUSED diff --git a/qemu-sockets.c b/qemu-sockets.c index cfed9c5..f2a6371 100644 --- a/qemu-sockets.c +++ b/qemu-sockets.c @@ -967,3 +967,21 @@ int socket_init(void) #endif return 0; } + +static int default_monitor_get_fd(Monitor *mon, const char *name, Error **errp) +{ + error_setg(errp, "only QEMU supports file descriptor passing"); + return -1; +} +QEMU_WEAK_ALIAS(monitor_get_fd, default_monitor_get_fd); + +static int default_qemu_set_fd_handler2(int fd, + IOCanReadHandler *fd_read_poll, + IOHandler *fd_read, + IOHandler *fd_write, + void *opaque) + +{ + abort(); +} +QEMU_WEAK_ALIAS(qemu_set_fd_handler2, default_qemu_set_fd_handler2); diff --git a/qemu-thread-posix.c b/qemu-thread-posix.c index 8fbabda..6a3d3a1 100644 --- a/qemu-thread-posix.c +++ b/qemu-thread-posix.c @@ -17,6 +17,9 @@ #include <signal.h> #include <stdint.h> #include <string.h> +#include <limits.h> +#include <unistd.h> +#include <sys/time.h> #include "qemu-thread.h" static void error_exit(int err, const char *msg) @@ -115,6 +118,83 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex) error_exit(err, __func__); } +void qemu_sem_init(QemuSemaphore *sem, int init) +{ + int rc; + + rc = sem_init(&sem->sem, 0, init); + if (rc < 0) { + error_exit(errno, __func__); + } +} + +void qemu_sem_destroy(QemuSemaphore *sem) +{ + int rc; + + rc = sem_destroy(&sem->sem); + if (rc < 0) { + error_exit(errno, __func__); + } +} + +void qemu_sem_post(QemuSemaphore *sem) +{ + int rc; + + rc = sem_post(&sem->sem); + if (rc < 0) { + error_exit(errno, __func__); + } +} + +int qemu_sem_timedwait(QemuSemaphore *sem, int ms) +{ + int rc; + + if (ms <= 0) { + /* This is cheaper than sem_timedwait. */ + do { + rc = sem_trywait(&sem->sem); + } while (rc == -1 && errno == EINTR); + if (rc == -1 && errno == EAGAIN) { + return -1; + } + } else { + struct timeval tv; + struct timespec ts; + gettimeofday(&tv, NULL); + ts.tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000; + ts.tv_sec = tv.tv_sec + ms / 1000; + if (ts.tv_nsec >= 1000000000) { + ts.tv_sec++; + ts.tv_nsec -= 1000000000; + } + do { + rc = sem_timedwait(&sem->sem, &ts); + } while (rc == -1 && errno == EINTR); + if (rc == -1 && errno == ETIMEDOUT) { + return -1; + } + } + if (rc < 0) { + error_exit(errno, __func__); + } + return 0; +} + +void qemu_sem_wait(QemuSemaphore *sem) +{ + int rc; + + do { + rc = sem_wait(&sem->sem); + } while (rc == -1 && errno == EINTR); + if (rc < 0) { + error_exit(errno, __func__); + } +} + void qemu_thread_create(QemuThread *thread, void *(*start_routine)(void*), void *arg, int mode) diff --git a/qemu-thread-posix.h b/qemu-thread-posix.h index ee4618e..2542c15 100644 --- a/qemu-thread-posix.h +++ b/qemu-thread-posix.h @@ -1,6 +1,7 @@ #ifndef __QEMU_THREAD_POSIX_H #define __QEMU_THREAD_POSIX_H 1 #include "pthread.h" +#include <semaphore.h> struct QemuMutex { pthread_mutex_t lock; @@ -10,6 +11,10 @@ struct QemuCond { pthread_cond_t cond; }; +struct QemuSemaphore { + sem_t sem; +}; + struct QemuThread { pthread_t thread; }; diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c index 177b398..4b3db60 100644 --- a/qemu-thread-win32.c +++ b/qemu-thread-win32.c @@ -192,6 +192,41 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex) qemu_mutex_lock(mutex); } +void qemu_sem_init(QemuSemaphore *sem, int init) +{ + /* Manual reset. */ + sem->sema = CreateSemaphore(NULL, init, LONG_MAX, NULL); +} + +void qemu_sem_destroy(QemuSemaphore *sem) +{ + CloseHandle(sem->sema); +} + +void qemu_sem_post(QemuSemaphore *sem) +{ + ReleaseSemaphore(sem->sema, 1, NULL); +} + +int qemu_sem_timedwait(QemuSemaphore *sem, int ms) +{ + int rc = WaitForSingleObject(sem->sema, ms); + if (rc == WAIT_OBJECT_0) { + return 0; + } + if (rc != WAIT_TIMEOUT) { + error_exit(GetLastError(), __func__); + } + return -1; +} + +void qemu_sem_wait(QemuSemaphore *sem) +{ + if (WaitForSingleObject(sem->sema, INFINITE) != WAIT_OBJECT_0) { + error_exit(GetLastError(), __func__); + } +} + struct QemuThreadData { /* Passed to win32_start_routine. */ void *(*start_routine)(void *); diff --git a/qemu-thread-win32.h b/qemu-thread-win32.h index b9d1be8..13adb95 100644 --- a/qemu-thread-win32.h +++ b/qemu-thread-win32.h @@ -13,6 +13,10 @@ struct QemuCond { HANDLE continue_event; }; +struct QemuSemaphore { + HANDLE sema; +}; + typedef struct QemuThreadData QemuThreadData; struct QemuThread { QemuThreadData *data; diff --git a/qemu-thread.h b/qemu-thread.h index 05fdaaf..3ee2f6b 100644 --- a/qemu-thread.h +++ b/qemu-thread.h @@ -6,6 +6,7 @@ typedef struct QemuMutex QemuMutex; typedef struct QemuCond QemuCond; +typedef struct QemuSemaphore QemuSemaphore; typedef struct QemuThread QemuThread; #ifdef _WIN32 @@ -38,6 +39,12 @@ void qemu_cond_signal(QemuCond *cond); void qemu_cond_broadcast(QemuCond *cond); void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex); +void qemu_sem_init(QemuSemaphore *sem, int init); +void qemu_sem_post(QemuSemaphore *sem); +void qemu_sem_wait(QemuSemaphore *sem); +int qemu_sem_timedwait(QemuSemaphore *sem, int ms); +void qemu_sem_destroy(QemuSemaphore *sem); + void qemu_thread_create(QemuThread *thread, void *(*start_routine)(void *), void *arg, int mode); diff --git a/qemu-timer.c b/qemu-timer.c index ede84ff..f3426c9 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -430,9 +430,11 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock, Notifier *notifier) void init_clocks(void) { - rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME); - vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL); - host_clock = qemu_new_clock(QEMU_CLOCK_HOST); + if (!rt_clock) { + rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME); + vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL); + host_clock = qemu_new_clock(QEMU_CLOCK_HOST); + } } uint64_t qemu_timer_expire_time_ns(QEMUTimer *ts) @@ -745,6 +747,10 @@ int init_timer_alarm(void) struct qemu_alarm_timer *t = NULL; int i, err = -1; + if (alarm_timer) { + return 0; + } + for (i = 0; alarm_timers[i].name; i++) { t = &alarm_timers[i]; diff --git a/qemu-tool.c b/qemu-tool.c index da4c05a..b46631e 100644 --- a/qemu-tool.c +++ b/qemu-tool.c @@ -38,12 +38,6 @@ const char *qemu_get_vm_name(void) Monitor *cur_mon; -int monitor_get_fd(Monitor *mon, const char *name, Error **errp) -{ - error_setg(errp, "only QEMU supports file descriptor passing"); - return -1; -} - void vm_stop(RunState state) { abort(); @@ -74,29 +68,9 @@ void monitor_protocol_event(MonitorEvent event, QObject *data) { } -int monitor_fdset_get_fd(int64_t fdset_id, int flags) -{ - return -1; -} - -int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd) -{ - return -1; -} - -int monitor_fdset_dup_fd_remove(int dup_fd) -{ - return -1; -} - -int monitor_fdset_dup_fd_find(int dup_fd) -{ - return -1; -} - int64_t cpu_get_clock(void) { - return qemu_get_clock_ns(rt_clock); + return get_clock_realtime(); } int64_t cpu_get_icount(void) @@ -118,13 +92,6 @@ void qemu_clock_warp(QEMUClock *clock) { } -int qemu_init_main_loop(void) -{ - init_clocks(); - init_timer_alarm(); - return main_loop_init(); -} - void slirp_update_timeout(uint32_t *timeout) { } diff --git a/qemu-user.c b/qemu-user.c index 13fb9ae..08ccb0f 100644 --- a/qemu-user.c +++ b/qemu-user.c @@ -35,23 +35,3 @@ void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) void monitor_set_error(Monitor *mon, QError *qerror) { } - -int monitor_fdset_get_fd(int64_t fdset_id, int flags) -{ - return -1; -} - -int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd) -{ - return -1; -} - -int monitor_fdset_dup_fd_remove(int dup_fd) -{ - return -1; -} - -int monitor_fdset_dup_fd_find(int dup_fd) -{ - return -1; -} @@ -471,11 +471,12 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename, return prop_list; } -CpuDefinitionInfoList GCC_WEAK *arch_query_cpu_definitions(Error **errp) +static CpuDefinitionInfoList *default_arch_query_cpu_definitions(Error **errp) { error_set(errp, QERR_NOT_SUPPORTED); return NULL; } +QEMU_WEAK_ALIAS(arch_query_cpu_definitions, default_arch_query_cpu_definitions); CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) { @@ -86,6 +86,7 @@ #include "memory.h" #include "qmp-commands.h" #include "trace.h" +#include "bitops.h" #define SELF_ANNOUNCE_ROUNDS 5 @@ -1132,6 +1133,46 @@ const VMStateInfo vmstate_info_unused_buffer = { .put = put_unused_buffer, }; +/* bitmaps (as defined by bitmap.h). Note that size here is the size + * of the bitmap in bits. The on-the-wire format of a bitmap is 64 + * bit words with the bits in big endian order. The in-memory format + * is an array of 'unsigned long', which may be either 32 or 64 bits. + */ +/* This is the number of 64 bit words sent over the wire */ +#define BITS_TO_U64S(nr) DIV_ROUND_UP(nr, 64) +static int get_bitmap(QEMUFile *f, void *pv, size_t size) +{ + unsigned long *bmp = pv; + int i, idx = 0; + for (i = 0; i < BITS_TO_U64S(size); i++) { + uint64_t w = qemu_get_be64(f); + bmp[idx++] = w; + if (sizeof(unsigned long) == 4 && idx < BITS_TO_LONGS(size)) { + bmp[idx++] = w >> 32; + } + } + return 0; +} + +static void put_bitmap(QEMUFile *f, void *pv, size_t size) +{ + unsigned long *bmp = pv; + int i, idx = 0; + for (i = 0; i < BITS_TO_U64S(size); i++) { + uint64_t w = bmp[idx++]; + if (sizeof(unsigned long) == 4 && idx < BITS_TO_LONGS(size)) { + w |= ((uint64_t)bmp[idx++]) << 32; + } + qemu_put_be64(f, w); + } +} + +const VMStateInfo vmstate_info_bitmap = { + .name = "bitmap", + .get = get_bitmap, + .put = put_bitmap, +}; + typedef struct CompatEntry { char idstr[256]; int instance_id; diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 67be2ef..4c7b566 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -34,7 +34,8 @@ ARCHLIST=$(cd "$linux/arch" && echo *) for arch in $ARCHLIST; do # Discard anything which isn't a KVM-supporting architecture - if ! [ -e "$linux/arch/$arch/include/asm/kvm.h" ]; then + if ! [ -e "$linux/arch/$arch/include/asm/kvm.h" ] && + ! [ -e "$linux/arch/$arch/include/uapi/asm/kvm.h" ] ; then continue fi diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c index 62d2a66..11a19eb 100644 --- a/target-alpha/cpu.c +++ b/target-alpha/cpu.c @@ -19,7 +19,7 @@ * <http://www.gnu.org/licenses/lgpl-2.1.html> */ -#include "cpu-qom.h" +#include "cpu.h" #include "qemu-common.h" diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h index 8f131b7..34221fb 100644 --- a/target-alpha/cpu.h +++ b/target-alpha/cpu.h @@ -510,8 +510,10 @@ static inline void cpu_set_tls(CPUAlphaState *env, target_ulong newtls) } #endif -static inline bool cpu_has_work(CPUAlphaState *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUAlphaState *env = &ALPHA_CPU(cpu)->env; + /* Here we are checking to see if the CPU should wake up from HALT. We will have gotten into this state only for WTINT from PALmode. */ /* ??? I'm not sure how the IPL state works with WTINT to keep a CPU diff --git a/target-arm/cpu.h b/target-arm/cpu.h index ff4de10..e4ff918 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -718,8 +718,10 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, } } -static inline bool cpu_has_work(CPUARMState *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUARMState *env = &ARM_CPU(cpu)->env; + return env->interrupt_request & (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB); } diff --git a/target-cris/cpu.h b/target-cris/cpu.h index 4f4df6d..2c27506 100644 --- a/target-cris/cpu.h +++ b/target-cris/cpu.h @@ -285,8 +285,10 @@ static inline void cpu_get_tb_cpu_state(CPUCRISState *env, target_ulong *pc, #define cpu_list cris_cpu_list void cris_cpu_list(FILE *f, fprintf_function cpu_fprintf); -static inline bool cpu_has_work(CPUCRISState *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUCRISState *env = &CRIS_CPU(cpu)->env; + return env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); } diff --git a/target-i386/cpu.c b/target-i386/cpu.c index d4f2e65..c46286a 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -37,6 +37,13 @@ #include <linux/kvm_para.h> #endif +#include "sysemu.h" +#ifndef CONFIG_USER_ONLY +#include "hw/xen.h" +#include "hw/sysbus.h" +#include "hw/apic_internal.h" +#endif + /* feature flags taken from "Intel Processor Identification and the CPUID * Instruction" and AMD's "CPUID Specification". In cases of disagreement * between feature naming conventions, aliases may be added. @@ -88,10 +95,14 @@ static const char *ext3_feature_name[] = { }; static const char *kvm_feature_name[] = { - "kvmclock", "kvm_nopiodelay", "kvm_mmu", "kvmclock", "kvm_asyncpf", NULL, "kvm_pv_eoi", NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "kvmclock", "kvm_nopiodelay", "kvm_mmu", "kvmclock", + "kvm_asyncpf", "kvm_steal_time", "kvm_pv_eoi", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, }; static const char *svm_feature_name[] = { @@ -106,8 +117,8 @@ static const char *svm_feature_name[] = { }; static const char *cpuid_7_0_ebx_feature_name[] = { - NULL, NULL, NULL, NULL, NULL, NULL, NULL, "smep", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "fsgsbase", NULL, NULL, "bmi1", "hle", "avx2", NULL, "smep", + "bmi2", "erms", "invpcid", "rtm", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "smap", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; @@ -762,13 +773,20 @@ static int cpu_x86_fill_model_id(char *str) return 0; } -static int cpu_x86_fill_host(x86_def_t *x86_cpu_def) +/* Fill a x86_def_t struct with information about the host CPU, and + * the CPU features supported by the host hardware + host kernel + * + * This function may be called only if KVM is enabled. + */ +static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def) { + KVMState *s = kvm_state; uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + assert(kvm_enabled()); + x86_cpu_def->name = "host"; host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); - x86_cpu_def->level = eax; x86_cpu_def->vendor1 = ebx; x86_cpu_def->vendor2 = edx; x86_cpu_def->vendor3 = ecx; @@ -777,21 +795,24 @@ static int cpu_x86_fill_host(x86_def_t *x86_cpu_def) x86_cpu_def->family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); x86_cpu_def->model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); x86_cpu_def->stepping = eax & 0x0F; - x86_cpu_def->ext_features = ecx; - x86_cpu_def->features = edx; - if (kvm_enabled() && x86_cpu_def->level >= 7) { - x86_cpu_def->cpuid_7_0_ebx_features = kvm_arch_get_supported_cpuid(kvm_state, 0x7, 0, R_EBX); + x86_cpu_def->level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX); + x86_cpu_def->features = kvm_arch_get_supported_cpuid(s, 0x1, 0, R_EDX); + x86_cpu_def->ext_features = kvm_arch_get_supported_cpuid(s, 0x1, 0, R_ECX); + + if (x86_cpu_def->level >= 7) { + x86_cpu_def->cpuid_7_0_ebx_features = + kvm_arch_get_supported_cpuid(s, 0x7, 0, R_EBX); } else { x86_cpu_def->cpuid_7_0_ebx_features = 0; } - host_cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx); - x86_cpu_def->xlevel = eax; + x86_cpu_def->xlevel = kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX); + x86_cpu_def->ext2_features = + kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX); + x86_cpu_def->ext3_features = + kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_ECX); - host_cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx); - x86_cpu_def->ext2_features = edx; - x86_cpu_def->ext3_features = ecx; cpu_x86_fill_model_id(x86_cpu_def->model_id); x86_cpu_def->vendor_override = 0; @@ -800,11 +821,13 @@ static int cpu_x86_fill_host(x86_def_t *x86_cpu_def) x86_cpu_def->vendor2 == CPUID_VENDOR_VIA_2 && x86_cpu_def->vendor3 == CPUID_VENDOR_VIA_3) { host_cpuid(0xC0000000, 0, &eax, &ebx, &ecx, &edx); + eax = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX); if (eax >= 0xC0000001) { /* Support VIA max extended level */ x86_cpu_def->xlevel2 = eax; host_cpuid(0xC0000001, 0, &eax, &ebx, &ecx, &edx); - x86_cpu_def->ext4_features = edx; + x86_cpu_def->ext4_features = + kvm_arch_get_supported_cpuid(s, 0xC0000001, 0, R_EDX); } } @@ -815,8 +838,6 @@ static int cpu_x86_fill_host(x86_def_t *x86_cpu_def) * unsupported ones later. */ x86_cpu_def->svm_features = -1; - - return 0; } static int unavailable_host_feature(struct model_features_t *f, uint32_t mask) @@ -837,8 +858,10 @@ static int unavailable_host_feature(struct model_features_t *f, uint32_t mask) /* best effort attempt to inform user requested cpu flags aren't making * their way to the guest. Note: ft[].check_feat ideally should be * specified via a guest_def field to suppress report of extraneous flags. + * + * This function may be called only if KVM is enabled. */ -static int check_features_against_host(x86_def_t *guest_def) +static int kvm_check_features_against_host(x86_def_t *guest_def) { x86_def_t host_def; uint32_t mask; @@ -853,7 +876,9 @@ static int check_features_against_host(x86_def_t *guest_def) {&guest_def->ext3_features, &host_def.ext3_features, ~CPUID_EXT3_SVM, ext3_feature_name, 0x80000001}}; - cpu_x86_fill_host(&host_def); + assert(kvm_enabled()); + + kvm_cpu_fill_host(&host_def); for (rv = 0, i = 0; i < ARRAY_SIZE(ft); ++i) for (mask = 1; mask; mask <<= 1) if (ft[i].check_feat & mask && *ft[i].guest_feat & mask && @@ -1140,7 +1165,7 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) if (name && !strcmp(name, def->name)) break; if (kvm_enabled() && name && strcmp(name, "host") == 0) { - cpu_x86_fill_host(x86_cpu_def); + kvm_cpu_fill_host(x86_cpu_def); } else if (!def) { goto error; } else { @@ -1278,8 +1303,8 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) x86_cpu_def->kvm_features &= ~minus_kvm_features; x86_cpu_def->svm_features &= ~minus_svm_features; x86_cpu_def->cpuid_7_0_ebx_features &= ~minus_7_0_ebx_features; - if (check_cpuid) { - if (check_features_against_host(x86_cpu_def) && enforce_cpuid) + if (check_cpuid && kvm_enabled()) { + if (kvm_check_features_against_host(x86_cpu_def) && enforce_cpuid) goto error; } if (x86_cpu_def->cpuid_7_0_ebx_features && x86_cpu_def->level < 7) { @@ -1368,6 +1393,32 @@ CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp) return cpu_list; } +#ifdef CONFIG_KVM +static void filter_features_for_kvm(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + KVMState *s = kvm_state; + + env->cpuid_features &= + kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); + env->cpuid_ext_features &= + kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX); + env->cpuid_ext2_features &= + kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX); + env->cpuid_ext3_features &= + kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_ECX); + env->cpuid_svm_features &= + kvm_arch_get_supported_cpuid(s, 0x8000000A, 0, R_EDX); + env->cpuid_7_0_ebx_features &= + kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX); + env->cpuid_kvm_features &= + kvm_arch_get_supported_cpuid(s, KVM_CPUID_FEATURES, 0, R_EAX); + env->cpuid_ext4_features &= + kvm_arch_get_supported_cpuid(s, 0xC0000001, 0, R_EDX); + +} +#endif + int cpu_x86_register(X86CPU *cpu, const char *cpu_model) { CPUX86State *env = &cpu->env; @@ -1425,9 +1476,14 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model) ); env->cpuid_ext3_features &= TCG_EXT3_FEATURES; env->cpuid_svm_features &= TCG_SVM_FEATURES; + } else { +#ifdef CONFIG_KVM + filter_features_for_kvm(cpu); +#endif } object_property_set_str(OBJECT(cpu), def->model_id, "model-id", &error); - if (error_is_set(&error)) { + if (error) { + fprintf(stderr, "%s\n", error_get_pretty(error)); error_free(error); return -1; } @@ -1878,12 +1934,65 @@ static void mce_init(X86CPU *cpu) } } +#define MSI_ADDR_BASE 0xfee00000 + +#ifndef CONFIG_USER_ONLY +static void x86_cpu_apic_init(X86CPU *cpu, Error **errp) +{ + static int apic_mapped; + CPUX86State *env = &cpu->env; + APICCommonState *apic; + const char *apic_type = "apic"; + + if (kvm_irqchip_in_kernel()) { + apic_type = "kvm-apic"; + } else if (xen_enabled()) { + apic_type = "xen-apic"; + } + + env->apic_state = qdev_try_create(NULL, apic_type); + if (env->apic_state == NULL) { + error_setg(errp, "APIC device '%s' could not be created", apic_type); + return; + } + + object_property_add_child(OBJECT(cpu), "apic", + OBJECT(env->apic_state), NULL); + qdev_prop_set_uint8(env->apic_state, "id", env->cpuid_apic_id); + /* TODO: convert to link<> */ + apic = APIC_COMMON(env->apic_state); + apic->cpu = cpu; + + if (qdev_init(env->apic_state)) { + error_setg(errp, "APIC device '%s' could not be initialized", + object_get_typename(OBJECT(env->apic_state))); + return; + } + + /* XXX: mapping more APICs at the same memory location */ + if (apic_mapped == 0) { + /* NOTE: the APIC is directly connected to the CPU - it is not + on the global memory bus. */ + /* XXX: what if the base changes? */ + sysbus_mmio_map(sysbus_from_qdev(env->apic_state), 0, MSI_ADDR_BASE); + apic_mapped = 1; + } +} +#endif + void x86_cpu_realize(Object *obj, Error **errp) { X86CPU *cpu = X86_CPU(obj); #ifndef CONFIG_USER_ONLY qemu_register_reset(x86_cpu_machine_reset_cb, cpu); + + if (cpu->env.cpuid_features & CPUID_APIC || smp_cpus > 1) { + x86_cpu_apic_init(cpu, errp); + if (error_is_set(errp)) { + return; + } + } #endif mce_init(cpu); diff --git a/target-i386/cpu.h b/target-i386/cpu.h index de33303..cdc59dc 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -907,9 +907,11 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, } } -static inline void cpu_x86_load_seg_cache_sipi(CPUX86State *env, +static inline void cpu_x86_load_seg_cache_sipi(X86CPU *cpu, int sipi_vector) { + CPUX86State *env = &cpu->env; + env->eip = 0; cpu_x86_load_seg_cache(env, R_CS, sipi_vector << 8, sipi_vector << 12, @@ -1098,8 +1100,10 @@ static inline void cpu_clone_regs(CPUX86State *env, target_ulong newsp) #include "hw/apic.h" #endif -static inline bool cpu_has_work(CPUX86State *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUX86State *env = &X86_CPU(cpu)->env; + return ((env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_POLL)) && (env->eflags & IF_MASK)) || @@ -1131,7 +1135,7 @@ void do_cpu_sipi(X86CPU *cpu); #define MCE_INJECT_BROADCAST 1 #define MCE_INJECT_UNCOND_AO 2 -void cpu_x86_inject_mce(Monitor *mon, CPUX86State *cenv, int bank, +void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, int flags); diff --git a/target-i386/helper.c b/target-i386/helper.c index c5d42c5..bf206cf 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1141,10 +1141,11 @@ static void do_inject_x86_mce(void *data) } } -void cpu_x86_inject_mce(Monitor *mon, CPUX86State *cenv, int bank, +void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, int flags) { + CPUX86State *cenv = &cpu->env; MCEInjectionParams params = { .mon = mon, .env = cenv, @@ -1176,7 +1177,7 @@ void cpu_x86_inject_mce(Monitor *mon, CPUX86State *cenv, int bank, return; } - run_on_cpu(cenv, do_inject_x86_mce, ¶ms); + run_on_cpu(CPU(cpu), do_inject_x86_mce, ¶ms); if (flags & MCE_INJECT_BROADCAST) { params.bank = 1; params.status = MCI_STATUS_VAL | MCI_STATUS_UC; @@ -1188,7 +1189,7 @@ void cpu_x86_inject_mce(Monitor *mon, CPUX86State *cenv, int bank, continue; } params.env = env; - run_on_cpu(cenv, do_inject_x86_mce, ¶ms); + run_on_cpu(CPU(cpu), do_inject_x86_mce, ¶ms); } } } @@ -1243,6 +1244,7 @@ X86CPU *cpu_x86_init(const char *cpu_model) { X86CPU *cpu; CPUX86State *env; + Error *error = NULL; cpu = X86_CPU(object_new(TYPE_X86_CPU)); env = &cpu->env; @@ -1253,8 +1255,12 @@ X86CPU *cpu_x86_init(const char *cpu_model) return NULL; } - x86_cpu_realize(OBJECT(cpu), NULL); - + x86_cpu_realize(OBJECT(cpu), &error); + if (error) { + error_free(error); + object_delete(OBJECT(cpu)); + return NULL; + } return cpu; } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 3aa62b2..73e2035 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -98,6 +98,19 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) return cpuid; } +/* Run KVM_GET_SUPPORTED_CPUID ioctl(), allocating a buffer large enough + * for all entries. + */ +static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s) +{ + struct kvm_cpuid2 *cpuid; + int max = 1; + while ((cpuid = try_get_cpuid(s, max)) == NULL) { + max *= 2; + } + return cpuid; +} + struct kvm_para_features { int cap; int feature; @@ -123,60 +136,98 @@ static int get_para_features(KVMState *s) } +/* Returns the value for a specific register on the cpuid entry + */ +static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) +{ + uint32_t ret = 0; + switch (reg) { + case R_EAX: + ret = entry->eax; + break; + case R_EBX: + ret = entry->ebx; + break; + case R_ECX: + ret = entry->ecx; + break; + case R_EDX: + ret = entry->edx; + break; + } + return ret; +} + +/* Find matching entry for function/index on kvm_cpuid2 struct + */ +static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index) +{ + int i; + for (i = 0; i < cpuid->nent; ++i) { + if (cpuid->entries[i].function == function && + cpuid->entries[i].index == index) { + return &cpuid->entries[i]; + } + } + /* not found: */ + return NULL; +} + uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, uint32_t index, int reg) { struct kvm_cpuid2 *cpuid; - int i, max; uint32_t ret = 0; uint32_t cpuid_1_edx; - int has_kvm_features = 0; + bool found = false; - max = 1; - while ((cpuid = try_get_cpuid(s, max)) == NULL) { - max *= 2; + cpuid = get_supported_cpuid(s); + + struct kvm_cpuid_entry2 *entry = cpuid_find_entry(cpuid, function, index); + if (entry) { + found = true; + ret = cpuid_entry_get_reg(entry, reg); } - for (i = 0; i < cpuid->nent; ++i) { - if (cpuid->entries[i].function == function && - cpuid->entries[i].index == index) { - if (cpuid->entries[i].function == KVM_CPUID_FEATURES) { - has_kvm_features = 1; - } - switch (reg) { - case R_EAX: - ret = cpuid->entries[i].eax; - break; - case R_EBX: - ret = cpuid->entries[i].ebx; - break; - case R_ECX: - ret = cpuid->entries[i].ecx; - break; - case R_EDX: - ret = cpuid->entries[i].edx; - switch (function) { - case 1: - /* KVM before 2.6.30 misreports the following features */ - ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA; - break; - case 0x80000001: - /* On Intel, kvm returns cpuid according to the Intel spec, - * so add missing bits according to the AMD spec: - */ - cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); - ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES; - break; - } - break; - } + /* Fixups for the data returned by KVM, below */ + + if (function == 1 && reg == R_EDX) { + /* KVM before 2.6.30 misreports the following features */ + ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA; + } else if (function == 1 && reg == R_ECX) { + /* We can set the hypervisor flag, even if KVM does not return it on + * GET_SUPPORTED_CPUID + */ + ret |= CPUID_EXT_HYPERVISOR; + /* tsc-deadline flag is not returned by GET_SUPPORTED_CPUID, but it + * can be enabled if the kernel has KVM_CAP_TSC_DEADLINE_TIMER, + * and the irqchip is in the kernel. + */ + if (kvm_irqchip_in_kernel() && + kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) { + ret |= CPUID_EXT_TSC_DEADLINE_TIMER; + } + + /* x2apic is reported by GET_SUPPORTED_CPUID, but it can't be enabled + * without the in-kernel irqchip + */ + if (!kvm_irqchip_in_kernel()) { + ret &= ~CPUID_EXT_X2APIC; } + } else if (function == 0x80000001 && reg == R_EDX) { + /* On Intel, kvm returns cpuid according to the Intel spec, + * so add missing bits according to the AMD spec: + */ + cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); + ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES; } g_free(cpuid); /* fallback for older kernels */ - if (!has_kvm_features && (function == KVM_CPUID_FEATURES)) { + if ((function == KVM_CPUID_FEATURES) && !found) { ret = get_para_features(s); } @@ -229,8 +280,9 @@ static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, return -ENOSYS; } -static void kvm_mce_inject(CPUX86State *env, hwaddr paddr, int code) +static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) { + CPUX86State *env = &cpu->env; uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; uint64_t mcg_status = MCG_STATUS_MCIP; @@ -242,7 +294,7 @@ static void kvm_mce_inject(CPUX86State *env, hwaddr paddr, int code) status |= 0xc0; mcg_status |= MCG_STATUS_RIPV; } - cpu_x86_inject_mce(NULL, env, 9, status, mcg_status, paddr, + cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr, (MCM_ADDR_PHYS << 6) | 0xc, cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0); @@ -256,6 +308,7 @@ static void hardware_memory_error(void) int kvm_arch_on_sigbus_vcpu(CPUX86State *env, int code, void *addr) { + X86CPU *cpu = x86_env_get_cpu(env); ram_addr_t ram_addr; hwaddr paddr; @@ -273,7 +326,7 @@ int kvm_arch_on_sigbus_vcpu(CPUX86State *env, int code, void *addr) } } kvm_hwpoison_page_add(ram_addr); - kvm_mce_inject(env, paddr, code); + kvm_mce_inject(cpu, paddr, code); } else { if (code == BUS_MCEERR_AO) { return 0; @@ -301,7 +354,7 @@ int kvm_arch_on_sigbus(int code, void *addr) return 0; } kvm_hwpoison_page_add(ram_addr); - kvm_mce_inject(first_cpu, paddr, code); + kvm_mce_inject(x86_env_get_cpu(first_cpu), paddr, code); } else { if (code == BUS_MCEERR_AO) { return 0; @@ -359,31 +412,12 @@ int kvm_arch_init_vcpu(CPUX86State *env) struct kvm_cpuid2 cpuid; struct kvm_cpuid_entry2 entries[100]; } QEMU_PACKED cpuid_data; - KVMState *s = env->kvm_state; uint32_t limit, i, j, cpuid_i; uint32_t unused; struct kvm_cpuid_entry2 *c; uint32_t signature[3]; int r; - env->cpuid_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); - - i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR; - j = env->cpuid_ext_features & CPUID_EXT_TSC_DEADLINE_TIMER; - env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX); - env->cpuid_ext_features |= i; - if (j && kvm_irqchip_in_kernel() && - kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) { - env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER; - } - - env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(s, 0x80000001, - 0, R_EDX); - env->cpuid_ext3_features &= kvm_arch_get_supported_cpuid(s, 0x80000001, - 0, R_ECX); - env->cpuid_svm_features &= kvm_arch_get_supported_cpuid(s, 0x8000000A, - 0, R_EDX); - cpuid_i = 0; /* Paravirtualization CPUIDs */ @@ -404,8 +438,7 @@ int kvm_arch_init_vcpu(CPUX86State *env) c = &cpuid_data.entries[cpuid_i++]; memset(c, 0, sizeof(*c)); c->function = KVM_CPUID_FEATURES; - c->eax = env->cpuid_kvm_features & - kvm_arch_get_supported_cpuid(s, KVM_CPUID_FEATURES, 0, R_EAX); + c->eax = env->cpuid_kvm_features; if (hyperv_enabled()) { memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); @@ -526,8 +559,6 @@ int kvm_arch_init_vcpu(CPUX86State *env) /* Call Centaur's CPUID instructions they are supported. */ if (env->cpuid_xlevel2 > 0) { - env->cpuid_ext4_features &= - kvm_arch_get_supported_cpuid(s, 0xC0000001, 0, R_EDX); cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); for (i = 0xC0000000; i <= limit; i++) { @@ -1365,8 +1396,9 @@ static int kvm_put_mp_state(CPUX86State *env) return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state); } -static int kvm_get_mp_state(CPUX86State *env) +static int kvm_get_mp_state(X86CPU *cpu) { + CPUX86State *env = &cpu->env; struct kvm_mp_state mp_state; int ret; @@ -1552,9 +1584,10 @@ static int kvm_get_debugregs(CPUX86State *env) int kvm_arch_put_registers(CPUX86State *env, int level) { + CPUState *cpu = ENV_GET_CPU(env); int ret; - assert(cpu_is_stopped(env) || qemu_cpu_is_self(env)); + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); ret = kvm_getput_regs(env, 1); if (ret < 0) { @@ -1609,9 +1642,10 @@ int kvm_arch_put_registers(CPUX86State *env, int level) int kvm_arch_get_registers(CPUX86State *env) { + X86CPU *cpu = x86_env_get_cpu(env); int ret; - assert(cpu_is_stopped(env) || qemu_cpu_is_self(env)); + assert(cpu_is_stopped(CPU(cpu)) || qemu_cpu_is_self(CPU(cpu))); ret = kvm_getput_regs(env, 0); if (ret < 0) { @@ -1633,7 +1667,7 @@ int kvm_arch_get_registers(CPUX86State *env) if (ret < 0) { return ret; } - ret = kvm_get_mp_state(env); + ret = kvm_get_mp_state(cpu); if (ret < 0) { return ret; } @@ -1781,8 +1815,10 @@ int kvm_arch_process_async_events(CPUX86State *env) return env->halted; } -static int kvm_handle_halt(CPUX86State *env) +static int kvm_handle_halt(X86CPU *cpu) { + CPUX86State *env = &cpu->env; + if (!((env->interrupt_request & CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) && !(env->interrupt_request & CPU_INTERRUPT_NMI)) { @@ -1996,13 +2032,14 @@ static bool host_supports_vmx(void) int kvm_arch_handle_exit(CPUX86State *env, struct kvm_run *run) { + X86CPU *cpu = x86_env_get_cpu(env); uint64_t code; int ret; switch (run->exit_reason) { case KVM_EXIT_HLT: DPRINTF("handle_hlt\n"); - ret = kvm_handle_halt(env); + ret = kvm_handle_halt(cpu); break; case KVM_EXIT_SET_TPR: ret = 0; diff --git a/target-lm32/cpu.h b/target-lm32/cpu.h index da80469..7243b4f 100644 --- a/target-lm32/cpu.h +++ b/target-lm32/cpu.h @@ -253,8 +253,10 @@ static inline void cpu_get_tb_cpu_state(CPULM32State *env, target_ulong *pc, *flags = 0; } -static inline bool cpu_has_work(CPULM32State *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPULM32State *env = &LM32_CPU(cpu)->env; + return env->interrupt_request & CPU_INTERRUPT_HARD; } diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h index 5e6ee50..780e2c9 100644 --- a/target-m68k/cpu.h +++ b/target-m68k/cpu.h @@ -257,8 +257,10 @@ static inline void cpu_get_tb_cpu_state(CPUM68KState *env, target_ulong *pc, | ((env->macsr >> 4) & 0xf); /* Bits 0-3 */ } -static inline bool cpu_has_work(CPUM68KState *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUM68KState *env = &M68K_CPU(cpu)->env; + return env->interrupt_request & CPU_INTERRUPT_HARD; } diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h index 37bbdf1..585bbd6 100644 --- a/target-microblaze/cpu.h +++ b/target-microblaze/cpu.h @@ -374,8 +374,10 @@ void cpu_unassigned_access(CPUMBState *env1, hwaddr addr, int is_write, int is_exec, int is_asi, int size); #endif -static inline bool cpu_has_work(CPUMBState *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUMBState *env = &MICROBLAZE_CPU(cpu)->env; + return env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI); } diff --git a/target-mips/cpu.h b/target-mips/cpu.h index 4c012d5..aebb2d5 100644 --- a/target-mips/cpu.h +++ b/target-mips/cpu.h @@ -710,16 +710,17 @@ static inline int mips_vpe_active(CPUMIPSState *env) return active; } -static inline int cpu_has_work(CPUMIPSState *env) +static inline bool cpu_has_work(CPUState *cpu) { - int has_work = 0; + CPUMIPSState *env = &MIPS_CPU(cpu)->env; + bool has_work = false; /* It is implementation dependent if non-enabled interrupts wake-up the CPU, however most of the implementations only check for interrupts that can be taken. */ if ((env->interrupt_request & CPU_INTERRUPT_HARD) && cpu_mips_hw_interrupts_pending(env)) { - has_work = 1; + has_work = true; } /* MIPS-MT has the ability to halt the CPU. */ @@ -727,11 +728,11 @@ static inline int cpu_has_work(CPUMIPSState *env) /* The QEMU model will issue an _WAKE request whenever the CPUs should be woken up. */ if (env->interrupt_request & CPU_INTERRUPT_WAKE) { - has_work = 1; + has_work = true; } if (!mips_vpe_active(env)) { - has_work = 0; + has_work = false; } } return has_work; diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h index a701d36..d42ffb0 100644 --- a/target-openrisc/cpu.h +++ b/target-openrisc/cpu.h @@ -437,8 +437,10 @@ static inline int cpu_mmu_index(CPUOpenRISCState *env) } #define CPU_INTERRUPT_TIMER CPU_INTERRUPT_TGT_INT_0 -static inline bool cpu_has_work(CPUOpenRISCState *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUOpenRISCState *env = &OPENRISC_CPU(cpu)->env; + return env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_TIMER); } diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 286f42a..c3cbad7 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -963,7 +963,7 @@ struct CPUPPCState { /* floating point registers */ float64 fpr[32]; /* floating point status and control register */ - uint32_t fpscr; + target_ulong fpscr; /* Next instruction pointer */ target_ulong nip; @@ -1014,6 +1014,8 @@ struct CPUPPCState { /* Altivec registers */ ppc_avr_t avr[32]; uint32_t vscr; + /* VSX registers */ + uint64_t vsr[32]; /* SPE registers */ uint64_t spe_acc; uint32_t spe_fscr; @@ -1045,9 +1047,9 @@ struct CPUPPCState { #endif #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) - hwaddr vpa_addr; - hwaddr slb_shadow_addr, slb_shadow_size; - hwaddr dtl_addr, dtl_size; + uint64_t vpa_addr; + uint64_t slb_shadow_addr, slb_shadow_size; + uint64_t dtl_addr, dtl_size; #endif /* TARGET_PPC64 */ int error_code; @@ -2220,10 +2222,12 @@ static inline bool msr_is_64bit(CPUPPCState *env, target_ulong msr) return msr & (1ULL << MSR_SF); } -extern void (*cpu_ppc_hypercall)(CPUPPCState *); +extern void (*cpu_ppc_hypercall)(PowerPCCPU *); -static inline bool cpu_has_work(CPUPPCState *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUPPCState *env = &POWERPC_CPU(cpu)->env; + return msr_ee && (env->interrupt_request & CPU_INTERRUPT_HARD); } diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c index 1a593f6..5e34ad0 100644 --- a/target-ppc/excp_helper.c +++ b/target-ppc/excp_helper.c @@ -33,7 +33,7 @@ /*****************************************************************************/ /* PowerPC Hypercall emulation */ -void (*cpu_ppc_hypercall)(CPUPPCState *); +void (*cpu_ppc_hypercall)(PowerPCCPU *); /*****************************************************************************/ /* Exception processing */ @@ -63,8 +63,9 @@ static inline void dump_syscall(CPUPPCState *env) /* Note that this function should be greatly optimized * when called with a constant excp, from ppc_hw_interrupt */ -static inline void powerpc_excp(CPUPPCState *env, int excp_model, int excp) +static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) { + CPUPPCState *env = &cpu->env; target_ulong msr, new_msr, vector; int srr0, srr1, asrr0, asrr1; int lpes0, lpes1, lev; @@ -238,7 +239,7 @@ static inline void powerpc_excp(CPUPPCState *env, int excp_model, int excp) dump_syscall(env); lev = env->error_code; if ((lev == 1) && cpu_ppc_hypercall) { - cpu_ppc_hypercall(env); + cpu_ppc_hypercall(cpu); return; } if (lev == 1 || (lpes0 == 0 && lpes1 == 0)) { @@ -643,11 +644,14 @@ static inline void powerpc_excp(CPUPPCState *env, int excp_model, int excp) void do_interrupt(CPUPPCState *env) { - powerpc_excp(env, env->excp_model, env->exception_index); + PowerPCCPU *cpu = ppc_env_get_cpu(env); + + powerpc_excp(cpu, env->excp_model, env->exception_index); } void ppc_hw_interrupt(CPUPPCState *env) { + PowerPCCPU *cpu = ppc_env_get_cpu(env); int hdice; #if 0 @@ -658,20 +662,20 @@ void ppc_hw_interrupt(CPUPPCState *env) /* External reset */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_RESET); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_RESET); return; } /* Machine check exception */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_MCK)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_MCK); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_MCHECK); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_MCHECK); return; } #if 0 /* TODO */ /* External debug exception */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_DEBUG)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DEBUG); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_DEBUG); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DEBUG); return; } #endif @@ -685,7 +689,7 @@ void ppc_hw_interrupt(CPUPPCState *env) /* Hypervisor decrementer exception */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDECR)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_HDECR); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_HDECR); return; } } @@ -698,7 +702,7 @@ void ppc_hw_interrupt(CPUPPCState *env) #if 0 env->pending_interrupts &= ~(1 << PPC_INTERRUPT_CEXT); #endif - powerpc_excp(env, env->excp_model, POWERPC_EXCP_CRITICAL); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_CRITICAL); return; } } @@ -706,30 +710,30 @@ void ppc_hw_interrupt(CPUPPCState *env) /* Watchdog timer on embedded PowerPC */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_WDT)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_WDT); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_WDT); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_WDT); return; } if (env->pending_interrupts & (1 << PPC_INTERRUPT_CDOORBELL)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_CDOORBELL); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_DOORCI); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORCI); return; } /* Fixed interval timer on embedded PowerPC */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_FIT)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_FIT); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_FIT); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_FIT); return; } /* Programmable interval timer on embedded PowerPC */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_PIT)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_PIT); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_PIT); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_PIT); return; } /* Decrementer exception */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_DECR)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DECR); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_DECR); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DECR); return; } /* External interrupt */ @@ -740,23 +744,23 @@ void ppc_hw_interrupt(CPUPPCState *env) #if 0 env->pending_interrupts &= ~(1 << PPC_INTERRUPT_EXT); #endif - powerpc_excp(env, env->excp_model, POWERPC_EXCP_EXTERNAL); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_EXTERNAL); return; } if (env->pending_interrupts & (1 << PPC_INTERRUPT_DOORBELL)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_DOORI); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORI); return; } if (env->pending_interrupts & (1 << PPC_INTERRUPT_PERFM)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_PERFM); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_PERFM); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_PERFM); return; } /* Thermal interrupt */ if (env->pending_interrupts & (1 << PPC_INTERRUPT_THERM)) { env->pending_interrupts &= ~(1 << PPC_INTERRUPT_THERM); - powerpc_excp(env, env->excp_model, POWERPC_EXCP_THERM); + powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_THERM); return; } } diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 5cbe98a..3f5df57 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -73,9 +73,11 @@ static int cap_hior; */ static QEMUTimer *idle_timer; -static void kvm_kick_env(void *env) +static void kvm_kick_cpu(void *opaque) { - qemu_cpu_kick(env); + PowerPCCPU *cpu = opaque; + + qemu_cpu_kick(CPU(cpu)); } int kvm_arch_init(KVMState *s) @@ -375,6 +377,7 @@ static inline void kvm_fixup_page_sizes(CPUPPCState *env) int kvm_arch_init_vcpu(CPUPPCState *cenv) { + PowerPCCPU *cpu = ppc_env_get_cpu(cenv); int ret; /* Gather server mmu info from KVM and update the CPU state */ @@ -386,7 +389,7 @@ int kvm_arch_init_vcpu(CPUPPCState *cenv) return ret; } - idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv); + idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu); /* Some targets support access to KVM's guest TLB. */ switch (cenv->mmu_model) { @@ -814,7 +817,8 @@ int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run) #ifdef CONFIG_PSERIES case KVM_EXIT_PAPR_HCALL: dprintf("handle PAPR hypercall\n"); - run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr, + run->papr_hcall.ret = spapr_hypercall(ppc_env_get_cpu(env), + run->papr_hcall.nr, run->papr_hcall.args); ret = 0; break; diff --git a/target-ppc/machine.c b/target-ppc/machine.c index 21ce757..5e7bc00 100644 --- a/target-ppc/machine.c +++ b/target-ppc/machine.c @@ -6,6 +6,7 @@ void cpu_save(QEMUFile *f, void *opaque) { CPUPPCState *env = (CPUPPCState *)opaque; unsigned int i, j; + uint32_t fpscr; for (i = 0; i < 32; i++) qemu_put_betls(f, &env->gpr[i]); @@ -30,7 +31,8 @@ void cpu_save(QEMUFile *f, void *opaque) u.d = env->fpr[i]; qemu_put_be64(f, u.l); } - qemu_put_be32s(f, &env->fpscr); + fpscr = env->fpscr; + qemu_put_be32s(f, &fpscr); qemu_put_sbe32s(f, &env->access_type); #if defined(TARGET_PPC64) qemu_put_betls(f, &env->asr); @@ -90,6 +92,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) CPUPPCState *env = (CPUPPCState *)opaque; unsigned int i, j; target_ulong sdr1; + uint32_t fpscr; for (i = 0; i < 32; i++) qemu_get_betls(f, &env->gpr[i]); @@ -114,7 +117,8 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) u.l = qemu_get_be64(f); env->fpr[i] = u.d; } - qemu_get_be32s(f, &env->fpscr); + qemu_get_be32s(f, &fpscr); + env->fpscr = fpscr; qemu_get_sbe32s(f, &env->access_type); #if defined(TARGET_PPC64) qemu_get_betls(f, &env->asr); diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 1042268..56725e6 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -68,7 +68,7 @@ static TCGv cpu_cfar; #endif static TCGv cpu_xer; static TCGv cpu_reserve; -static TCGv_i32 cpu_fpscr; +static TCGv cpu_fpscr; static TCGv_i32 cpu_access_type; #include "gen-icount.h" @@ -163,8 +163,8 @@ void ppc_translate_init(void) offsetof(CPUPPCState, reserve_addr), "reserve_addr"); - cpu_fpscr = tcg_global_mem_new_i32(TCG_AREG0, - offsetof(CPUPPCState, fpscr), "fpscr"); + cpu_fpscr = tcg_global_mem_new(TCG_AREG0, + offsetof(CPUPPCState, fpscr), "fpscr"); cpu_access_type = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUPPCState, access_type), "access_type"); @@ -2302,6 +2302,7 @@ GEN_FLOAT_B(neg, 0x08, 0x01, 0, PPC_FLOAT); /* mcrfs */ static void gen_mcrfs(DisasContext *ctx) { + TCGv tmp = tcg_temp_new(); int bfa; if (unlikely(!ctx->fpu_enabled)) { @@ -2309,9 +2310,11 @@ static void gen_mcrfs(DisasContext *ctx) return; } bfa = 4 * (7 - crfS(ctx->opcode)); - tcg_gen_shri_i32(cpu_crf[crfD(ctx->opcode)], cpu_fpscr, bfa); + tcg_gen_shri_tl(tmp, cpu_fpscr, bfa); + tcg_gen_trunc_tl_i32(cpu_crf[crfD(ctx->opcode)], tmp); + tcg_temp_free(tmp); tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], 0xf); - tcg_gen_andi_i32(cpu_fpscr, cpu_fpscr, ~(0xF << bfa)); + tcg_gen_andi_tl(cpu_fpscr, cpu_fpscr, ~(0xF << bfa)); } /* mffs */ @@ -2322,7 +2325,7 @@ static void gen_mffs(DisasContext *ctx) return; } gen_reset_fpstatus(); - tcg_gen_extu_i32_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpscr); + tcg_gen_extu_tl_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpscr); gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); } @@ -2346,7 +2349,8 @@ static void gen_mtfsb0(DisasContext *ctx) tcg_temp_free_i32(t0); } if (unlikely(Rc(ctx->opcode) != 0)) { - tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX); + tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr); + tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX); } } @@ -2371,7 +2375,8 @@ static void gen_mtfsb1(DisasContext *ctx) tcg_temp_free_i32(t0); } if (unlikely(Rc(ctx->opcode) != 0)) { - tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX); + tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr); + tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX); } /* We can raise a differed exception */ gen_helper_float_check_status(cpu_env); @@ -2397,7 +2402,8 @@ static void gen_mtfsf(DisasContext *ctx) gen_helper_store_fpscr(cpu_env, cpu_fpr[rB(ctx->opcode)], t0); tcg_temp_free_i32(t0); if (unlikely(Rc(ctx->opcode) != 0)) { - tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX); + tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr); + tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX); } /* We can raise a differed exception */ gen_helper_float_check_status(cpu_env); @@ -2425,7 +2431,8 @@ static void gen_mtfsfi(DisasContext *ctx) tcg_temp_free_i64(t0); tcg_temp_free_i32(t1); if (unlikely(Rc(ctx->opcode) != 0)) { - tcg_gen_shri_i32(cpu_crf[1], cpu_fpscr, FPSCR_OX); + tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr); + tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX); } /* We can raise a differed exception */ gen_helper_float_check_status(cpu_env); @@ -9463,7 +9470,7 @@ void cpu_dump_state (CPUPPCState *env, FILE *f, fprintf_function cpu_fprintf, if ((i & (RFPL - 1)) == (RFPL - 1)) cpu_fprintf(f, "\n"); } - cpu_fprintf(f, "FPSCR %08x\n", env->fpscr); + cpu_fprintf(f, "FPSCR " TARGET_FMT_lx "\n", env->fpscr); #if !defined(CONFIG_USER_ONLY) cpu_fprintf(f, " SRR0 " TARGET_FMT_lx " SRR1 " TARGET_FMT_lx " PVR " TARGET_FMT_lx " VRSAVE " TARGET_FMT_lx "\n", diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index 5be6e83..0f9a1f7 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -977,8 +977,10 @@ static inline void cpu_inject_ext(CPUS390XState *env, uint32_t code, uint32_t pa cpu_interrupt(env, CPU_INTERRUPT_HARD); } -static inline bool cpu_has_work(CPUS390XState *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUS390XState *env = &S390_CPU(cpu)->env; + return (env->interrupt_request & CPU_INTERRUPT_HARD) && (env->psw.mask & PSW_MASK_EXT); } diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index a66ac43..94de764 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -403,7 +403,7 @@ static int s390_cpu_restart(S390CPU *cpu) kvm_s390_interrupt(env, KVM_S390_RESTART, 0); s390_add_running_cpu(env); - qemu_cpu_kick(env); + qemu_cpu_kick(CPU(cpu)); dprintf("DONE: SIGP cpu restart: %p\n", env); return 0; } diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h index 782159e..9a0e72b 100644 --- a/target-sh4/cpu.h +++ b/target-sh4/cpu.h @@ -371,8 +371,10 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */ } -static inline bool cpu_has_work(CPUSH4State *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUSH4State *env = &SUPERH_CPU(cpu)->env; + return env->interrupt_request & CPU_INTERRUPT_HARD; } diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h index a55fe08..6aa82b3 100644 --- a/target-sparc/cpu.h +++ b/target-sparc/cpu.h @@ -764,8 +764,10 @@ static inline bool tb_am_enabled(int tb_flags) #endif } -static inline bool cpu_has_work(CPUSPARCState *env1) +static inline bool cpu_has_work(CPUState *cpu) { + CPUSPARCState *env1 = &SPARC_CPU(cpu)->env; + return (env1->interrupt_request & CPU_INTERRUPT_HARD) && cpu_interrupts_enabled(env1); } diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c index 3425bbe..884c101 100644 --- a/target-unicore32/cpu.c +++ b/target-unicore32/cpu.c @@ -12,7 +12,7 @@ * or (at your option) any later version. */ -#include "cpu-qom.h" +#include "cpu.h" #include "qemu-common.h" static inline void set_feature(CPUUniCore32State *env, int feature) diff --git a/target-unicore32/cpu.h b/target-unicore32/cpu.h index 06508a1..676c5d9 100644 --- a/target-unicore32/cpu.h +++ b/target-unicore32/cpu.h @@ -181,8 +181,10 @@ void uc32_translate_init(void); void do_interrupt(CPUUniCore32State *); void switch_mode(CPUUniCore32State *, int); -static inline bool cpu_has_work(CPUUniCore32State *env) +static inline bool cpu_has_work(CPUState *cpu) { + CPUUniCore32State *env = &UNICORE32_CPU(cpu)->env; + return env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB); } diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h index 7348277..74e9888 100644 --- a/target-xtensa/cpu.h +++ b/target-xtensa/cpu.h @@ -501,8 +501,10 @@ static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc, #include "cpu-all.h" #include "exec-all.h" -static inline int cpu_has_work(CPUXtensaState *env) +static inline int cpu_has_work(CPUState *cpu) { + CPUXtensaState *env = &XTENSA_CPU(cpu)->env; + return env->pending_irq_level; } diff --git a/tests/Makefile b/tests/Makefile index 86c9b79..9bf0765 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -36,7 +36,7 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \ tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \ tests/test-qmp-commands.o tests/test-visitor-serialization.o -test-qapi-obj-y = $(qobject-obj-y) $(qapi-obj-y) $(tools-obj-y) +test-qapi-obj-y = $(qobject-obj-y) $(qapi-obj-y) qemu-tool.o test-qapi-obj-y += tests/test-qapi-visit.o tests/test-qapi-types.o test-qapi-obj-y += module.o @@ -47,8 +47,8 @@ tests/check-qstring$(EXESUF): tests/check-qstring.o qstring.o tests/check-qdict$(EXESUF): tests/check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o -tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) $(tools-obj-y) -tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y) +tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) qemu-tool.o +tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y) $(block-obj-y) iov.o tests/test-iov$(EXESUF): tests/test-iov.o iov.o tests/test-qapi-types.c tests/test-qapi-types.h :\ @@ -81,7 +81,7 @@ TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS))) QTEST_TARGETS=$(foreach TARGET,$(TARGETS), $(if $(check-qtest-$(TARGET)-y), $(TARGET),)) check-qtest-$(CONFIG_POSIX)=$(foreach TARGET,$(TARGETS), $(check-qtest-$(TARGET)-y)) -qtest-obj-y = tests/libqtest.o $(oslib-obj-y) $(tools-obj-y) +qtest-obj-y = tests/libqtest.o $(oslib-obj-y) $(check-qtest-y): $(qtest-obj-y) .PHONY: check-help diff --git a/thread-pool.c b/thread-pool.c new file mode 100644 index 0000000..651b324 --- /dev/null +++ b/thread-pool.c @@ -0,0 +1,289 @@ +/* + * QEMU block layer thread pool + * + * Copyright IBM, Corp. 2008 + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include "qemu-common.h" +#include "qemu-queue.h" +#include "qemu-thread.h" +#include "osdep.h" +#include "qemu-coroutine.h" +#include "trace.h" +#include "block_int.h" +#include "event_notifier.h" +#include "thread-pool.h" + +static void do_spawn_thread(void); + +typedef struct ThreadPoolElement ThreadPoolElement; + +enum ThreadState { + THREAD_QUEUED, + THREAD_ACTIVE, + THREAD_DONE, + THREAD_CANCELED, +}; + +struct ThreadPoolElement { + BlockDriverAIOCB common; + ThreadPoolFunc *func; + void *arg; + + /* Moving state out of THREAD_QUEUED is protected by lock. After + * that, only the worker thread can write to it. Reads and writes + * of state and ret are ordered with memory barriers. + */ + enum ThreadState state; + int ret; + + /* Access to this list is protected by lock. */ + QTAILQ_ENTRY(ThreadPoolElement) reqs; + + /* Access to this list is protected by the global mutex. */ + QLIST_ENTRY(ThreadPoolElement) all; +}; + +static EventNotifier notifier; +static QemuMutex lock; +static QemuCond check_cancel; +static QemuSemaphore sem; +static int max_threads = 64; +static QEMUBH *new_thread_bh; + +/* The following variables are protected by the global mutex. */ +static QLIST_HEAD(, ThreadPoolElement) head; + +/* The following variables are protected by lock. */ +static QTAILQ_HEAD(, ThreadPoolElement) request_list; +static int cur_threads; +static int idle_threads; +static int new_threads; /* backlog of threads we need to create */ +static int pending_threads; /* threads created but not running yet */ +static int pending_cancellations; /* whether we need a cond_broadcast */ + +static void *worker_thread(void *unused) +{ + qemu_mutex_lock(&lock); + pending_threads--; + do_spawn_thread(); + + while (1) { + ThreadPoolElement *req; + int ret; + + do { + idle_threads++; + qemu_mutex_unlock(&lock); + ret = qemu_sem_timedwait(&sem, 10000); + qemu_mutex_lock(&lock); + idle_threads--; + } while (ret == -1 && !QTAILQ_EMPTY(&request_list)); + if (ret == -1) { + break; + } + + req = QTAILQ_FIRST(&request_list); + QTAILQ_REMOVE(&request_list, req, reqs); + req->state = THREAD_ACTIVE; + qemu_mutex_unlock(&lock); + + ret = req->func(req->arg); + + req->ret = ret; + /* Write ret before state. */ + smp_wmb(); + req->state = THREAD_DONE; + + qemu_mutex_lock(&lock); + if (pending_cancellations) { + qemu_cond_broadcast(&check_cancel); + } + + event_notifier_set(¬ifier); + } + + cur_threads--; + qemu_mutex_unlock(&lock); + return NULL; +} + +static void do_spawn_thread(void) +{ + QemuThread t; + + /* Runs with lock taken. */ + if (!new_threads) { + return; + } + + new_threads--; + pending_threads++; + + qemu_thread_create(&t, worker_thread, NULL, QEMU_THREAD_DETACHED); +} + +static void spawn_thread_bh_fn(void *opaque) +{ + qemu_mutex_lock(&lock); + do_spawn_thread(); + qemu_mutex_unlock(&lock); +} + +static void spawn_thread(void) +{ + cur_threads++; + new_threads++; + /* If there are threads being created, they will spawn new workers, so + * we don't spend time creating many threads in a loop holding a mutex or + * starving the current vcpu. + * + * If there are no idle threads, ask the main thread to create one, so we + * inherit the correct affinity instead of the vcpu affinity. + */ + if (!pending_threads) { + qemu_bh_schedule(new_thread_bh); + } +} + +static void event_notifier_ready(EventNotifier *notifier) +{ + ThreadPoolElement *elem, *next; + + event_notifier_test_and_clear(notifier); +restart: + QLIST_FOREACH_SAFE(elem, &head, all, next) { + if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { + continue; + } + if (elem->state == THREAD_DONE) { + trace_thread_pool_complete(elem, elem->common.opaque, elem->ret); + } + if (elem->state == THREAD_DONE && elem->common.cb) { + QLIST_REMOVE(elem, all); + /* Read state before ret. */ + smp_rmb(); + elem->common.cb(elem->common.opaque, elem->ret); + qemu_aio_release(elem); + goto restart; + } else { + /* remove the request */ + QLIST_REMOVE(elem, all); + qemu_aio_release(elem); + } + } +} + +static int thread_pool_active(EventNotifier *notifier) +{ + return !QLIST_EMPTY(&head); +} + +static void thread_pool_cancel(BlockDriverAIOCB *acb) +{ + ThreadPoolElement *elem = (ThreadPoolElement *)acb; + + trace_thread_pool_cancel(elem, elem->common.opaque); + + qemu_mutex_lock(&lock); + if (elem->state == THREAD_QUEUED && + /* No thread has yet started working on elem. we can try to "steal" + * the item from the worker if we can get a signal from the + * semaphore. Because this is non-blocking, we can do it with + * the lock taken and ensure that elem will remain THREAD_QUEUED. + */ + qemu_sem_timedwait(&sem, 0) == 0) { + QTAILQ_REMOVE(&request_list, elem, reqs); + elem->state = THREAD_CANCELED; + event_notifier_set(¬ifier); + } else { + pending_cancellations++; + while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { + qemu_cond_wait(&check_cancel, &lock); + } + pending_cancellations--; + } + qemu_mutex_unlock(&lock); +} + +static AIOPool thread_pool_cb_pool = { + .aiocb_size = sizeof(ThreadPoolElement), + .cancel = thread_pool_cancel, +}; + +BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, + BlockDriverCompletionFunc *cb, void *opaque) +{ + ThreadPoolElement *req; + + req = qemu_aio_get(&thread_pool_cb_pool, NULL, cb, opaque); + req->func = func; + req->arg = arg; + req->state = THREAD_QUEUED; + + QLIST_INSERT_HEAD(&head, req, all); + + trace_thread_pool_submit(req, arg); + + qemu_mutex_lock(&lock); + if (idle_threads == 0 && cur_threads < max_threads) { + spawn_thread(); + } + QTAILQ_INSERT_TAIL(&request_list, req, reqs); + qemu_mutex_unlock(&lock); + qemu_sem_post(&sem); + return &req->common; +} + +typedef struct ThreadPoolCo { + Coroutine *co; + int ret; +} ThreadPoolCo; + +static void thread_pool_co_cb(void *opaque, int ret) +{ + ThreadPoolCo *co = opaque; + + co->ret = ret; + qemu_coroutine_enter(co->co, NULL); +} + +int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) +{ + ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS }; + assert(qemu_in_coroutine()); + thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc); + qemu_coroutine_yield(); + return tpc.ret; +} + +void thread_pool_submit(ThreadPoolFunc *func, void *arg) +{ + thread_pool_submit_aio(func, arg, NULL, NULL); +} + +static void thread_pool_init(void) +{ + QLIST_INIT(&head); + event_notifier_init(¬ifier, false); + qemu_mutex_init(&lock); + qemu_cond_init(&check_cancel); + qemu_sem_init(&sem, 0); + qemu_aio_set_event_notifier(¬ifier, event_notifier_ready, + thread_pool_active); + + QTAILQ_INIT(&request_list); + new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL); +} + +block_init(thread_pool_init) diff --git a/thread-pool.h b/thread-pool.h new file mode 100644 index 0000000..378a4ac --- /dev/null +++ b/thread-pool.h @@ -0,0 +1,34 @@ +/* + * QEMU block layer thread pool + * + * Copyright IBM, Corp. 2008 + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef QEMU_THREAD_POOL_H +#define QEMU_THREAD_POOL_H 1 + +#include "qemu-common.h" +#include "qemu-queue.h" +#include "qemu-thread.h" +#include "qemu-coroutine.h" +#include "block_int.h" + +typedef int ThreadPoolFunc(void *opaque); + +BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, + BlockDriverCompletionFunc *cb, void *opaque); +int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg); +void thread_pool_submit(ThreadPoolFunc *func, void *arg); + +#endif diff --git a/trace-events b/trace-events index 7ee21e5..066cdaf 100644 --- a/trace-events +++ b/trace-events @@ -98,6 +98,11 @@ virtio_blk_rw_complete(void *req, int ret) "req %p ret %d" virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" +# thread-pool.c +thread_pool_submit(void *req, void *opaque) "req %p opaque %p" +thread_pool_complete(void *req, void *opaque, int ret) "req %p opaque %p ret %d" +thread_pool_cancel(void *req, void *opaque) "req %p opaque %p" + # posix-aio-compat.c paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d" paio_complete(void *acb, void *opaque, int ret) "acb %p opaque %p ret %d" @@ -2441,11 +2441,6 @@ static void free_and_trace(gpointer mem) free(mem); } -int qemu_init_main_loop(void) -{ - return main_loop_init(); -} - int main(int argc, char **argv, char **envp) { int i; @@ -2574,6 +2569,11 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_M: machine = machine_parse(optarg); break; + case QEMU_OPTION_no_kvm_irqchip: { + olist = qemu_find_opts("machine"); + qemu_opts_parse(olist, "kernel_irqchip=off", 0); + break; + } case QEMU_OPTION_cpu: /* hw initialization will check this */ cpu_model = optarg; @@ -3166,6 +3166,30 @@ int main(int argc, char **argv, char **envp) machine = machine_parse(optarg); } break; + case QEMU_OPTION_no_kvm: + olist = qemu_find_opts("machine"); + qemu_opts_parse(olist, "accel=tcg", 0); + break; + case QEMU_OPTION_no_kvm_pit: { + fprintf(stderr, "Warning: KVM PIT can no longer be disabled " + "separately.\n"); + break; + } + case QEMU_OPTION_no_kvm_pit_reinjection: { + static GlobalProperty kvm_pit_lost_tick_policy[] = { + { + .driver = "kvm-pit", + .property = "lost_tick_policy", + .value = "discard", + }, + { /* end of list */ } + }; + + fprintf(stderr, "Warning: option deprecated, use " + "lost_tick_policy property of kvm-pit instead.\n"); + qdev_prop_register_global_list(kvm_pit_lost_tick_policy); + break; + } case QEMU_OPTION_usb: machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0); if (machine_opts) { @@ -3255,6 +3279,10 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_semihosting: semihosting_enabled = 1; break; + case QEMU_OPTION_tdf: + fprintf(stderr, "Warning: user space PIT time drift fix " + "is no longer supported.\n"); + break; case QEMU_OPTION_name: qemu_name = g_strdup(optarg); { @@ -3418,6 +3446,12 @@ int main(int argc, char **argv, char **envp) } loc_set_none(); + qemu_init_cpu_loop(); + if (qemu_init_main_loop()) { + fprintf(stderr, "qemu_init_main_loop failed\n"); + exit(1); + } + if (qemu_opts_foreach(qemu_find_opts("sandbox"), parse_sandbox, NULL, 0)) { exit(1); } @@ -3585,12 +3619,6 @@ int main(int argc, char **argv, char **envp) configure_accelerator(); - qemu_init_cpu_loop(); - if (qemu_init_main_loop()) { - fprintf(stderr, "qemu_init_main_loop failed\n"); - exit(1); - } - machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0); if (machine_opts) { kernel_filename = qemu_opt_get(machine_opts, "kernel"); @@ -139,6 +139,7 @@ extern const VMStateInfo vmstate_info_uint64; extern const VMStateInfo vmstate_info_timer; extern const VMStateInfo vmstate_info_buffer; extern const VMStateInfo vmstate_info_unused_buffer; +extern const VMStateInfo vmstate_info_bitmap; #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0) #define type_check_pointer(t1,t2) ((t1**)0 - (t2*)0) @@ -411,6 +412,18 @@ extern const VMStateInfo vmstate_info_unused_buffer; .flags = VMS_BUFFER, \ } +/* _field_size should be a int32_t field in the _state struct giving the + * size of the bitmap _field in bits. + */ +#define VMSTATE_BITMAP(_field, _state, _version, _field_size) { \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .size_offset = vmstate_offset_value(_state, _field_size, int32_t),\ + .info = &vmstate_info_bitmap, \ + .flags = VMS_VBUFFER|VMS_POINTER, \ + .offset = offsetof(_state, _field), \ +} + /* _f : field name _f_n : num of elements field_name _n : num of elements |