aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.objs2
-rwxr-xr-xblock/blkreplay.c15
-rw-r--r--bsd-user/main.c33
-rwxr-xr-xconfigure3
-rw-r--r--cpu-exec.c12
-rw-r--r--cpus-common.c352
-rw-r--r--cpus.c100
-rw-r--r--docs/tcg-exclusive.promela225
-rw-r--r--exec.c37
-rw-r--r--hw/i386/amd_iommu.c16
-rw-r--r--hw/i386/intel_iommu.c18
-rw-r--r--hw/i386/kvm/apic.c5
-rw-r--r--hw/i386/kvmvapic.c6
-rw-r--r--hw/intc/ioapic.c2
-rw-r--r--hw/ppc/ppce500_spin.c31
-rw-r--r--hw/ppc/spapr.c6
-rw-r--r--hw/ppc/spapr_hcall.c17
-rw-r--r--hw/ppc/spapr_iommu.c18
-rw-r--r--hw/vfio/common.c4
-rw-r--r--include/exec/cpu-common.h5
-rw-r--r--include/exec/exec-all.h11
-rw-r--r--include/exec/memory.h63
-rw-r--r--include/exec/tb-context.h2
-rw-r--r--include/hw/compat.h4
-rw-r--r--include/hw/vfio/vfio-common.h2
-rw-r--r--include/qemu/compiler.h6
-rw-r--r--include/qom/cpu.h102
-rw-r--r--include/sysemu/replay.h4
-rw-r--r--kvm-all.c21
-rw-r--r--linux-user/main.c130
-rw-r--r--memory.c106
-rw-r--r--migration/ram.c2
-rw-r--r--replay/Makefile.objs1
-rw-r--r--replay/replay-events.c10
-rw-r--r--replay/replay-internal.c20
-rw-r--r--replay/replay-internal.h23
-rw-r--r--replay/replay-snapshot.c61
-rw-r--r--replay/replay-time.c2
-rw-r--r--replay/replay.c16
-rwxr-xr-xscripts/checkpatch.pl2
-rw-r--r--stubs/replay.c5
-rw-r--r--target-i386/helper.c19
-rw-r--r--target-i386/kvm.c6
-rw-r--r--target-s390x/cpu.c4
-rw-r--r--target-s390x/cpu.h7
-rw-r--r--target-s390x/kvm.c98
-rw-r--r--target-s390x/misc_helper.c4
-rw-r--r--translate-all.c38
-rw-r--r--vl.c2
49 files changed, 1157 insertions, 521 deletions
diff --git a/Makefile.objs b/Makefile.objs
index 7301544..a8e0224 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -89,7 +89,7 @@ endif
#######################################################################
# Target-independent parts used in system and user emulation
-common-obj-y += tcg-runtime.o
+common-obj-y += tcg-runtime.o cpus-common.o
common-obj-y += hw/
common-obj-y += qom/
common-obj-y += disas/
diff --git a/block/blkreplay.c b/block/blkreplay.c
index 30f9d5f..a741654 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -20,11 +20,6 @@ typedef struct Request {
QEMUBH *bh;
} Request;
-/* Next request id.
- This counter is global, because requests from different
- block devices should not get overlapping ids. */
-static uint64_t request_id;
-
static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -84,7 +79,7 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs,
static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
- uint64_t reqid = request_id++;
+ uint64_t reqid = blkreplay_next_id();
int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@@ -95,7 +90,7 @@ static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
- uint64_t reqid = request_id++;
+ uint64_t reqid = blkreplay_next_id();
int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@@ -106,7 +101,7 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int count, BdrvRequestFlags flags)
{
- uint64_t reqid = request_id++;
+ uint64_t reqid = blkreplay_next_id();
int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@@ -117,7 +112,7 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
int64_t offset, int count)
{
- uint64_t reqid = request_id++;
+ uint64_t reqid = blkreplay_next_id();
int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@@ -127,7 +122,7 @@ static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
{
- uint64_t reqid = request_id++;
+ uint64_t reqid = blkreplay_next_id();
int ret = bdrv_co_flush(bs->file->bs);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 0fb08e4..35125b7 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -67,23 +67,6 @@ int cpu_get_pic_interrupt(CPUX86State *env)
}
#endif
-/* These are no-ops because we are not threadsafe. */
-static inline void cpu_exec_start(CPUArchState *env)
-{
-}
-
-static inline void cpu_exec_end(CPUArchState *env)
-{
-}
-
-static inline void start_exclusive(void)
-{
-}
-
-static inline void end_exclusive(void)
-{
-}
-
void fork_start(void)
{
}
@@ -95,14 +78,6 @@ void fork_end(int child)
}
}
-void cpu_list_lock(void)
-{
-}
-
-void cpu_list_unlock(void)
-{
-}
-
#ifdef TARGET_I386
/***********************************************************/
/* CPUX86 core interface */
@@ -172,7 +147,11 @@ void cpu_loop(CPUX86State *env)
//target_siginfo_t info;
for(;;) {
+ cpu_exec_start(cs);
trapnr = cpu_exec(cs);
+ cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch(trapnr) {
case 0x80:
/* syscall from int $0x80 */
@@ -513,7 +492,10 @@ void cpu_loop(CPUSPARCState *env)
//target_siginfo_t info;
while (1) {
+ cpu_exec_start(cs);
trapnr = cpu_exec(cs);
+ cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
switch (trapnr) {
#ifndef TARGET_SPARC64
@@ -748,6 +730,7 @@ int main(int argc, char **argv)
if (argc <= 1)
usage();
+ qemu_init_cpu_list();
module_call_init(MODULE_INIT_QOM);
if ((envlist = envlist_create()) == NULL) {
diff --git a/configure b/configure
index 1fb343d..df4a247 100755
--- a/configure
+++ b/configure
@@ -2988,7 +2988,7 @@ for i in $glib_modules; do
if $pkg_config --atleast-version=$glib_req_ver $i; then
glib_cflags=$($pkg_config --cflags $i)
glib_libs=$($pkg_config --libs $i)
- CFLAGS="$glib_cflags $CFLAGS"
+ QEMU_CFLAGS="$glib_cflags $QEMU_CFLAGS"
LIBS="$glib_libs $LIBS"
libs_qga="$glib_libs $libs_qga"
else
@@ -5195,7 +5195,6 @@ fi
if test "$glib_subprocess" = "yes" ; then
echo "CONFIG_HAS_GLIB_SUBPROCESS_TESTS=y" >> $config_host_mak
fi
-echo "GLIB_CFLAGS=$glib_cflags" >> $config_host_mak
if test "$gtk" = "yes" ; then
echo "CONFIG_GTK=y" >> $config_host_mak
echo "CONFIG_GTKABI=$gtkabi" >> $config_host_mak
diff --git a/cpu-exec.c b/cpu-exec.c
index 9f4bd0b..8823d23 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -204,20 +204,16 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
TranslationBlock *orig_tb, bool ignore_icount)
{
TranslationBlock *tb;
- bool old_tb_flushed;
/* Should never happen.
We only end up here when an existing TB is too long. */
if (max_cycles > CF_COUNT_MASK)
max_cycles = CF_COUNT_MASK;
- old_tb_flushed = cpu->tb_flushed;
- cpu->tb_flushed = false;
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
max_cycles | CF_NOCACHE
| (ignore_icount ? CF_IGNORE_ICOUNT : 0));
- tb->orig_tb = cpu->tb_flushed ? NULL : orig_tb;
- cpu->tb_flushed |= old_tb_flushed;
+ tb->orig_tb = orig_tb;
/* execute the generated code */
trace_exec_tb_nocache(tb, tb->pc);
cpu_tb_exec(cpu, tb);
@@ -338,10 +334,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
tb_lock();
have_tb_lock = true;
}
- /* Check if translation buffer has been flushed */
- if (cpu->tb_flushed) {
- cpu->tb_flushed = false;
- } else if (!tb->invalid) {
+ if (!tb->invalid) {
tb_add_jump(last_tb, tb_exit, tb);
}
}
@@ -606,7 +599,6 @@ int cpu_exec(CPUState *cpu)
break;
}
- atomic_mb_set(&cpu->tb_flushed, false); /* reset before first TB lookup */
for(;;) {
cpu_handle_interrupt(cpu, &last_tb);
tb = tb_find(cpu, last_tb, tb_exit);
diff --git a/cpus-common.c b/cpus-common.c
new file mode 100644
index 0000000..3e11452
--- /dev/null
+++ b/cpus-common.c
@@ -0,0 +1,352 @@
+/*
+ * CPU thread main loop - common bits for user and system mode emulation
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "exec/cpu-common.h"
+#include "qom/cpu.h"
+#include "sysemu/cpus.h"
+
+static QemuMutex qemu_cpu_list_lock;
+static QemuCond exclusive_cond;
+static QemuCond exclusive_resume;
+static QemuCond qemu_work_cond;
+
+/* >= 1 if a thread is inside start_exclusive/end_exclusive. Written
+ * under qemu_cpu_list_lock, read with atomic operations.
+ */
+static int pending_cpus;
+
+void qemu_init_cpu_list(void)
+{
+ /* This is needed because qemu_init_cpu_list is also called by the
+ * child process in a fork. */
+ pending_cpus = 0;
+
+ qemu_mutex_init(&qemu_cpu_list_lock);
+ qemu_cond_init(&exclusive_cond);
+ qemu_cond_init(&exclusive_resume);
+ qemu_cond_init(&qemu_work_cond);
+}
+
+void cpu_list_lock(void)
+{
+ qemu_mutex_lock(&qemu_cpu_list_lock);
+}
+
+void cpu_list_unlock(void)
+{
+ qemu_mutex_unlock(&qemu_cpu_list_lock);
+}
+
+static bool cpu_index_auto_assigned;
+
+static int cpu_get_free_index(void)
+{
+ CPUState *some_cpu;
+ int cpu_index = 0;
+
+ cpu_index_auto_assigned = true;
+ CPU_FOREACH(some_cpu) {
+ cpu_index++;
+ }
+ return cpu_index;
+}
+
+static void finish_safe_work(CPUState *cpu)
+{
+ cpu_exec_start(cpu);
+ cpu_exec_end(cpu);
+}
+
+void cpu_list_add(CPUState *cpu)
+{
+ qemu_mutex_lock(&qemu_cpu_list_lock);
+ if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
+ cpu->cpu_index = cpu_get_free_index();
+ assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
+ } else {
+ assert(!cpu_index_auto_assigned);
+ }
+ QTAILQ_INSERT_TAIL(&cpus, cpu, node);
+ qemu_mutex_unlock(&qemu_cpu_list_lock);
+
+ finish_safe_work(cpu);
+}
+
+void cpu_list_remove(CPUState *cpu)
+{
+ qemu_mutex_lock(&qemu_cpu_list_lock);
+ if (!QTAILQ_IN_USE(cpu, node)) {
+ /* there is nothing to undo since cpu_exec_init() hasn't been called */
+ qemu_mutex_unlock(&qemu_cpu_list_lock);
+ return;
+ }
+
+ assert(!(cpu_index_auto_assigned && cpu != QTAILQ_LAST(&cpus, CPUTailQ)));
+
+ QTAILQ_REMOVE(&cpus, cpu, node);
+ cpu->cpu_index = UNASSIGNED_CPU_INDEX;
+ qemu_mutex_unlock(&qemu_cpu_list_lock);
+}
+
+struct qemu_work_item {
+ struct qemu_work_item *next;
+ run_on_cpu_func func;
+ void *data;
+ bool free, exclusive, done;
+};
+
+static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
+{
+ qemu_mutex_lock(&cpu->work_mutex);
+ if (cpu->queued_work_first == NULL) {
+ cpu->queued_work_first = wi;
+ } else {
+ cpu->queued_work_last->next = wi;
+ }
+ cpu->queued_work_last = wi;
+ wi->next = NULL;
+ wi->done = false;
+ qemu_mutex_unlock(&cpu->work_mutex);
+
+ qemu_cpu_kick(cpu);
+}
+
+void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data,
+ QemuMutex *mutex)
+{
+ struct qemu_work_item wi;
+
+ if (qemu_cpu_is_self(cpu)) {
+ func(cpu, data);
+ return;
+ }
+
+ wi.func = func;
+ wi.data = data;
+ wi.done = false;
+ wi.free = false;
+ wi.exclusive = false;
+
+ queue_work_on_cpu(cpu, &wi);
+ while (!atomic_mb_read(&wi.done)) {
+ CPUState *self_cpu = current_cpu;
+
+ qemu_cond_wait(&qemu_work_cond, mutex);
+ current_cpu = self_cpu;
+ }
+}
+
+void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+{
+ struct qemu_work_item *wi;
+
+ wi = g_malloc0(sizeof(struct qemu_work_item));
+ wi->func = func;
+ wi->data = data;
+ wi->free = true;
+
+ queue_work_on_cpu(cpu, wi);
+}
+
+/* Wait for pending exclusive operations to complete. The CPU list lock
+ must be held. */
+static inline void exclusive_idle(void)
+{
+ while (pending_cpus) {
+ qemu_cond_wait(&exclusive_resume, &qemu_cpu_list_lock);
+ }
+}
+
+/* Start an exclusive operation.
+ Must only be called from outside cpu_exec. */
+void start_exclusive(void)
+{
+ CPUState *other_cpu;
+ int running_cpus;
+
+ qemu_mutex_lock(&qemu_cpu_list_lock);
+ exclusive_idle();
+
+ /* Make all other cpus stop executing. */
+ atomic_set(&pending_cpus, 1);
+
+ /* Write pending_cpus before reading other_cpu->running. */
+ smp_mb();
+ running_cpus = 0;
+ CPU_FOREACH(other_cpu) {
+ if (atomic_read(&other_cpu->running)) {
+ other_cpu->has_waiter = true;
+ running_cpus++;
+ qemu_cpu_kick(other_cpu);
+ }
+ }
+
+ atomic_set(&pending_cpus, running_cpus + 1);
+ while (pending_cpus > 1) {
+ qemu_cond_wait(&exclusive_cond, &qemu_cpu_list_lock);
+ }
+
+ /* Can release mutex, no one will enter another exclusive
+ * section until end_exclusive resets pending_cpus to 0.
+ */
+ qemu_mutex_unlock(&qemu_cpu_list_lock);
+}
+
+/* Finish an exclusive operation. */
+void end_exclusive(void)
+{
+ qemu_mutex_lock(&qemu_cpu_list_lock);
+ atomic_set(&pending_cpus, 0);
+ qemu_cond_broadcast(&exclusive_resume);
+ qemu_mutex_unlock(&qemu_cpu_list_lock);
+}
+
+/* Wait for exclusive ops to finish, and begin cpu execution. */
+void cpu_exec_start(CPUState *cpu)
+{
+ atomic_set(&cpu->running, true);
+
+ /* Write cpu->running before reading pending_cpus. */
+ smp_mb();
+
+ /* 1. start_exclusive saw cpu->running == true and pending_cpus >= 1.
+ * After taking the lock we'll see cpu->has_waiter == true and run---not
+ * for long because start_exclusive kicked us. cpu_exec_end will
+ * decrement pending_cpus and signal the waiter.
+ *
+ * 2. start_exclusive saw cpu->running == false but pending_cpus >= 1.
+ * This includes the case when an exclusive item is running now.
+ * Then we'll see cpu->has_waiter == false and wait for the item to
+ * complete.
+ *
+ * 3. pending_cpus == 0. Then start_exclusive is definitely going to
+ * see cpu->running == true, and it will kick the CPU.
+ */
+ if (unlikely(atomic_read(&pending_cpus))) {
+ qemu_mutex_lock(&qemu_cpu_list_lock);
+ if (!cpu->has_waiter) {
+ /* Not counted in pending_cpus, let the exclusive item
+ * run. Since we have the lock, just set cpu->running to true
+ * while holding it; no need to check pending_cpus again.
+ */
+ atomic_set(&cpu->running, false);
+ exclusive_idle();
+ /* Now pending_cpus is zero. */
+ atomic_set(&cpu->running, true);
+ } else {
+ /* Counted in pending_cpus, go ahead and release the
+ * waiter at cpu_exec_end.
+ */
+ }
+ qemu_mutex_unlock(&qemu_cpu_list_lock);
+ }
+}
+
+/* Mark cpu as not executing, and release pending exclusive ops. */
+void cpu_exec_end(CPUState *cpu)
+{
+ atomic_set(&cpu->running, false);
+
+ /* Write cpu->running before reading pending_cpus. */
+ smp_mb();
+
+ /* 1. start_exclusive saw cpu->running == true. Then it will increment
+ * pending_cpus and wait for exclusive_cond. After taking the lock
+ * we'll see cpu->has_waiter == true.
+ *
+ * 2. start_exclusive saw cpu->running == false but here pending_cpus >= 1.
+ * This includes the case when an exclusive item started after setting
+ * cpu->running to false and before we read pending_cpus. Then we'll see
+ * cpu->has_waiter == false and not touch pending_cpus. The next call to
+ * cpu_exec_start will run exclusive_idle if still necessary, thus waiting
+ * for the item to complete.
+ *
+ * 3. pending_cpus == 0. Then start_exclusive is definitely going to
+ * see cpu->running == false, and it can ignore this CPU until the
+ * next cpu_exec_start.
+ */
+ if (unlikely(atomic_read(&pending_cpus))) {
+ qemu_mutex_lock(&qemu_cpu_list_lock);
+ if (cpu->has_waiter) {
+ cpu->has_waiter = false;
+ atomic_set(&pending_cpus, pending_cpus - 1);
+ if (pending_cpus == 1) {
+ qemu_cond_signal(&exclusive_cond);
+ }
+ }
+ qemu_mutex_unlock(&qemu_cpu_list_lock);
+ }
+}
+
+void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+{
+ struct qemu_work_item *wi;
+
+ wi = g_malloc0(sizeof(struct qemu_work_item));
+ wi->func = func;
+ wi->data = data;
+ wi->free = true;
+ wi->exclusive = true;
+
+ queue_work_on_cpu(cpu, wi);
+}
+
+void process_queued_cpu_work(CPUState *cpu)
+{
+ struct qemu_work_item *wi;
+
+ if (cpu->queued_work_first == NULL) {
+ return;
+ }
+
+ qemu_mutex_lock(&cpu->work_mutex);
+ while (cpu->queued_work_first != NULL) {
+ wi = cpu->queued_work_first;
+ cpu->queued_work_first = wi->next;
+ if (!cpu->queued_work_first) {
+ cpu->queued_work_last = NULL;
+ }
+ qemu_mutex_unlock(&cpu->work_mutex);
+ if (wi->exclusive) {
+ /* Running work items outside the BQL avoids the following deadlock:
+ * 1) start_exclusive() is called with the BQL taken while another
+ * CPU is running; 2) cpu_exec in the other CPU tries to takes the
+ * BQL, so it goes to sleep; start_exclusive() is sleeping too, so
+ * neither CPU can proceed.
+ */
+ qemu_mutex_unlock_iothread();
+ start_exclusive();
+ wi->func(cpu, wi->data);
+ end_exclusive();
+ qemu_mutex_lock_iothread();
+ } else {
+ wi->func(cpu, wi->data);
+ }
+ qemu_mutex_lock(&cpu->work_mutex);
+ if (wi->free) {
+ g_free(wi);
+ } else {
+ atomic_mb_set(&wi->done, true);
+ }
+ }
+ qemu_mutex_unlock(&cpu->work_mutex);
+ qemu_cond_broadcast(&qemu_work_cond);
+}
diff --git a/cpus.c b/cpus.c
index e39ccb7..b2fbe33 100644
--- a/cpus.c
+++ b/cpus.c
@@ -557,9 +557,8 @@ static const VMStateDescription vmstate_timers = {
}
};
-static void cpu_throttle_thread(void *opaque)
+static void cpu_throttle_thread(CPUState *cpu, void *opaque)
{
- CPUState *cpu = opaque;
double pct;
double throttle_ratio;
long sleeptime_ns;
@@ -589,7 +588,7 @@ static void cpu_throttle_timer_tick(void *opaque)
}
CPU_FOREACH(cpu) {
if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
- async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
+ async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
}
}
@@ -751,6 +750,7 @@ static int do_vm_stop(RunState state)
}
bdrv_drain_all();
+ replay_disable_events();
ret = blk_flush_all();
return ret;
@@ -903,79 +903,21 @@ static QemuThread io_thread;
static QemuCond qemu_cpu_cond;
/* system init */
static QemuCond qemu_pause_cond;
-static QemuCond qemu_work_cond;
void qemu_init_cpu_loop(void)
{
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
- qemu_cond_init(&qemu_work_cond);
qemu_cond_init(&qemu_io_proceeded_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
}
-void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
{
- struct qemu_work_item wi;
-
- if (qemu_cpu_is_self(cpu)) {
- func(data);
- return;
- }
-
- wi.func = func;
- wi.data = data;
- wi.free = false;
-
- qemu_mutex_lock(&cpu->work_mutex);
- if (cpu->queued_work_first == NULL) {
- cpu->queued_work_first = &wi;
- } else {
- cpu->queued_work_last->next = &wi;
- }
- cpu->queued_work_last = &wi;
- wi.next = NULL;
- wi.done = false;
- qemu_mutex_unlock(&cpu->work_mutex);
-
- qemu_cpu_kick(cpu);
- while (!atomic_mb_read(&wi.done)) {
- CPUState *self_cpu = current_cpu;
-
- qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
- current_cpu = self_cpu;
- }
-}
-
-void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
-{
- struct qemu_work_item *wi;
-
- if (qemu_cpu_is_self(cpu)) {
- func(data);
- return;
- }
-
- wi = g_malloc0(sizeof(struct qemu_work_item));
- wi->func = func;
- wi->data = data;
- wi->free = true;
-
- qemu_mutex_lock(&cpu->work_mutex);
- if (cpu->queued_work_first == NULL) {
- cpu->queued_work_first = wi;
- } else {
- cpu->queued_work_last->next = wi;
- }
- cpu->queued_work_last = wi;
- wi->next = NULL;
- wi->done = false;
- qemu_mutex_unlock(&cpu->work_mutex);
-
- qemu_cpu_kick(cpu);
+ do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
}
static void qemu_kvm_destroy_vcpu(CPUState *cpu)
@@ -990,34 +932,6 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
{
}
-static void flush_queued_work(CPUState *cpu)
-{
- struct qemu_work_item *wi;
-
- if (cpu->queued_work_first == NULL) {
- return;
- }
-
- qemu_mutex_lock(&cpu->work_mutex);
- while (cpu->queued_work_first != NULL) {
- wi = cpu->queued_work_first;
- cpu->queued_work_first = wi->next;
- if (!cpu->queued_work_first) {
- cpu->queued_work_last = NULL;
- }
- qemu_mutex_unlock(&cpu->work_mutex);
- wi->func(wi->data);
- qemu_mutex_lock(&cpu->work_mutex);
- if (wi->free) {
- g_free(wi);
- } else {
- atomic_mb_set(&wi->done, true);
- }
- }
- qemu_mutex_unlock(&cpu->work_mutex);
- qemu_cond_broadcast(&qemu_work_cond);
-}
-
static void qemu_wait_io_event_common(CPUState *cpu)
{
if (cpu->stop) {
@@ -1025,7 +939,7 @@ static void qemu_wait_io_event_common(CPUState *cpu)
cpu->stopped = true;
qemu_cond_broadcast(&qemu_pause_cond);
}
- flush_queued_work(cpu);
+ process_queued_cpu_work(cpu);
cpu->thread_kicked = false;
}
@@ -1544,7 +1458,9 @@ static int tcg_cpu_exec(CPUState *cpu)
cpu->icount_decr.u16.low = decr;
cpu->icount_extra = count;
}
+ cpu_exec_start(cpu);
ret = cpu_exec(cpu);
+ cpu_exec_end(cpu);
#ifdef CONFIG_PROFILER
tcg_time += profile_getclock() - ti;
#endif
diff --git a/docs/tcg-exclusive.promela b/docs/tcg-exclusive.promela
new file mode 100644
index 0000000..c91cfca
--- /dev/null
+++ b/docs/tcg-exclusive.promela
@@ -0,0 +1,225 @@
+/*
+ * This model describes the implementation of exclusive sections in
+ * cpus-common.c (start_exclusive, end_exclusive, cpu_exec_start,
+ * cpu_exec_end).
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This file is in the public domain. If you really want a license,
+ * the WTFPL will do.
+ *
+ * To verify it:
+ * spin -a docs/tcg-exclusive.promela
+ * gcc pan.c -O2
+ * ./a.out -a
+ *
+ * Tunable processor macros: N_CPUS, N_EXCLUSIVE, N_CYCLES, USE_MUTEX,
+ * TEST_EXPENSIVE.
+ */
+
+// Define the missing parameters for the model
+#ifndef N_CPUS
+#define N_CPUS 2
+#warning defaulting to 2 CPU processes
+#endif
+
+// the expensive test is not so expensive for <= 2 CPUs
+// If the mutex is used, it's also cheap (300 MB / 4 seconds) for 3 CPUs
+// For 3 CPUs and the lock-free option it needs 1.5 GB of RAM
+#if N_CPUS <= 2 || (N_CPUS <= 3 && defined USE_MUTEX)
+#define TEST_EXPENSIVE
+#endif
+
+#ifndef N_EXCLUSIVE
+# if !defined N_CYCLES || N_CYCLES <= 1 || defined TEST_EXPENSIVE
+# define N_EXCLUSIVE 2
+# warning defaulting to 2 concurrent exclusive sections
+# else
+# define N_EXCLUSIVE 1
+# warning defaulting to 1 concurrent exclusive sections
+# endif
+#endif
+#ifndef N_CYCLES
+# if N_EXCLUSIVE <= 1 || defined TEST_EXPENSIVE
+# define N_CYCLES 2
+# warning defaulting to 2 CPU cycles
+# else
+# define N_CYCLES 1
+# warning defaulting to 1 CPU cycles
+# endif
+#endif
+
+
+// synchronization primitives. condition variables require a
+// process-local "cond_t saved;" variable.
+
+#define mutex_t byte
+#define MUTEX_LOCK(m) atomic { m == 0 -> m = 1 }
+#define MUTEX_UNLOCK(m) m = 0
+
+#define cond_t int
+#define COND_WAIT(c, m) { \
+ saved = c; \
+ MUTEX_UNLOCK(m); \
+ c != saved -> MUTEX_LOCK(m); \
+ }
+#define COND_BROADCAST(c) c++
+
+// this is the logic from cpus-common.c
+
+mutex_t mutex;
+cond_t exclusive_cond;
+cond_t exclusive_resume;
+byte pending_cpus;
+
+byte running[N_CPUS];
+byte has_waiter[N_CPUS];
+
+#define exclusive_idle() \
+ do \
+ :: pending_cpus -> COND_WAIT(exclusive_resume, mutex); \
+ :: else -> break; \
+ od
+
+#define start_exclusive() \
+ MUTEX_LOCK(mutex); \
+ exclusive_idle(); \
+ pending_cpus = 1; \
+ \
+ i = 0; \
+ do \
+ :: i < N_CPUS -> { \
+ if \
+ :: running[i] -> has_waiter[i] = 1; pending_cpus++; \
+ :: else -> skip; \
+ fi; \
+ i++; \
+ } \
+ :: else -> break; \
+ od; \
+ \
+ do \
+ :: pending_cpus > 1 -> COND_WAIT(exclusive_cond, mutex); \
+ :: else -> break; \
+ od; \
+ MUTEX_UNLOCK(mutex);
+
+#define end_exclusive() \
+ MUTEX_LOCK(mutex); \
+ pending_cpus = 0; \
+ COND_BROADCAST(exclusive_resume); \
+ MUTEX_UNLOCK(mutex);
+
+#ifdef USE_MUTEX
+// Simple version using mutexes
+#define cpu_exec_start(id) \
+ MUTEX_LOCK(mutex); \
+ exclusive_idle(); \
+ running[id] = 1; \
+ MUTEX_UNLOCK(mutex);
+
+#define cpu_exec_end(id) \
+ MUTEX_LOCK(mutex); \
+ running[id] = 0; \
+ if \
+ :: pending_cpus -> { \
+ pending_cpus--; \
+ if \
+ :: pending_cpus == 1 -> COND_BROADCAST(exclusive_cond); \
+ :: else -> skip; \
+ fi; \
+ } \
+ :: else -> skip; \
+ fi; \
+ MUTEX_UNLOCK(mutex);
+#else
+// Wait-free fast path, only needs mutex when concurrent with
+// an exclusive section
+#define cpu_exec_start(id) \
+ running[id] = 1; \
+ if \
+ :: pending_cpus -> { \
+ MUTEX_LOCK(mutex); \
+ if \
+ :: !has_waiter[id] -> { \
+ running[id] = 0; \
+ exclusive_idle(); \
+ running[id] = 1; \
+ } \
+ :: else -> skip; \
+ fi; \
+ MUTEX_UNLOCK(mutex); \
+ } \
+ :: else -> skip; \
+ fi;
+
+#define cpu_exec_end(id) \
+ running[id] = 0; \
+ if \
+ :: pending_cpus -> { \
+ MUTEX_LOCK(mutex); \
+ if \
+ :: has_waiter[id] -> { \
+ has_waiter[id] = 0; \
+ pending_cpus--; \
+ if \
+ :: pending_cpus == 1 -> COND_BROADCAST(exclusive_cond); \
+ :: else -> skip; \
+ fi; \
+ } \
+ :: else -> skip; \
+ fi; \
+ MUTEX_UNLOCK(mutex); \
+ } \
+ :: else -> skip; \
+ fi
+#endif
+
+// Promela processes
+
+byte done_cpu;
+byte in_cpu;
+active[N_CPUS] proctype cpu()
+{
+ byte id = _pid % N_CPUS;
+ byte cycles = 0;
+ cond_t saved;
+
+ do
+ :: cycles == N_CYCLES -> break;
+ :: else -> {
+ cycles++;
+ cpu_exec_start(id)
+ in_cpu++;
+ done_cpu++;
+ in_cpu--;
+ cpu_exec_end(id)
+ }
+ od;
+}
+
+byte done_exclusive;
+byte in_exclusive;
+active[N_EXCLUSIVE] proctype exclusive()
+{
+ cond_t saved;
+ byte i;
+
+ start_exclusive();
+ in_exclusive = 1;
+ done_exclusive++;
+ in_exclusive = 0;
+ end_exclusive();
+}
+
+#define LIVENESS (done_cpu == N_CPUS * N_CYCLES && done_exclusive == N_EXCLUSIVE)
+#define SAFETY !(in_exclusive && in_cpu)
+
+never { /* ! ([] SAFETY && <> [] LIVENESS) */
+ do
+ // once the liveness property is satisfied, this is not executable
+ // and the never clause is not accepted
+ :: ! LIVENESS -> accept_liveness: skip
+ :: 1 -> assert(SAFETY)
+ od;
+}
diff --git a/exec.c b/exec.c
index c81d5ab..c8389f9 100644
--- a/exec.c
+++ b/exec.c
@@ -598,36 +598,11 @@ AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
}
#endif
-static bool cpu_index_auto_assigned;
-
-static int cpu_get_free_index(void)
-{
- CPUState *some_cpu;
- int cpu_index = 0;
-
- cpu_index_auto_assigned = true;
- CPU_FOREACH(some_cpu) {
- cpu_index++;
- }
- return cpu_index;
-}
-
void cpu_exec_exit(CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
- cpu_list_lock();
- if (!QTAILQ_IN_USE(cpu, node)) {
- /* there is nothing to undo since cpu_exec_init() hasn't been called */
- cpu_list_unlock();
- return;
- }
-
- assert(!(cpu_index_auto_assigned && cpu != QTAILQ_LAST(&cpus, CPUTailQ)));
-
- QTAILQ_REMOVE(&cpus, cpu, node);
- cpu->cpu_index = UNASSIGNED_CPU_INDEX;
- cpu_list_unlock();
+ cpu_list_remove(cpu);
if (cc->vmsd != NULL) {
vmstate_unregister(NULL, cc->vmsd, cpu);
@@ -663,15 +638,7 @@ void cpu_exec_init(CPUState *cpu, Error **errp)
object_ref(OBJECT(cpu->memory));
#endif
- cpu_list_lock();
- if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
- cpu->cpu_index = cpu_get_free_index();
- assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
- } else {
- assert(!cpu_index_auto_assigned);
- }
- QTAILQ_INSERT_TAIL(&cpus, cpu, node);
- cpu_list_unlock();
+ cpu_list_add(cpu);
#ifndef CONFIG_USER_ONLY
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index a91a179..023de52 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -21,6 +21,7 @@
*/
#include "qemu/osdep.h"
#include "hw/i386/amd_iommu.h"
+#include "qemu/error-report.h"
#include "trace.h"
/* used AMD-Vi MMIO registers */
@@ -1066,13 +1067,18 @@ static const MemoryRegionOps mmio_mem_ops = {
}
};
-static void amdvi_iommu_notify_started(MemoryRegion *iommu)
+static void amdvi_iommu_notify_flag_changed(MemoryRegion *iommu,
+ IOMMUNotifierFlag old,
+ IOMMUNotifierFlag new)
{
AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
- hw_error("device %02x.%02x.%x requires iommu notifier which is not "
- "currently supported", as->bus_num, PCI_SLOT(as->devfn),
- PCI_FUNC(as->devfn));
+ if (new & IOMMU_NOTIFIER_MAP) {
+ error_report("device %02x.%02x.%x requires iommu notifier which is not "
+ "currently supported", as->bus_num, PCI_SLOT(as->devfn),
+ PCI_FUNC(as->devfn));
+ exit(1);
+ }
}
static void amdvi_init(AMDVIState *s)
@@ -1080,7 +1086,7 @@ static void amdvi_init(AMDVIState *s)
amdvi_iotlb_reset(s);
s->iommu_ops.translate = amdvi_translate;
- s->iommu_ops.notify_started = amdvi_iommu_notify_started;
+ s->iommu_ops.notify_flag_changed = amdvi_iommu_notify_flag_changed;
s->devtab_len = 0;
s->cmdbuf_len = 0;
s->cmdbuf_head = 0;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index d6e02c8..9f4e64a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1974,14 +1974,20 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
return ret;
}
-static void vtd_iommu_notify_started(MemoryRegion *iommu)
+static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
+ IOMMUNotifierFlag old,
+ IOMMUNotifierFlag new)
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
- hw_error("Device at bus %s addr %02x.%d requires iommu notifier which "
- "is currently not supported by intel-iommu emulation",
- vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
- PCI_FUNC(vtd_as->devfn));
+ if (new & IOMMU_NOTIFIER_MAP) {
+ error_report("Device at bus %s addr %02x.%d requires iommu "
+ "notifier which is currently not supported by "
+ "intel-iommu emulation",
+ vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
+ PCI_FUNC(vtd_as->devfn));
+ exit(1);
+ }
}
static const VMStateDescription vtd_vmstate = {
@@ -2348,7 +2354,7 @@ static void vtd_init(IntelIOMMUState *s)
memset(s->womask, 0, DMAR_REG_SIZE);
s->iommu_ops.translate = vtd_iommu_translate;
- s->iommu_ops.notify_started = vtd_iommu_notify_started;
+ s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed;
s->root = 0;
s->root_extended = false;
s->dmar_enabled = false;
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index f57fed1..c016e63 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -125,7 +125,7 @@ static void kvm_apic_vapic_base_update(APICCommonState *s)
}
}
-static void kvm_apic_put(void *data)
+static void kvm_apic_put(CPUState *cs, void *data)
{
APICCommonState *s = data;
struct kvm_lapic_state kapic;
@@ -146,10 +146,9 @@ static void kvm_apic_post_load(APICCommonState *s)
run_on_cpu(CPU(s->cpu), kvm_apic_put, s);
}
-static void do_inject_external_nmi(void *data)
+static void do_inject_external_nmi(CPUState *cpu, void *data)
{
APICCommonState *s = data;
- CPUState *cpu = CPU(s->cpu);
uint32_t lvt;
int ret;
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index a1cd9b5..74a549b 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -483,7 +483,7 @@ typedef struct VAPICEnableTPRReporting {
bool enable;
} VAPICEnableTPRReporting;
-static void vapic_do_enable_tpr_reporting(void *data)
+static void vapic_do_enable_tpr_reporting(CPUState *cpu, void *data)
{
VAPICEnableTPRReporting *info = data;
@@ -734,10 +734,10 @@ static void vapic_realize(DeviceState *dev, Error **errp)
nb_option_roms++;
}
-static void do_vapic_enable(void *data)
+static void do_vapic_enable(CPUState *cs, void *data)
{
VAPICROMState *s = data;
- X86CPU *cpu = X86_CPU(first_cpu);
+ X86CPU *cpu = X86_CPU(cs);
static const uint8_t enabled = 1;
cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled),
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 31791b0..fd9208f 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -416,7 +416,7 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
}
static Property ioapic_properties[] = {
- DEFINE_PROP_UINT8("version", IOAPICCommonState, version, 0x11),
+ DEFINE_PROP_UINT8("version", IOAPICCommonState, version, 0x20),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/ppc/ppce500_spin.c b/hw/ppc/ppce500_spin.c
index 22c584e..8e16f65 100644
--- a/hw/ppc/ppce500_spin.c
+++ b/hw/ppc/ppce500_spin.c
@@ -54,11 +54,6 @@ typedef struct SpinState {
SpinInfo spin[MAX_CPUS];
} SpinState;
-typedef struct spin_kick {
- PowerPCCPU *cpu;
- SpinInfo *spin;
-} SpinKick;
-
static void spin_reset(void *opaque)
{
SpinState *s = opaque;
@@ -89,16 +84,15 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env,
env->tlb_dirty = true;
}
-static void spin_kick(void *data)
+static void spin_kick(CPUState *cs, void *data)
{
- SpinKick *kick = data;
- CPUState *cpu = CPU(kick->cpu);
- CPUPPCState *env = &kick->cpu->env;
- SpinInfo *curspin = kick->spin;
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ SpinInfo *curspin = data;
hwaddr map_size = 64 * 1024 * 1024;
hwaddr map_start;
- cpu_synchronize_state(cpu);
+ cpu_synchronize_state(cs);
stl_p(&curspin->pir, env->spr[SPR_BOOKE_PIR]);
env->nip = ldq_p(&curspin->addr) & (map_size - 1);
env->gpr[3] = ldq_p(&curspin->r3);
@@ -112,10 +106,10 @@ static void spin_kick(void *data)
map_start = ldq_p(&curspin->addr) & ~(map_size - 1);
mmubooke_create_initial_mapping(env, 0, map_start, map_size);
- cpu->halted = 0;
- cpu->exception_index = -1;
- cpu->stopped = false;
- qemu_cpu_kick(cpu);
+ cs->halted = 0;
+ cs->exception_index = -1;
+ cs->stopped = false;
+ qemu_cpu_kick(cs);
}
static void spin_write(void *opaque, hwaddr addr, uint64_t value,
@@ -153,12 +147,7 @@ static void spin_write(void *opaque, hwaddr addr, uint64_t value,
if (!(ldq_p(&curspin->addr) & 1)) {
/* run CPU */
- SpinKick kick = {
- .cpu = POWERPC_CPU(cpu),
- .spin = curspin,
- };
-
- run_on_cpu(cpu, spin_kick, &kick);
+ run_on_cpu(cpu, spin_kick, curspin);
}
}
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 648576e..14b6821 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2132,10 +2132,8 @@ static void spapr_machine_finalizefn(Object *obj)
g_free(spapr->kvm_type);
}
-static void ppc_cpu_do_nmi_on_cpu(void *arg)
+static void ppc_cpu_do_nmi_on_cpu(CPUState *cs, void *arg)
{
- CPUState *cs = arg;
-
cpu_synchronize_state(cs);
ppc_cpu_do_system_reset(cs);
}
@@ -2145,7 +2143,7 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
CPUState *cs;
CPU_FOREACH(cs) {
- async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs);
+ async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, NULL);
}
}
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 290a712..c5e7e8c 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -13,19 +13,18 @@
#include "kvm_ppc.h"
struct SPRSyncState {
- CPUState *cs;
int spr;
target_ulong value;
target_ulong mask;
};
-static void do_spr_sync(void *arg)
+static void do_spr_sync(CPUState *cs, void *arg)
{
struct SPRSyncState *s = arg;
- PowerPCCPU *cpu = POWERPC_CPU(s->cs);
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
CPUPPCState *env = &cpu->env;
- cpu_synchronize_state(s->cs);
+ cpu_synchronize_state(cs);
env->spr[s->spr] &= ~s->mask;
env->spr[s->spr] |= s->value;
}
@@ -34,7 +33,6 @@ static void set_spr(CPUState *cs, int spr, target_ulong value,
target_ulong mask)
{
struct SPRSyncState s = {
- .cs = cs,
.spr = spr,
.value = value,
.mask = mask
@@ -909,17 +907,17 @@ static target_ulong cas_get_option_vector(int vector, target_ulong table)
}
typedef struct {
- PowerPCCPU *cpu;
uint32_t cpu_version;
Error *err;
} SetCompatState;
-static void do_set_compat(void *arg)
+static void do_set_compat(CPUState *cs, void *arg)
{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
SetCompatState *s = arg;
- cpu_synchronize_state(CPU(s->cpu));
- ppc_set_compat(s->cpu, s->cpu_version, &s->err);
+ cpu_synchronize_state(cs);
+ ppc_set_compat(cpu, s->cpu_version, &s->err);
}
#define get_compat_level(cpuver) ( \
@@ -1015,7 +1013,6 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
if (old_cpu_version != cpu_version) {
CPU_FOREACH(cs) {
SetCompatState s = {
- .cpu = POWERPC_CPU(cs),
.cpu_version = cpu_version,
.err = NULL,
};
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index f20b0b8..ae30bbe 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -156,14 +156,17 @@ static uint64_t spapr_tce_get_min_page_size(MemoryRegion *iommu)
return 1ULL << tcet->page_shift;
}
-static void spapr_tce_notify_started(MemoryRegion *iommu)
+static void spapr_tce_notify_flag_changed(MemoryRegion *iommu,
+ IOMMUNotifierFlag old,
+ IOMMUNotifierFlag new)
{
- spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), true);
-}
+ struct sPAPRTCETable *tbl = container_of(iommu, sPAPRTCETable, iommu);
-static void spapr_tce_notify_stopped(MemoryRegion *iommu)
-{
- spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), false);
+ if (old == IOMMU_NOTIFIER_NONE && new != IOMMU_NOTIFIER_NONE) {
+ spapr_tce_set_need_vfio(tbl, true);
+ } else if (old != IOMMU_NOTIFIER_NONE && new == IOMMU_NOTIFIER_NONE) {
+ spapr_tce_set_need_vfio(tbl, false);
+ }
}
static int spapr_tce_table_post_load(void *opaque, int version_id)
@@ -246,8 +249,7 @@ static const VMStateDescription vmstate_spapr_tce_table = {
static MemoryRegionIOMMUOps spapr_iommu_ops = {
.translate = spapr_tce_translate_iommu,
.get_min_page_size = spapr_tce_get_min_page_size,
- .notify_started = spapr_tce_notify_started,
- .notify_stopped = spapr_tce_notify_stopped,
+ .notify_flag_changed = spapr_tce_notify_flag_changed,
};
static int spapr_tce_table_realize(DeviceState *dev)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b313e7c..29188a1 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -293,11 +293,10 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
section->offset_within_address_space & (1ULL << 63);
}
-static void vfio_iommu_map_notify(Notifier *n, void *data)
+static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
{
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
VFIOContainer *container = giommu->container;
- IOMMUTLBEntry *iotlb = data;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
MemoryRegion *mr;
hwaddr xlat;
@@ -454,6 +453,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
section->offset_within_region;
giommu->container = container;
giommu->n.notify = vfio_iommu_map_notify;
+ giommu->n.notifier_flags = IOMMU_NOTIFIER_ALL;
QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 952bcfe..869ba41 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -23,6 +23,11 @@ typedef struct CPUListState {
FILE *file;
} CPUListState;
+/* The CPU list lock nests outside tb_lock/tb_unlock. */
+void qemu_init_cpu_list(void);
+void cpu_list_lock(void);
+void cpu_list_unlock(void);
+
#if !defined(CONFIG_USER_ONLY)
enum device_endian {
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 008e09a..336a57c 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -56,17 +56,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
target_ulong pc, target_ulong cs_base,
uint32_t flags,
int cflags);
-#if defined(CONFIG_USER_ONLY)
-void cpu_list_lock(void);
-void cpu_list_unlock(void);
-#else
-static inline void cpu_list_unlock(void)
-{
-}
-static inline void cpu_list_lock(void)
-{
-}
-#endif
void cpu_exec_init(CPUState *cpu, Error **errp);
void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 3e4d416..10d7eac 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -67,6 +67,27 @@ struct IOMMUTLBEntry {
IOMMUAccessFlags perm;
};
+/*
+ * Bitmap for different IOMMUNotifier capabilities. Each notifier can
+ * register with one or multiple IOMMU Notifier capability bit(s).
+ */
+typedef enum {
+ IOMMU_NOTIFIER_NONE = 0,
+ /* Notify cache invalidations */
+ IOMMU_NOTIFIER_UNMAP = 0x1,
+ /* Notify entry changes (newly created entries) */
+ IOMMU_NOTIFIER_MAP = 0x2,
+} IOMMUNotifierFlag;
+
+#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP)
+
+struct IOMMUNotifier {
+ void (*notify)(struct IOMMUNotifier *notifier, IOMMUTLBEntry *data);
+ IOMMUNotifierFlag notifier_flags;
+ QLIST_ENTRY(IOMMUNotifier) node;
+};
+typedef struct IOMMUNotifier IOMMUNotifier;
+
/* New-style MMIO accessors can indicate that the transaction failed.
* A zero (MEMTX_OK) response means success; anything else is a failure
* of some kind. The memory subsystem will bitwise-OR together results
@@ -153,10 +174,10 @@ struct MemoryRegionIOMMUOps {
IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
/* Returns minimum supported page size */
uint64_t (*get_min_page_size)(MemoryRegion *iommu);
- /* Called when the first notifier is set */
- void (*notify_started)(MemoryRegion *iommu);
- /* Called when the last notifier is removed */
- void (*notify_stopped)(MemoryRegion *iommu);
+ /* Called when IOMMU Notifier flag changed */
+ void (*notify_flag_changed)(MemoryRegion *iommu,
+ IOMMUNotifierFlag old_flags,
+ IOMMUNotifierFlag new_flags);
};
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
@@ -201,7 +222,8 @@ struct MemoryRegion {
const char *name;
unsigned ioeventfd_nb;
MemoryRegionIoeventfd *ioeventfds;
- NotifierList iommu_notify;
+ QLIST_HEAD(, IOMMUNotifier) iommu_notify;
+ IOMMUNotifierFlag iommu_notify_flags;
};
/**
@@ -607,6 +629,15 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
/**
* memory_region_notify_iommu: notify a change in an IOMMU translation entry.
*
+ * The notification type will be decided by entry.perm bits:
+ *
+ * - For UNMAP (cache invalidation) notifies: set entry.perm to IOMMU_NONE.
+ * - For MAP (newly added entry) notifies: set entry.perm to the
+ * permission of the page (which is definitely !IOMMU_NONE).
+ *
+ * Note: for any IOMMU implementation, an in-place mapping change
+ * should be notified with an UNMAP followed by a MAP.
+ *
* @mr: the memory region that was changed
* @entry: the new entry in the IOMMU translation table. The entry
* replaces all old entries for the same virtual I/O address range.
@@ -620,11 +651,12 @@ void memory_region_notify_iommu(MemoryRegion *mr,
* IOMMU translation entries.
*
* @mr: the memory region to observe
- * @n: the notifier to be added; the notifier receives a pointer to an
- * #IOMMUTLBEntry as the opaque value; the pointer ceases to be
- * valid on exit from the notifier.
+ * @n: the IOMMUNotifier to be added; the notify callback receives a
+ * pointer to an #IOMMUTLBEntry as the opaque value; the pointer
+ * ceases to be valid on exit from the notifier.
*/
-void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n);
+void memory_region_register_iommu_notifier(MemoryRegion *mr,
+ IOMMUNotifier *n);
/**
* memory_region_iommu_replay: replay existing IOMMU translations to
@@ -636,7 +668,8 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n);
* @is_write: Whether to treat the replay as a translate "write"
* through the iommu
*/
-void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write);
+void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
+ bool is_write);
/**
* memory_region_unregister_iommu_notifier: unregister a notifier for
@@ -646,7 +679,8 @@ void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write);
* needs to be called
* @n: the notifier to be removed.
*/
-void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n);
+void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
+ IOMMUNotifier *n);
/**
* memory_region_name: get a memory region's name
@@ -1154,12 +1188,11 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
hwaddr addr, uint64_t size);
/**
- * address_space_sync_dirty_bitmap: synchronize the dirty log for all memory
+ * memory_global_dirty_log_sync: synchronize the dirty log for all memory
*
- * Synchronizes the dirty page log for an entire address space.
- * @as: the address space that contains the memory being synchronized
+ * Synchronizes the dirty page log for all address spaces.
*/
-void address_space_sync_dirty_bitmap(AddressSpace *as);
+void memory_global_dirty_log_sync(void);
/**
* memory_region_transaction_begin: Start a transaction.
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index dce95d9..c7f17f2 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -38,7 +38,7 @@ struct TBContext {
QemuMutex tb_lock;
/* statistics */
- int tb_flush_count;
+ unsigned tb_flush_count;
int tb_phys_invalidate_count;
};
diff --git a/include/hw/compat.h b/include/hw/compat.h
index a1d6694..46412b2 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -6,6 +6,10 @@
.driver = "virtio-pci",\
.property = "page-per-vq",\
.value = "on",\
+ },{\
+ .driver = "ioapic",\
+ .property = "version",\
+ .value = "0x11",\
},
#define HW_COMPAT_2_6 \
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 94dfae3..c17602e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -93,7 +93,7 @@ typedef struct VFIOGuestIOMMU {
VFIOContainer *container;
MemoryRegion *iommu;
hwaddr iommu_offset;
- Notifier n;
+ IOMMUNotifier n;
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
} VFIOGuestIOMMU;
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index 338d3a6..157698b 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -1,4 +1,8 @@
-/* public domain */
+/* compiler.h: macros to abstract away compiler specifics
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
#ifndef COMPILER_H
#define COMPILER_H
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index ce0c406..22b54d6 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -232,13 +232,8 @@ struct kvm_run;
#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
/* work queue */
-struct qemu_work_item {
- struct qemu_work_item *next;
- void (*func)(void *data);
- void *data;
- int done;
- bool free;
-};
+typedef void (*run_on_cpu_func)(CPUState *cpu, void *data);
+struct qemu_work_item;
/**
* CPUState:
@@ -247,7 +242,9 @@ struct qemu_work_item {
* @nr_threads: Number of threads within this CPU.
* @numa_node: NUMA node this CPU is belonging to.
* @host_tid: Host thread ID.
- * @running: #true if CPU is currently running (usermode).
+ * @running: #true if CPU is currently running (lockless).
+ * @has_waiter: #true if a CPU is currently waiting for the cpu_exec_end;
+ * valid under cpu_list_lock.
* @created: Indicates whether the CPU thread has been successfully created.
* @interrupt_request: Indicates a pending interrupt request.
* @halted: Nonzero if the CPU is in suspended state.
@@ -257,7 +254,6 @@ struct qemu_work_item {
* @crash_occurred: Indicates the OS reported a crash (panic) for this CPU
* @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
* CPU and return to its top level loop.
- * @tb_flushed: Indicates the translation buffer has been flushed.
* @singlestep_enabled: Flags for single-stepping.
* @icount_extra: Instructions until next timer event.
* @icount_decr: Number of cycles left, with interrupt flag in high bit.
@@ -301,7 +297,7 @@ struct CPUState {
#endif
int thread_id;
uint32_t host_tid;
- bool running;
+ bool running, has_waiter;
struct QemuCond *halt_cond;
bool thread_kicked;
bool created;
@@ -310,7 +306,6 @@ struct CPUState {
bool unplug;
bool crash_occurred;
bool exit_request;
- bool tb_flushed;
uint32_t interrupt_request;
int singlestep_enabled;
int64_t icount_extra;
@@ -543,6 +538,18 @@ static inline int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
#endif
/**
+ * cpu_list_add:
+ * @cpu: The CPU to be added to the list of CPUs.
+ */
+void cpu_list_add(CPUState *cpu);
+
+/**
+ * cpu_list_remove:
+ * @cpu: The CPU to be removed from the list of CPUs.
+ */
+void cpu_list_remove(CPUState *cpu);
+
+/**
* cpu_reset:
* @cpu: The CPU whose state is to be reset.
*/
@@ -616,6 +623,18 @@ void qemu_cpu_kick(CPUState *cpu);
bool cpu_is_stopped(CPUState *cpu);
/**
+ * do_run_on_cpu:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ * @mutex: Mutex to release while waiting for @func to run.
+ *
+ * Used internally in the implementation of run_on_cpu.
+ */
+void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data,
+ QemuMutex *mutex);
+
+/**
* run_on_cpu:
* @cpu: The vCPU to run on.
* @func: The function to be executed.
@@ -623,7 +642,7 @@ bool cpu_is_stopped(CPUState *cpu);
*
* Schedules the function @func for execution on the vCPU @cpu.
*/
-void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
/**
* async_run_on_cpu:
@@ -633,7 +652,21 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
*
* Schedules the function @func for execution on the vCPU @cpu asynchronously.
*/
-void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
+void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
+
+/**
+ * async_safe_run_on_cpu:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ *
+ * Schedules the function @func for execution on the vCPU @cpu asynchronously,
+ * while all other vCPUs are sleeping.
+ *
+ * Unlike run_on_cpu and async_run_on_cpu, the function is run outside the
+ * BQL.
+ */
+void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
/**
* qemu_get_cpu:
@@ -794,6 +827,49 @@ void cpu_remove(CPUState *cpu);
void cpu_remove_sync(CPUState *cpu);
/**
+ * process_queued_cpu_work() - process all items on CPU work queue
+ * @cpu: The CPU which work queue to process.
+ */
+void process_queued_cpu_work(CPUState *cpu);
+
+/**
+ * cpu_exec_start:
+ * @cpu: The CPU for the current thread.
+ *
+ * Record that a CPU has started execution and can be interrupted with
+ * cpu_exit.
+ */
+void cpu_exec_start(CPUState *cpu);
+
+/**
+ * cpu_exec_end:
+ * @cpu: The CPU for the current thread.
+ *
+ * Record that a CPU has stopped execution and exclusive sections
+ * can be executed without interrupting it.
+ */
+void cpu_exec_end(CPUState *cpu);
+
+/**
+ * start_exclusive:
+ *
+ * Wait for a concurrent exclusive section to end, and then start
+ * a section of work that is run while other CPUs are not running
+ * between cpu_exec_start and cpu_exec_end. CPUs that are running
+ * cpu_exec are exited immediately. CPUs that call cpu_exec_start
+ * during the exclusive section go to sleep until this CPU calls
+ * end_exclusive.
+ */
+void start_exclusive(void);
+
+/**
+ * end_exclusive:
+ *
+ * Concludes an exclusive execution section started by start_exclusive.
+ */
+void end_exclusive(void);
+
+/**
* qemu_init_vcpu:
* @cpu: The vCPU to initialize.
*
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 0a88393..f80d6d2 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -105,6 +105,8 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint);
/*! Disables storing events in the queue */
void replay_disable_events(void);
+/*! Enables storing events in the queue */
+void replay_enable_events(void);
/*! Returns true when saving events is enabled */
bool replay_events_enabled(void);
/*! Adds bottom half event to the queue */
@@ -115,6 +117,8 @@ void replay_input_event(QemuConsole *src, InputEvent *evt);
void replay_input_sync_event(void);
/*! Adds block layer event to the queue */
void replay_block_event(QEMUBH *bh, uint64_t id);
+/*! Returns ID for the next block event */
+uint64_t blkreplay_next_id(void);
/* Character device */
diff --git a/kvm-all.c b/kvm-all.c
index 8a4382e..fc2898a 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1847,10 +1847,8 @@ void kvm_flush_coalesced_mmio_buffer(void)
s->coalesced_flush_in_progress = false;
}
-static void do_kvm_cpu_synchronize_state(void *arg)
+static void do_kvm_cpu_synchronize_state(CPUState *cpu, void *arg)
{
- CPUState *cpu = arg;
-
if (!cpu->kvm_vcpu_dirty) {
kvm_arch_get_registers(cpu);
cpu->kvm_vcpu_dirty = true;
@@ -1860,34 +1858,30 @@ static void do_kvm_cpu_synchronize_state(void *arg)
void kvm_cpu_synchronize_state(CPUState *cpu)
{
if (!cpu->kvm_vcpu_dirty) {
- run_on_cpu(cpu, do_kvm_cpu_synchronize_state, cpu);
+ run_on_cpu(cpu, do_kvm_cpu_synchronize_state, NULL);
}
}
-static void do_kvm_cpu_synchronize_post_reset(void *arg)
+static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, void *arg)
{
- CPUState *cpu = arg;
-
kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
cpu->kvm_vcpu_dirty = false;
}
void kvm_cpu_synchronize_post_reset(CPUState *cpu)
{
- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, cpu);
+ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, NULL);
}
-static void do_kvm_cpu_synchronize_post_init(void *arg)
+static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, void *arg)
{
- CPUState *cpu = arg;
-
kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
cpu->kvm_vcpu_dirty = false;
}
void kvm_cpu_synchronize_post_init(CPUState *cpu)
{
- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, cpu);
+ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, NULL);
}
int kvm_cpu_exec(CPUState *cpu)
@@ -2216,7 +2210,7 @@ struct kvm_set_guest_debug_data {
int err;
};
-static void kvm_invoke_set_guest_debug(void *data)
+static void kvm_invoke_set_guest_debug(CPUState *unused_cpu, void *data)
{
struct kvm_set_guest_debug_data *dbg_data = data;
@@ -2234,7 +2228,6 @@ int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
}
kvm_arch_update_guest_debug(cpu, &data.dbg);
- data.cpu = cpu;
run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data);
return data.err;
diff --git a/linux-user/main.c b/linux-user/main.c
index 8daebe0..c8f8573 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -107,21 +107,11 @@ int cpu_get_pic_interrupt(CPUX86State *env)
/***********************************************************/
/* Helper routines for implementing atomic operations. */
-/* To implement exclusive operations we force all cpus to syncronise.
- We don't require a full sync, only that no cpus are executing guest code.
- The alternative is to map target atomic ops onto host equivalents,
- which requires quite a lot of per host/target work. */
-static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER;
-static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER;
-static int pending_cpus;
-
/* Make sure everything is in a consistent state for calling fork(). */
void fork_start(void)
{
+ cpu_list_lock();
qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
- pthread_mutex_lock(&exclusive_lock);
mmap_fork_start();
}
@@ -137,93 +127,15 @@ void fork_end(int child)
QTAILQ_REMOVE(&cpus, cpu, node);
}
}
- pending_cpus = 0;
- pthread_mutex_init(&exclusive_lock, NULL);
- pthread_mutex_init(&cpu_list_mutex, NULL);
- pthread_cond_init(&exclusive_cond, NULL);
- pthread_cond_init(&exclusive_resume, NULL);
qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
+ qemu_init_cpu_list();
gdbserver_fork(thread_cpu);
} else {
- pthread_mutex_unlock(&exclusive_lock);
qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+ cpu_list_unlock();
}
}
-/* Wait for pending exclusive operations to complete. The exclusive lock
- must be held. */
-static inline void exclusive_idle(void)
-{
- while (pending_cpus) {
- pthread_cond_wait(&exclusive_resume, &exclusive_lock);
- }
-}
-
-/* Start an exclusive operation.
- Must only be called from outside cpu_exec. */
-static inline void start_exclusive(void)
-{
- CPUState *other_cpu;
-
- pthread_mutex_lock(&exclusive_lock);
- exclusive_idle();
-
- pending_cpus = 1;
- /* Make all other cpus stop executing. */
- CPU_FOREACH(other_cpu) {
- if (other_cpu->running) {
- pending_cpus++;
- cpu_exit(other_cpu);
- }
- }
- if (pending_cpus > 1) {
- pthread_cond_wait(&exclusive_cond, &exclusive_lock);
- }
-}
-
-/* Finish an exclusive operation. */
-static inline void __attribute__((unused)) end_exclusive(void)
-{
- pending_cpus = 0;
- pthread_cond_broadcast(&exclusive_resume);
- pthread_mutex_unlock(&exclusive_lock);
-}
-
-/* Wait for exclusive ops to finish, and begin cpu execution. */
-static inline void cpu_exec_start(CPUState *cpu)
-{
- pthread_mutex_lock(&exclusive_lock);
- exclusive_idle();
- cpu->running = true;
- pthread_mutex_unlock(&exclusive_lock);
-}
-
-/* Mark cpu as not executing, and release pending exclusive ops. */
-static inline void cpu_exec_end(CPUState *cpu)
-{
- pthread_mutex_lock(&exclusive_lock);
- cpu->running = false;
- if (pending_cpus > 1) {
- pending_cpus--;
- if (pending_cpus == 1) {
- pthread_cond_signal(&exclusive_cond);
- }
- }
- exclusive_idle();
- pthread_mutex_unlock(&exclusive_lock);
-}
-
-void cpu_list_lock(void)
-{
- pthread_mutex_lock(&cpu_list_mutex);
-}
-
-void cpu_list_unlock(void)
-{
- pthread_mutex_unlock(&cpu_list_mutex);
-}
-
-
#ifdef TARGET_I386
/***********************************************************/
/* CPUX86 core interface */
@@ -296,6 +208,8 @@ void cpu_loop(CPUX86State *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch(trapnr) {
case 0x80:
/* linux syscall from int $0x80 */
@@ -737,6 +651,8 @@ void cpu_loop(CPUARMState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch(trapnr) {
case EXCP_UDEF:
{
@@ -1073,6 +989,7 @@ void cpu_loop(CPUARMState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
switch (trapnr) {
case EXCP_SWI:
@@ -1161,6 +1078,8 @@ void cpu_loop(CPUUniCore32State *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch (trapnr) {
case UC32_EXCP_PRIV:
{
@@ -1366,6 +1285,7 @@ void cpu_loop (CPUSPARCState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
/* Compute PSR before exposing state. */
if (env->cc_op != CC_OP_FLAGS) {
@@ -1638,6 +1558,8 @@ void cpu_loop(CPUPPCState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch(trapnr) {
case POWERPC_EXCP_NONE:
/* Just go on */
@@ -2484,6 +2406,8 @@ void cpu_loop(CPUMIPSState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch(trapnr) {
case EXCP_SYSCALL:
env->active_tc.PC += 4;
@@ -2724,6 +2648,7 @@ void cpu_loop(CPUOpenRISCState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
gdbsig = 0;
switch (trapnr) {
@@ -2818,6 +2743,7 @@ void cpu_loop(CPUSH4State *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
switch (trapnr) {
case 0x160:
@@ -2884,6 +2810,8 @@ void cpu_loop(CPUCRISState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch (trapnr) {
case 0xaa:
{
@@ -2949,6 +2877,8 @@ void cpu_loop(CPUMBState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch (trapnr) {
case 0xaa:
{
@@ -3066,6 +2996,8 @@ void cpu_loop(CPUM68KState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch(trapnr) {
case EXCP_ILLEGAL:
{
@@ -3209,6 +3141,7 @@ void cpu_loop(CPUAlphaState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
/* All of the traps imply a transition through PALcode, which
implies an REI instruction has been executed. Which means
@@ -3401,6 +3334,8 @@ void cpu_loop(CPUS390XState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch (trapnr) {
case EXCP_INTERRUPT:
/* Just indicate that signals should be handled asap. */
@@ -3710,6 +3645,8 @@ void cpu_loop(CPUTLGState *env)
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
+ process_queued_cpu_work(cs);
+
switch (trapnr) {
case TILEGX_EXCP_SYSCALL:
{
@@ -3769,6 +3706,16 @@ void cpu_loop(CPUTLGState *env)
THREAD CPUState *thread_cpu;
+bool qemu_cpu_is_self(CPUState *cpu)
+{
+ return thread_cpu == cpu;
+}
+
+void qemu_cpu_kick(CPUState *cpu)
+{
+ cpu_exit(cpu);
+}
+
void task_settid(TaskState *ts)
{
if (ts->ts_tid == 0) {
@@ -4211,6 +4158,7 @@ int main(int argc, char **argv, char **envp)
int ret;
int execfd;
+ qemu_init_cpu_list();
module_call_init(MODULE_INIT_QOM);
if ((envlist = envlist_create()) == NULL) {
diff --git a/memory.c b/memory.c
index 1a1baf5..58f9269 100644
--- a/memory.c
+++ b/memory.c
@@ -158,14 +158,10 @@ static bool memory_listener_match(MemoryListener *listener,
/* No need to ref/unref .mr, the FlatRange keeps it alive. */
#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...) \
- MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) { \
- .mr = (fr)->mr, \
- .address_space = (as), \
- .offset_within_region = (fr)->offset_in_region, \
- .size = (fr)->addr.size, \
- .offset_within_address_space = int128_get64((fr)->addr.start), \
- .readonly = (fr)->readonly, \
- }), ##_args)
+ do { \
+ MemoryRegionSection mrs = section_from_flat_range(fr, as); \
+ MEMORY_LISTENER_CALL(callback, dir, &mrs, ##_args); \
+ } while(0)
struct CoalescedMemoryRange {
AddrRange addr;
@@ -245,6 +241,19 @@ typedef struct AddressSpaceOps AddressSpaceOps;
#define FOR_EACH_FLAT_RANGE(var, view) \
for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var)
+static inline MemoryRegionSection
+section_from_flat_range(FlatRange *fr, AddressSpace *as)
+{
+ return (MemoryRegionSection) {
+ .mr = fr->mr,
+ .address_space = as,
+ .offset_within_region = fr->offset_in_region,
+ .size = fr->addr.size,
+ .offset_within_address_space = int128_get64(fr->addr.start),
+ .readonly = fr->readonly,
+ };
+}
+
static bool flatrange_equal(FlatRange *a, FlatRange *b)
{
return a->mr == b->mr
@@ -1413,7 +1422,8 @@ void memory_region_init_iommu(MemoryRegion *mr,
memory_region_init(mr, owner, name, size);
mr->iommu_ops = ops,
mr->terminates = true; /* then re-forwards */
- notifier_list_init(&mr->iommu_notify);
+ QLIST_INIT(&mr->iommu_notify);
+ mr->iommu_notify_flags = IOMMU_NOTIFIER_NONE;
}
static void memory_region_finalize(Object *obj)
@@ -1508,13 +1518,31 @@ bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
return memory_region_get_dirty_log_mask(mr) & (1 << client);
}
-void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
+static void memory_region_update_iommu_notify_flags(MemoryRegion *mr)
{
- if (mr->iommu_ops->notify_started &&
- QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
- mr->iommu_ops->notify_started(mr);
+ IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE;
+ IOMMUNotifier *iommu_notifier;
+
+ QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
+ flags |= iommu_notifier->notifier_flags;
+ }
+
+ if (flags != mr->iommu_notify_flags &&
+ mr->iommu_ops->notify_flag_changed) {
+ mr->iommu_ops->notify_flag_changed(mr, mr->iommu_notify_flags,
+ flags);
}
- notifier_list_add(&mr->iommu_notify, n);
+
+ mr->iommu_notify_flags = flags;
+}
+
+void memory_region_register_iommu_notifier(MemoryRegion *mr,
+ IOMMUNotifier *n)
+{
+ /* We need to register for at least one bitfield */
+ assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
+ QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
+ memory_region_update_iommu_notify_flags(mr);
}
uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr)
@@ -1526,7 +1554,8 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr)
return TARGET_PAGE_SIZE;
}
-void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write)
+void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
+ bool is_write)
{
hwaddr addr, granularity;
IOMMUTLBEntry iotlb;
@@ -1547,20 +1576,32 @@ void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write)
}
}
-void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n)
+void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
+ IOMMUNotifier *n)
{
- notifier_remove(n);
- if (mr->iommu_ops->notify_stopped &&
- QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
- mr->iommu_ops->notify_stopped(mr);
- }
+ QLIST_REMOVE(n, node);
+ memory_region_update_iommu_notify_flags(mr);
}
void memory_region_notify_iommu(MemoryRegion *mr,
IOMMUTLBEntry entry)
{
+ IOMMUNotifier *iommu_notifier;
+ IOMMUNotifierFlag request_flags;
+
assert(memory_region_is_iommu(mr));
- notifier_list_notify(&mr->iommu_notify, &entry);
+
+ if (entry.perm & IOMMU_RW) {
+ request_flags = IOMMU_NOTIFIER_MAP;
+ } else {
+ request_flags = IOMMU_NOTIFIER_UNMAP;
+ }
+
+ QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
+ if (iommu_notifier->notifier_flags & request_flags) {
+ iommu_notifier->notify(iommu_notifier, &entry);
+ }
+ }
}
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
@@ -2124,16 +2165,27 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr)
return mr && mr != container;
}
-void address_space_sync_dirty_bitmap(AddressSpace *as)
+void memory_global_dirty_log_sync(void)
{
+ MemoryListener *listener;
+ AddressSpace *as;
FlatView *view;
FlatRange *fr;
- view = address_space_get_flatview(as);
- FOR_EACH_FLAT_RANGE(fr, view) {
- MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
+ QTAILQ_FOREACH(listener, &memory_listeners, link) {
+ if (!listener->log_sync) {
+ continue;
+ }
+ /* Global listeners are being phased out. */
+ assert(listener->address_space_filter);
+ as = listener->address_space_filter;
+ view = address_space_get_flatview(as);
+ FOR_EACH_FLAT_RANGE(fr, view) {
+ MemoryRegionSection mrs = section_from_flat_range(fr, as);
+ listener->log_sync(listener, &mrs);
+ }
+ flatview_unref(view);
}
- flatview_unref(view);
}
void memory_global_dirty_log_start(void)
diff --git a/migration/ram.c b/migration/ram.c
index a6e1c63..c8ec9f2 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -626,7 +626,7 @@ static void migration_bitmap_sync(void)
}
trace_migration_bitmap_sync_start();
- address_space_sync_dirty_bitmap(&address_space_memory);
+ memory_global_dirty_log_sync();
qemu_mutex_lock(&migration_bitmap_mutex);
rcu_read_lock();
diff --git a/replay/Makefile.objs b/replay/Makefile.objs
index fcb3f74..c8ad3eb 100644
--- a/replay/Makefile.objs
+++ b/replay/Makefile.objs
@@ -4,3 +4,4 @@ common-obj-y += replay-events.o
common-obj-y += replay-time.o
common-obj-y += replay-input.o
common-obj-y += replay-char.o
+common-obj-y += replay-snapshot.o
diff --git a/replay/replay-events.c b/replay/replay-events.c
index 3807245..c513913 100644
--- a/replay/replay-events.c
+++ b/replay/replay-events.c
@@ -279,7 +279,7 @@ static Event *replay_read_event(int checkpoint)
/* Called with replay mutex locked */
void replay_read_events(int checkpoint)
{
- while (replay_data_kind == EVENT_ASYNC) {
+ while (replay_state.data_kind == EVENT_ASYNC) {
Event *event = replay_read_event(checkpoint);
if (!event) {
break;
@@ -309,3 +309,11 @@ bool replay_events_enabled(void)
{
return events_enabled;
}
+
+uint64_t blkreplay_next_id(void)
+{
+ if (replay_events_enabled()) {
+ return replay_state.block_request_id++;
+ }
+ return 0;
+}
diff --git a/replay/replay-internal.c b/replay/replay-internal.c
index 5835e8d..bea7b4a 100644
--- a/replay/replay-internal.c
+++ b/replay/replay-internal.c
@@ -16,11 +16,8 @@
#include "qemu/error-report.h"
#include "sysemu/sysemu.h"
-unsigned int replay_data_kind = -1;
-static unsigned int replay_has_unread_data;
-
/* Mutex to protect reading and writing events to the log.
- replay_data_kind and replay_has_unread_data are also protected
+ data_kind and has_unread_data are also protected
by this mutex.
It also protects replay events queue which stores events to be
written or read to the log. */
@@ -150,15 +147,16 @@ void replay_check_error(void)
void replay_fetch_data_kind(void)
{
if (replay_file) {
- if (!replay_has_unread_data) {
- replay_data_kind = replay_get_byte();
- if (replay_data_kind == EVENT_INSTRUCTION) {
+ if (!replay_state.has_unread_data) {
+ replay_state.data_kind = replay_get_byte();
+ if (replay_state.data_kind == EVENT_INSTRUCTION) {
replay_state.instructions_count = replay_get_dword();
}
replay_check_error();
- replay_has_unread_data = 1;
- if (replay_data_kind >= EVENT_COUNT) {
- error_report("Replay: unknown event kind %d", replay_data_kind);
+ replay_state.has_unread_data = 1;
+ if (replay_state.data_kind >= EVENT_COUNT) {
+ error_report("Replay: unknown event kind %d",
+ replay_state.data_kind);
exit(1);
}
}
@@ -167,7 +165,7 @@ void replay_fetch_data_kind(void)
void replay_finish_event(void)
{
- replay_has_unread_data = 0;
+ replay_state.has_unread_data = 0;
replay_fetch_data_kind();
}
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index efbf14c..9117e44 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -62,11 +62,19 @@ typedef struct ReplayState {
uint64_t current_step;
/*! Number of instructions to be executed before other events happen. */
int instructions_count;
+ /*! Type of the currently executed event. */
+ unsigned int data_kind;
+ /*! Flag which indicates that event is not processed yet. */
+ unsigned int has_unread_data;
+ /*! Temporary variable for saving current log offset. */
+ uint64_t file_offset;
+ /*! Next block operation id.
+ This counter is global, because requests from different
+ block devices should not get overlapping ids. */
+ uint64_t block_request_id;
} ReplayState;
extern ReplayState replay_state;
-extern unsigned int replay_data_kind;
-
/* File for replay writing */
extern FILE *replay_file;
@@ -98,7 +106,7 @@ void replay_check_error(void);
the next event from the log. */
void replay_finish_event(void);
/*! Reads data type from the file and stores it in the
- replay_data_kind variable. */
+ data_kind variable. */
void replay_fetch_data_kind(void);
/*! Saves queued events (like instructions and sound). */
@@ -119,8 +127,6 @@ void replay_read_next_clock(unsigned int kind);
void replay_init_events(void);
/*! Clears internal data structures for events handling */
void replay_finish_events(void);
-/*! Enables storing events in the queue */
-void replay_enable_events(void);
/*! Flushes events queue */
void replay_flush_events(void);
/*! Clears events list before loading new VM state */
@@ -155,4 +161,11 @@ void replay_event_char_read_save(void *opaque);
/*! Reads char event read from the file. */
void *replay_event_char_read_load(void);
+/* VMState-related functions */
+
+/* Registers replay VMState.
+ Should be called before virtual devices initialization
+ to make cached timers available for post_load functions. */
+void replay_vmstate_register(void);
+
#endif
diff --git a/replay/replay-snapshot.c b/replay/replay-snapshot.c
new file mode 100644
index 0000000..4980597
--- /dev/null
+++ b/replay/replay-snapshot.c
@@ -0,0 +1,61 @@
+/*
+ * replay-snapshot.c
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/replay.h"
+#include "replay-internal.h"
+#include "sysemu/sysemu.h"
+#include "monitor/monitor.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/error-report.h"
+#include "migration/vmstate.h"
+
+static void replay_pre_save(void *opaque)
+{
+ ReplayState *state = opaque;
+ state->file_offset = ftell(replay_file);
+}
+
+static int replay_post_load(void *opaque, int version_id)
+{
+ ReplayState *state = opaque;
+ fseek(replay_file, state->file_offset, SEEK_SET);
+ /* If this was a vmstate, saved in recording mode,
+ we need to initialize replay data fields. */
+ replay_fetch_data_kind();
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_replay = {
+ .name = "replay",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .pre_save = replay_pre_save,
+ .post_load = replay_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_INT64_ARRAY(cached_clock, ReplayState, REPLAY_CLOCK_COUNT),
+ VMSTATE_UINT64(current_step, ReplayState),
+ VMSTATE_INT32(instructions_count, ReplayState),
+ VMSTATE_UINT32(data_kind, ReplayState),
+ VMSTATE_UINT32(has_unread_data, ReplayState),
+ VMSTATE_UINT64(file_offset, ReplayState),
+ VMSTATE_UINT64(block_request_id, ReplayState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+void replay_vmstate_register(void)
+{
+ vmstate_register(NULL, 0, &vmstate_replay, &replay_state);
+}
diff --git a/replay/replay-time.c b/replay/replay-time.c
index fffe072..f70382a 100644
--- a/replay/replay-time.c
+++ b/replay/replay-time.c
@@ -31,7 +31,7 @@ int64_t replay_save_clock(ReplayClockKind kind, int64_t clock)
void replay_read_next_clock(ReplayClockKind kind)
{
- unsigned int read_kind = replay_data_kind - EVENT_CLOCK;
+ unsigned int read_kind = replay_state.data_kind - EVENT_CLOCK;
assert(read_kind == kind);
diff --git a/replay/replay.c b/replay/replay.c
index 167fd29..c797aea 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -38,15 +38,15 @@ bool replay_next_event_is(int event)
/* nothing to skip - not all instructions used */
if (replay_state.instructions_count != 0) {
- assert(replay_data_kind == EVENT_INSTRUCTION);
+ assert(replay_state.data_kind == EVENT_INSTRUCTION);
return event == EVENT_INSTRUCTION;
}
while (true) {
- if (event == replay_data_kind) {
+ if (event == replay_state.data_kind) {
res = true;
}
- switch (replay_data_kind) {
+ switch (replay_state.data_kind) {
case EVENT_SHUTDOWN:
replay_finish_event();
qemu_system_shutdown_request();
@@ -85,7 +85,7 @@ void replay_account_executed_instructions(void)
replay_state.instructions_count -= count;
replay_state.current_step += count;
if (replay_state.instructions_count == 0) {
- assert(replay_data_kind == EVENT_INSTRUCTION);
+ assert(replay_state.data_kind == EVENT_INSTRUCTION);
replay_finish_event();
/* Wake up iothread. This is required because
timers will not expire until clock counters
@@ -188,7 +188,7 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
if (replay_mode == REPLAY_MODE_PLAY) {
if (replay_next_event_is(EVENT_CHECKPOINT + checkpoint)) {
replay_finish_event();
- } else if (replay_data_kind != EVENT_ASYNC) {
+ } else if (replay_state.data_kind != EVENT_ASYNC) {
res = false;
goto out;
}
@@ -196,7 +196,7 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
/* replay_read_events may leave some unread events.
Return false if not all of the events associated with
checkpoint were processed */
- res = replay_data_kind != EVENT_ASYNC;
+ res = replay_state.data_kind != EVENT_ASYNC;
} else if (replay_mode == REPLAY_MODE_RECORD) {
replay_put_event(EVENT_CHECKPOINT + checkpoint);
replay_save_events(checkpoint);
@@ -237,9 +237,10 @@ static void replay_enable(const char *fname, int mode)
replay_filename = g_strdup(fname);
replay_mode = mode;
- replay_data_kind = -1;
+ replay_state.data_kind = -1;
replay_state.instructions_count = 0;
replay_state.current_step = 0;
+ replay_state.has_unread_data = 0;
/* skip file header for RECORD and check it for PLAY */
if (replay_mode == REPLAY_MODE_RECORD) {
@@ -291,6 +292,7 @@ void replay_configure(QemuOpts *opts)
exit(1);
}
+ replay_vmstate_register();
replay_enable(fname, mode);
out:
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index dde3f5f..3afa19a 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2407,7 +2407,7 @@ sub process {
# we have e.g. CONFIG_LINUX and CONFIG_WIN32 for common cases
# where they might be necessary.
if ($line =~ m@^.\s*\#\s*if.*\b__@) {
- ERROR("architecture specific defines should be avoided\n" . $herecurr);
+ WARN("architecture specific defines should be avoided\n" . $herecurr);
}
# Check that the storage class is at the beginning of a declaration
diff --git a/stubs/replay.c b/stubs/replay.c
index de9fa1e..d9a6da9 100644
--- a/stubs/replay.c
+++ b/stubs/replay.c
@@ -67,3 +67,8 @@ void replay_char_read_all_save_buf(uint8_t *buf, int offset)
void replay_block_event(QEMUBH *bh, uint64_t id)
{
}
+
+uint64_t blkreplay_next_id(void)
+{
+ return 0;
+}
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 1c250b8..9bc961b 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1113,7 +1113,6 @@ out:
typedef struct MCEInjectionParams {
Monitor *mon;
- X86CPU *cpu;
int bank;
uint64_t status;
uint64_t mcg_status;
@@ -1122,14 +1121,14 @@ typedef struct MCEInjectionParams {
int flags;
} MCEInjectionParams;
-static void do_inject_x86_mce(void *data)
+static void do_inject_x86_mce(CPUState *cs, void *data)
{
MCEInjectionParams *params = data;
- CPUX86State *cenv = &params->cpu->env;
- CPUState *cpu = CPU(params->cpu);
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *cenv = &cpu->env;
uint64_t *banks = cenv->mce_banks + 4 * params->bank;
- cpu_synchronize_state(cpu);
+ cpu_synchronize_state(cs);
/*
* If there is an MCE exception being processed, ignore this SRAO MCE
@@ -1149,7 +1148,7 @@ static void do_inject_x86_mce(void *data)
if ((cenv->mcg_cap & MCG_CTL_P) && cenv->mcg_ctl != ~(uint64_t)0) {
monitor_printf(params->mon,
"CPU %d: Uncorrected error reporting disabled\n",
- cpu->cpu_index);
+ cs->cpu_index);
return;
}
@@ -1161,7 +1160,7 @@ static void do_inject_x86_mce(void *data)
monitor_printf(params->mon,
"CPU %d: Uncorrected error reporting disabled for"
" bank %d\n",
- cpu->cpu_index, params->bank);
+ cs->cpu_index, params->bank);
return;
}
@@ -1170,7 +1169,7 @@ static void do_inject_x86_mce(void *data)
monitor_printf(params->mon,
"CPU %d: Previous MCE still in progress, raising"
" triple fault\n",
- cpu->cpu_index);
+ cs->cpu_index);
qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
qemu_system_reset_request();
return;
@@ -1182,7 +1181,7 @@ static void do_inject_x86_mce(void *data)
banks[3] = params->misc;
cenv->mcg_status = params->mcg_status;
banks[1] = params->status;
- cpu_interrupt(cpu, CPU_INTERRUPT_MCE);
+ cpu_interrupt(cs, CPU_INTERRUPT_MCE);
} else if (!(banks[1] & MCI_STATUS_VAL)
|| !(banks[1] & MCI_STATUS_UC)) {
if (banks[1] & MCI_STATUS_VAL) {
@@ -1204,7 +1203,6 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
CPUX86State *cenv = &cpu->env;
MCEInjectionParams params = {
.mon = mon,
- .cpu = cpu,
.bank = bank,
.status = status,
.mcg_status = mcg_status,
@@ -1245,7 +1243,6 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
if (other_cs == cs) {
continue;
}
- params.cpu = X86_CPU(other_cs);
run_on_cpu(other_cs, do_inject_x86_mce, &params);
}
}
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 30b63b7..ee1f53e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -150,10 +150,8 @@ static int kvm_get_tsc(CPUState *cs)
return 0;
}
-static inline void do_kvm_synchronize_tsc(void *arg)
+static inline void do_kvm_synchronize_tsc(CPUState *cpu, void *arg)
{
- CPUState *cpu = arg;
-
kvm_get_tsc(cpu);
}
@@ -163,7 +161,7 @@ void kvm_synchronize_all_tsc(void)
if (kvm_enabled()) {
CPU_FOREACH(cpu) {
- run_on_cpu(cpu, do_kvm_synchronize_tsc, cpu);
+ run_on_cpu(cpu, do_kvm_synchronize_tsc, NULL);
}
}
}
diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
index 2f3c8e2..35ae2ce 100644
--- a/target-s390x/cpu.c
+++ b/target-s390x/cpu.c
@@ -164,7 +164,7 @@ static void s390_cpu_machine_reset_cb(void *opaque)
{
S390CPU *cpu = opaque;
- run_on_cpu(CPU(cpu), s390_do_cpu_full_reset, CPU(cpu));
+ run_on_cpu(CPU(cpu), s390_do_cpu_full_reset, NULL);
}
#endif
@@ -220,7 +220,7 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
s390_cpu_gdb_init(cs);
qemu_init_vcpu(cs);
#if !defined(CONFIG_USER_ONLY)
- run_on_cpu(cs, s390_do_cpu_full_reset, cs);
+ run_on_cpu(cs, s390_do_cpu_full_reset, NULL);
#else
cpu_reset(cs);
#endif
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 5645e06..4fb34b5 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -502,17 +502,14 @@ static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb,
#define decode_basedisp_rs decode_basedisp_s
/* helper functions for run_on_cpu() */
-static inline void s390_do_cpu_reset(void *arg)
+static inline void s390_do_cpu_reset(CPUState *cs, void *arg)
{
- CPUState *cs = arg;
S390CPUClass *scc = S390_CPU_GET_CLASS(cs);
scc->cpu_reset(cs);
}
-static inline void s390_do_cpu_full_reset(void *arg)
+static inline void s390_do_cpu_full_reset(CPUState *cs, void *arg)
{
- CPUState *cs = arg;
-
cpu_reset(cs);
}
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 4b847a3..fd929e8 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -1385,7 +1385,6 @@ static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb)
}
typedef struct SigpInfo {
- S390CPU *cpu;
uint64_t param;
int cc;
uint64_t *status_reg;
@@ -1398,38 +1397,40 @@ static void set_sigp_status(SigpInfo *si, uint64_t status)
si->cc = SIGP_CC_STATUS_STORED;
}
-static void sigp_start(void *arg)
+static void sigp_start(CPUState *cs, void *arg)
{
+ S390CPU *cpu = S390_CPU(cs);
SigpInfo *si = arg;
- if (s390_cpu_get_state(si->cpu) != CPU_STATE_STOPPED) {
+ if (s390_cpu_get_state(cpu) != CPU_STATE_STOPPED) {
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
return;
}
- s390_cpu_set_state(CPU_STATE_OPERATING, si->cpu);
+ s390_cpu_set_state(CPU_STATE_OPERATING, cpu);
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
}
-static void sigp_stop(void *arg)
+static void sigp_stop(CPUState *cs, void *arg)
{
+ S390CPU *cpu = S390_CPU(cs);
SigpInfo *si = arg;
struct kvm_s390_irq irq = {
.type = KVM_S390_SIGP_STOP,
};
- if (s390_cpu_get_state(si->cpu) != CPU_STATE_OPERATING) {
+ if (s390_cpu_get_state(cpu) != CPU_STATE_OPERATING) {
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
return;
}
/* disabled wait - sleeping in user space */
- if (CPU(si->cpu)->halted) {
- s390_cpu_set_state(CPU_STATE_STOPPED, si->cpu);
+ if (cs->halted) {
+ s390_cpu_set_state(CPU_STATE_STOPPED, cpu);
} else {
/* execute the stop function */
- si->cpu->env.sigp_order = SIGP_STOP;
- kvm_s390_vcpu_interrupt(si->cpu, &irq);
+ cpu->env.sigp_order = SIGP_STOP;
+ kvm_s390_vcpu_interrupt(cpu, &irq);
}
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
}
@@ -1496,56 +1497,58 @@ static int kvm_s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch)
return 0;
}
-static void sigp_stop_and_store_status(void *arg)
+static void sigp_stop_and_store_status(CPUState *cs, void *arg)
{
+ S390CPU *cpu = S390_CPU(cs);
SigpInfo *si = arg;
struct kvm_s390_irq irq = {
.type = KVM_S390_SIGP_STOP,
};
/* disabled wait - sleeping in user space */
- if (s390_cpu_get_state(si->cpu) == CPU_STATE_OPERATING &&
- CPU(si->cpu)->halted) {
- s390_cpu_set_state(CPU_STATE_STOPPED, si->cpu);
+ if (s390_cpu_get_state(cpu) == CPU_STATE_OPERATING && cs->halted) {
+ s390_cpu_set_state(CPU_STATE_STOPPED, cpu);
}
- switch (s390_cpu_get_state(si->cpu)) {
+ switch (s390_cpu_get_state(cpu)) {
case CPU_STATE_OPERATING:
- si->cpu->env.sigp_order = SIGP_STOP_STORE_STATUS;
- kvm_s390_vcpu_interrupt(si->cpu, &irq);
+ cpu->env.sigp_order = SIGP_STOP_STORE_STATUS;
+ kvm_s390_vcpu_interrupt(cpu, &irq);
/* store will be performed when handling the stop intercept */
break;
case CPU_STATE_STOPPED:
/* already stopped, just store the status */
- cpu_synchronize_state(CPU(si->cpu));
- kvm_s390_store_status(si->cpu, KVM_S390_STORE_STATUS_DEF_ADDR, true);
+ cpu_synchronize_state(cs);
+ kvm_s390_store_status(cpu, KVM_S390_STORE_STATUS_DEF_ADDR, true);
break;
}
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
}
-static void sigp_store_status_at_address(void *arg)
+static void sigp_store_status_at_address(CPUState *cs, void *arg)
{
+ S390CPU *cpu = S390_CPU(cs);
SigpInfo *si = arg;
uint32_t address = si->param & 0x7ffffe00u;
/* cpu has to be stopped */
- if (s390_cpu_get_state(si->cpu) != CPU_STATE_STOPPED) {
+ if (s390_cpu_get_state(cpu) != CPU_STATE_STOPPED) {
set_sigp_status(si, SIGP_STAT_INCORRECT_STATE);
return;
}
- cpu_synchronize_state(CPU(si->cpu));
+ cpu_synchronize_state(cs);
- if (kvm_s390_store_status(si->cpu, address, false)) {
+ if (kvm_s390_store_status(cpu, address, false)) {
set_sigp_status(si, SIGP_STAT_INVALID_PARAMETER);
return;
}
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
}
-static void sigp_store_adtl_status(void *arg)
+static void sigp_store_adtl_status(CPUState *cs, void *arg)
{
+ S390CPU *cpu = S390_CPU(cs);
SigpInfo *si = arg;
if (!s390_has_feat(S390_FEAT_VECTOR)) {
@@ -1554,7 +1557,7 @@ static void sigp_store_adtl_status(void *arg)
}
/* cpu has to be stopped */
- if (s390_cpu_get_state(si->cpu) != CPU_STATE_STOPPED) {
+ if (s390_cpu_get_state(cpu) != CPU_STATE_STOPPED) {
set_sigp_status(si, SIGP_STAT_INCORRECT_STATE);
return;
}
@@ -1565,31 +1568,32 @@ static void sigp_store_adtl_status(void *arg)
return;
}
- cpu_synchronize_state(CPU(si->cpu));
+ cpu_synchronize_state(cs);
- if (kvm_s390_store_adtl_status(si->cpu, si->param)) {
+ if (kvm_s390_store_adtl_status(cpu, si->param)) {
set_sigp_status(si, SIGP_STAT_INVALID_PARAMETER);
return;
}
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
}
-static void sigp_restart(void *arg)
+static void sigp_restart(CPUState *cs, void *arg)
{
+ S390CPU *cpu = S390_CPU(cs);
SigpInfo *si = arg;
struct kvm_s390_irq irq = {
.type = KVM_S390_RESTART,
};
- switch (s390_cpu_get_state(si->cpu)) {
+ switch (s390_cpu_get_state(cpu)) {
case CPU_STATE_STOPPED:
/* the restart irq has to be delivered prior to any other pending irq */
- cpu_synchronize_state(CPU(si->cpu));
- do_restart_interrupt(&si->cpu->env);
- s390_cpu_set_state(CPU_STATE_OPERATING, si->cpu);
+ cpu_synchronize_state(cs);
+ do_restart_interrupt(&cpu->env);
+ s390_cpu_set_state(CPU_STATE_OPERATING, cpu);
break;
case CPU_STATE_OPERATING:
- kvm_s390_vcpu_interrupt(si->cpu, &irq);
+ kvm_s390_vcpu_interrupt(cpu, &irq);
break;
}
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
@@ -1597,20 +1601,18 @@ static void sigp_restart(void *arg)
int kvm_s390_cpu_restart(S390CPU *cpu)
{
- SigpInfo si = {
- .cpu = cpu,
- };
+ SigpInfo si = {};
run_on_cpu(CPU(cpu), sigp_restart, &si);
DPRINTF("DONE: KVM cpu restart: %p\n", &cpu->env);
return 0;
}
-static void sigp_initial_cpu_reset(void *arg)
+static void sigp_initial_cpu_reset(CPUState *cs, void *arg)
{
+ S390CPU *cpu = S390_CPU(cs);
+ S390CPUClass *scc = S390_CPU_GET_CLASS(cpu);
SigpInfo *si = arg;
- CPUState *cs = CPU(si->cpu);
- S390CPUClass *scc = S390_CPU_GET_CLASS(si->cpu);
cpu_synchronize_state(cs);
scc->initial_cpu_reset(cs);
@@ -1618,11 +1620,11 @@ static void sigp_initial_cpu_reset(void *arg)
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
}
-static void sigp_cpu_reset(void *arg)
+static void sigp_cpu_reset(CPUState *cs, void *arg)
{
+ S390CPU *cpu = S390_CPU(cs);
+ S390CPUClass *scc = S390_CPU_GET_CLASS(cpu);
SigpInfo *si = arg;
- CPUState *cs = CPU(si->cpu);
- S390CPUClass *scc = S390_CPU_GET_CLASS(si->cpu);
cpu_synchronize_state(cs);
scc->cpu_reset(cs);
@@ -1630,12 +1632,13 @@ static void sigp_cpu_reset(void *arg)
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
}
-static void sigp_set_prefix(void *arg)
+static void sigp_set_prefix(CPUState *cs, void *arg)
{
+ S390CPU *cpu = S390_CPU(cs);
SigpInfo *si = arg;
uint32_t addr = si->param & 0x7fffe000u;
- cpu_synchronize_state(CPU(si->cpu));
+ cpu_synchronize_state(cs);
if (!address_space_access_valid(&address_space_memory, addr,
sizeof(struct LowCore), false)) {
@@ -1644,13 +1647,13 @@ static void sigp_set_prefix(void *arg)
}
/* cpu has to be stopped */
- if (s390_cpu_get_state(si->cpu) != CPU_STATE_STOPPED) {
+ if (s390_cpu_get_state(cpu) != CPU_STATE_STOPPED) {
set_sigp_status(si, SIGP_STAT_INCORRECT_STATE);
return;
}
- si->cpu->env.psa = addr;
- cpu_synchronize_post_init(CPU(si->cpu));
+ cpu->env.psa = addr;
+ cpu_synchronize_post_init(cs);
si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
}
@@ -1658,7 +1661,6 @@ static int handle_sigp_single_dst(S390CPU *dst_cpu, uint8_t order,
uint64_t param, uint64_t *status_reg)
{
SigpInfo si = {
- .cpu = dst_cpu,
.param = param,
.status_reg = status_reg,
};
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 86da194..4df2ec6 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -126,7 +126,7 @@ static int modified_clear_reset(S390CPU *cpu)
pause_all_vcpus();
cpu_synchronize_all_states();
CPU_FOREACH(t) {
- run_on_cpu(t, s390_do_cpu_full_reset, t);
+ run_on_cpu(t, s390_do_cpu_full_reset, NULL);
}
s390_cmma_reset();
subsystem_reset();
@@ -145,7 +145,7 @@ static int load_normal_reset(S390CPU *cpu)
pause_all_vcpus();
cpu_synchronize_all_states();
CPU_FOREACH(t) {
- run_on_cpu(t, s390_do_cpu_reset, t);
+ run_on_cpu(t, s390_do_cpu_reset, NULL);
}
s390_cmma_reset();
subsystem_reset();
diff --git a/translate-all.c b/translate-all.c
index e9bc90c..8ca393c 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -834,12 +834,19 @@ static void page_flush_tb(void)
}
/* flush all the translation blocks */
-/* XXX: tb_flush is currently not thread safe */
-void tb_flush(CPUState *cpu)
+static void do_tb_flush(CPUState *cpu, void *data)
{
- if (!tcg_enabled()) {
- return;
+ unsigned tb_flush_req = (unsigned) (uintptr_t) data;
+
+ tb_lock();
+
+ /* If it's already been done on request of another CPU,
+ * just retry.
+ */
+ if (tcg_ctx.tb_ctx.tb_flush_count != tb_flush_req) {
+ goto done;
}
+
#if defined(DEBUG_FLUSH)
printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
(unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
@@ -858,7 +865,6 @@ void tb_flush(CPUState *cpu)
for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
atomic_set(&cpu->tb_jmp_cache[i], NULL);
}
- atomic_mb_set(&cpu->tb_flushed, true);
}
tcg_ctx.tb_ctx.nb_tbs = 0;
@@ -868,7 +874,19 @@ void tb_flush(CPUState *cpu)
tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
/* XXX: flush processor icache at this point if cache flush is
expensive */
- tcg_ctx.tb_ctx.tb_flush_count++;
+ atomic_mb_set(&tcg_ctx.tb_ctx.tb_flush_count,
+ tcg_ctx.tb_ctx.tb_flush_count + 1);
+
+done:
+ tb_unlock();
+}
+
+void tb_flush(CPUState *cpu)
+{
+ if (tcg_enabled()) {
+ uintptr_t tb_flush_req = atomic_mb_read(&tcg_ctx.tb_ctx.tb_flush_count);
+ async_safe_run_on_cpu(cpu, do_tb_flush, (void *) tb_flush_req);
+ }
}
#ifdef DEBUG_TB_CHECK
@@ -1175,9 +1193,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
buffer_overflow:
/* flush must be done */
tb_flush(cpu);
- /* cannot fail at this point */
- tb = tb_alloc(pc);
- assert(tb != NULL);
+ mmap_unlock();
+ cpu_loop_exit(cpu);
}
gen_code_buf = tcg_ctx.code_gen_ptr;
@@ -1775,7 +1792,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
qht_statistics_destroy(&hst);
cpu_fprintf(f, "\nStatistics:\n");
- cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count);
+ cpu_fprintf(f, "TB flush count %u\n",
+ atomic_read(&tcg_ctx.tb_ctx.tb_flush_count));
cpu_fprintf(f, "TB invalidate count %d\n",
tcg_ctx.tb_ctx.tb_phys_invalidate_count);
cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
diff --git a/vl.c b/vl.c
index eafda8d..ab0349b 100644
--- a/vl.c
+++ b/vl.c
@@ -784,6 +784,7 @@ void vm_start(void)
if (runstate_is_running()) {
qapi_event_send_stop(&error_abort);
} else {
+ replay_enable_events();
cpu_enable_ticks();
runstate_set(RUN_STATE_RUNNING);
vm_state_notify(1, RUN_STATE_RUNNING);
@@ -3019,6 +3020,7 @@ int main(int argc, char **argv, char **envp)
Error *err = NULL;
bool list_data_dirs = false;
+ qemu_init_cpu_list();
qemu_init_cpu_loop();
qemu_mutex_lock_iothread();