aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-02-25 18:43:52 +0000
committerPeter Maydell <peter.maydell@linaro.org>2017-02-25 18:43:52 +0000
commit28f997a82cb509bf4775d4006b368e1bde8b7bdd (patch)
tree5fbcd96b2f6541b3132ae5d9249a1501f591ce8d
parent2421f381dc38a8a6d12477c08c2f74a35a0698f8 (diff)
parentca759f9e387db87e1719911f019bc60c74be9ed8 (diff)
downloadqemu-28f997a82cb509bf4775d4006b368e1bde8b7bdd.zip
qemu-28f997a82cb509bf4775d4006b368e1bde8b7bdd.tar.gz
qemu-28f997a82cb509bf4775d4006b368e1bde8b7bdd.tar.bz2
Merge remote-tracking branch 'remotes/stsquad/tags/pull-mttcg-240217-1' into staging
This is the MTTCG pull-request as posted yesterday. # gpg: Signature made Fri 24 Feb 2017 11:17:51 GMT # gpg: using RSA key 0xFBD0DB095A9E2A44 # gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>" # Primary key fingerprint: 6685 AE99 E751 67BC AFC8 DF35 FBD0 DB09 5A9E 2A44 * remotes/stsquad/tags/pull-mttcg-240217-1: (24 commits) tcg: enable MTTCG by default for ARM on x86 hosts hw/misc/imx6_src: defer clearing of SRC_SCR reset bits target-arm: ensure all cross vCPUs TLB flushes complete target-arm: don't generate WFE/YIELD calls for MTTCG target-arm/powerctl: defer cpu reset work to CPU context cputlb: introduce tlb_flush_*_all_cpus[_synced] cputlb: atomically update tlb fields used by tlb_reset_dirty cputlb: add tlb_flush_by_mmuidx async routines cputlb and arm/sparc targets: convert mmuidx flushes from varg to bitmap cputlb: introduce tlb_flush_* async work. cputlb: tweak qemu_ram_addr_from_host_nofail reporting cputlb: add assert_cpu_is_self checks tcg: handle EXCP_ATOMIC exception for system emulation tcg: enable thread-per-vCPU tcg: enable tb_lock() for SoftMMU tcg: remove global exit_request tcg: drop global lock during TCG code execution tcg: rename tcg_current_cpu to tcg_current_rr_cpu tcg: add kick timer for single-threaded vCPU emulation tcg: add options for enabling MTTCG ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rwxr-xr-xconfigure6
-rw-r--r--cpu-exec-common.c3
-rw-r--r--cpu-exec.c89
-rw-r--r--cpus.c345
-rw-r--r--cputlb.c463
-rw-r--r--docs/multi-thread-tcg.txt350
-rw-r--r--exec.c12
-rw-r--r--hw/core/irq.c1
-rw-r--r--hw/i386/kvmvapic.c4
-rw-r--r--hw/intc/arm_gicv3_cpuif.c3
-rw-r--r--hw/misc/imx6_src.c58
-rw-r--r--hw/ppc/ppc.c16
-rw-r--r--hw/ppc/spapr.c3
-rw-r--r--include/exec/cputlb.h2
-rw-r--r--include/exec/exec-all.h132
-rw-r--r--include/qom/cpu.h16
-rw-r--r--include/sysemu/cpus.h2
-rw-r--r--memory.c2
-rw-r--r--qemu-options.hx20
-rw-r--r--qom/cpu.c10
-rw-r--r--target/arm/arm-powerctl.c202
-rw-r--r--target/arm/arm-powerctl.h2
-rw-r--r--target/arm/cpu.c4
-rw-r--r--target/arm/cpu.h18
-rw-r--r--target/arm/helper.c219
-rw-r--r--target/arm/kvm.c7
-rw-r--r--target/arm/machine.c41
-rw-r--r--target/arm/op_helper.c50
-rw-r--r--target/arm/psci.c4
-rw-r--r--target/arm/translate-a64.c8
-rw-r--r--target/arm/translate.c20
-rw-r--r--target/i386/smm_helper.c7
-rw-r--r--target/s390x/misc_helper.c5
-rw-r--r--target/sparc/ldst_helper.c8
-rw-r--r--tcg/i386/tcg-target.h11
-rw-r--r--tcg/tcg-mo.h48
-rw-r--r--tcg/tcg.h27
-rw-r--r--translate-all.c66
-rw-r--r--translate-common.c21
-rw-r--r--vl.c49
40 files changed, 1878 insertions, 476 deletions
diff --git a/configure b/configure
index 4b68861..44ecbe6 100755
--- a/configure
+++ b/configure
@@ -5879,6 +5879,7 @@ mkdir -p $target_dir
echo "# Automatically generated by configure - do not modify" > $config_target_mak
bflt="no"
+mttcg="no"
interp_prefix1=$(echo "$interp_prefix" | sed "s/%M/$target_name/g")
gdb_xml_files=""
@@ -5897,11 +5898,13 @@ case "$target_name" in
arm|armeb)
TARGET_ARCH=arm
bflt="yes"
+ mttcg="yes"
gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
aarch64)
TARGET_BASE_ARCH=arm
bflt="yes"
+ mttcg="yes"
gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
;;
cris)
@@ -6066,6 +6069,9 @@ if test "$target_bigendian" = "yes" ; then
fi
if test "$target_softmmu" = "yes" ; then
echo "CONFIG_SOFTMMU=y" >> $config_target_mak
+ if test "$mttcg" = "yes" ; then
+ echo "TARGET_SUPPORTS_MTTCG=y" >> $config_target_mak
+ fi
fi
if test "$target_user_only" = "yes" ; then
echo "CONFIG_USER_ONLY=y" >> $config_target_mak
diff --git a/cpu-exec-common.c b/cpu-exec-common.c
index 767d9c6..0504a94 100644
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -23,9 +23,6 @@
#include "exec/exec-all.h"
#include "exec/memory-internal.h"
-bool exit_request;
-CPUState *tcg_current_cpu;
-
/* exit the current TB, but without causing any exception to be raised */
void cpu_loop_exit_noexc(CPUState *cpu)
{
diff --git a/cpu-exec.c b/cpu-exec.c
index 142a586..1a5ad48 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -29,6 +29,7 @@
#include "qemu/rcu.h"
#include "exec/tb-hash.h"
#include "exec/log.h"
+#include "qemu/main-loop.h"
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
#include "hw/i386/apic.h"
#endif
@@ -227,20 +228,43 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
static void cpu_exec_step(CPUState *cpu)
{
+ CPUClass *cc = CPU_GET_CLASS(cpu);
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
- tb = tb_gen_code(cpu, pc, cs_base, flags,
- 1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
- tb->orig_tb = NULL;
- /* execute the generated code */
- trace_exec_tb_nocache(tb, pc);
- cpu_tb_exec(cpu, tb);
- tb_phys_invalidate(tb, -1);
- tb_free(tb);
+ if (sigsetjmp(cpu->jmp_env, 0) == 0) {
+ mmap_lock();
+ tb_lock();
+ tb = tb_gen_code(cpu, pc, cs_base, flags,
+ 1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
+ tb->orig_tb = NULL;
+ tb_unlock();
+ mmap_unlock();
+
+ cc->cpu_exec_enter(cpu);
+ /* execute the generated code */
+ trace_exec_tb_nocache(tb, pc);
+ cpu_tb_exec(cpu, tb);
+ cc->cpu_exec_exit(cpu);
+
+ tb_lock();
+ tb_phys_invalidate(tb, -1);
+ tb_free(tb);
+ tb_unlock();
+ } else {
+ /* We may have exited due to another problem here, so we need
+ * to reset any tb_locks we may have taken but didn't release.
+ * The mmap_lock is dropped by tb_gen_code if it runs out of
+ * memory.
+ */
+#ifndef CONFIG_SOFTMMU
+ tcg_debug_assert(!have_mmap_lock());
+#endif
+ tb_lock_reset();
+ }
}
void cpu_exec_step_atomic(CPUState *cpu)
@@ -384,12 +408,13 @@ static inline bool cpu_handle_halt(CPUState *cpu)
if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
&& replay_interrupt()) {
X86CPU *x86_cpu = X86_CPU(cpu);
+ qemu_mutex_lock_iothread();
apic_poll_irq(x86_cpu->apic_state);
cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
+ qemu_mutex_unlock_iothread();
}
#endif
if (!cpu_has_work(cpu)) {
- current_cpu = NULL;
return true;
}
@@ -439,7 +464,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
#else
if (replay_exception()) {
CPUClass *cc = CPU_GET_CLASS(cpu);
+ qemu_mutex_lock_iothread();
cc->do_interrupt(cpu);
+ qemu_mutex_unlock_iothread();
cpu->exception_index = -1;
} else if (!replay_has_interrupt()) {
/* give a chance to iothread in replay mode */
@@ -465,9 +492,11 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
TranslationBlock **last_tb)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
- int interrupt_request = cpu->interrupt_request;
- if (unlikely(interrupt_request)) {
+ if (unlikely(atomic_read(&cpu->interrupt_request))) {
+ int interrupt_request;
+ qemu_mutex_lock_iothread();
+ interrupt_request = cpu->interrupt_request;
if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
/* Mask out external interrupts for this step. */
interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
@@ -475,6 +504,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
if (interrupt_request & CPU_INTERRUPT_DEBUG) {
cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
cpu->exception_index = EXCP_DEBUG;
+ qemu_mutex_unlock_iothread();
return true;
}
if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
@@ -484,6 +514,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
cpu->halted = 1;
cpu->exception_index = EXCP_HLT;
+ qemu_mutex_unlock_iothread();
return true;
}
#if defined(TARGET_I386)
@@ -494,12 +525,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
do_cpu_init(x86_cpu);
cpu->exception_index = EXCP_HALTED;
+ qemu_mutex_unlock_iothread();
return true;
}
#else
else if (interrupt_request & CPU_INTERRUPT_RESET) {
replay_interrupt();
cpu_reset(cpu);
+ qemu_mutex_unlock_iothread();
return true;
}
#endif
@@ -522,7 +555,12 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
the program flow was changed */
*last_tb = NULL;
}
+
+ /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
+ qemu_mutex_unlock_iothread();
}
+
+
if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
atomic_set(&cpu->exit_request, 0);
cpu->exception_index = EXCP_INTERRUPT;
@@ -548,15 +586,13 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
*tb_exit = ret & TB_EXIT_MASK;
switch (*tb_exit) {
case TB_EXIT_REQUESTED:
- /* Something asked us to stop executing
- * chained TBs; just continue round the main
- * loop. Whatever requested the exit will also
- * have set something else (eg exit_request or
- * interrupt_request) which we will handle
- * next time around the loop. But we need to
- * ensure the zeroing of tcg_exit_req (see cpu_tb_exec)
- * comes before the next read of cpu->exit_request
- * or cpu->interrupt_request.
+ /* Something asked us to stop executing chained TBs; just
+ * continue round the main loop. Whatever requested the exit
+ * will also have set something else (eg interrupt_request)
+ * which we will handle next time around the loop. But we
+ * need to ensure the tcg_exit_req read in generated code
+ * comes before the next read of cpu->exit_request or
+ * cpu->interrupt_request.
*/
smp_mb();
*last_tb = NULL;
@@ -608,13 +644,8 @@ int cpu_exec(CPUState *cpu)
return EXCP_HALTED;
}
- atomic_mb_set(&tcg_current_cpu, cpu);
rcu_read_lock();
- if (unlikely(atomic_mb_read(&exit_request))) {
- cpu->exit_request = 1;
- }
-
cc->cpu_exec_enter(cpu);
/* Calculate difference between guest clock and host clock.
@@ -640,6 +671,9 @@ int cpu_exec(CPUState *cpu)
#endif /* buggy compiler */
cpu->can_do_io = 1;
tb_lock_reset();
+ if (qemu_mutex_iothread_locked()) {
+ qemu_mutex_unlock_iothread();
+ }
}
/* if an exception is pending, we execute it here */
@@ -659,10 +693,5 @@ int cpu_exec(CPUState *cpu)
cc->cpu_exec_exit(cpu);
rcu_read_unlock();
- /* fail safe : never use current_cpu outside cpu_exec() */
- current_cpu = NULL;
-
- /* Does not need atomic_mb_set because a spurious wakeup is okay. */
- atomic_set(&tcg_current_cpu, NULL);
return ret;
}
diff --git a/cpus.c b/cpus.c
index 0bcb5b5..8200ac6 100644
--- a/cpus.c
+++ b/cpus.c
@@ -25,6 +25,7 @@
/* Needed early for CONFIG_BSD etc. */
#include "qemu/osdep.h"
#include "qemu-common.h"
+#include "qemu/config-file.h"
#include "cpu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
@@ -45,6 +46,7 @@
#include "qemu/main-loop.h"
#include "qemu/bitmap.h"
#include "qemu/seqlock.h"
+#include "tcg.h"
#include "qapi-event.h"
#include "hw/nmi.h"
#include "sysemu/replay.h"
@@ -150,6 +152,77 @@ typedef struct TimersState {
} TimersState;
static TimersState timers_state;
+bool mttcg_enabled;
+
+/*
+ * We default to false if we know other options have been enabled
+ * which are currently incompatible with MTTCG. Otherwise when each
+ * guest (target) has been updated to support:
+ * - atomic instructions
+ * - memory ordering primitives (barriers)
+ * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
+ *
+ * Once a guest architecture has been converted to the new primitives
+ * there are two remaining limitations to check.
+ *
+ * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
+ * - The host must have a stronger memory order than the guest
+ *
+ * It may be possible in future to support strong guests on weak hosts
+ * but that will require tagging all load/stores in a guest with their
+ * implicit memory order requirements which would likely slow things
+ * down a lot.
+ */
+
+static bool check_tcg_memory_orders_compatible(void)
+{
+#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
+ return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
+#else
+ return false;
+#endif
+}
+
+static bool default_mttcg_enabled(void)
+{
+ QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
+ const char *rr = qemu_opt_get(icount_opts, "rr");
+
+ if (rr || TCG_OVERSIZED_GUEST) {
+ return false;
+ } else {
+#ifdef TARGET_SUPPORTS_MTTCG
+ return check_tcg_memory_orders_compatible();
+#else
+ return false;
+#endif
+ }
+}
+
+void qemu_tcg_configure(QemuOpts *opts, Error **errp)
+{
+ const char *t = qemu_opt_get(opts, "thread");
+ if (t) {
+ if (strcmp(t, "multi") == 0) {
+ if (TCG_OVERSIZED_GUEST) {
+ error_setg(errp, "No MTTCG when guest word size > hosts");
+ } else {
+ if (!check_tcg_memory_orders_compatible()) {
+ error_report("Guest expects a stronger memory ordering "
+ "than the host provides");
+ error_printf("This may cause strange/hard to debug errors");
+ }
+ mttcg_enabled = true;
+ }
+ } else if (strcmp(t, "single") == 0) {
+ mttcg_enabled = false;
+ } else {
+ error_setg(errp, "Invalid 'thread' setting %s", t);
+ }
+ } else {
+ mttcg_enabled = default_mttcg_enabled();
+ }
+}
int64_t cpu_get_icount_raw(void)
{
@@ -695,6 +768,63 @@ void configure_icount(QemuOpts *opts, Error **errp)
}
/***********************************************************/
+/* TCG vCPU kick timer
+ *
+ * The kick timer is responsible for moving single threaded vCPU
+ * emulation on to the next vCPU. If more than one vCPU is running a
+ * timer event with force a cpu->exit so the next vCPU can get
+ * scheduled.
+ *
+ * The timer is removed if all vCPUs are idle and restarted again once
+ * idleness is complete.
+ */
+
+static QEMUTimer *tcg_kick_vcpu_timer;
+static CPUState *tcg_current_rr_cpu;
+
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
+
+static inline int64_t qemu_tcg_next_kick(void)
+{
+ return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
+}
+
+/* Kick the currently round-robin scheduled vCPU */
+static void qemu_cpu_kick_rr_cpu(void)
+{
+ CPUState *cpu;
+ do {
+ cpu = atomic_mb_read(&tcg_current_rr_cpu);
+ if (cpu) {
+ cpu_exit(cpu);
+ }
+ } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
+}
+
+static void kick_tcg_thread(void *opaque)
+{
+ timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+ qemu_cpu_kick_rr_cpu();
+}
+
+static void start_tcg_kick_timer(void)
+{
+ if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
+ tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ kick_tcg_thread, NULL);
+ timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+ }
+}
+
+static void stop_tcg_kick_timer(void)
+{
+ if (tcg_kick_vcpu_timer) {
+ timer_del(tcg_kick_vcpu_timer);
+ tcg_kick_vcpu_timer = NULL;
+ }
+}
+
+/***********************************************************/
void hw_error(const char *fmt, ...)
{
va_list ap;
@@ -896,8 +1026,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
#endif /* _WIN32 */
static QemuMutex qemu_global_mutex;
-static QemuCond qemu_io_proceeded_cond;
-static unsigned iothread_requesting_mutex;
static QemuThread io_thread;
@@ -911,7 +1039,6 @@ void qemu_init_cpu_loop(void)
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
- qemu_cond_init(&qemu_io_proceeded_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
@@ -936,28 +1063,34 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
static void qemu_wait_io_event_common(CPUState *cpu)
{
+ atomic_mb_set(&cpu->thread_kicked, false);
if (cpu->stop) {
cpu->stop = false;
cpu->stopped = true;
qemu_cond_broadcast(&qemu_pause_cond);
}
process_queued_cpu_work(cpu);
- cpu->thread_kicked = false;
+}
+
+static bool qemu_tcg_should_sleep(CPUState *cpu)
+{
+ if (mttcg_enabled) {
+ return cpu_thread_is_idle(cpu);
+ } else {
+ return all_cpu_threads_idle();
+ }
}
static void qemu_tcg_wait_io_event(CPUState *cpu)
{
- while (all_cpu_threads_idle()) {
+ while (qemu_tcg_should_sleep(cpu)) {
+ stop_tcg_kick_timer();
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
- while (iothread_requesting_mutex) {
- qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
- }
+ start_tcg_kick_timer();
- CPU_FOREACH(cpu) {
- qemu_wait_io_event_common(cpu);
- }
+ qemu_wait_io_event_common(cpu);
}
static void qemu_kvm_wait_io_event(CPUState *cpu)
@@ -1028,6 +1161,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->can_do_io = 1;
+ current_cpu = cpu;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
@@ -1036,9 +1170,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
- current_cpu = cpu;
while (1) {
- current_cpu = NULL;
qemu_mutex_unlock_iothread();
do {
int sig;
@@ -1049,7 +1181,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
exit(1);
}
qemu_mutex_lock_iothread();
- current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
@@ -1115,9 +1246,11 @@ static int tcg_cpu_exec(CPUState *cpu)
cpu->icount_decr.u16.low = decr;
cpu->icount_extra = count;
}
+ qemu_mutex_unlock_iothread();
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
+ qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
tcg_time += profile_getclock() - ti;
#endif
@@ -1150,7 +1283,16 @@ static void deal_with_unplugged_cpus(void)
}
}
-static void *qemu_tcg_cpu_thread_fn(void *arg)
+/* Single-threaded TCG
+ *
+ * In the single-threaded case each vCPU is simulated in turn. If
+ * there is more than a single vCPU we create a simple timer to kick
+ * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
+ * This is done explicitly rather than relying on side-effects
+ * elsewhere.
+ */
+
+static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
@@ -1172,15 +1314,18 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
/* process any pending work */
CPU_FOREACH(cpu) {
+ current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
}
- /* process any pending work */
- atomic_mb_set(&exit_request, 1);
+ start_tcg_kick_timer();
cpu = first_cpu;
+ /* process any pending work */
+ cpu->exit_request = 1;
+
while (1) {
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
qemu_account_warp_timer();
@@ -1189,7 +1334,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
cpu = first_cpu;
}
- for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
+ while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
+
+ atomic_mb_set(&tcg_current_rr_cpu, cpu);
+ current_cpu = cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
(cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
@@ -1200,22 +1348,32 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
+ } else if (r == EXCP_ATOMIC) {
+ qemu_mutex_unlock_iothread();
+ cpu_exec_step_atomic(cpu);
+ qemu_mutex_lock_iothread();
+ break;
}
- } else if (cpu->stop || cpu->stopped) {
+ } else if (cpu->stop) {
if (cpu->unplug) {
cpu = CPU_NEXT(cpu);
}
break;
}
- } /* for cpu.. */
+ cpu = CPU_NEXT(cpu);
+ } /* while (cpu && !cpu->exit_request).. */
+
+ /* Does not need atomic_mb_set because a spurious wakeup is okay. */
+ atomic_set(&tcg_current_rr_cpu, NULL);
- /* Pairs with smp_wmb in qemu_cpu_kick. */
- atomic_mb_set(&exit_request, 0);
+ if (cpu && cpu->exit_request) {
+ atomic_mb_set(&cpu->exit_request, 0);
+ }
handle_icount_deadline();
- qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
+ qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
deal_with_unplugged_cpus();
}
@@ -1262,6 +1420,68 @@ static void CALLBACK dummy_apc_func(ULONG_PTR unused)
}
#endif
+/* Multi-threaded TCG
+ *
+ * In the multi-threaded case each vCPU has its own thread. The TLS
+ * variable current_cpu can be used deep in the code to find the
+ * current CPUState for a given thread.
+ */
+
+static void *qemu_tcg_cpu_thread_fn(void *arg)
+{
+ CPUState *cpu = arg;
+
+ rcu_register_thread();
+
+ qemu_mutex_lock_iothread();
+ qemu_thread_get_self(cpu->thread);
+
+ cpu->thread_id = qemu_get_thread_id();
+ cpu->created = true;
+ cpu->can_do_io = 1;
+ current_cpu = cpu;
+ qemu_cond_signal(&qemu_cpu_cond);
+
+ /* process any pending work */
+ cpu->exit_request = 1;
+
+ while (1) {
+ if (cpu_can_run(cpu)) {
+ int r;
+ r = tcg_cpu_exec(cpu);
+ switch (r) {
+ case EXCP_DEBUG:
+ cpu_handle_guest_debug(cpu);
+ break;
+ case EXCP_HALTED:
+ /* during start-up the vCPU is reset and the thread is
+ * kicked several times. If we don't ensure we go back
+ * to sleep in the halted state we won't cleanly
+ * start-up when the vCPU is enabled.
+ *
+ * cpu->halted should ensure we sleep in wait_io_event
+ */
+ g_assert(cpu->halted);
+ break;
+ case EXCP_ATOMIC:
+ qemu_mutex_unlock_iothread();
+ cpu_exec_step_atomic(cpu);
+ qemu_mutex_lock_iothread();
+ default:
+ /* Ignore everything else? */
+ break;
+ }
+ }
+
+ handle_icount_deadline();
+
+ atomic_mb_set(&cpu->exit_request, 0);
+ qemu_tcg_wait_io_event(cpu);
+ }
+
+ return NULL;
+}
+
static void qemu_cpu_kick_thread(CPUState *cpu)
{
#ifndef _WIN32
@@ -1287,24 +1507,13 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
#endif
}
-static void qemu_cpu_kick_no_halt(void)
-{
- CPUState *cpu;
- /* Ensure whatever caused the exit has reached the CPU threads before
- * writing exit_request.
- */
- atomic_mb_set(&exit_request, 1);
- cpu = atomic_mb_read(&tcg_current_cpu);
- if (cpu) {
- cpu_exit(cpu);
- }
-}
-
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
if (tcg_enabled()) {
- qemu_cpu_kick_no_halt();
+ cpu_exit(cpu);
+ /* NOP unless doing single-thread RR */
+ qemu_cpu_kick_rr_cpu();
} else {
if (hax_enabled()) {
/*
@@ -1342,27 +1551,14 @@ bool qemu_mutex_iothread_locked(void)
void qemu_mutex_lock_iothread(void)
{
- atomic_inc(&iothread_requesting_mutex);
- /* In the simple case there is no need to bump the VCPU thread out of
- * TCG code execution.
- */
- if (!tcg_enabled() || qemu_in_vcpu_thread() ||
- !first_cpu || !first_cpu->created) {
- qemu_mutex_lock(&qemu_global_mutex);
- atomic_dec(&iothread_requesting_mutex);
- } else {
- if (qemu_mutex_trylock(&qemu_global_mutex)) {
- qemu_cpu_kick_no_halt();
- qemu_mutex_lock(&qemu_global_mutex);
- }
- atomic_dec(&iothread_requesting_mutex);
- qemu_cond_broadcast(&qemu_io_proceeded_cond);
- }
+ g_assert(!qemu_mutex_iothread_locked());
+ qemu_mutex_lock(&qemu_global_mutex);
iothread_locked = true;
}
void qemu_mutex_unlock_iothread(void)
{
+ g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
qemu_mutex_unlock(&qemu_global_mutex);
}
@@ -1392,13 +1588,6 @@ void pause_all_vcpus(void)
if (qemu_in_vcpu_thread()) {
cpu_stop_current();
- if (!kvm_enabled()) {
- CPU_FOREACH(cpu) {
- cpu->stop = false;
- cpu->stopped = true;
- }
- return;
- }
}
while (!all_vcpus_paused()) {
@@ -1447,29 +1636,43 @@ void cpu_remove_sync(CPUState *cpu)
static void qemu_tcg_init_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
- static QemuCond *tcg_halt_cond;
- static QemuThread *tcg_cpu_thread;
+ static QemuCond *single_tcg_halt_cond;
+ static QemuThread *single_tcg_cpu_thread;
- /* share a single thread for all cpus with TCG */
- if (!tcg_cpu_thread) {
+ if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
- tcg_halt_cond = cpu->halt_cond;
- snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
+
+ if (qemu_tcg_mttcg_enabled()) {
+ /* create a thread per vCPU with TCG (MTTCG) */
+ parallel_cpus = true;
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
cpu->cpu_index);
- qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
- cpu, QEMU_THREAD_JOINABLE);
+
+ qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE);
+
+ } else {
+ /* share a single thread for all cpus with TCG */
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
+ qemu_thread_create(cpu->thread, thread_name,
+ qemu_tcg_rr_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE);
+
+ single_tcg_halt_cond = cpu->halt_cond;
+ single_tcg_cpu_thread = cpu->thread;
+ }
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
- tcg_cpu_thread = cpu->thread;
} else {
- cpu->thread = tcg_cpu_thread;
- cpu->halt_cond = tcg_halt_cond;
+ /* For non-MTTCG cases we share the thread */
+ cpu->thread = single_tcg_cpu_thread;
+ cpu->halt_cond = single_tcg_halt_cond;
}
}
diff --git a/cputlb.c b/cputlb.c
index 6c39927..7fa7fef 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -18,6 +18,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/exec-all.h"
#include "exec/memory.h"
@@ -57,6 +58,40 @@
} \
} while (0)
+#define assert_cpu_is_self(this_cpu) do { \
+ if (DEBUG_TLB_GATE) { \
+ g_assert(!cpu->created || qemu_cpu_is_self(cpu)); \
+ } \
+ } while (0)
+
+/* run_on_cpu_data.target_ptr should always be big enough for a
+ * target_ulong even on 32 bit builds */
+QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
+
+/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
+ */
+QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
+#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
+
+/* flush_all_helper: run fn across all cpus
+ *
+ * If the wait flag is set then the src cpu's helper will be queued as
+ * "safe" work and the loop exited creating a synchronisation point
+ * where all queued work will be finished before execution starts
+ * again.
+ */
+static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
+ run_on_cpu_data d)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ if (cpu != src) {
+ async_run_on_cpu(cpu, fn, d);
+ }
+ }
+}
+
/* statistics */
int tlb_flush_count;
@@ -65,10 +100,22 @@ int tlb_flush_count;
* flushing more entries than required is only an efficiency issue,
* not a correctness issue.
*/
-void tlb_flush(CPUState *cpu)
+static void tlb_flush_nocheck(CPUState *cpu)
{
CPUArchState *env = cpu->env_ptr;
+ /* The QOM tests will trigger tlb_flushes without setting up TCG
+ * so we bug out here in that case.
+ */
+ if (!tcg_enabled()) {
+ return;
+ }
+
+ assert_cpu_is_self(cpu);
+ tlb_debug("(count: %d)\n", tlb_flush_count++);
+
+ tb_lock();
+
memset(env->tlb_table, -1, sizeof(env->tlb_table));
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
@@ -76,39 +123,117 @@ void tlb_flush(CPUState *cpu)
env->vtlb_index = 0;
env->tlb_flush_addr = -1;
env->tlb_flush_mask = 0;
- tlb_flush_count++;
+
+ tb_unlock();
+
+ atomic_mb_set(&cpu->pending_tlb_flush, 0);
+}
+
+static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
+{
+ tlb_flush_nocheck(cpu);
+}
+
+void tlb_flush(CPUState *cpu)
+{
+ if (cpu->created && !qemu_cpu_is_self(cpu)) {
+ if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
+ atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
+ async_run_on_cpu(cpu, tlb_flush_global_async_work,
+ RUN_ON_CPU_NULL);
+ }
+ } else {
+ tlb_flush_nocheck(cpu);
+ }
+}
+
+void tlb_flush_all_cpus(CPUState *src_cpu)
+{
+ const run_on_cpu_func fn = tlb_flush_global_async_work;
+ flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
+ fn(src_cpu, RUN_ON_CPU_NULL);
+}
+
+void tlb_flush_all_cpus_synced(CPUState *src_cpu)
+{
+ const run_on_cpu_func fn = tlb_flush_global_async_work;
+ flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
+ async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
}
-static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
+static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
+ unsigned long mmu_idx_bitmask = data.host_int;
+ int mmu_idx;
- tlb_debug("start\n");
+ assert_cpu_is_self(cpu);
- for (;;) {
- int mmu_idx = va_arg(argp, int);
+ tb_lock();
- if (mmu_idx < 0) {
- break;
- }
+ tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
- tlb_debug("%d\n", mmu_idx);
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+
+ if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
+ tlb_debug("%d\n", mmu_idx);
- memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
- memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
+ memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
+ memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
+ }
}
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+
+ tlb_debug("done\n");
+
+ tb_unlock();
}
-void tlb_flush_by_mmuidx(CPUState *cpu, ...)
+void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
{
- va_list argp;
- va_start(argp, cpu);
- v_tlb_flush_by_mmuidx(cpu, argp);
- va_end(argp);
+ tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
+
+ if (!qemu_cpu_is_self(cpu)) {
+ uint16_t pending_flushes = idxmap;
+ pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
+
+ if (pending_flushes) {
+ tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
+
+ atomic_or(&cpu->pending_tlb_flush, pending_flushes);
+ async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
+ RUN_ON_CPU_HOST_INT(pending_flushes));
+ }
+ } else {
+ tlb_flush_by_mmuidx_async_work(cpu,
+ RUN_ON_CPU_HOST_INT(idxmap));
+ }
+}
+
+void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
+{
+ const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
+
+ tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
+
+ flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
+ fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
}
+void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
+ uint16_t idxmap)
+{
+ const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
+
+ tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
+
+ flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
+ async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
+}
+
+
+
static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
{
if (addr == (tlb_entry->addr_read &
@@ -121,12 +246,15 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
}
}
-void tlb_flush_page(CPUState *cpu, target_ulong addr)
+static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
+ target_ulong addr = (target_ulong) data.target_ptr;
int i;
int mmu_idx;
+ assert_cpu_is_self(cpu);
+
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
/* Check if we need to flush due to large pages. */
@@ -156,15 +284,62 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
tb_flush_jmp_cache(cpu, addr);
}
-void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
+void tlb_flush_page(CPUState *cpu, target_ulong addr)
+{
+ tlb_debug("page :" TARGET_FMT_lx "\n", addr);
+
+ if (!qemu_cpu_is_self(cpu)) {
+ async_run_on_cpu(cpu, tlb_flush_page_async_work,
+ RUN_ON_CPU_TARGET_PTR(addr));
+ } else {
+ tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
+ }
+}
+
+/* As we are going to hijack the bottom bits of the page address for a
+ * mmuidx bit mask we need to fail to build if we can't do that
+ */
+QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
+
+static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
+ run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
- int i, k;
- va_list argp;
+ target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
+ target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
+ unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
+ int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ int mmu_idx;
+ int i;
+
+ assert_cpu_is_self(cpu);
- va_start(argp, addr);
+ tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
+ page, addr, mmu_idx_bitmap);
+
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+ if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
+ tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
+
+ /* check whether there are vltb entries that need to be flushed */
+ for (i = 0; i < CPU_VTLB_SIZE; i++) {
+ tlb_flush_entry(&env->tlb_v_table[mmu_idx][i], addr);
+ }
+ }
+ }
+
+ tb_flush_jmp_cache(cpu, addr);
+}
+
+static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
+ run_on_cpu_data data)
+{
+ CPUArchState *env = cpu->env_ptr;
+ target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
+ target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
+ unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
- tlb_debug("addr "TARGET_FMT_lx"\n", addr);
+ tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
/* Check if we need to flush due to large pages. */
if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
@@ -172,33 +347,80 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
env->tlb_flush_addr, env->tlb_flush_mask);
- v_tlb_flush_by_mmuidx(cpu, argp);
- va_end(argp);
- return;
+ tlb_flush_by_mmuidx_async_work(cpu,
+ RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
+ } else {
+ tlb_flush_page_by_mmuidx_async_work(cpu, data);
}
+}
- addr &= TARGET_PAGE_MASK;
- i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
+{
+ target_ulong addr_and_mmu_idx;
- for (;;) {
- int mmu_idx = va_arg(argp, int);
+ tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
- if (mmu_idx < 0) {
- break;
- }
+ /* This should already be page aligned */
+ addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
+ addr_and_mmu_idx |= idxmap;
- tlb_debug("idx %d\n", mmu_idx);
+ if (!qemu_cpu_is_self(cpu)) {
+ async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
+ RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+ } else {
+ tlb_check_page_and_flush_by_mmuidx_async_work(
+ cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+ }
+}
- tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
+void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
+ uint16_t idxmap)
+{
+ const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
+ target_ulong addr_and_mmu_idx;
- /* check whether there are vltb entries that need to be flushed */
- for (k = 0; k < CPU_VTLB_SIZE; k++) {
- tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
- }
- }
- va_end(argp);
+ tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
- tb_flush_jmp_cache(cpu, addr);
+ /* This should already be page aligned */
+ addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
+ addr_and_mmu_idx |= idxmap;
+
+ flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+ fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+}
+
+void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
+ target_ulong addr,
+ uint16_t idxmap)
+{
+ const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
+ target_ulong addr_and_mmu_idx;
+
+ tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
+
+ /* This should already be page aligned */
+ addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
+ addr_and_mmu_idx |= idxmap;
+
+ flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+ async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+}
+
+void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
+{
+ const run_on_cpu_func fn = tlb_flush_page_async_work;
+
+ flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
+ fn(src, RUN_ON_CPU_TARGET_PTR(addr));
+}
+
+void tlb_flush_page_all_cpus_synced(CPUState *src,
+ target_ulong addr)
+{
+ const run_on_cpu_func fn = tlb_flush_page_async_work;
+
+ flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
+ async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
}
/* update the TLBs so that writes to code in the virtual page 'addr'
@@ -216,36 +438,84 @@ void tlb_unprotect_code(ram_addr_t ram_addr)
cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
}
-static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
-{
- return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 0;
-}
-void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
+/*
+ * Dirty write flag handling
+ *
+ * When the TCG code writes to a location it looks up the address in
+ * the TLB and uses that data to compute the final address. If any of
+ * the lower bits of the address are set then the slow path is forced.
+ * There are a number of reasons to do this but for normal RAM the
+ * most usual is detecting writes to code regions which may invalidate
+ * generated code.
+ *
+ * Because we want other vCPUs to respond to changes straight away we
+ * update the te->addr_write field atomically. If the TLB entry has
+ * been changed by the vCPU in the mean time we skip the update.
+ *
+ * As this function uses atomic accesses we also need to ensure
+ * updates to tlb_entries follow the same access rules. We don't need
+ * to worry about this for oversized guests as MTTCG is disabled for
+ * them.
+ */
+
+static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
uintptr_t length)
{
- uintptr_t addr;
+#if TCG_OVERSIZED_GUEST
+ uintptr_t addr = tlb_entry->addr_write;
- if (tlb_is_dirty_ram(tlb_entry)) {
- addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
+ if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
+ addr &= TARGET_PAGE_MASK;
+ addr += tlb_entry->addend;
if ((addr - start) < length) {
tlb_entry->addr_write |= TLB_NOTDIRTY;
}
}
+#else
+ /* paired with atomic_mb_set in tlb_set_page_with_attrs */
+ uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
+ uintptr_t addr = orig_addr;
+
+ if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
+ addr &= TARGET_PAGE_MASK;
+ addr += atomic_read(&tlb_entry->addend);
+ if ((addr - start) < length) {
+ uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
+ atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
+ }
+ }
+#endif
}
-static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+/* For atomic correctness when running MTTCG we need to use the right
+ * primitives when copying entries */
+static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s,
+ bool atomic_set)
{
- ram_addr_t ram_addr;
-
- ram_addr = qemu_ram_addr_from_host(ptr);
- if (ram_addr == RAM_ADDR_INVALID) {
- fprintf(stderr, "Bad ram pointer %p\n", ptr);
- abort();
+#if TCG_OVERSIZED_GUEST
+ *d = *s;
+#else
+ if (atomic_set) {
+ d->addr_read = s->addr_read;
+ d->addr_code = s->addr_code;
+ atomic_set(&d->addend, atomic_read(&s->addend));
+ /* Pairs with flag setting in tlb_reset_dirty_range */
+ atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write));
+ } else {
+ d->addr_read = s->addr_read;
+ d->addr_write = atomic_read(&s->addr_write);
+ d->addr_code = s->addr_code;
+ d->addend = atomic_read(&s->addend);
}
- return ram_addr;
+#endif
}
+/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
+ * the target vCPU). As such care needs to be taken that we don't
+ * dangerously race with another vCPU update. The only thing actually
+ * updated is the target TLB entry ->addr_write flags.
+ */
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
{
CPUArchState *env;
@@ -283,6 +553,8 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
int i;
int mmu_idx;
+ assert_cpu_is_self(cpu);
+
vaddr &= TARGET_PAGE_MASK;
i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
@@ -337,11 +609,12 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
target_ulong address;
target_ulong code_address;
uintptr_t addend;
- CPUTLBEntry *te;
+ CPUTLBEntry *te, *tv, tn;
hwaddr iotlb, xlat, sz;
unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
int asidx = cpu_asidx_from_attrs(cpu, attrs);
+ assert_cpu_is_self(cpu);
assert(size >= TARGET_PAGE_SIZE);
if (size != TARGET_PAGE_SIZE) {
tlb_add_large_page(env, vaddr, size);
@@ -371,41 +644,50 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
te = &env->tlb_table[mmu_idx][index];
-
/* do not discard the translation in te, evict it into a victim tlb */
- env->tlb_v_table[mmu_idx][vidx] = *te;
+ tv = &env->tlb_v_table[mmu_idx][vidx];
+
+ /* addr_write can race with tlb_reset_dirty_range */
+ copy_tlb_helper(tv, te, true);
+
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
/* refill the tlb */
env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
env->iotlb[mmu_idx][index].attrs = attrs;
- te->addend = addend - vaddr;
+
+ /* Now calculate the new entry */
+ tn.addend = addend - vaddr;
if (prot & PAGE_READ) {
- te->addr_read = address;
+ tn.addr_read = address;
} else {
- te->addr_read = -1;
+ tn.addr_read = -1;
}
if (prot & PAGE_EXEC) {
- te->addr_code = code_address;
+ tn.addr_code = code_address;
} else {
- te->addr_code = -1;
+ tn.addr_code = -1;
}
+
+ tn.addr_write = -1;
if (prot & PAGE_WRITE) {
if ((memory_region_is_ram(section->mr) && section->readonly)
|| memory_region_is_romd(section->mr)) {
/* Write access calls the I/O callback. */
- te->addr_write = address | TLB_MMIO;
+ tn.addr_write = address | TLB_MMIO;
} else if (memory_region_is_ram(section->mr)
&& cpu_physical_memory_is_clean(
memory_region_get_ram_addr(section->mr) + xlat)) {
- te->addr_write = address | TLB_NOTDIRTY;
+ tn.addr_write = address | TLB_NOTDIRTY;
} else {
- te->addr_write = address;
+ tn.addr_write = address;
}
- } else {
- te->addr_write = -1;
}
+
+ /* Pairs with flag setting in tlb_reset_dirty_range */
+ copy_tlb_helper(te, &tn, true);
+ /* atomic_mb_set(&te->addr_write, write_address); */
}
/* Add a new TLB entry, but without specifying the memory
@@ -452,6 +734,18 @@ static void report_bad_exec(CPUState *cpu, target_ulong addr)
log_cpu_state_mask(LOG_GUEST_ERROR, cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
}
+static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+{
+ ram_addr_t ram_addr;
+
+ ram_addr = qemu_ram_addr_from_host(ptr);
+ if (ram_addr == RAM_ADDR_INVALID) {
+ error_report("Bad ram pointer %p", ptr);
+ abort();
+ }
+ return ram_addr;
+}
+
/* NOTE: this function can trigger an exception */
/* NOTE2: the returned address is not exactly the physical address: it
* is actually a ram_addr_t (in system mode; the user mode emulation
@@ -495,6 +789,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
uint64_t val;
+ bool locked = false;
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
cpu->mem_io_pc = retaddr;
@@ -503,7 +798,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
}
cpu->mem_io_vaddr = addr;
+
+ if (mr->global_locking) {
+ qemu_mutex_lock_iothread();
+ locked = true;
+ }
memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
+ if (locked) {
+ qemu_mutex_unlock_iothread();
+ }
+
return val;
}
@@ -514,15 +818,23 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
CPUState *cpu = ENV_GET_CPU(env);
hwaddr physaddr = iotlbentry->addr;
MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+ bool locked = false;
physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
cpu_io_recompile(cpu, retaddr);
}
-
cpu->mem_io_vaddr = addr;
cpu->mem_io_pc = retaddr;
+
+ if (mr->global_locking) {
+ qemu_mutex_lock_iothread();
+ locked = true;
+ }
memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
+ if (locked) {
+ qemu_mutex_unlock_iothread();
+ }
}
/* Return true if ADDR is present in the victim tlb, and has been copied
@@ -538,10 +850,13 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */
CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
+
+ copy_tlb_helper(&tmptlb, tlb, false);
+ copy_tlb_helper(tlb, vtlb, true);
+ copy_tlb_helper(vtlb, &tmptlb, true);
+
CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
-
- tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
tmpio = *io; *io = *vio; *vio = tmpio;
return true;
}
diff --git a/docs/multi-thread-tcg.txt b/docs/multi-thread-tcg.txt
new file mode 100644
index 0000000..a99b456
--- /dev/null
+++ b/docs/multi-thread-tcg.txt
@@ -0,0 +1,350 @@
+Copyright (c) 2015-2016 Linaro Ltd.
+
+This work is licensed under the terms of the GNU GPL, version 2 or
+later. See the COPYING file in the top-level directory.
+
+Introduction
+============
+
+This document outlines the design for multi-threaded TCG system-mode
+emulation. The current user-mode emulation mirrors the thread
+structure of the translated executable. Some of the work will be
+applicable to both system and linux-user emulation.
+
+The original system-mode TCG implementation was single threaded and
+dealt with multiple CPUs with simple round-robin scheduling. This
+simplified a lot of things but became increasingly limited as systems
+being emulated gained additional cores and per-core performance gains
+for host systems started to level off.
+
+vCPU Scheduling
+===============
+
+We introduce a new running mode where each vCPU will run on its own
+user-space thread. This will be enabled by default for all FE/BE
+combinations that have had the required work done to support this
+safely.
+
+In the general case of running translated code there should be no
+inter-vCPU dependencies and all vCPUs should be able to run at full
+speed. Synchronisation will only be required while accessing internal
+shared data structures or when the emulated architecture requires a
+coherent representation of the emulated machine state.
+
+Shared Data Structures
+======================
+
+Main Run Loop
+-------------
+
+Even when there is no code being generated there are a number of
+structures associated with the hot-path through the main run-loop.
+These are associated with looking up the next translation block to
+execute. These include:
+
+ tb_jmp_cache (per-vCPU, cache of recent jumps)
+ tb_ctx.htable (global hash table, phys address->tb lookup)
+
+As TB linking only occurs when blocks are in the same page this code
+is critical to performance as looking up the next TB to execute is the
+most common reason to exit the generated code.
+
+DESIGN REQUIREMENT: Make access to lookup structures safe with
+multiple reader/writer threads. Minimise any lock contention to do it.
+
+The hot-path avoids using locks where possible. The tb_jmp_cache is
+updated with atomic accesses to ensure consistent results. The fall
+back QHT based hash table is also designed for lockless lookups. Locks
+are only taken when code generation is required or TranslationBlocks
+have their block-to-block jumps patched.
+
+Global TCG State
+----------------
+
+We need to protect the entire code generation cycle including any post
+generation patching of the translated code. This also implies a shared
+translation buffer which contains code running on all cores. Any
+execution path that comes to the main run loop will need to hold a
+mutex for code generation. This also includes times when we need flush
+code or entries from any shared lookups/caches. Structures held on a
+per-vCPU basis won't need locking unless other vCPUs will need to
+modify them.
+
+DESIGN REQUIREMENT: Add locking around all code generation and TB
+patching.
+
+(Current solution)
+
+Mainly as part of the linux-user work all code generation is
+serialised with a tb_lock(). For the SoftMMU tb_lock() also takes the
+place of mmap_lock() in linux-user.
+
+Translation Blocks
+------------------
+
+Currently the whole system shares a single code generation buffer
+which when full will force a flush of all translations and start from
+scratch again. Some operations also force a full flush of translations
+including:
+
+ - debugging operations (breakpoint insertion/removal)
+ - some CPU helper functions
+
+This is done with the async_safe_run_on_cpu() mechanism to ensure all
+vCPUs are quiescent when changes are being made to shared global
+structures.
+
+More granular translation invalidation events are typically due
+to a change of the state of a physical page:
+
+ - code modification (self modify code, patching code)
+ - page changes (new page mapping in linux-user mode)
+
+While setting the invalid flag in a TranslationBlock will stop it
+being used when looked up in the hot-path there are a number of other
+book-keeping structures that need to be safely cleared.
+
+Any TranslationBlocks which have been patched to jump directly to the
+now invalid blocks need the jump patches reversing so they will return
+to the C code.
+
+There are a number of look-up caches that need to be properly updated
+including the:
+
+ - jump lookup cache
+ - the physical-to-tb lookup hash table
+ - the global page table
+
+The global page table (l1_map) which provides a multi-level look-up
+for PageDesc structures which contain pointers to the start of a
+linked list of all Translation Blocks in that page (see page_next).
+
+Both the jump patching and the page cache involve linked lists that
+the invalidated TranslationBlock needs to be removed from.
+
+DESIGN REQUIREMENT: Safely handle invalidation of TBs
+ - safely patch/revert direct jumps
+ - remove central PageDesc lookup entries
+ - ensure lookup caches/hashes are safely updated
+
+(Current solution)
+
+The direct jump themselves are updated atomically by the TCG
+tb_set_jmp_target() code. Modification to the linked lists that allow
+searching for linked pages are done under the protect of the
+tb_lock().
+
+The global page table is protected by the tb_lock() in system-mode and
+mmap_lock() in linux-user mode.
+
+The lookup caches are updated atomically and the lookup hash uses QHT
+which is designed for concurrent safe lookup.
+
+
+Memory maps and TLBs
+--------------------
+
+The memory handling code is fairly critical to the speed of memory
+access in the emulated system. The SoftMMU code is designed so the
+hot-path can be handled entirely within translated code. This is
+handled with a per-vCPU TLB structure which once populated will allow
+a series of accesses to the page to occur without exiting the
+translated code. It is possible to set flags in the TLB address which
+will ensure the slow-path is taken for each access. This can be done
+to support:
+
+ - Memory regions (dividing up access to PIO, MMIO and RAM)
+ - Dirty page tracking (for code gen, SMC detection, migration and display)
+ - Virtual TLB (for translating guest address->real address)
+
+When the TLB tables are updated by a vCPU thread other than their own
+we need to ensure it is done in a safe way so no inconsistent state is
+seen by the vCPU thread.
+
+Some operations require updating a number of vCPUs TLBs at the same
+time in a synchronised manner.
+
+DESIGN REQUIREMENTS:
+
+ - TLB Flush All/Page
+ - can be across-vCPUs
+ - cross vCPU TLB flush may need other vCPU brought to halt
+ - change may need to be visible to the calling vCPU immediately
+ - TLB Flag Update
+ - usually cross-vCPU
+ - want change to be visible as soon as possible
+ - TLB Update (update a CPUTLBEntry, via tlb_set_page_with_attrs)
+ - This is a per-vCPU table - by definition can't race
+ - updated by its own thread when the slow-path is forced
+
+(Current solution)
+
+We have updated cputlb.c to defer operations when a cross-vCPU
+operation with async_run_on_cpu() which ensures each vCPU sees a
+coherent state when it next runs its work (in a few instructions
+time).
+
+A new set up operations (tlb_flush_*_all_cpus) take an additional flag
+which when set will force synchronisation by setting the source vCPUs
+work as "safe work" and exiting the cpu run loop. This ensure by the
+time execution restarts all flush operations have completed.
+
+TLB flag updates are all done atomically and are also protected by the
+tb_lock() which is used by the functions that update the TLB in bulk.
+
+(Known limitation)
+
+Not really a limitation but the wait mechanism is overly strict for
+some architectures which only need flushes completed by a barrier
+instruction. This could be a future optimisation.
+
+Emulated hardware state
+-----------------------
+
+Currently thanks to KVM work any access to IO memory is automatically
+protected by the global iothread mutex, also known as the BQL (Big
+Qemu Lock). Any IO region that doesn't use global mutex is expected to
+do its own locking.
+
+However IO memory isn't the only way emulated hardware state can be
+modified. Some architectures have model specific registers that
+trigger hardware emulation features. Generally any translation helper
+that needs to update more than a single vCPUs of state should take the
+BQL.
+
+As the BQL, or global iothread mutex is shared across the system we
+push the use of the lock as far down into the TCG code as possible to
+minimise contention.
+
+(Current solution)
+
+MMIO access automatically serialises hardware emulation by way of the
+BQL. Currently ARM targets serialise all ARM_CP_IO register accesses
+and also defer the reset/startup of vCPUs to the vCPU context by way
+of async_run_on_cpu().
+
+Updates to interrupt state are also protected by the BQL as they can
+often be cross vCPU.
+
+Memory Consistency
+==================
+
+Between emulated guests and host systems there are a range of memory
+consistency models. Even emulating weakly ordered systems on strongly
+ordered hosts needs to ensure things like store-after-load re-ordering
+can be prevented when the guest wants to.
+
+Memory Barriers
+---------------
+
+Barriers (sometimes known as fences) provide a mechanism for software
+to enforce a particular ordering of memory operations from the point
+of view of external observers (e.g. another processor core). They can
+apply to any memory operations as well as just loads or stores.
+
+The Linux kernel has an excellent write-up on the various forms of
+memory barrier and the guarantees they can provide [1].
+
+Barriers are often wrapped around synchronisation primitives to
+provide explicit memory ordering semantics. However they can be used
+by themselves to provide safe lockless access by ensuring for example
+a change to a signal flag will only be visible once the changes to
+payload are.
+
+DESIGN REQUIREMENT: Add a new tcg_memory_barrier op
+
+This would enforce a strong load/store ordering so all loads/stores
+complete at the memory barrier. On single-core non-SMP strongly
+ordered backends this could become a NOP.
+
+Aside from explicit standalone memory barrier instructions there are
+also implicit memory ordering semantics which comes with each guest
+memory access instruction. For example all x86 load/stores come with
+fairly strong guarantees of sequential consistency where as ARM has
+special variants of load/store instructions that imply acquire/release
+semantics.
+
+In the case of a strongly ordered guest architecture being emulated on
+a weakly ordered host the scope for a heavy performance impact is
+quite high.
+
+DESIGN REQUIREMENTS: Be efficient with use of memory barriers
+ - host systems with stronger implied guarantees can skip some barriers
+ - merge consecutive barriers to the strongest one
+
+(Current solution)
+
+The system currently has a tcg_gen_mb() which will add memory barrier
+operations if code generation is being done in a parallel context. The
+tcg_optimize() function attempts to merge barriers up to their
+strongest form before any load/store operations. The solution was
+originally developed and tested for linux-user based systems. All
+backends have been converted to emit fences when required. So far the
+following front-ends have been updated to emit fences when required:
+
+ - target-i386
+ - target-arm
+ - target-aarch64
+ - target-alpha
+ - target-mips
+
+Memory Control and Maintenance
+------------------------------
+
+This includes a class of instructions for controlling system cache
+behaviour. While QEMU doesn't model cache behaviour these instructions
+are often seen when code modification has taken place to ensure the
+changes take effect.
+
+Synchronisation Primitives
+--------------------------
+
+There are two broad types of synchronisation primitives found in
+modern ISAs: atomic instructions and exclusive regions.
+
+The first type offer a simple atomic instruction which will guarantee
+some sort of test and conditional store will be truly atomic w.r.t.
+other cores sharing access to the memory. The classic example is the
+x86 cmpxchg instruction.
+
+The second type offer a pair of load/store instructions which offer a
+guarantee that an region of memory has not been touched between the
+load and store instructions. An example of this is ARM's ldrex/strex
+pair where the strex instruction will return a flag indicating a
+successful store only if no other CPU has accessed the memory region
+since the ldrex.
+
+Traditionally TCG has generated a series of operations that work
+because they are within the context of a single translation block so
+will have completed before another CPU is scheduled. However with
+the ability to have multiple threads running to emulate multiple CPUs
+we will need to explicitly expose these semantics.
+
+DESIGN REQUIREMENTS:
+ - Support classic atomic instructions
+ - Support load/store exclusive (or load link/store conditional) pairs
+ - Generic enough infrastructure to support all guest architectures
+CURRENT OPEN QUESTIONS:
+ - How problematic is the ABA problem in general?
+
+(Current solution)
+
+The TCG provides a number of atomic helpers (tcg_gen_atomic_*) which
+can be used directly or combined to emulate other instructions like
+ARM's ldrex/strex instructions. While they are susceptible to the ABA
+problem so far common guests have not implemented patterns where
+this may be a problem - typically presenting a locking ABI which
+assumes cmpxchg like semantics.
+
+The code also includes a fall-back for cases where multi-threaded TCG
+ops can't work (e.g. guest atomic width > host atomic width). In this
+case an EXCP_ATOMIC exit occurs and the instruction is emulated with
+an exclusive lock which ensures all emulation is serialised.
+
+While the atomic helpers look good enough for now there may be a need
+to look at solutions that can more closely model the guest
+architectures semantics.
+
+==========
+
+[1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/Documentation/memory-barriers.txt
diff --git a/exec.c b/exec.c
index 865a1e8..3adf2b1 100644
--- a/exec.c
+++ b/exec.c
@@ -2134,9 +2134,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
}
cpu->watchpoint_hit = wp;
- /* The tb_lock will be reset when cpu_loop_exit or
- * cpu_loop_exit_noexc longjmp back into the cpu_exec
- * main loop.
+ /* Both tb_lock and iothread_mutex will be reset when
+ * cpu_loop_exit or cpu_loop_exit_noexc longjmp
+ * back into the cpu_exec main loop.
*/
tb_lock();
tb_check_watchpoint(cpu);
@@ -2371,8 +2371,14 @@ static void io_mem_init(void)
memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
NULL, UINT64_MAX);
+
+ /* io_mem_notdirty calls tb_invalidate_phys_page_fast,
+ * which can be called without the iothread mutex.
+ */
memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
NULL, UINT64_MAX);
+ memory_region_clear_global_locking(&io_mem_notdirty);
+
memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
NULL, UINT64_MAX);
}
diff --git a/hw/core/irq.c b/hw/core/irq.c
index 49ff2e6..b98d1d6 100644
--- a/hw/core/irq.c
+++ b/hw/core/irq.c
@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
#include "qemu-common.h"
#include "hw/irq.h"
#include "qom/object.h"
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 7135633..82a4955 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -457,8 +457,8 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
resume_all_vcpus();
if (!kvm_enabled()) {
- /* tb_lock will be reset when cpu_loop_exit_noexc longjmps
- * back into the cpu_exec loop. */
+ /* Both tb_lock and iothread_mutex will be reset when
+ * longjmps back into the cpu_exec loop. */
tb_lock();
tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
cpu_loop_exit_noexc(cs);
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index c25ee03..f775aba 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -14,6 +14,7 @@
#include "qemu/osdep.h"
#include "qemu/bitops.h"
+#include "qemu/main-loop.h"
#include "trace.h"
#include "gicv3_internal.h"
#include "cpu.h"
@@ -733,6 +734,8 @@ void gicv3_cpuif_update(GICv3CPUState *cs)
ARMCPU *cpu = ARM_CPU(cs->cpu);
CPUARMState *env = &cpu->env;
+ g_assert(qemu_mutex_iothread_locked());
+
trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq,
cs->hppi.grp, cs->hppi.prio);
diff --git a/hw/misc/imx6_src.c b/hw/misc/imx6_src.c
index 55b817b..edbb756 100644
--- a/hw/misc/imx6_src.c
+++ b/hw/misc/imx6_src.c
@@ -14,6 +14,7 @@
#include "qemu/bitops.h"
#include "qemu/log.h"
#include "arm-powerctl.h"
+#include "qom/cpu.h"
#ifndef DEBUG_IMX6_SRC
#define DEBUG_IMX6_SRC 0
@@ -113,6 +114,45 @@ static uint64_t imx6_src_read(void *opaque, hwaddr offset, unsigned size)
return value;
}
+
+/* The reset is asynchronous so we need to defer clearing the reset
+ * bit until the work is completed.
+ */
+
+struct SRCSCRResetInfo {
+ IMX6SRCState *s;
+ int reset_bit;
+};
+
+static void imx6_clear_reset_bit(CPUState *cpu, run_on_cpu_data data)
+{
+ struct SRCSCRResetInfo *ri = data.host_ptr;
+ IMX6SRCState *s = ri->s;
+
+ assert(qemu_mutex_iothread_locked());
+
+ s->regs[SRC_SCR] = deposit32(s->regs[SRC_SCR], ri->reset_bit, 1, 0);
+ DPRINTF("reg[%s] <= 0x%" PRIx32 "\n",
+ imx6_src_reg_name(SRC_SCR), s->regs[SRC_SCR]);
+
+ g_free(ri);
+}
+
+static void imx6_defer_clear_reset_bit(int cpuid,
+ IMX6SRCState *s,
+ unsigned long reset_shift)
+{
+ struct SRCSCRResetInfo *ri;
+
+ ri = g_malloc(sizeof(struct SRCSCRResetInfo));
+ ri->s = s;
+ ri->reset_bit = reset_shift;
+
+ async_run_on_cpu(arm_get_cpu_by_id(cpuid), imx6_clear_reset_bit,
+ RUN_ON_CPU_HOST_PTR(ri));
+}
+
+
static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
@@ -153,7 +193,7 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
arm_set_cpu_off(3);
}
/* We clear the reset bits as the processor changed state */
- clear_bit(CORE3_RST_SHIFT, &current_value);
+ imx6_defer_clear_reset_bit(3, s, CORE3_RST_SHIFT);
clear_bit(CORE3_RST_SHIFT, &change_mask);
}
if (EXTRACT(change_mask, CORE2_ENABLE)) {
@@ -162,11 +202,11 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
arm_set_cpu_on(2, s->regs[SRC_GPR5], s->regs[SRC_GPR6],
3, false);
} else {
- /* CORE 3 is shut down */
+ /* CORE 2 is shut down */
arm_set_cpu_off(2);
}
/* We clear the reset bits as the processor changed state */
- clear_bit(CORE2_RST_SHIFT, &current_value);
+ imx6_defer_clear_reset_bit(2, s, CORE2_RST_SHIFT);
clear_bit(CORE2_RST_SHIFT, &change_mask);
}
if (EXTRACT(change_mask, CORE1_ENABLE)) {
@@ -175,28 +215,28 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
arm_set_cpu_on(1, s->regs[SRC_GPR3], s->regs[SRC_GPR4],
3, false);
} else {
- /* CORE 3 is shut down */
+ /* CORE 1 is shut down */
arm_set_cpu_off(1);
}
/* We clear the reset bits as the processor changed state */
- clear_bit(CORE1_RST_SHIFT, &current_value);
+ imx6_defer_clear_reset_bit(1, s, CORE1_RST_SHIFT);
clear_bit(CORE1_RST_SHIFT, &change_mask);
}
if (EXTRACT(change_mask, CORE0_RST)) {
arm_reset_cpu(0);
- clear_bit(CORE0_RST_SHIFT, &current_value);
+ imx6_defer_clear_reset_bit(0, s, CORE0_RST_SHIFT);
}
if (EXTRACT(change_mask, CORE1_RST)) {
arm_reset_cpu(1);
- clear_bit(CORE1_RST_SHIFT, &current_value);
+ imx6_defer_clear_reset_bit(1, s, CORE1_RST_SHIFT);
}
if (EXTRACT(change_mask, CORE2_RST)) {
arm_reset_cpu(2);
- clear_bit(CORE2_RST_SHIFT, &current_value);
+ imx6_defer_clear_reset_bit(2, s, CORE2_RST_SHIFT);
}
if (EXTRACT(change_mask, CORE3_RST)) {
arm_reset_cpu(3);
- clear_bit(CORE3_RST_SHIFT, &current_value);
+ imx6_defer_clear_reset_bit(3, s, CORE3_RST_SHIFT);
}
if (EXTRACT(change_mask, SW_IPU2_RST)) {
/* We pretend the IPU2 is reset */
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index d171e60..5f93083 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -62,7 +62,16 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
- unsigned int old_pending = env->pending_interrupts;
+ unsigned int old_pending;
+ bool locked = false;
+
+ /* We may already have the BQL if coming from the reset path */
+ if (!qemu_mutex_iothread_locked()) {
+ locked = true;
+ qemu_mutex_lock_iothread();
+ }
+
+ old_pending = env->pending_interrupts;
if (level) {
env->pending_interrupts |= 1 << n_IRQ;
@@ -80,9 +89,14 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
#endif
}
+
LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32
"req %08x\n", __func__, env, n_IRQ, level,
env->pending_interrupts, CPU(cpu)->interrupt_request);
+
+ if (locked) {
+ qemu_mutex_unlock_iothread();
+ }
}
/* PowerPC 6xx / 7xx internal IRQ controller */
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 5904e64..87d8366 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1010,6 +1010,9 @@ static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
{
CPUPPCState *env = &cpu->env;
+ /* The TCG path should also be holding the BQL at this point */
+ g_assert(qemu_mutex_iothread_locked());
+
if (msr_pr) {
hcall_dprintf("Hypercall made with MSR[PR]=1\n");
env->gpr[3] = H_PRIVILEGE;
diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
index d454c00..3f94178 100644
--- a/include/exec/cputlb.h
+++ b/include/exec/cputlb.h
@@ -23,8 +23,6 @@
/* cputlb.c */
void tlb_protect_code(ram_addr_t ram_addr);
void tlb_unprotect_code(ram_addr_t ram_addr);
-void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
- uintptr_t length);
extern int tlb_flush_count;
#endif
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 21ab7bf..bcde1e6 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -93,6 +93,27 @@ void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx);
*/
void tlb_flush_page(CPUState *cpu, target_ulong addr);
/**
+ * tlb_flush_page_all_cpus:
+ * @cpu: src CPU of the flush
+ * @addr: virtual address of page to be flushed
+ *
+ * Flush one page from the TLB of the specified CPU, for all
+ * MMU indexes.
+ */
+void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr);
+/**
+ * tlb_flush_page_all_cpus_synced:
+ * @cpu: src CPU of the flush
+ * @addr: virtual address of page to be flushed
+ *
+ * Flush one page from the TLB of the specified CPU, for all MMU
+ * indexes like tlb_flush_page_all_cpus except the source vCPUs work
+ * is scheduled as safe work meaning all flushes will be complete once
+ * the source vCPUs safe work is complete. This will depend on when
+ * the guests translation ends the TB.
+ */
+void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr);
+/**
* tlb_flush:
* @cpu: CPU whose TLB should be flushed
*
@@ -103,24 +124,87 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr);
*/
void tlb_flush(CPUState *cpu);
/**
+ * tlb_flush_all_cpus:
+ * @cpu: src CPU of the flush
+ */
+void tlb_flush_all_cpus(CPUState *src_cpu);
+/**
+ * tlb_flush_all_cpus_synced:
+ * @cpu: src CPU of the flush
+ *
+ * Like tlb_flush_all_cpus except this except the source vCPUs work is
+ * scheduled as safe work meaning all flushes will be complete once
+ * the source vCPUs safe work is complete. This will depend on when
+ * the guests translation ends the TB.
+ */
+void tlb_flush_all_cpus_synced(CPUState *src_cpu);
+/**
* tlb_flush_page_by_mmuidx:
* @cpu: CPU whose TLB should be flushed
* @addr: virtual address of page to be flushed
- * @...: list of MMU indexes to flush, terminated by a negative value
+ * @idxmap: bitmap of MMU indexes to flush
*
* Flush one page from the TLB of the specified CPU, for the specified
* MMU indexes.
*/
-void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...);
+void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr,
+ uint16_t idxmap);
+/**
+ * tlb_flush_page_by_mmuidx_all_cpus:
+ * @cpu: Originating CPU of the flush
+ * @addr: virtual address of page to be flushed
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush one page from the TLB of all CPUs, for the specified
+ * MMU indexes.
+ */
+void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
+ uint16_t idxmap);
+/**
+ * tlb_flush_page_by_mmuidx_all_cpus_synced:
+ * @cpu: Originating CPU of the flush
+ * @addr: virtual address of page to be flushed
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush one page from the TLB of all CPUs, for the specified MMU
+ * indexes like tlb_flush_page_by_mmuidx_all_cpus except the source
+ * vCPUs work is scheduled as safe work meaning all flushes will be
+ * complete once the source vCPUs safe work is complete. This will
+ * depend on when the guests translation ends the TB.
+ */
+void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
+ uint16_t idxmap);
/**
* tlb_flush_by_mmuidx:
* @cpu: CPU whose TLB should be flushed
- * @...: list of MMU indexes to flush, terminated by a negative value
+ * @wait: If true ensure synchronisation by exiting the cpu_loop
+ * @idxmap: bitmap of MMU indexes to flush
*
* Flush all entries from the TLB of the specified CPU, for the specified
* MMU indexes.
*/
-void tlb_flush_by_mmuidx(CPUState *cpu, ...);
+void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap);
+/**
+ * tlb_flush_by_mmuidx_all_cpus:
+ * @cpu: Originating CPU of the flush
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush all entries from all TLBs of all CPUs, for the specified
+ * MMU indexes.
+ */
+void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap);
+/**
+ * tlb_flush_by_mmuidx_all_cpus_synced:
+ * @cpu: Originating CPU of the flush
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush all entries from all TLBs of all CPUs, for the specified
+ * MMU indexes like tlb_flush_by_mmuidx_all_cpus except except the source
+ * vCPUs work is scheduled as safe work meaning all flushes will be
+ * complete once the source vCPUs safe work is complete. This will
+ * depend on when the guests translation ends the TB.
+ */
+void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
/**
* tlb_set_page_with_attrs:
* @cpu: CPU to add this TLB entry for
@@ -162,17 +246,45 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
}
-
+static inline void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
+{
+}
+static inline void tlb_flush_page_all_cpus_synced(CPUState *src,
+ target_ulong addr)
+{
+}
static inline void tlb_flush(CPUState *cpu)
{
}
-
+static inline void tlb_flush_all_cpus(CPUState *src_cpu)
+{
+}
+static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu)
+{
+}
static inline void tlb_flush_page_by_mmuidx(CPUState *cpu,
- target_ulong addr, ...)
+ target_ulong addr, uint16_t idxmap)
{
}
-static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...)
+static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
+{
+}
+static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu,
+ target_ulong addr,
+ uint16_t idxmap)
+{
+}
+static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu,
+ target_ulong addr,
+ uint16_t idxmap)
+{
+}
+static inline void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap)
+{
+}
+static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
+ uint16_t idxmap)
{
}
#endif
@@ -404,8 +516,4 @@ bool memory_region_is_unassigned(MemoryRegion *mr);
/* vl.c */
extern int singlestep;
-/* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */
-extern CPUState *tcg_current_cpu;
-extern bool exit_request;
-
#endif
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index f69b240..3e61c88 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -329,6 +329,7 @@ struct CPUState {
bool unplug;
bool crash_occurred;
bool exit_request;
+ /* updates protected by BQL */
uint32_t interrupt_request;
int singlestep_enabled;
int64_t icount_extra;
@@ -401,6 +402,12 @@ struct CPUState {
bool hax_vcpu_dirty;
struct hax_vcpu_state *hax_vcpu;
+
+ /* The pending_tlb_flush flag is set and cleared atomically to
+ * avoid potential races. The aim of the flag is to avoid
+ * unnecessary flushes.
+ */
+ uint16_t pending_tlb_flush;
};
QTAILQ_HEAD(CPUTailQ, CPUState);
@@ -416,6 +423,15 @@ extern struct CPUTailQ cpus;
extern __thread CPUState *current_cpu;
/**
+ * qemu_tcg_mttcg_enabled:
+ * Check whether we are running MultiThread TCG or not.
+ *
+ * Returns: %true if we are in MTTCG mode %false otherwise.
+ */
+extern bool mttcg_enabled;
+#define qemu_tcg_mttcg_enabled() (mttcg_enabled)
+
+/**
* cpu_paging_enabled:
* @cpu: The CPU whose state is to be inspected.
*
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 3728a1e..a73b5d4 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -36,4 +36,6 @@ extern int smp_threads;
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg);
+void qemu_tcg_configure(QemuOpts *opts, Error **errp);
+
#endif
diff --git a/memory.c b/memory.c
index ed8b5aa..d61caee 100644
--- a/memory.c
+++ b/memory.c
@@ -917,6 +917,8 @@ void memory_region_transaction_commit(void)
AddressSpace *as;
assert(memory_region_transaction_depth);
+ assert(qemu_mutex_iothread_locked());
+
--memory_region_transaction_depth;
if (!memory_region_transaction_depth) {
if (memory_region_update_pending) {
diff --git a/qemu-options.hx b/qemu-options.hx
index 9936cf3..bf458f8 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -95,6 +95,26 @@ STEXI
Select CPU model (@code{-cpu help} for list and additional feature selection)
ETEXI
+DEF("accel", HAS_ARG, QEMU_OPTION_accel,
+ "-accel [accel=]accelerator[,thread=single|multi]\n"
+ " select accelerator ('-accel help for list')\n"
+ " thread=single|multi (enable multi-threaded TCG)", QEMU_ARCH_ALL)
+STEXI
+@item -accel @var{name}[,prop=@var{value}[,...]]
+@findex -accel
+This is used to enable an accelerator. Depending on the target architecture,
+kvm, xen, or tcg can be available. By default, tcg is used. If there is more
+than one accelerator specified, the next one is used if the previous one fails
+to initialize.
+@table @option
+@item thread=single|multi
+Controls number of TCG threads. When the TCG is multi-threaded there will be one
+thread per vCPU therefor taking advantage of additional host cores. The default
+is to enable multi-threading where both the back-end and front-ends support it and
+no incompatible TCG features have been enabled (e.g. icount/replay).
+@end table
+ETEXI
+
DEF("smp", HAS_ARG, QEMU_OPTION_smp,
"-smp [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets]\n"
" set the number of CPUs to 'n' [default=1]\n"
diff --git a/qom/cpu.c b/qom/cpu.c
index ed87c50..58784bc 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -113,9 +113,19 @@ static void cpu_common_get_memory_mapping(CPUState *cpu,
error_setg(errp, "Obtaining memory mappings is unsupported on this CPU.");
}
+/* Resetting the IRQ comes from across the code base so we take the
+ * BQL here if we need to. cpu_interrupt assumes it is held.*/
void cpu_reset_interrupt(CPUState *cpu, int mask)
{
+ bool need_lock = !qemu_mutex_iothread_locked();
+
+ if (need_lock) {
+ qemu_mutex_lock_iothread();
+ }
cpu->interrupt_request &= ~mask;
+ if (need_lock) {
+ qemu_mutex_unlock_iothread();
+ }
}
void cpu_exit(CPUState *cpu)
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index fbb7a15..25207cb 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -14,6 +14,7 @@
#include "internals.h"
#include "arm-powerctl.h"
#include "qemu/log.h"
+#include "qemu/main-loop.h"
#include "exec/exec-all.h"
#ifndef DEBUG_ARM_POWERCTL
@@ -48,11 +49,93 @@ CPUState *arm_get_cpu_by_id(uint64_t id)
return NULL;
}
+struct CpuOnInfo {
+ uint64_t entry;
+ uint64_t context_id;
+ uint32_t target_el;
+ bool target_aa64;
+};
+
+
+static void arm_set_cpu_on_async_work(CPUState *target_cpu_state,
+ run_on_cpu_data data)
+{
+ ARMCPU *target_cpu = ARM_CPU(target_cpu_state);
+ struct CpuOnInfo *info = (struct CpuOnInfo *) data.host_ptr;
+
+ /* Initialize the cpu we are turning on */
+ cpu_reset(target_cpu_state);
+ target_cpu_state->halted = 0;
+
+ if (info->target_aa64) {
+ if ((info->target_el < 3) && arm_feature(&target_cpu->env,
+ ARM_FEATURE_EL3)) {
+ /*
+ * As target mode is AArch64, we need to set lower
+ * exception level (the requested level 2) to AArch64
+ */
+ target_cpu->env.cp15.scr_el3 |= SCR_RW;
+ }
+
+ if ((info->target_el < 2) && arm_feature(&target_cpu->env,
+ ARM_FEATURE_EL2)) {
+ /*
+ * As target mode is AArch64, we need to set lower
+ * exception level (the requested level 1) to AArch64
+ */
+ target_cpu->env.cp15.hcr_el2 |= HCR_RW;
+ }
+
+ target_cpu->env.pstate = aarch64_pstate_mode(info->target_el, true);
+ } else {
+ /* We are requested to boot in AArch32 mode */
+ static const uint32_t mode_for_el[] = { 0,
+ ARM_CPU_MODE_SVC,
+ ARM_CPU_MODE_HYP,
+ ARM_CPU_MODE_SVC };
+
+ cpsr_write(&target_cpu->env, mode_for_el[info->target_el], CPSR_M,
+ CPSRWriteRaw);
+ }
+
+ if (info->target_el == 3) {
+ /* Processor is in secure mode */
+ target_cpu->env.cp15.scr_el3 &= ~SCR_NS;
+ } else {
+ /* Processor is not in secure mode */
+ target_cpu->env.cp15.scr_el3 |= SCR_NS;
+ }
+
+ /* We check if the started CPU is now at the correct level */
+ assert(info->target_el == arm_current_el(&target_cpu->env));
+
+ if (info->target_aa64) {
+ target_cpu->env.xregs[0] = info->context_id;
+ target_cpu->env.thumb = false;
+ } else {
+ target_cpu->env.regs[0] = info->context_id;
+ target_cpu->env.thumb = info->entry & 1;
+ info->entry &= 0xfffffffe;
+ }
+
+ /* Start the new CPU at the requested address */
+ cpu_set_pc(target_cpu_state, info->entry);
+
+ g_free(info);
+
+ /* Finally set the power status */
+ assert(qemu_mutex_iothread_locked());
+ target_cpu->power_state = PSCI_ON;
+}
+
int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
uint32_t target_el, bool target_aa64)
{
CPUState *target_cpu_state;
ARMCPU *target_cpu;
+ struct CpuOnInfo *info;
+
+ assert(qemu_mutex_iothread_locked());
DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64
"\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry,
@@ -77,7 +160,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
}
target_cpu = ARM_CPU(target_cpu_state);
- if (!target_cpu->powered_off) {
+ if (target_cpu->power_state == PSCI_ON) {
qemu_log_mask(LOG_GUEST_ERROR,
"[ARM]%s: CPU %" PRId64 " is already on\n",
__func__, cpuid);
@@ -109,74 +192,54 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
return QEMU_ARM_POWERCTL_INVALID_PARAM;
}
- /* Initialize the cpu we are turning on */
- cpu_reset(target_cpu_state);
- target_cpu->powered_off = false;
- target_cpu_state->halted = 0;
-
- if (target_aa64) {
- if ((target_el < 3) && arm_feature(&target_cpu->env, ARM_FEATURE_EL3)) {
- /*
- * As target mode is AArch64, we need to set lower
- * exception level (the requested level 2) to AArch64
- */
- target_cpu->env.cp15.scr_el3 |= SCR_RW;
- }
-
- if ((target_el < 2) && arm_feature(&target_cpu->env, ARM_FEATURE_EL2)) {
- /*
- * As target mode is AArch64, we need to set lower
- * exception level (the requested level 1) to AArch64
- */
- target_cpu->env.cp15.hcr_el2 |= HCR_RW;
- }
-
- target_cpu->env.pstate = aarch64_pstate_mode(target_el, true);
- } else {
- /* We are requested to boot in AArch32 mode */
- static uint32_t mode_for_el[] = { 0,
- ARM_CPU_MODE_SVC,
- ARM_CPU_MODE_HYP,
- ARM_CPU_MODE_SVC };
-
- cpsr_write(&target_cpu->env, mode_for_el[target_el], CPSR_M,
- CPSRWriteRaw);
- }
-
- if (target_el == 3) {
- /* Processor is in secure mode */
- target_cpu->env.cp15.scr_el3 &= ~SCR_NS;
- } else {
- /* Processor is not in secure mode */
- target_cpu->env.cp15.scr_el3 |= SCR_NS;
- }
-
- /* We check if the started CPU is now at the correct level */
- assert(target_el == arm_current_el(&target_cpu->env));
-
- if (target_aa64) {
- target_cpu->env.xregs[0] = context_id;
- target_cpu->env.thumb = false;
- } else {
- target_cpu->env.regs[0] = context_id;
- target_cpu->env.thumb = entry & 1;
- entry &= 0xfffffffe;
+ /*
+ * If another CPU has powered the target on we are in the state
+ * ON_PENDING and additional attempts to power on the CPU should
+ * fail (see 6.6 Implementation CPU_ON/CPU_OFF races in the PSCI
+ * spec)
+ */
+ if (target_cpu->power_state == PSCI_ON_PENDING) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "[ARM]%s: CPU %" PRId64 " is already powering on\n",
+ __func__, cpuid);
+ return QEMU_ARM_POWERCTL_ON_PENDING;
}
- /* Start the new CPU at the requested address */
- cpu_set_pc(target_cpu_state, entry);
+ /* To avoid racing with a CPU we are just kicking off we do the
+ * final bit of preparation for the work in the target CPUs
+ * context.
+ */
+ info = g_new(struct CpuOnInfo, 1);
+ info->entry = entry;
+ info->context_id = context_id;
+ info->target_el = target_el;
+ info->target_aa64 = target_aa64;
- qemu_cpu_kick(target_cpu_state);
+ async_run_on_cpu(target_cpu_state, arm_set_cpu_on_async_work,
+ RUN_ON_CPU_HOST_PTR(info));
/* We are good to go */
return QEMU_ARM_POWERCTL_RET_SUCCESS;
}
+static void arm_set_cpu_off_async_work(CPUState *target_cpu_state,
+ run_on_cpu_data data)
+{
+ ARMCPU *target_cpu = ARM_CPU(target_cpu_state);
+
+ assert(qemu_mutex_iothread_locked());
+ target_cpu->power_state = PSCI_OFF;
+ target_cpu_state->halted = 1;
+ target_cpu_state->exception_index = EXCP_HLT;
+}
+
int arm_set_cpu_off(uint64_t cpuid)
{
CPUState *target_cpu_state;
ARMCPU *target_cpu;
+ assert(qemu_mutex_iothread_locked());
+
DPRINTF("cpu %" PRId64 "\n", cpuid);
/* change to the cpu we are powering up */
@@ -185,27 +248,34 @@ int arm_set_cpu_off(uint64_t cpuid)
return QEMU_ARM_POWERCTL_INVALID_PARAM;
}
target_cpu = ARM_CPU(target_cpu_state);
- if (target_cpu->powered_off) {
+ if (target_cpu->power_state == PSCI_OFF) {
qemu_log_mask(LOG_GUEST_ERROR,
"[ARM]%s: CPU %" PRId64 " is already off\n",
__func__, cpuid);
return QEMU_ARM_POWERCTL_IS_OFF;
}
- target_cpu->powered_off = true;
- target_cpu_state->halted = 1;
- target_cpu_state->exception_index = EXCP_HLT;
- cpu_loop_exit(target_cpu_state);
- /* notreached */
+ /* Queue work to run under the target vCPUs context */
+ async_run_on_cpu(target_cpu_state, arm_set_cpu_off_async_work,
+ RUN_ON_CPU_NULL);
return QEMU_ARM_POWERCTL_RET_SUCCESS;
}
+static void arm_reset_cpu_async_work(CPUState *target_cpu_state,
+ run_on_cpu_data data)
+{
+ /* Reset the cpu */
+ cpu_reset(target_cpu_state);
+}
+
int arm_reset_cpu(uint64_t cpuid)
{
CPUState *target_cpu_state;
ARMCPU *target_cpu;
+ assert(qemu_mutex_iothread_locked());
+
DPRINTF("cpu %" PRId64 "\n", cpuid);
/* change to the cpu we are resetting */
@@ -214,15 +284,17 @@ int arm_reset_cpu(uint64_t cpuid)
return QEMU_ARM_POWERCTL_INVALID_PARAM;
}
target_cpu = ARM_CPU(target_cpu_state);
- if (target_cpu->powered_off) {
+
+ if (target_cpu->power_state == PSCI_OFF) {
qemu_log_mask(LOG_GUEST_ERROR,
"[ARM]%s: CPU %" PRId64 " is off\n",
__func__, cpuid);
return QEMU_ARM_POWERCTL_IS_OFF;
}
- /* Reset the cpu */
- cpu_reset(target_cpu_state);
+ /* Queue work to run under the target vCPUs context */
+ async_run_on_cpu(target_cpu_state, arm_reset_cpu_async_work,
+ RUN_ON_CPU_NULL);
return QEMU_ARM_POWERCTL_RET_SUCCESS;
}
diff --git a/target/arm/arm-powerctl.h b/target/arm/arm-powerctl.h
index 98ee049..0435392 100644
--- a/target/arm/arm-powerctl.h
+++ b/target/arm/arm-powerctl.h
@@ -17,6 +17,7 @@
#define QEMU_ARM_POWERCTL_INVALID_PARAM QEMU_PSCI_RET_INVALID_PARAMS
#define QEMU_ARM_POWERCTL_ALREADY_ON QEMU_PSCI_RET_ALREADY_ON
#define QEMU_ARM_POWERCTL_IS_OFF QEMU_PSCI_RET_DENIED
+#define QEMU_ARM_POWERCTL_ON_PENDING QEMU_PSCI_RET_ON_PENDING
/*
* arm_get_cpu_by_id:
@@ -43,6 +44,7 @@ CPUState *arm_get_cpu_by_id(uint64_t cpuid);
* Returns: QEMU_ARM_POWERCTL_RET_SUCCESS on success.
* QEMU_ARM_POWERCTL_INVALID_PARAM if bad parameters are provided.
* QEMU_ARM_POWERCTL_ALREADY_ON if the CPU was already started.
+ * QEMU_ARM_POWERCTL_ON_PENDING if the CPU is still powering up
*/
int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
uint32_t target_el, bool target_aa64);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 4a069f6..f7157dc 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -45,7 +45,7 @@ static bool arm_cpu_has_work(CPUState *cs)
{
ARMCPU *cpu = ARM_CPU(cs);
- return !cpu->powered_off
+ return (cpu->power_state != PSCI_OFF)
&& cs->interrupt_request &
(CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD
| CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ
@@ -132,7 +132,7 @@ static void arm_cpu_reset(CPUState *s)
env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1;
env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2;
- cpu->powered_off = cpu->start_powered_off;
+ cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON;
s->halted = cpu->start_powered_off;
if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 0956a54..38a8e00 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -30,6 +30,9 @@
# define TARGET_LONG_BITS 32
#endif
+/* ARM processors have a weak memory model */
+#define TCG_GUEST_DEFAULT_MO (0)
+
#define CPUArchState struct CPUARMState
#include "qemu-common.h"
@@ -526,6 +529,15 @@ typedef struct CPUARMState {
*/
typedef void ARMELChangeHook(ARMCPU *cpu, void *opaque);
+
+/* These values map onto the return values for
+ * QEMU_PSCI_0_2_FN_AFFINITY_INFO */
+typedef enum ARMPSCIState {
+ PSCI_OFF = 0,
+ PSCI_ON = 1,
+ PSCI_ON_PENDING = 2
+} ARMPSCIState;
+
/**
* ARMCPU:
* @env: #CPUARMState
@@ -582,8 +594,10 @@ struct ARMCPU {
/* Should CPU start in PSCI powered-off state? */
bool start_powered_off;
- /* CPU currently in PSCI powered-off state */
- bool powered_off;
+
+ /* Current power state, access guarded by BQL */
+ ARMPSCIState power_state;
+
/* CPU has virtualization extension */
bool has_el2;
/* CPU has security extension */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 47250bc..bcedb4a 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -536,41 +536,33 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
- CPU_FOREACH(other_cs) {
- tlb_flush(other_cs);
- }
+ tlb_flush_all_cpus_synced(cs);
}
static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
- CPU_FOREACH(other_cs) {
- tlb_flush(other_cs);
- }
+ tlb_flush_all_cpus_synced(cs);
}
static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
- CPU_FOREACH(other_cs) {
- tlb_flush_page(other_cs, value & TARGET_PAGE_MASK);
- }
+ tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
}
static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
- CPU_FOREACH(other_cs) {
- tlb_flush_page(other_cs, value & TARGET_PAGE_MASK);
- }
+ tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
}
static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -578,19 +570,21 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
CPUState *cs = ENV_GET_CPU(env);
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
- ARMMMUIdx_S2NS, -1);
+ tlb_flush_by_mmuidx(cs,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0) |
+ (1 << ARMMMUIdx_S2NS));
}
static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
- CPU_FOREACH(other_cs) {
- tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
- ARMMMUIdx_S12NSE0, ARMMMUIdx_S2NS, -1);
- }
+ tlb_flush_by_mmuidx_all_cpus_synced(cs,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0) |
+ (1 << ARMMMUIdx_S2NS));
}
static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -611,13 +605,13 @@ static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
pageaddr = sextract64(value << 12, 0, 40);
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S2NS, -1);
+ tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S2NS));
}
static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr;
if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
@@ -626,9 +620,8 @@ static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
pageaddr = sextract64(value << 12, 0, 40);
- CPU_FOREACH(other_cs) {
- tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S2NS, -1);
- }
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ (1 << ARMMMUIdx_S2NS));
}
static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -636,17 +629,15 @@ static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
CPUState *cs = ENV_GET_CPU(env);
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1E2, -1);
+ tlb_flush_by_mmuidx(cs, (1 << ARMMMUIdx_S1E2));
}
static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
- CPU_FOREACH(other_cs) {
- tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1E2, -1);
- }
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, (1 << ARMMMUIdx_S1E2));
}
static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -655,18 +646,17 @@ static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1E2, -1);
+ tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S1E2));
}
static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
- CPU_FOREACH(other_cs) {
- tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1E2, -1);
- }
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ (1 << ARMMMUIdx_S1E2));
}
static const ARMCPRegInfo cp_reginfo[] = {
@@ -2542,8 +2532,10 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
/* Accesses to VTTBR may change the VMID so we must flush the TLB. */
if (raw_read(env, ri) != value) {
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
- ARMMMUIdx_S2NS, -1);
+ tlb_flush_by_mmuidx(cs,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0) |
+ (1 << ARMMMUIdx_S2NS));
raw_write(env, ri, value);
}
}
@@ -2898,29 +2890,33 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env,
static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
+ CPUState *cs = ENV_GET_CPU(env);
if (arm_is_secure_below_el3(env)) {
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
+ tlb_flush_by_mmuidx(cs,
+ (1 << ARMMMUIdx_S1SE1) |
+ (1 << ARMMMUIdx_S1SE0));
} else {
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0, -1);
+ tlb_flush_by_mmuidx(cs,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0));
}
}
static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
+ CPUState *cs = ENV_GET_CPU(env);
bool sec = arm_is_secure_below_el3(env);
- CPUState *other_cs;
- CPU_FOREACH(other_cs) {
- if (sec) {
- tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
- } else {
- tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
- ARMMMUIdx_S12NSE0, -1);
- }
+ if (sec) {
+ tlb_flush_by_mmuidx_all_cpus_synced(cs,
+ (1 << ARMMMUIdx_S1SE1) |
+ (1 << ARMMMUIdx_S1SE0));
+ } else {
+ tlb_flush_by_mmuidx_all_cpus_synced(cs,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0));
}
}
@@ -2935,13 +2931,19 @@ static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
CPUState *cs = CPU(cpu);
if (arm_is_secure_below_el3(env)) {
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
+ tlb_flush_by_mmuidx(cs,
+ (1 << ARMMMUIdx_S1SE1) |
+ (1 << ARMMMUIdx_S1SE0));
} else {
if (arm_feature(env, ARM_FEATURE_EL2)) {
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
- ARMMMUIdx_S2NS, -1);
+ tlb_flush_by_mmuidx(cs,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0) |
+ (1 << ARMMMUIdx_S2NS));
} else {
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0, -1);
+ tlb_flush_by_mmuidx(cs,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0));
}
}
}
@@ -2952,7 +2954,7 @@ static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri,
ARMCPU *cpu = arm_env_get_cpu(env);
CPUState *cs = CPU(cpu);
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1E2, -1);
+ tlb_flush_by_mmuidx(cs, (1 << ARMMMUIdx_S1E2));
}
static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2961,7 +2963,7 @@ static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
ARMCPU *cpu = arm_env_get_cpu(env);
CPUState *cs = CPU(cpu);
- tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1E3, -1);
+ tlb_flush_by_mmuidx(cs, (1 << ARMMMUIdx_S1E3));
}
static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2971,41 +2973,40 @@ static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
* stage 2 translations, whereas most other scopes only invalidate
* stage 1 translations.
*/
+ CPUState *cs = ENV_GET_CPU(env);
bool sec = arm_is_secure_below_el3(env);
bool has_el2 = arm_feature(env, ARM_FEATURE_EL2);
- CPUState *other_cs;
-
- CPU_FOREACH(other_cs) {
- if (sec) {
- tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
- } else if (has_el2) {
- tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
- ARMMMUIdx_S12NSE0, ARMMMUIdx_S2NS, -1);
- } else {
- tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
- ARMMMUIdx_S12NSE0, -1);
- }
+
+ if (sec) {
+ tlb_flush_by_mmuidx_all_cpus_synced(cs,
+ (1 << ARMMMUIdx_S1SE1) |
+ (1 << ARMMMUIdx_S1SE0));
+ } else if (has_el2) {
+ tlb_flush_by_mmuidx_all_cpus_synced(cs,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0) |
+ (1 << ARMMMUIdx_S2NS));
+ } else {
+ tlb_flush_by_mmuidx_all_cpus_synced(cs,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0));
}
}
static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
- CPU_FOREACH(other_cs) {
- tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1E2, -1);
- }
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, (1 << ARMMMUIdx_S1E2));
}
static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
- CPU_FOREACH(other_cs) {
- tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1E3, -1);
- }
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, (1 << ARMMMUIdx_S1E3));
}
static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3021,11 +3022,13 @@ static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t pageaddr = sextract64(value << 12, 0, 56);
if (arm_is_secure_below_el3(env)) {
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1SE1,
- ARMMMUIdx_S1SE0, -1);
+ tlb_flush_page_by_mmuidx(cs, pageaddr,
+ (1 << ARMMMUIdx_S1SE1) |
+ (1 << ARMMMUIdx_S1SE0));
} else {
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S12NSE1,
- ARMMMUIdx_S12NSE0, -1);
+ tlb_flush_page_by_mmuidx(cs, pageaddr,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0));
}
}
@@ -3040,7 +3043,7 @@ static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri,
CPUState *cs = CPU(cpu);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1E2, -1);
+ tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S1E2));
}
static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3054,47 +3057,46 @@ static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
CPUState *cs = CPU(cpu);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1E3, -1);
+ tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S1E3));
}
static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
+ ARMCPU *cpu = arm_env_get_cpu(env);
+ CPUState *cs = CPU(cpu);
bool sec = arm_is_secure_below_el3(env);
- CPUState *other_cs;
uint64_t pageaddr = sextract64(value << 12, 0, 56);
- CPU_FOREACH(other_cs) {
- if (sec) {
- tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1SE1,
- ARMMMUIdx_S1SE0, -1);
- } else {
- tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S12NSE1,
- ARMMMUIdx_S12NSE0, -1);
- }
+ if (sec) {
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ (1 << ARMMMUIdx_S1SE1) |
+ (1 << ARMMMUIdx_S1SE0));
+ } else {
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ (1 << ARMMMUIdx_S12NSE1) |
+ (1 << ARMMMUIdx_S12NSE0));
}
}
static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
- CPU_FOREACH(other_cs) {
- tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1E2, -1);
- }
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ (1 << ARMMMUIdx_S1E2));
}
static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
- CPU_FOREACH(other_cs) {
- tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1E3, -1);
- }
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ (1 << ARMMMUIdx_S1E3));
}
static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3116,13 +3118,13 @@ static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
pageaddr = sextract64(value << 12, 0, 48);
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S2NS, -1);
+ tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S2NS));
}
static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *other_cs;
+ CPUState *cs = ENV_GET_CPU(env);
uint64_t pageaddr;
if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
@@ -3131,9 +3133,8 @@ static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
pageaddr = sextract64(value << 12, 0, 48);
- CPU_FOREACH(other_cs) {
- tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S2NS, -1);
- }
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ (1 << ARMMMUIdx_S2NS));
}
static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -6769,6 +6770,12 @@ void arm_cpu_do_interrupt(CPUState *cs)
arm_cpu_do_interrupt_aarch32(cs);
}
+ /* Hooks may change global state so BQL should be held, also the
+ * BQL needs to be held for any modification of
+ * cs->interrupt_request.
+ */
+ g_assert(qemu_mutex_iothread_locked());
+
arm_call_el_change_hook(cpu);
if (!kvm_enabled()) {
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index c00b94e..395e986 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -488,8 +488,8 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
{
if (cap_has_mp_state) {
struct kvm_mp_state mp_state = {
- .mp_state =
- cpu->powered_off ? KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
+ .mp_state = (cpu->power_state == PSCI_OFF) ?
+ KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
};
int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
if (ret) {
@@ -515,7 +515,8 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
__func__, ret, strerror(-ret));
abort();
}
- cpu->powered_off = (mp_state.mp_state == KVM_MP_STATE_STOPPED);
+ cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ?
+ PSCI_OFF : PSCI_ON;
}
return 0;
diff --git a/target/arm/machine.c b/target/arm/machine.c
index fa5ec76..d8094a8 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -211,6 +211,38 @@ static const VMStateInfo vmstate_cpsr = {
.put = put_cpsr,
};
+static int get_power(QEMUFile *f, void *opaque, size_t size,
+ VMStateField *field)
+{
+ ARMCPU *cpu = opaque;
+ bool powered_off = qemu_get_byte(f);
+ cpu->power_state = powered_off ? PSCI_OFF : PSCI_ON;
+ return 0;
+}
+
+static int put_power(QEMUFile *f, void *opaque, size_t size,
+ VMStateField *field, QJSON *vmdesc)
+{
+ ARMCPU *cpu = opaque;
+
+ /* Migration should never happen while we transition power states */
+
+ if (cpu->power_state == PSCI_ON ||
+ cpu->power_state == PSCI_OFF) {
+ bool powered_off = (cpu->power_state == PSCI_OFF) ? true : false;
+ qemu_put_byte(f, powered_off);
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+static const VMStateInfo vmstate_powered_off = {
+ .name = "powered_off",
+ .get = get_power,
+ .put = put_power,
+};
+
static void cpu_pre_save(void *opaque)
{
ARMCPU *cpu = opaque;
@@ -329,7 +361,14 @@ const VMStateDescription vmstate_arm_cpu = {
VMSTATE_UINT64(env.exception.vaddress, ARMCPU),
VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU),
VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU),
- VMSTATE_BOOL(powered_off, ARMCPU),
+ {
+ .name = "power_state",
+ .version_id = 0,
+ .size = sizeof(bool),
+ .info = &vmstate_powered_off,
+ .flags = VMS_SINGLE,
+ .offset = 0,
+ },
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index fb366fd..d64c867 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -18,6 +18,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
+#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
#include "internals.h"
@@ -435,6 +436,13 @@ void HELPER(yield)(CPUARMState *env)
ARMCPU *cpu = arm_env_get_cpu(env);
CPUState *cs = CPU(cpu);
+ /* When running in MTTCG we don't generate jumps to the yield and
+ * WFE helpers as it won't affect the scheduling of other vCPUs.
+ * If we wanted to more completely model WFE/SEV so we don't busy
+ * spin unnecessarily we would need to do something more involved.
+ */
+ g_assert(!parallel_cpus);
+
/* This is a non-trappable hint instruction that generally indicates
* that the guest is currently busy-looping. Yield control back to the
* top level loop so that a more deserving VCPU has a chance to run.
@@ -487,7 +495,9 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
*/
env->regs[15] &= (env->thumb ? ~1 : ~3);
+ qemu_mutex_lock_iothread();
arm_call_el_change_hook(arm_env_get_cpu(env));
+ qemu_mutex_unlock_iothread();
}
/* Access to user mode registers from privileged modes. */
@@ -735,28 +745,58 @@ void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value)
{
const ARMCPRegInfo *ri = rip;
- ri->writefn(env, ri, value);
+ if (ri->type & ARM_CP_IO) {
+ qemu_mutex_lock_iothread();
+ ri->writefn(env, ri, value);
+ qemu_mutex_unlock_iothread();
+ } else {
+ ri->writefn(env, ri, value);
+ }
}
uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip)
{
const ARMCPRegInfo *ri = rip;
+ uint32_t res;
+
+ if (ri->type & ARM_CP_IO) {
+ qemu_mutex_lock_iothread();
+ res = ri->readfn(env, ri);
+ qemu_mutex_unlock_iothread();
+ } else {
+ res = ri->readfn(env, ri);
+ }
- return ri->readfn(env, ri);
+ return res;
}
void HELPER(set_cp_reg64)(CPUARMState *env, void *rip, uint64_t value)
{
const ARMCPRegInfo *ri = rip;
- ri->writefn(env, ri, value);
+ if (ri->type & ARM_CP_IO) {
+ qemu_mutex_lock_iothread();
+ ri->writefn(env, ri, value);
+ qemu_mutex_unlock_iothread();
+ } else {
+ ri->writefn(env, ri, value);
+ }
}
uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
{
const ARMCPRegInfo *ri = rip;
+ uint64_t res;
- return ri->readfn(env, ri);
+ if (ri->type & ARM_CP_IO) {
+ qemu_mutex_lock_iothread();
+ res = ri->readfn(env, ri);
+ qemu_mutex_unlock_iothread();
+ } else {
+ res = ri->readfn(env, ri);
+ }
+
+ return res;
}
void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
@@ -989,7 +1029,9 @@ void HELPER(exception_return)(CPUARMState *env)
cur_el, new_el, env->pc);
}
+ qemu_mutex_lock_iothread();
arm_call_el_change_hook(arm_env_get_cpu(env));
+ qemu_mutex_unlock_iothread();
return;
diff --git a/target/arm/psci.c b/target/arm/psci.c
index 64bf82e..ade9fe2 100644
--- a/target/arm/psci.c
+++ b/target/arm/psci.c
@@ -127,7 +127,9 @@ void arm_handle_psci_call(ARMCPU *cpu)
break;
}
target_cpu = ARM_CPU(target_cpu_state);
- ret = target_cpu->powered_off ? 1 : 0;
+
+ g_assert(qemu_mutex_iothread_locked());
+ ret = target_cpu->power_state;
break;
default:
/* Everything above affinity level 0 is always on. */
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e61bbd6..e15eae6 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1328,10 +1328,14 @@ static void handle_hint(DisasContext *s, uint32_t insn,
s->is_jmp = DISAS_WFI;
return;
case 1: /* YIELD */
- s->is_jmp = DISAS_YIELD;
+ if (!parallel_cpus) {
+ s->is_jmp = DISAS_YIELD;
+ }
return;
case 2: /* WFE */
- s->is_jmp = DISAS_WFE;
+ if (!parallel_cpus) {
+ s->is_jmp = DISAS_WFE;
+ }
return;
case 4: /* SEV */
case 5: /* SEVL */
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 4436d8f..abc1f77 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -4404,20 +4404,32 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
gen_rfe(s, pc, load_cpu_field(spsr));
}
+/*
+ * For WFI we will halt the vCPU until an IRQ. For WFE and YIELD we
+ * only call the helper when running single threaded TCG code to ensure
+ * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
+ * just skip this instruction. Currently the SEV/SEVL instructions
+ * which are *one* of many ways to wake the CPU from WFE are not
+ * implemented so we can't sleep like WFI does.
+ */
static void gen_nop_hint(DisasContext *s, int val)
{
switch (val) {
case 1: /* yield */
- gen_set_pc_im(s, s->pc);
- s->is_jmp = DISAS_YIELD;
+ if (!parallel_cpus) {
+ gen_set_pc_im(s, s->pc);
+ s->is_jmp = DISAS_YIELD;
+ }
break;
case 3: /* wfi */
gen_set_pc_im(s, s->pc);
s->is_jmp = DISAS_WFI;
break;
case 2: /* wfe */
- gen_set_pc_im(s, s->pc);
- s->is_jmp = DISAS_WFE;
+ if (!parallel_cpus) {
+ gen_set_pc_im(s, s->pc);
+ s->is_jmp = DISAS_WFE;
+ }
break;
case 4: /* sev */
case 5: /* sevl */
diff --git a/target/i386/smm_helper.c b/target/i386/smm_helper.c
index 4dd6a2c..f051a77 100644
--- a/target/i386/smm_helper.c
+++ b/target/i386/smm_helper.c
@@ -18,6 +18,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
#include "exec/log.h"
@@ -42,11 +43,14 @@ void helper_rsm(CPUX86State *env)
#define SMM_REVISION_ID 0x00020000
#endif
+/* Called with iothread lock taken */
void cpu_smm_update(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
bool smm_enabled = (env->hflags & HF_SMM_MASK);
+ g_assert(qemu_mutex_iothread_locked());
+
if (cpu->smram) {
memory_region_set_enabled(cpu->smram, smm_enabled);
}
@@ -333,7 +337,10 @@ void helper_rsm(CPUX86State *env)
}
env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK;
env->hflags &= ~HF_SMM_MASK;
+
+ qemu_mutex_lock_iothread();
cpu_smm_update(cpu);
+ qemu_mutex_unlock_iothread();
qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
log_cpu_state_mask(CPU_LOG_INT, CPU(cpu), CPU_DUMP_CCOP);
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index c9604ea..3cb942e 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -25,6 +25,7 @@
#include "exec/helper-proto.h"
#include "sysemu/kvm.h"
#include "qemu/timer.h"
+#include "qemu/main-loop.h"
#include "exec/address-spaces.h"
#ifdef CONFIG_KVM
#include <linux/kvm.h>
@@ -109,11 +110,13 @@ void program_interrupt(CPUS390XState *env, uint32_t code, int ilen)
/* SCLP service call */
uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2)
{
+ qemu_mutex_lock_iothread();
int r = sclp_service_call(env, r1, r2);
if (r < 0) {
program_interrupt(env, -r, 4);
- return 0;
+ r = 0;
}
+ qemu_mutex_unlock_iothread();
return r;
}
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
index 2c05d6a..57968d9 100644
--- a/target/sparc/ldst_helper.c
+++ b/target/sparc/ldst_helper.c
@@ -1768,13 +1768,15 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
case 1:
env->dmmu.mmu_primary_context = val;
env->immu.mmu_primary_context = val;
- tlb_flush_by_mmuidx(CPU(cpu), MMU_USER_IDX, MMU_KERNEL_IDX, -1);
+ tlb_flush_by_mmuidx(CPU(cpu),
+ (1 << MMU_USER_IDX) | (1 << MMU_KERNEL_IDX));
break;
case 2:
env->dmmu.mmu_secondary_context = val;
env->immu.mmu_secondary_context = val;
- tlb_flush_by_mmuidx(CPU(cpu), MMU_USER_SECONDARY_IDX,
- MMU_KERNEL_SECONDARY_IDX, -1);
+ tlb_flush_by_mmuidx(CPU(cpu),
+ (1 << MMU_USER_SECONDARY_IDX) |
+ (1 << MMU_KERNEL_SECONDARY_IDX));
break;
default:
cpu_unassigned_access(cs, addr, true, false, 1, size);
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 21d96ec..4275787 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -165,4 +165,15 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
{
}
+/* This defines the natural memory order supported by this
+ * architecture before guarantees made by various barrier
+ * instructions.
+ *
+ * The x86 has a pretty strong memory ordering which only really
+ * allows for some stores to be re-ordered after loads.
+ */
+#include "tcg-mo.h"
+
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+
#endif
diff --git a/tcg/tcg-mo.h b/tcg/tcg-mo.h
new file mode 100644
index 0000000..c2c5570
--- /dev/null
+++ b/tcg/tcg-mo.h
@@ -0,0 +1,48 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_MO_H
+#define TCG_MO_H
+
+typedef enum {
+ /* Used to indicate the type of accesses on which ordering
+ is to be ensured. Modeled after SPARC barriers.
+
+ This is of the form TCG_MO_A_B where A is before B in program order.
+ */
+ TCG_MO_LD_LD = 0x01,
+ TCG_MO_ST_LD = 0x02,
+ TCG_MO_LD_ST = 0x04,
+ TCG_MO_ST_ST = 0x08,
+ TCG_MO_ALL = 0x0F, /* OR of the above */
+
+ /* Used to indicate the kind of ordering which is to be ensured by the
+ instruction. These types are derived from x86/aarch64 instructions.
+ It should be noted that these are different from C11 semantics. */
+ TCG_BAR_LDAQ = 0x10, /* Following ops will not come forward */
+ TCG_BAR_STRL = 0x20, /* Previous ops will not be delayed */
+ TCG_BAR_SC = 0x30, /* No ops cross barrier; OR of the above */
+} TCGBar;
+
+#endif /* TCG_MO_H */
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 631c6f6..4c7f258 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -29,6 +29,7 @@
#include "cpu.h"
#include "exec/tb-context.h"
#include "qemu/bitops.h"
+#include "tcg-mo.h"
#include "tcg-target.h"
/* XXX: make safe guess about sizes */
@@ -79,6 +80,15 @@ typedef uint64_t tcg_target_ulong;
#error unsupported
#endif
+/* Oversized TCG guests make things like MTTCG hard
+ * as we can't use atomics for cputlb updates.
+ */
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+#define TCG_OVERSIZED_GUEST 1
+#else
+#define TCG_OVERSIZED_GUEST 0
+#endif
+
#if TCG_TARGET_NB_REGS <= 32
typedef uint32_t TCGRegSet;
#elif TCG_TARGET_NB_REGS <= 64
@@ -498,23 +508,6 @@ static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_PTR(TCGv_ptr t)
#define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1)
#define TCG_CALL_DUMMY_ARG ((TCGArg)(-1))
-typedef enum {
- /* Used to indicate the type of accesses on which ordering
- is to be ensured. Modeled after SPARC barriers. */
- TCG_MO_LD_LD = 0x01,
- TCG_MO_ST_LD = 0x02,
- TCG_MO_LD_ST = 0x04,
- TCG_MO_ST_ST = 0x08,
- TCG_MO_ALL = 0x0F, /* OR of the above */
-
- /* Used to indicate the kind of ordering which is to be ensured by the
- instruction. These types are derived from x86/aarch64 instructions.
- It should be noted that these are different from C11 semantics. */
- TCG_BAR_LDAQ = 0x10, /* Following ops will not come forward */
- TCG_BAR_STRL = 0x20, /* Previous ops will not be delayed */
- TCG_BAR_SC = 0x30, /* No ops cross barrier; OR of the above */
-} TCGBar;
-
/* Conditions. Note that these are laid out for easy manipulation by
the functions below:
bit 0 is used for inverting;
diff --git a/translate-all.c b/translate-all.c
index 5f44ec8..9bac061 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -55,11 +55,11 @@
#include "translate-all.h"
#include "qemu/bitmap.h"
#include "qemu/timer.h"
+#include "qemu/main-loop.h"
#include "exec/log.h"
/* #define DEBUG_TB_INVALIDATE */
/* #define DEBUG_TB_FLUSH */
-/* #define DEBUG_LOCKING */
/* make various TB consistency checks */
/* #define DEBUG_TB_CHECK */
@@ -74,20 +74,10 @@
* access to the memory related structures are protected with the
* mmap_lock.
*/
-#ifdef DEBUG_LOCKING
-#define DEBUG_MEM_LOCKS 1
-#else
-#define DEBUG_MEM_LOCKS 0
-#endif
-
#ifdef CONFIG_SOFTMMU
-#define assert_memory_lock() do { /* nothing */ } while (0)
+#define assert_memory_lock() tcg_debug_assert(have_tb_lock)
#else
-#define assert_memory_lock() do { \
- if (DEBUG_MEM_LOCKS) { \
- g_assert(have_mmap_lock()); \
- } \
- } while (0)
+#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
#endif
#define SMC_BITMAP_USE_THRESHOLD 10
@@ -145,9 +135,7 @@ TCGContext tcg_ctx;
bool parallel_cpus;
/* translation block context */
-#ifdef CONFIG_USER_ONLY
__thread int have_tb_lock;
-#endif
static void page_table_config_init(void)
{
@@ -169,51 +157,31 @@ static void page_table_config_init(void)
assert(v_l2_levels >= 0);
}
+#define assert_tb_locked() tcg_debug_assert(have_tb_lock)
+#define assert_tb_unlocked() tcg_debug_assert(!have_tb_lock)
+
void tb_lock(void)
{
-#ifdef CONFIG_USER_ONLY
- assert(!have_tb_lock);
+ assert_tb_unlocked();
qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
have_tb_lock++;
-#endif
}
void tb_unlock(void)
{
-#ifdef CONFIG_USER_ONLY
- assert(have_tb_lock);
+ assert_tb_locked();
have_tb_lock--;
qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
-#endif
}
void tb_lock_reset(void)
{
-#ifdef CONFIG_USER_ONLY
if (have_tb_lock) {
qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
have_tb_lock = 0;
}
-#endif
}
-#ifdef DEBUG_LOCKING
-#define DEBUG_TB_LOCKS 1
-#else
-#define DEBUG_TB_LOCKS 0
-#endif
-
-#ifdef CONFIG_SOFTMMU
-#define assert_tb_lock() do { /* nothing */ } while (0)
-#else
-#define assert_tb_lock() do { \
- if (DEBUG_TB_LOCKS) { \
- g_assert(have_tb_lock); \
- } \
- } while (0)
-#endif
-
-
static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
void cpu_gen_init(void)
@@ -847,7 +815,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
{
TranslationBlock *tb;
- assert_tb_lock();
+ assert_tb_locked();
if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
return NULL;
@@ -862,7 +830,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
/* Called with tb_lock held. */
void tb_free(TranslationBlock *tb)
{
- assert_tb_lock();
+ assert_tb_locked();
/* In practice this is mostly used for single use temporary TB
Ignore the hard cases and just back up if this TB happens to
@@ -1104,7 +1072,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
uint32_t h;
tb_page_addr_t phys_pc;
- assert_tb_lock();
+ assert_tb_locked();
atomic_set(&tb->invalid, true);
@@ -1421,7 +1389,7 @@ static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end)
#ifdef CONFIG_SOFTMMU
void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
{
- assert_tb_lock();
+ assert_tb_locked();
tb_invalidate_phys_range_1(start, end);
}
#else
@@ -1464,7 +1432,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
#endif /* TARGET_HAS_PRECISE_SMC */
assert_memory_lock();
- assert_tb_lock();
+ assert_tb_locked();
p = page_find(start >> TARGET_PAGE_BITS);
if (!p) {
@@ -1543,7 +1511,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
#ifdef CONFIG_SOFTMMU
/* len must be <= 8 and start must be a multiple of len.
* Called via softmmu_template.h when code areas are written to with
- * tb_lock held.
+ * iothread mutex not held.
*/
void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
{
@@ -1745,7 +1713,10 @@ void tb_check_watchpoint(CPUState *cpu)
#ifndef CONFIG_USER_ONLY
/* in deterministic execution mode, instructions doing device I/Os
- must be at the end of the TB */
+ * must be at the end of the TB.
+ *
+ * Called by softmmu_template.h, with iothread mutex not held.
+ */
void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
{
#if defined(TARGET_MIPS) || defined(TARGET_SH4)
@@ -1957,6 +1928,7 @@ void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
void cpu_interrupt(CPUState *cpu, int mask)
{
+ g_assert(qemu_mutex_iothread_locked());
cpu->interrupt_request |= mask;
cpu->tcg_exit_req = 1;
}
diff --git a/translate-common.c b/translate-common.c
index 5e989cd..d504dd0 100644
--- a/translate-common.c
+++ b/translate-common.c
@@ -21,6 +21,7 @@
#include "qemu-common.h"
#include "qom/cpu.h"
#include "sysemu/cpus.h"
+#include "qemu/main-loop.h"
uintptr_t qemu_real_host_page_size;
intptr_t qemu_real_host_page_mask;
@@ -30,6 +31,7 @@ intptr_t qemu_real_host_page_mask;
static void tcg_handle_interrupt(CPUState *cpu, int mask)
{
int old_mask;
+ g_assert(qemu_mutex_iothread_locked());
old_mask = cpu->interrupt_request;
cpu->interrupt_request |= mask;
@@ -40,17 +42,16 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
*/
if (!qemu_cpu_is_self(cpu)) {
qemu_cpu_kick(cpu);
- return;
- }
-
- if (use_icount) {
- cpu->icount_decr.u16.high = 0xffff;
- if (!cpu->can_do_io
- && (mask & ~old_mask) != 0) {
- cpu_abort(cpu, "Raised interrupt while not in I/O function");
- }
} else {
- cpu->tcg_exit_req = 1;
+ if (use_icount) {
+ cpu->icount_decr.u16.high = 0xffff;
+ if (!cpu->can_do_io
+ && (mask & ~old_mask) != 0) {
+ cpu_abort(cpu, "Raised interrupt while not in I/O function");
+ }
+ } else {
+ cpu->tcg_exit_req = 1;
+ }
}
}
diff --git a/vl.c b/vl.c
index 904e34b..e10a27b 100644
--- a/vl.c
+++ b/vl.c
@@ -300,6 +300,26 @@ static QemuOptsList qemu_machine_opts = {
},
};
+static QemuOptsList qemu_accel_opts = {
+ .name = "accel",
+ .implied_opt_name = "accel",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_accel_opts.head),
+ .merge_lists = true,
+ .desc = {
+ {
+ .name = "accel",
+ .type = QEMU_OPT_STRING,
+ .help = "Select the type of accelerator",
+ },
+ {
+ .name = "thread",
+ .type = QEMU_OPT_STRING,
+ .help = "Enable/disable multi-threaded TCG",
+ },
+ { /* end of list */ }
+ },
+};
+
static QemuOptsList qemu_boot_opts = {
.name = "boot-opts",
.implied_opt_name = "order",
@@ -2928,7 +2948,8 @@ int main(int argc, char **argv, char **envp)
const char *boot_once = NULL;
DisplayState *ds;
int cyls, heads, secs, translation;
- QemuOpts *hda_opts = NULL, *opts, *machine_opts, *icount_opts = NULL;
+ QemuOpts *opts, *machine_opts;
+ QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL;
QemuOptsList *olist;
int optind;
const char *optarg;
@@ -2983,6 +3004,7 @@ int main(int argc, char **argv, char **envp)
qemu_add_opts(&qemu_trace_opts);
qemu_add_opts(&qemu_option_rom_opts);
qemu_add_opts(&qemu_machine_opts);
+ qemu_add_opts(&qemu_accel_opts);
qemu_add_opts(&qemu_mem_opts);
qemu_add_opts(&qemu_smp_opts);
qemu_add_opts(&qemu_boot_opts);
@@ -3675,6 +3697,26 @@ int main(int argc, char **argv, char **envp)
qdev_prop_register_global(&kvm_pit_lost_tick_policy);
break;
}
+ case QEMU_OPTION_accel:
+ accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
+ optarg, true);
+ optarg = qemu_opt_get(accel_opts, "accel");
+
+ olist = qemu_find_opts("machine");
+ if (strcmp("kvm", optarg) == 0) {
+ qemu_opts_parse_noisily(olist, "accel=kvm", false);
+ } else if (strcmp("xen", optarg) == 0) {
+ qemu_opts_parse_noisily(olist, "accel=xen", false);
+ } else if (strcmp("tcg", optarg) == 0) {
+ qemu_opts_parse_noisily(olist, "accel=tcg", false);
+ } else {
+ if (!is_help_option(optarg)) {
+ error_printf("Unknown accelerator: %s", optarg);
+ }
+ error_printf("Supported accelerators: kvm, xen, tcg\n");
+ exit(1);
+ }
+ break;
case QEMU_OPTION_usb:
olist = qemu_find_opts("machine");
qemu_opts_parse_noisily(olist, "usb=on", false);
@@ -3983,6 +4025,8 @@ int main(int argc, char **argv, char **envp)
replay_configure(icount_opts);
+ qemu_tcg_configure(accel_opts, &error_fatal);
+
machine_class = select_machine();
set_memory_options(&ram_slots, &maxram_size, machine_class);
@@ -4349,6 +4393,9 @@ int main(int argc, char **argv, char **envp)
if (!tcg_enabled()) {
error_report("-icount is not allowed with hardware virtualization");
exit(1);
+ } else if (qemu_tcg_mttcg_enabled()) {
+ error_report("-icount does not currently work with MTTCG");
+ exit(1);
}
configure_icount(icount_opts, &error_abort);
qemu_opts_del(icount_opts);