From 9373e63297c43752f9cf085feb7f5aed57d959f8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Aug 2015 06:24:34 -0700 Subject: tcg: introduce tcg_current_cpu This is already useful on Windows in order to remove tls.h, because accesses to current_cpu are done from a different thread on that platform. It will be used on POSIX platforms as soon TCG stops using signals to interrupt the execution of translated code. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- cpu-exec.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index 713540f..5153f1b 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -342,6 +342,7 @@ static void cpu_handle_debug_exception(CPUState *cpu) /* main execution loop */ volatile sig_atomic_t exit_request; +CPUState *tcg_current_cpu; int cpu_exec(CPUState *cpu) { @@ -368,15 +369,7 @@ int cpu_exec(CPUState *cpu) } current_cpu = cpu; - - /* As long as current_cpu is null, up to the assignment just above, - * requests by other threads to exit the execution loop are expected to - * be issued using the exit_request global. We must make sure that our - * evaluation of the global value is performed past the current_cpu - * value transition point, which requires a memory barrier as well as - * an instruction scheduling constraint on modern architectures. */ - smp_mb(); - + atomic_mb_set(&tcg_current_cpu, cpu); rcu_read_lock(); if (unlikely(exit_request)) { @@ -579,5 +572,8 @@ int cpu_exec(CPUState *cpu) /* fail safe : never use current_cpu outside cpu_exec() */ current_cpu = NULL; + + /* Does not need atomic_mb_set because a spurious wakeup is okay. */ + atomic_set(&tcg_current_cpu, NULL); return ret; } -- cgit v1.1 From b0a46fa796504c7334202877a68c857e49f7c96c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Aug 2015 06:32:02 -0700 Subject: tcg: assign cpu->current_tb in a simpler place TCG has not been reading cpu->current_tb from signal handlers for years. The code that synchronized cpu_exec with the signal handler is not needed anymore. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- cpu-exec.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index 5153f1b..567ae8b 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -493,18 +493,13 @@ int cpu_exec(CPUState *cpu) } have_tb_lock = false; spin_unlock(&tcg_ctx.tb_ctx.tb_lock); - - /* cpu_interrupt might be called while translating the - TB, but before it is linked into a potentially - infinite loop and becomes env->current_tb. Avoid - starting execution if there is a pending interrupt. */ - cpu->current_tb = tb; - barrier(); if (likely(!cpu->exit_request)) { trace_exec_tb(tb, tb->pc); tc_ptr = tb->tc_ptr; /* execute the generated code */ + cpu->current_tb = tb; next_tb = cpu_tb_exec(cpu, tc_ptr); + cpu->current_tb = NULL; switch (next_tb & TB_EXIT_MASK) { case TB_EXIT_REQUESTED: /* Something asked us to stop executing @@ -543,7 +538,6 @@ int cpu_exec(CPUState *cpu) break; } } - cpu->current_tb = NULL; /* Try to align the host and virtual clocks if the guest is in advance */ align_clocks(&sc, cpu); -- cgit v1.1 From ab096a75cd626dcd4ad34b2a11652df0269bee0d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Aug 2015 06:34:19 -0700 Subject: tcg: synchronize cpu->exit_request and cpu->tcg_exit_req accesses Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- cpu-exec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index 567ae8b..e24c640 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -507,8 +507,12 @@ int cpu_exec(CPUState *cpu) * loop. Whatever requested the exit will also * have set something else (eg exit_request or * interrupt_request) which we will handle - * next time around the loop. + * next time around the loop. But we need to + * ensure the tcg_exit_req read in generated code + * comes before the next read of cpu->exit_request + * or cpu->interrupt_request. */ + smp_rmb(); next_tb = 0; break; case TB_EXIT_ICOUNT_EXPIRED: -- cgit v1.1 From aed807c8e2bf009b2c6a35490d4fd4383887221d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 18 Aug 2015 06:43:15 -0700 Subject: tcg: synchronize exit_request and tcg_current_cpu accesses Synchronize the remaining pair of accesses in cpu_signal. These should be necessary on Windows as well, at least in theory. Probably SuspendProcess and ResumeProcess introduce some implicit memory barrier. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- cpu-exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index e24c640..ef9d745 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -372,7 +372,7 @@ int cpu_exec(CPUState *cpu) atomic_mb_set(&tcg_current_cpu, cpu); rcu_read_lock(); - if (unlikely(exit_request)) { + if (unlikely(atomic_mb_read(&exit_request))) { cpu->exit_request = 1; } -- cgit v1.1 From e0c382113f768cc375a0d61b7cb3692f1b4bba58 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Aug 2015 00:19:19 +0200 Subject: tcg: signal-free qemu_cpu_kick Signals are slow and do not exist on Win32. The previous patches have done most of the legwork to introduce memory barriers (some of them were even there already for the sake of Windows!) and we can now set the flags directly in the iothread. qemu_cpu_kick_thread is not used anymore on TCG, since the TCG thread is never outside usermode while the CPU is running (not halted). Instead run the content of the signal handler (now in qemu_cpu_kick_no_halt) directly. qemu_cpu_kick_no_halt is also used in qemu_mutex_lock_iothread to avoid the overhead of qemu_cond_broadcast. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- cpu-exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index ef9d745..6a30261 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -341,7 +341,7 @@ static void cpu_handle_debug_exception(CPUState *cpu) /* main execution loop */ -volatile sig_atomic_t exit_request; +bool exit_request; CPUState *tcg_current_cpu; int cpu_exec(CPUState *cpu) -- cgit v1.1 From 677ef6230b603571ae05125db469f7b4c8912a77 Mon Sep 17 00:00:00 2001 From: KONRAD Frederic Date: Mon, 10 Aug 2015 17:27:02 +0200 Subject: replace spinlock by QemuMutex. spinlock is only used in two cases: * cpu-exec.c: to protect TranslationBlock * mem_helper.c: for lock helper in target-i386 (which seems broken). It's a pthread_mutex_t in user-mode, so we can use QemuMutex directly, with an #ifdef. The #ifdef will be removed when multithreaded TCG will need the mutex as well. Signed-off-by: KONRAD Frederic Message-Id: <1439220437-23957-5-git-send-email-fred.konrad@greensocs.com> Signed-off-by: Emilio G. Cota [Merge Emilio G. Cota's patch to remove volatile. - Paolo] Signed-off-by: Paolo Bonzini --- cpu-exec.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index 6a30261..2c0a6f6 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -357,9 +357,6 @@ int cpu_exec(CPUState *cpu) uintptr_t next_tb; SyncClocks sc; - /* This must be volatile so it is not trashed by longjmp() */ - volatile bool have_tb_lock = false; - if (cpu->halted) { if (!cpu_has_work(cpu)) { return EXCP_HALTED; @@ -468,8 +465,7 @@ int cpu_exec(CPUState *cpu) cpu->exception_index = EXCP_INTERRUPT; cpu_loop_exit(cpu); } - spin_lock(&tcg_ctx.tb_ctx.tb_lock); - have_tb_lock = true; + tb_lock(); tb = tb_find_fast(cpu); /* Note: we do it here to avoid a gcc bug on Mac OS X when doing it in tb_find_slow */ @@ -491,8 +487,7 @@ int cpu_exec(CPUState *cpu) tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), next_tb & TB_EXIT_MASK, tb); } - have_tb_lock = false; - spin_unlock(&tcg_ctx.tb_ctx.tb_lock); + tb_unlock(); if (likely(!cpu->exit_request)) { trace_exec_tb(tb, tb->pc); tc_ptr = tb->tc_ptr; @@ -558,10 +553,7 @@ int cpu_exec(CPUState *cpu) x86_cpu = X86_CPU(cpu); env = &x86_cpu->env; #endif - if (have_tb_lock) { - spin_unlock(&tcg_ctx.tb_ctx.tb_lock); - have_tb_lock = false; - } + tb_lock_reset(); } } /* for(;;) */ -- cgit v1.1 From 9fd1a94888cd6a559f95c3596ec1ac28b74838c1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 11 Aug 2015 11:33:24 +0200 Subject: cpu-exec: fix lock hierarchy for user-mode emulation tb_lock has to be taken inside the mmap_lock (example: tb_invalidate_phys_range is called by target_mmap), but tb_link_page is taking the mmap_lock and it is called with the tb_lock held. To fix this, take the mmap_lock in tb_find_slow, not in tb_link_page. Reviewed-by: Peter Maydell Signed-off-by: Paolo Bonzini --- cpu-exec.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 18 deletions(-) (limited to 'cpu-exec.c') diff --git a/cpu-exec.c b/cpu-exec.c index 2c0a6f6..cb0c75d 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -249,10 +249,10 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, tb_free(tb); } -static TranslationBlock *tb_find_slow(CPUState *cpu, - target_ulong pc, - target_ulong cs_base, - uint64_t flags) +static TranslationBlock *tb_find_physical(CPUState *cpu, + target_ulong pc, + target_ulong cs_base, + uint64_t flags) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb, **ptb1; @@ -269,8 +269,9 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h]; for(;;) { tb = *ptb1; - if (!tb) - goto not_found; + if (!tb) { + return NULL; + } if (tb->pc == pc && tb->page_addr[0] == phys_page1 && tb->cs_base == cs_base && @@ -282,25 +283,59 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, virt_page2 = (pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; phys_page2 = get_page_addr_code(env, virt_page2); - if (tb->page_addr[1] == phys_page2) - goto found; + if (tb->page_addr[1] == phys_page2) { + break; + } } else { - goto found; + break; } } ptb1 = &tb->phys_hash_next; } - not_found: - /* if no translated code available, then translate it now */ - tb = tb_gen_code(cpu, pc, cs_base, flags, 0); - found: - /* Move the last found TB to the head of the list */ - if (likely(*ptb1)) { - *ptb1 = tb->phys_hash_next; - tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h]; - tcg_ctx.tb_ctx.tb_phys_hash[h] = tb; + /* Move the TB to the head of the list */ + *ptb1 = tb->phys_hash_next; + tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h]; + tcg_ctx.tb_ctx.tb_phys_hash[h] = tb; + return tb; +} + +static TranslationBlock *tb_find_slow(CPUState *cpu, + target_ulong pc, + target_ulong cs_base, + uint64_t flags) +{ + TranslationBlock *tb; + + tb = tb_find_physical(cpu, pc, cs_base, flags); + if (tb) { + goto found; + } + +#ifdef CONFIG_USER_ONLY + /* mmap_lock is needed by tb_gen_code, and mmap_lock must be + * taken outside tb_lock. Since we're momentarily dropping + * tb_lock, there's a chance that our desired tb has been + * translated. + */ + tb_unlock(); + mmap_lock(); + tb_lock(); + tb = tb_find_physical(cpu, pc, cs_base, flags); + if (tb) { + mmap_unlock(); + goto found; } +#endif + + /* if no translated code available, then translate it now */ + tb = tb_gen_code(cpu, pc, cs_base, flags, 0); + +#ifdef CONFIG_USER_ONLY + mmap_unlock(); +#endif + +found: /* we add the TB in the virtual pc hash table */ cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; return tb; -- cgit v1.1