From 6f04cb1c8f481cf02fbc4657fefba985a1fe725f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Wed, 24 Feb 2021 16:58:07 +0000 Subject: accel/tcg: rename tb_lookup__cpu_state and hoist state extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having a function return either and valid TB and some system state seems excessive. It will make the subsequent re-factoring easier if we lookup the current state where we are. Signed-off-by: Alex Bennée Message-Id: <20210224165811.11567-2-alex.bennee@linaro.org> Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 10 ++++++++-- accel/tcg/tcg-runtime.c | 4 +++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'accel') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 16e4fe3..ef96b31 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -245,6 +245,7 @@ static void cpu_exec_exit(CPUState *cpu) void cpu_exec_step_atomic(CPUState *cpu) { + CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; @@ -258,7 +259,9 @@ void cpu_exec_step_atomic(CPUState *cpu) g_assert(!cpu->running); cpu->running = true; - tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + tb = tb_lookup(cpu, pc, cs_base, flags, cf_mask); + if (tb == NULL) { mmap_lock(); tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); @@ -418,11 +421,14 @@ static inline TranslationBlock *tb_find(CPUState *cpu, TranslationBlock *last_tb, int tb_exit, uint32_t cf_mask) { + CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; - tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + + tb = tb_lookup(cpu, pc, cs_base, flags, cf_mask); if (tb == NULL) { mmap_lock(); tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index d736f4f..05e3d52 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -152,7 +152,9 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) target_ulong cs_base, pc; uint32_t flags; - tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags()); + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + + tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags()); if (tb == NULL) { return tcg_code_gen_epilogue; } -- cgit v1.1 From c0ae396a81e13e5a09846f86a702bc61733a8885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Wed, 24 Feb 2021 16:58:08 +0000 Subject: accel/tcg: move CF_CLUSTER calculation to curr_cflags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is nothing special about this compile flag that doesn't mean we can't just compute it with curr_cflags() which we should be using when building a new set. Signed-off-by: Alex Bennée Message-Id: <20210224165811.11567-3-alex.bennee@linaro.org> Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 9 ++++----- accel/tcg/tcg-runtime.c | 2 +- accel/tcg/translate-all.c | 6 +++--- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'accel') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index ef96b31..45286dc 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -249,8 +249,7 @@ void cpu_exec_step_atomic(CPUState *cpu) TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; - uint32_t cflags = 1; - uint32_t cf_mask = cflags & CF_HASH_MASK; + uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1; int tb_exit; if (sigsetjmp(cpu->jmp_env, 0) == 0) { @@ -260,7 +259,7 @@ void cpu_exec_step_atomic(CPUState *cpu) cpu->running = true; cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = tb_lookup(cpu, pc, cs_base, flags, cf_mask); + tb = tb_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { mmap_lock(); @@ -497,7 +496,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) if (replay_has_exception() && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) { /* Execute just one insn to trigger exception pending in the log */ - cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1; + cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1; } #endif return false; @@ -794,7 +793,7 @@ int cpu_exec(CPUState *cpu) have CF_INVALID set, -1 is a convenient invalid value that does not require tcg headers for cpu_common_reset. */ if (cflags == -1) { - cflags = curr_cflags(); + cflags = curr_cflags(cpu); } else { cpu->cflags_next_tb = -1; } diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index 05e3d52..99403e3 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -154,7 +154,7 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags()); + tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu)); if (tb == NULL) { return tcg_code_gen_epilogue; } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index bbd919a..f29b47f 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2194,7 +2194,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, if (current_tb_modified) { page_collection_unlock(pages); /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(); + cpu->cflags_next_tb = 1 | curr_cflags(cpu); mmap_unlock(); cpu_loop_exit_noexc(cpu); } @@ -2362,7 +2362,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(); + cpu->cflags_next_tb = 1 | curr_cflags(cpu); return true; } #endif @@ -2438,7 +2438,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) * operations only (which execute after completion) so we don't * double instrument the instruction. */ - cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n; + cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, "cpu_io_recompile: rewound execution of TB to " -- cgit v1.1 From bf253ac606de4a934e41ba178bf4f1338c554cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Wed, 24 Feb 2021 16:58:09 +0000 Subject: accel/tcg: drop the use of CF_HASH_MASK and rename params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't really deal in cf_mask most of the time. The one time it's relevant is when we want to remove an invalidated TB from the QHT lookup. Everywhere else we should be looking up things without CF_INVALID set. Signed-off-by: Alex Bennée Message-Id: <20210224165811.11567-4-alex.bennee@linaro.org> Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 16 ++++++++-------- accel/tcg/tcg-runtime.c | 2 +- accel/tcg/translate-all.c | 8 +++++--- 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'accel') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 45286dc..931da96 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -307,7 +307,7 @@ struct tb_desc { CPUArchState *env; tb_page_addr_t phys_page1; uint32_t flags; - uint32_t cf_mask; + uint32_t cflags; uint32_t trace_vcpu_dstate; }; @@ -321,7 +321,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) tb->cs_base == desc->cs_base && tb->flags == desc->flags && tb->trace_vcpu_dstate == desc->trace_vcpu_dstate && - (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) { + tb_cflags(tb) == desc->cflags) { /* check next page if needed */ if (tb->page_addr[1] == -1) { return true; @@ -341,7 +341,7 @@ static bool tb_lookup_cmp(const void *p, const void *d) TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags, - uint32_t cf_mask) + uint32_t cflags) { tb_page_addr_t phys_pc; struct tb_desc desc; @@ -350,7 +350,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, desc.env = (CPUArchState *)cpu->env_ptr; desc.cs_base = cs_base; desc.flags = flags; - desc.cf_mask = cf_mask; + desc.cflags = cflags; desc.trace_vcpu_dstate = *cpu->trace_dstate; desc.pc = pc; phys_pc = get_page_addr_code(desc.env, pc); @@ -358,7 +358,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, return NULL; } desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate); + h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate); return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); } @@ -418,7 +418,7 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, static inline TranslationBlock *tb_find(CPUState *cpu, TranslationBlock *last_tb, - int tb_exit, uint32_t cf_mask) + int tb_exit, uint32_t cflags) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; @@ -427,10 +427,10 @@ static inline TranslationBlock *tb_find(CPUState *cpu, cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = tb_lookup(cpu, pc, cs_base, flags, cf_mask); + tb = tb_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { mmap_lock(); - tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); + tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); mmap_unlock(); /* We add the TB in the virtual pc hash table for the fast lookup */ qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index 99403e3..49f5de3 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -27,10 +27,10 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" -#include "exec/tb-lookup.h" #include "disas/disas.h" #include "exec/log.h" #include "tcg/tcg.h" +#include "exec/tb-lookup.h" /* 32-bit helpers */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index f29b47f..0b0bfd3 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1311,7 +1311,7 @@ static bool tb_cmp(const void *ap, const void *bp) return a->pc == b->pc && a->cs_base == b->cs_base && a->flags == b->flags && - (tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) && + (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) && a->trace_vcpu_dstate == b->trace_vcpu_dstate && a->page_addr[0] == b->page_addr[0] && a->page_addr[1] == b->page_addr[1]; @@ -1616,6 +1616,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) PageDesc *p; uint32_t h; tb_page_addr_t phys_pc; + uint32_t orig_cflags = tb_cflags(tb); assert_memory_lock(); @@ -1626,7 +1627,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK, + h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags, tb->trace_vcpu_dstate); if (!qht_remove(&tb_ctx.htable, tb, h)) { return; @@ -1793,6 +1794,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, uint32_t h; assert_memory_lock(); + tcg_debug_assert(!(tb->cflags & CF_INVALID)); /* * Add the TB to the page list, acquiring first the pages's locks. @@ -1811,7 +1813,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, } /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags, tb->trace_vcpu_dstate); qht_insert(&tb_ctx.htable, tb, h, &existing_tb); -- cgit v1.1 From 6cc9d67c6f682cf04eea2d6e64a252b63a7eccdf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 1 Mar 2021 19:21:08 -0800 Subject: accel/tcg: Precompute curr_cflags into cpu->tcg_cflags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The primary motivation is to remove a dozen insns along the fast-path in tb_lookup. As a byproduct, this allows us to completely remove parallel_cpus. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 3 --- accel/tcg/tcg-accel-ops-mttcg.c | 3 +-- accel/tcg/tcg-accel-ops-rr.c | 2 +- accel/tcg/tcg-accel-ops.c | 8 ++++++++ accel/tcg/tcg-accel-ops.h | 1 + accel/tcg/translate-all.c | 4 ---- 6 files changed, 11 insertions(+), 10 deletions(-) (limited to 'accel') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 931da96..bdfa036 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -267,8 +267,6 @@ void cpu_exec_step_atomic(CPUState *cpu) mmap_unlock(); } - /* Since we got here, we know that parallel_cpus must be true. */ - parallel_cpus = false; cpu_exec_enter(cpu); /* execute the generated code */ trace_exec_tb(tb, pc); @@ -296,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu) * the execution. */ g_assert(cpu_in_exclusive_context(cpu)); - parallel_cpus = true; cpu->running = false; end_exclusive(); } diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 42973fb..847d207 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -114,8 +114,7 @@ void mttcg_start_vcpu_thread(CPUState *cpu) char thread_name[VCPU_THREAD_NAME_SIZE]; g_assert(tcg_enabled()); - - parallel_cpus = (current_machine->smp.max_cpus > 1); + tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1); cpu->thread = g_malloc0(sizeof(QemuThread)); cpu->halt_cond = g_malloc0(sizeof(QemuCond)); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 4a66055..018b54c 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -269,7 +269,7 @@ void rr_start_vcpu_thread(CPUState *cpu) static QemuThread *single_tcg_cpu_thread; g_assert(tcg_enabled()); - parallel_cpus = false; + tcg_cpu_init_cflags(cpu, false); if (!single_tcg_cpu_thread) { cpu->thread = g_malloc0(sizeof(QemuThread)); diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 6144d9d..6cdcaa2 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -41,6 +41,14 @@ /* common functionality among all TCG variants */ +void tcg_cpu_init_cflags(CPUState *cpu, bool parallel) +{ + uint32_t cflags = cpu->cluster_index << CF_CLUSTER_SHIFT; + cflags |= parallel ? CF_PARALLEL : 0; + cflags |= icount_enabled() ? CF_USE_ICOUNT : 0; + cpu->tcg_cflags = cflags; +} + void tcg_cpus_destroy(CPUState *cpu) { cpu_thread_signal_destroyed(cpu); diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h index 4813000..6a5fcef 100644 --- a/accel/tcg/tcg-accel-ops.h +++ b/accel/tcg/tcg-accel-ops.h @@ -17,5 +17,6 @@ void tcg_cpus_destroy(CPUState *cpu); int tcg_cpus_exec(CPUState *cpu); void tcg_handle_interrupt(CPUState *cpu, int mask); +void tcg_cpu_init_cflags(CPUState *cpu, bool parallel); #endif /* TCG_CPUS_H */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 0b0bfd3..f32df8b 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -224,7 +224,6 @@ static void *l1_map[V_L1_MAX_SIZE]; TCGContext tcg_init_ctx; __thread TCGContext *tcg_ctx; TBContext tb_ctx; -bool parallel_cpus; static void page_table_config_init(void) { @@ -1867,9 +1866,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, cflags = (cflags & ~CF_COUNT_MASK) | 1; } - cflags &= ~CF_CLUSTER_MASK; - cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT; - max_insns = cflags & CF_COUNT_MASK; if (max_insns == 0) { max_insns = CF_COUNT_MASK; -- cgit v1.1