diff options
Diffstat (limited to 'accel/tcg')
-rw-r--r-- | accel/tcg/cpu-exec.c | 1 | ||||
-rw-r--r-- | accel/tcg/cputlb.c | 37 | ||||
-rw-r--r-- | accel/tcg/internal-common.h | 2 | ||||
-rw-r--r-- | accel/tcg/meson.build | 11 | ||||
-rw-r--r-- | accel/tcg/monitor.c | 209 | ||||
-rw-r--r-- | accel/tcg/plugin-gen.c | 30 | ||||
-rw-r--r-- | accel/tcg/tcg-accel-ops-mttcg.c | 1 | ||||
-rw-r--r-- | accel/tcg/tcg-accel-ops.c | 12 | ||||
-rw-r--r-- | accel/tcg/tcg-all.c | 9 | ||||
-rw-r--r-- | accel/tcg/tcg-stats.c | 219 | ||||
-rw-r--r-- | accel/tcg/translate-all.c | 6 |
11 files changed, 303 insertions, 234 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index cc5f362..713bdb2 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -1039,6 +1039,7 @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp) assert(tcg_ops->cpu_exec_halt); assert(tcg_ops->cpu_exec_interrupt); assert(tcg_ops->cpu_exec_reset); + assert(tcg_ops->pointer_wrap); #endif /* !CONFIG_USER_ONLY */ assert(tcg_ops->translate_code); assert(tcg_ops->get_tb_cpu_state); diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 5f6d7c6..87e14bd 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1773,6 +1773,9 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, l->page[1].size = l->page[0].size - size0; l->page[0].size = size0; + l->page[1].addr = cpu->cc->tcg_ops->pointer_wrap(cpu, l->mmu_idx, + l->page[1].addr, addr); + /* * Lookup both pages, recognizing exceptions from either. If the * second lookup potentially resized, refresh first CPUTLBEntryFull. @@ -1871,8 +1874,12 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, goto stop_the_world; } - /* Collect tlb flags for read. */ + /* Finish collecting tlb flags for both read and write. */ + full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index]; tlb_addr |= tlbe->addr_read; + tlb_addr &= TLB_FLAGS_MASK & ~TLB_FORCE_SLOW; + tlb_addr |= full->slow_flags[MMU_DATA_STORE]; + tlb_addr |= full->slow_flags[MMU_DATA_LOAD]; /* Notice an IO access or a needs-MMU-lookup access */ if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) { @@ -1882,13 +1889,12 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, } hostaddr = (void *)((uintptr_t)addr + tlbe->addend); - full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index]; if (unlikely(tlb_addr & TLB_NOTDIRTY)) { notdirty_write(cpu, addr, size, full, retaddr); } - if (unlikely(tlb_addr & TLB_FORCE_SLOW)) { + if (unlikely(tlb_addr & TLB_WATCHPOINT)) { int wp_flags = 0; if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) { @@ -1897,10 +1903,8 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) { wp_flags |= BP_MEM_READ; } - if (wp_flags) { - cpu_check_watchpoint(cpu, addr, size, - full->attrs, wp_flags, retaddr); - } + cpu_check_watchpoint(cpu, addr, size, + full->attrs, wp_flags, retaddr); } return hostaddr; @@ -2926,3 +2930,22 @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr, { return do_ld8_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH); } + +/* + * Common pointer_wrap implementations. + */ + +/* + * To be used for strict alignment targets. + * Because no accesses are unaligned, no accesses wrap either. + */ +vaddr cpu_pointer_wrap_notreached(CPUState *cs, int idx, vaddr res, vaddr base) +{ + g_assert_not_reached(); +} + +/* To be used for strict 32-bit targets. */ +vaddr cpu_pointer_wrap_uint32(CPUState *cs, int idx, vaddr res, vaddr base) +{ + return (uint32_t)res; +} diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h index 1dbc45d..6adfeef 100644 --- a/accel/tcg/internal-common.h +++ b/accel/tcg/internal-common.h @@ -139,4 +139,6 @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); +void tcg_get_stats(AccelState *accel, GString *buf); + #endif diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build index d6f533f..002aa8f 100644 --- a/accel/tcg/meson.build +++ b/accel/tcg/meson.build @@ -1,4 +1,4 @@ -if not get_option('tcg').allowed() +if not have_tcg subdir_done() endif @@ -11,6 +11,7 @@ tcg_ss.add(files( 'tcg-runtime-gvec.c', 'tb-maint.c', 'tcg-all.c', + 'tcg-stats.c', 'translate-all.c', 'translator.c', )) @@ -18,15 +19,15 @@ if get_option('plugins') tcg_ss.add(files('plugin-gen.c')) endif -libuser_ss.add_all(tcg_ss) -libsystem_ss.add_all(tcg_ss) +user_ss.add_all(tcg_ss) +system_ss.add_all(tcg_ss) -libuser_ss.add(files( +user_ss.add(files( 'user-exec.c', 'user-exec-stub.c', )) -libsystem_ss.add(files( +system_ss.add(files( 'cputlb.c', 'icount-common.c', 'monitor.c', diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c index 1c182b6..be5c195 100644 --- a/accel/tcg/monitor.c +++ b/accel/tcg/monitor.c @@ -7,197 +7,13 @@ */ #include "qemu/osdep.h" -#include "qemu/accel.h" -#include "qemu/qht.h" #include "qapi/error.h" #include "qapi/type-helpers.h" #include "qapi/qapi-commands-machine.h" #include "monitor/monitor.h" -#include "system/cpu-timers.h" -#include "exec/icount.h" #include "system/tcg.h" #include "tcg/tcg.h" #include "internal-common.h" -#include "tb-context.h" - - -static void dump_drift_info(GString *buf) -{ - if (!icount_enabled()) { - return; - } - - g_string_append_printf(buf, "Host - Guest clock %"PRIi64" ms\n", - (cpu_get_clock() - icount_get()) / SCALE_MS); - if (icount_align_option) { - g_string_append_printf(buf, "Max guest delay %"PRIi64" ms\n", - -max_delay / SCALE_MS); - g_string_append_printf(buf, "Max guest advance %"PRIi64" ms\n", - max_advance / SCALE_MS); - } else { - g_string_append_printf(buf, "Max guest delay NA\n"); - g_string_append_printf(buf, "Max guest advance NA\n"); - } -} - -static void dump_accel_info(GString *buf) -{ - AccelState *accel = current_accel(); - bool one_insn_per_tb = object_property_get_bool(OBJECT(accel), - "one-insn-per-tb", - &error_fatal); - - g_string_append_printf(buf, "Accelerator settings:\n"); - g_string_append_printf(buf, "one-insn-per-tb: %s\n\n", - one_insn_per_tb ? "on" : "off"); -} - -static void print_qht_statistics(struct qht_stats hst, GString *buf) -{ - uint32_t hgram_opts; - size_t hgram_bins; - char *hgram; - - if (!hst.head_buckets) { - return; - } - g_string_append_printf(buf, "TB hash buckets %zu/%zu " - "(%0.2f%% head buckets used)\n", - hst.used_head_buckets, hst.head_buckets, - (double)hst.used_head_buckets / - hst.head_buckets * 100); - - hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; - hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; - if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { - hgram_opts |= QDIST_PR_NODECIMAL; - } - hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); - g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " - "Histogram: %s\n", - qdist_avg(&hst.occupancy) * 100, hgram); - g_free(hgram); - - hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; - hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); - if (hgram_bins > 10) { - hgram_bins = 10; - } else { - hgram_bins = 0; - hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; - } - hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); - g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " - "Histogram: %s\n", - qdist_avg(&hst.chain), hgram); - g_free(hgram); -} - -struct tb_tree_stats { - size_t nb_tbs; - size_t host_size; - size_t target_size; - size_t max_target_size; - size_t direct_jmp_count; - size_t direct_jmp2_count; - size_t cross_page; -}; - -static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) -{ - const TranslationBlock *tb = value; - struct tb_tree_stats *tst = data; - - tst->nb_tbs++; - tst->host_size += tb->tc.size; - tst->target_size += tb->size; - if (tb->size > tst->max_target_size) { - tst->max_target_size = tb->size; - } - if (tb->page_addr[1] != -1) { - tst->cross_page++; - } - if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { - tst->direct_jmp_count++; - if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { - tst->direct_jmp2_count++; - } - } - return false; -} - -static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) -{ - CPUState *cpu; - size_t full = 0, part = 0, elide = 0; - - CPU_FOREACH(cpu) { - full += qatomic_read(&cpu->neg.tlb.c.full_flush_count); - part += qatomic_read(&cpu->neg.tlb.c.part_flush_count); - elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count); - } - *pfull = full; - *ppart = part; - *pelide = elide; -} - -static void tcg_dump_info(GString *buf) -{ - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); -} - -static void dump_exec_info(GString *buf) -{ - struct tb_tree_stats tst = {}; - struct qht_stats hst; - size_t nb_tbs, flush_full, flush_part, flush_elide; - - tcg_tb_foreach(tb_tree_stats_iter, &tst); - nb_tbs = tst.nb_tbs; - /* XXX: avoid using doubles ? */ - g_string_append_printf(buf, "Translation buffer state:\n"); - /* - * Report total code size including the padding and TB structs; - * otherwise users might think "-accel tcg,tb-size" is not honoured. - * For avg host size we use the precise numbers from tb_tree_stats though. - */ - g_string_append_printf(buf, "gen code size %zu/%zu\n", - tcg_code_size(), tcg_code_capacity()); - g_string_append_printf(buf, "TB count %zu\n", nb_tbs); - g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", - nb_tbs ? tst.target_size / nb_tbs : 0, - tst.max_target_size); - g_string_append_printf(buf, "TB avg host size %zu bytes " - "(expansion ratio: %0.1f)\n", - nb_tbs ? tst.host_size / nb_tbs : 0, - tst.target_size ? - (double)tst.host_size / tst.target_size : 0); - g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", - tst.cross_page, - nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); - g_string_append_printf(buf, "direct jump count %zu (%zu%%) " - "(2 jumps=%zu %zu%%)\n", - tst.direct_jmp_count, - nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, - tst.direct_jmp2_count, - nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); - - qht_statistics_init(&tb_ctx.htable, &hst); - print_qht_statistics(hst, buf); - qht_statistics_destroy(&hst); - - g_string_append_printf(buf, "\nStatistics:\n"); - g_string_append_printf(buf, "TB flush count %u\n", - qatomic_read(&tb_ctx.tb_flush_count)); - g_string_append_printf(buf, "TB invalidate count %u\n", - qatomic_read(&tb_ctx.tb_phys_invalidate_count)); - - tlb_flush_counts(&flush_full, &flush_part, &flush_elide); - g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); - g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); - g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); - tcg_dump_info(buf); -} HumanReadableText *qmp_x_query_jit(Error **errp) { @@ -208,29 +24,7 @@ HumanReadableText *qmp_x_query_jit(Error **errp) return NULL; } - dump_accel_info(buf); - dump_exec_info(buf); - dump_drift_info(buf); - - return human_readable_text_from_str(buf); -} - -static void tcg_dump_op_count(GString *buf) -{ - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); -} - -HumanReadableText *qmp_x_query_opcount(Error **errp) -{ - g_autoptr(GString) buf = g_string_new(""); - - if (!tcg_enabled()) { - error_setg(errp, - "Opcode count information is only available with accel=tcg"); - return NULL; - } - - tcg_dump_op_count(buf); + tcg_dump_stats(buf); return human_readable_text_from_str(buf); } @@ -238,7 +32,6 @@ HumanReadableText *qmp_x_query_opcount(Error **errp) static void hmp_tcg_register(void) { monitor_register_hmp_info_hrt("jit", qmp_x_query_jit); - monitor_register_hmp_info_hrt("opcount", qmp_x_query_opcount); } type_init(hmp_tcg_register); diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index c1da753..9920381 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -117,10 +117,20 @@ static TCGv_i32 gen_cpu_index(void) static void gen_udata_cb(struct qemu_plugin_regular_cb *cb) { TCGv_i32 cpu_index = gen_cpu_index(); + enum qemu_plugin_cb_flags cb_flags = + tcg_call_to_qemu_plugin_cb_flags(cb->info->flags); + TCGv_i32 flags = tcg_constant_i32(cb_flags); + TCGv_i32 clear_flags = tcg_constant_i32(QEMU_PLUGIN_CB_NO_REGS); + tcg_gen_st_i32(flags, tcg_env, + offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState)); tcg_gen_call2(cb->f.vcpu_udata, cb->info, NULL, tcgv_i32_temp(cpu_index), tcgv_ptr_temp(tcg_constant_ptr(cb->userp))); + tcg_gen_st_i32(clear_flags, tcg_env, + offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState)); tcg_temp_free_i32(cpu_index); + tcg_temp_free_i32(flags); + tcg_temp_free_i32(clear_flags); } static TCGv_ptr gen_plugin_u64_ptr(qemu_plugin_u64 entry) @@ -173,10 +183,20 @@ static void gen_udata_cond_cb(struct qemu_plugin_conditional_cb *cb) tcg_gen_ld_i64(val, ptr, 0); tcg_gen_brcondi_i64(cond, val, cb->imm, after_cb); TCGv_i32 cpu_index = gen_cpu_index(); + enum qemu_plugin_cb_flags cb_flags = + tcg_call_to_qemu_plugin_cb_flags(cb->info->flags); + TCGv_i32 flags = tcg_constant_i32(cb_flags); + TCGv_i32 clear_flags = tcg_constant_i32(QEMU_PLUGIN_CB_NO_REGS); + tcg_gen_st_i32(flags, tcg_env, + offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState)); tcg_gen_call2(cb->f.vcpu_udata, cb->info, NULL, tcgv_i32_temp(cpu_index), tcgv_ptr_temp(tcg_constant_ptr(cb->userp))); + tcg_gen_st_i32(clear_flags, tcg_env, + offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState)); tcg_temp_free_i32(cpu_index); + tcg_temp_free_i32(flags); + tcg_temp_free_i32(clear_flags); gen_set_label(after_cb); tcg_temp_free_i64(val); @@ -210,12 +230,22 @@ static void gen_mem_cb(struct qemu_plugin_regular_cb *cb, qemu_plugin_meminfo_t meminfo, TCGv_i64 addr) { TCGv_i32 cpu_index = gen_cpu_index(); + enum qemu_plugin_cb_flags cb_flags = + tcg_call_to_qemu_plugin_cb_flags(cb->info->flags); + TCGv_i32 flags = tcg_constant_i32(cb_flags); + TCGv_i32 clear_flags = tcg_constant_i32(QEMU_PLUGIN_CB_NO_REGS); + tcg_gen_st_i32(flags, tcg_env, + offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState)); tcg_gen_call4(cb->f.vcpu_mem, cb->info, NULL, tcgv_i32_temp(cpu_index), tcgv_i32_temp(tcg_constant_i32(meminfo)), tcgv_i64_temp(addr), tcgv_ptr_temp(tcg_constant_ptr(cb->userp))); + tcg_gen_st_i32(clear_flags, tcg_env, + offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState)); tcg_temp_free_i32(cpu_index); + tcg_temp_free_i32(flags); + tcg_temp_free_i32(clear_flags); } static void inject_cb(struct qemu_plugin_dyn_cb *cb) diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index dfcee30..337b993 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -113,7 +113,6 @@ static void *mttcg_cpu_thread_fn(void *arg) } } - qatomic_set_mb(&cpu->exit_request, 0); qemu_wait_io_event(cpu); } while (!cpu->unplug || cpu_can_run(cpu)); diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index b24d6a7..3b0d7d2 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -26,7 +26,8 @@ */ #include "qemu/osdep.h" -#include "system/accel-ops.h" +#include "accel/accel-ops.h" +#include "accel/accel-cpu-ops.h" #include "system/tcg.h" #include "system/replay.h" #include "exec/icount.h" @@ -80,6 +81,9 @@ int tcg_cpu_exec(CPUState *cpu) cpu_exec_start(cpu); ret = cpu_exec(cpu); cpu_exec_end(cpu); + + qatomic_set_mb(&cpu->exit_request, 0); + return ret; } @@ -93,8 +97,6 @@ static void tcg_cpu_reset_hold(CPUState *cpu) /* mask must never be zero, except for A20 change call */ void tcg_handle_interrupt(CPUState *cpu, int mask) { - g_assert(bql_locked()); - cpu->interrupt_request |= mask; /* @@ -198,8 +200,10 @@ static inline void tcg_remove_all_breakpoints(CPUState *cpu) cpu_watchpoint_remove_all(cpu, BP_GDB); } -static void tcg_accel_ops_init(AccelOpsClass *ops) +static void tcg_accel_ops_init(AccelClass *ac) { + AccelOpsClass *ops = ac->ops; + if (qemu_tcg_mttcg_enabled()) { ops->create_vcpu_thread = mttcg_start_vcpu_thread; ops->kick_vcpu_thread = mttcg_kick_vcpu_thread; diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 6e5dc33..5125e1a 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -39,6 +39,8 @@ #ifndef CONFIG_USER_ONLY #include "hw/boards.h" #endif +#include "accel/accel-ops.h" +#include "accel/accel-cpu-ops.h" #include "accel/tcg/cpu-ops.h" #include "internal-common.h" @@ -80,9 +82,9 @@ static void tcg_accel_instance_init(Object *obj) bool one_insn_per_tb; -static int tcg_init_machine(MachineState *ms) +static int tcg_init_machine(AccelState *as, MachineState *ms) { - TCGState *s = TCG_STATE(current_accel()); + TCGState *s = TCG_STATE(as); unsigned max_threads = 1; #ifndef CONFIG_USER_ONLY @@ -219,7 +221,7 @@ static void tcg_set_one_insn_per_tb(Object *obj, bool value, Error **errp) qatomic_set(&one_insn_per_tb, value); } -static int tcg_gdbstub_supported_sstep_flags(void) +static int tcg_gdbstub_supported_sstep_flags(AccelState *as) { /* * In replay mode all events will come from the log and can't be @@ -241,6 +243,7 @@ static void tcg_accel_class_init(ObjectClass *oc, const void *data) ac->init_machine = tcg_init_machine; ac->cpu_common_realize = tcg_exec_realizefn; ac->cpu_common_unrealize = tcg_exec_unrealizefn; + ac->get_stats = tcg_get_stats; ac->allowed = &tcg_allowed; ac->gdbstub_supported_sstep_flags = tcg_gdbstub_supported_sstep_flags; diff --git a/accel/tcg/tcg-stats.c b/accel/tcg/tcg-stats.c new file mode 100644 index 0000000..ced5dec --- /dev/null +++ b/accel/tcg/tcg-stats.c @@ -0,0 +1,219 @@ +/* + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * QEMU TCG statistics + * + * Copyright (c) 2003-2005 Fabrice Bellard + */ + +#include "qemu/osdep.h" +#include "qemu/accel.h" +#include "qemu/qht.h" +#include "qapi/error.h" +#include "system/cpu-timers.h" +#include "exec/icount.h" +#include "hw/core/cpu.h" +#include "tcg/tcg.h" +#include "internal-common.h" +#include "tb-context.h" +#include <math.h> + +static void dump_drift_info(GString *buf) +{ + if (!icount_enabled()) { + return; + } + + g_string_append_printf(buf, "Host - Guest clock %"PRIi64" ms\n", + (cpu_get_clock() - icount_get()) / SCALE_MS); + if (icount_align_option) { + g_string_append_printf(buf, "Max guest delay %"PRIi64" ms\n", + -max_delay / SCALE_MS); + g_string_append_printf(buf, "Max guest advance %"PRIi64" ms\n", + max_advance / SCALE_MS); + } else { + g_string_append_printf(buf, "Max guest delay NA\n"); + g_string_append_printf(buf, "Max guest advance NA\n"); + } +} + +static void dump_accel_info(AccelState *accel, GString *buf) +{ + bool one_insn_per_tb = object_property_get_bool(OBJECT(accel), + "one-insn-per-tb", + &error_fatal); + + g_string_append_printf(buf, "Accelerator settings:\n"); + g_string_append_printf(buf, "one-insn-per-tb: %s\n\n", + one_insn_per_tb ? "on" : "off"); +} + +static void print_qht_statistics(struct qht_stats hst, GString *buf) +{ + uint32_t hgram_opts; + size_t hgram_bins; + char *hgram; + double avg; + + if (!hst.head_buckets) { + return; + } + g_string_append_printf(buf, "TB hash buckets %zu/%zu " + "(%0.2f%% head buckets used)\n", + hst.used_head_buckets, hst.head_buckets, + (double)hst.used_head_buckets / + hst.head_buckets * 100); + + hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; + hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; + if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { + hgram_opts |= QDIST_PR_NODECIMAL; + } + hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); + avg = qdist_avg(&hst.occupancy); + if (!isnan(avg)) { + g_string_append_printf(buf, "TB hash occupancy " + "%0.2f%% avg chain occ. " + "Histogram: %s\n", + avg * 100, hgram); + } + g_free(hgram); + + hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; + hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); + if (hgram_bins > 10) { + hgram_bins = 10; + } else { + hgram_bins = 0; + hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; + } + hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); + avg = qdist_avg(&hst.chain); + if (!isnan(avg)) { + g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " + "Histogram: %s\n", + avg, hgram); + } + g_free(hgram); +} + +struct tb_tree_stats { + size_t nb_tbs; + size_t host_size; + size_t target_size; + size_t max_target_size; + size_t direct_jmp_count; + size_t direct_jmp2_count; + size_t cross_page; +}; + +static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) +{ + const TranslationBlock *tb = value; + struct tb_tree_stats *tst = data; + + tst->nb_tbs++; + tst->host_size += tb->tc.size; + tst->target_size += tb->size; + if (tb->size > tst->max_target_size) { + tst->max_target_size = tb->size; + } +#ifndef CONFIG_USER_ONLY + if (tb->page_addr[1] != -1) { + tst->cross_page++; + } +#endif + if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { + tst->direct_jmp_count++; + if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { + tst->direct_jmp2_count++; + } + } + return false; +} + +static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) +{ + CPUState *cpu; + size_t full = 0, part = 0, elide = 0; + + CPU_FOREACH(cpu) { + full += qatomic_read(&cpu->neg.tlb.c.full_flush_count); + part += qatomic_read(&cpu->neg.tlb.c.part_flush_count); + elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count); + } + *pfull = full; + *ppart = part; + *pelide = elide; +} + +static void tcg_dump_flush_info(GString *buf) +{ + size_t flush_full, flush_part, flush_elide; + + g_string_append_printf(buf, "TB flush count %u\n", + qatomic_read(&tb_ctx.tb_flush_count)); + g_string_append_printf(buf, "TB invalidate count %u\n", + qatomic_read(&tb_ctx.tb_phys_invalidate_count)); + + tlb_flush_counts(&flush_full, &flush_part, &flush_elide); + g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); + g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); + g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); +} + +static void dump_exec_info(GString *buf) +{ + struct tb_tree_stats tst = {}; + struct qht_stats hst; + size_t nb_tbs; + + tcg_tb_foreach(tb_tree_stats_iter, &tst); + nb_tbs = tst.nb_tbs; + /* XXX: avoid using doubles ? */ + g_string_append_printf(buf, "Translation buffer state:\n"); + /* + * Report total code size including the padding and TB structs; + * otherwise users might think "-accel tcg,tb-size" is not honoured. + * For avg host size we use the precise numbers from tb_tree_stats though. + */ + g_string_append_printf(buf, "gen code size %zu/%zu\n", + tcg_code_size(), tcg_code_capacity()); + g_string_append_printf(buf, "TB count %zu\n", nb_tbs); + g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", + nb_tbs ? tst.target_size / nb_tbs : 0, + tst.max_target_size); + g_string_append_printf(buf, "TB avg host size %zu bytes " + "(expansion ratio: %0.1f)\n", + nb_tbs ? tst.host_size / nb_tbs : 0, + tst.target_size ? + (double)tst.host_size / tst.target_size : 0); + g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", + tst.cross_page, + nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); + g_string_append_printf(buf, "direct jump count %zu (%zu%%) " + "(2 jumps=%zu %zu%%)\n", + tst.direct_jmp_count, + nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, + tst.direct_jmp2_count, + nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); + + qht_statistics_init(&tb_ctx.htable, &hst); + print_qht_statistics(hst, buf); + qht_statistics_destroy(&hst); + + g_string_append_printf(buf, "\nStatistics:\n"); + tcg_dump_flush_info(buf); +} + +void tcg_get_stats(AccelState *accel, GString *buf) +{ + dump_accel_info(accel, buf); + dump_exec_info(buf); + dump_drift_info(buf); +} + +void tcg_dump_stats(GString *buf) +{ + tcg_get_stats(current_accel(), buf); +} diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 451b383..d468667 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -24,7 +24,6 @@ #include "tcg/tcg.h" #include "exec/mmap-lock.h" #include "tb-internal.h" -#include "tlb-bounds.h" #include "exec/tb-flush.h" #include "qemu/cacheinfo.h" #include "qemu/target-info.h" @@ -313,11 +312,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, TCGTBCPUState s) tcg_ctx->gen_tb = tb; tcg_ctx->addr_type = target_long_bits() == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64; -#ifdef CONFIG_SOFTMMU - tcg_ctx->page_bits = TARGET_PAGE_BITS; - tcg_ctx->page_mask = TARGET_PAGE_MASK; - tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS; -#endif tcg_ctx->guest_mo = cpu->cc->tcg_ops->guest_default_memory_order; restart_translate: |