diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-10-25 16:38:57 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-10-25 16:38:57 +0100 |
commit | ae49fbbcd8e4e9d8bf7131add34773f579e1aff7 (patch) | |
tree | 5981acc5f85f69062f1e67bef90465295dac25c7 /include/exec | |
parent | 4e1b31dba8f66e337fbaf0166b7b8c440be78529 (diff) | |
parent | cc689485ee3e9dca05765326ee8fd619a6ec48f0 (diff) | |
download | qemu-ae49fbbcd8e4e9d8bf7131add34773f579e1aff7.zip qemu-ae49fbbcd8e4e9d8bf7131add34773f579e1aff7.tar.gz qemu-ae49fbbcd8e4e9d8bf7131add34773f579e1aff7.tar.bz2 |
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20171025' into staging
TCG patch queue
# gpg: Signature made Wed 25 Oct 2017 10:30:18 BST
# gpg: using RSA key 0x64DF38E8AF7E215F
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>"
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* remotes/rth/tags/pull-tcg-20171025: (51 commits)
translate-all: exit from tb_phys_invalidate if qht_remove fails
tcg: Initialize cpu_env generically
tcg: enable multiple TCG contexts in softmmu
tcg: introduce regions to split code_gen_buffer
translate-all: use qemu_protect_rwx/none helpers
osdep: introduce qemu_mprotect_rwx/none
tcg: allocate optimizer temps with tcg_malloc
tcg: distribute profiling counters across TCGContext's
tcg: introduce **tcg_ctxs to keep track of all TCGContext's
gen-icount: fold exitreq_label into TCGContext
tcg: define tcg_init_ctx and make tcg_ctx a pointer
tcg: take tb_ctx out of TCGContext
translate-all: report correct avg host TB size
exec-all: rename tb_free to tb_remove
translate-all: use a binary search tree to track TBs in TBContext
tcg: Remove CF_IGNORE_ICOUNT
tcg: Add CF_LAST_IO + CF_USE_ICOUNT to CF_HASH_MASK
cpu-exec: lookup/generate TB outside exclusive region during step_atomic
tcg: check CF_PARALLEL instead of parallel_cpus
target/sparc: check CF_PARALLEL instead of parallel_cpus
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'include/exec')
-rw-r--r-- | include/exec/exec-all.h | 42 | ||||
-rw-r--r-- | include/exec/gen-icount.h | 27 | ||||
-rw-r--r-- | include/exec/helper-gen.h | 22 | ||||
-rw-r--r-- | include/exec/helper-head.h | 16 | ||||
-rw-r--r-- | include/exec/tb-context.h | 6 | ||||
-rw-r--r-- | include/exec/tb-hash-xx.h | 9 | ||||
-rw-r--r-- | include/exec/tb-hash.h | 4 | ||||
-rw-r--r-- | include/exec/tb-lookup.h | 6 |
8 files changed, 76 insertions, 56 deletions
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 53f1835..923ece3 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -22,6 +22,7 @@ #include "qemu-common.h" #include "exec/tb-context.h" +#include "sysemu/cpus.h" /* allow to see translation results - the slowdown should be negligible, so we leave it */ #define DEBUG_DISAS @@ -305,10 +306,14 @@ static inline void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) /* * Translation Cache-related fields of a TB. + * This struct exists just for convenience; we keep track of TB's in a binary + * search tree, and the only fields needed to compare TB's in the tree are + * @ptr and @size. + * Note: the address of search data can be obtained by adding @size to @ptr. */ struct tb_tc { void *ptr; /* pointer to the translated code */ - uint8_t *search; /* pointer to search data */ + size_t size; }; struct TranslationBlock { @@ -319,12 +324,15 @@ struct TranslationBlock { size <= TARGET_PAGE_SIZE) */ uint16_t icount; uint32_t cflags; /* compile flags */ -#define CF_COUNT_MASK 0x7fff -#define CF_LAST_IO 0x8000 /* Last insn may be an IO access. */ -#define CF_NOCACHE 0x10000 /* To be freed after execution */ -#define CF_USE_ICOUNT 0x20000 -#define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */ -#define CF_INVALID 0x80000 /* TB is stale. Setters must acquire tb_lock */ +#define CF_COUNT_MASK 0x00007fff +#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ +#define CF_NOCACHE 0x00010000 /* To be freed after execution */ +#define CF_USE_ICOUNT 0x00020000 +#define CF_INVALID 0x00040000 /* TB is stale. Setters need tb_lock */ +#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ +/* cflags' mask for hashing/comparison */ +#define CF_HASH_MASK \ + (CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL) /* Per-vCPU dynamic tracing state used to generate this TB */ uint32_t trace_vcpu_dstate; @@ -365,11 +373,27 @@ struct TranslationBlock { uintptr_t jmp_list_first; }; -void tb_free(TranslationBlock *tb); +extern bool parallel_cpus; + +/* Hide the atomic_read to make code a little easier on the eyes */ +static inline uint32_t tb_cflags(const TranslationBlock *tb) +{ + return atomic_read(&tb->cflags); +} + +/* current cflags for hashing/comparison */ +static inline uint32_t curr_cflags(void) +{ + return (parallel_cpus ? CF_PARALLEL : 0) + | (use_icount ? CF_USE_ICOUNT : 0); +} + +void tb_remove(TranslationBlock *tb); void tb_flush(CPUState *cpu); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, uint32_t flags); + target_ulong cs_base, uint32_t flags, + uint32_t cf_mask); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); /* GETPC is the true target of the return instruction that we'll execute. */ diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index 9b3cb14..049bba8 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -6,23 +6,22 @@ /* Helpers for instruction counting code generation. */ static int icount_start_insn_idx; -static TCGLabel *exitreq_label; static inline void gen_tb_start(TranslationBlock *tb) { TCGv_i32 count, imm; - exitreq_label = gen_new_label(); - if (tb->cflags & CF_USE_ICOUNT) { + tcg_ctx->exitreq_label = gen_new_label(); + if (tb_cflags(tb) & CF_USE_ICOUNT) { count = tcg_temp_local_new_i32(); } else { count = tcg_temp_new_i32(); } - tcg_gen_ld_i32(count, tcg_ctx.tcg_env, + tcg_gen_ld_i32(count, cpu_env, -ENV_OFFSET + offsetof(CPUState, icount_decr.u32)); - if (tb->cflags & CF_USE_ICOUNT) { + if (tb_cflags(tb) & CF_USE_ICOUNT) { imm = tcg_temp_new_i32(); /* We emit a movi with a dummy immediate argument. Keep the insn index * of the movi so that we later (when we know the actual insn count) @@ -34,10 +33,10 @@ static inline void gen_tb_start(TranslationBlock *tb) tcg_temp_free_i32(imm); } - tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, exitreq_label); + tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label); - if (tb->cflags & CF_USE_ICOUNT) { - tcg_gen_st16_i32(count, tcg_ctx.tcg_env, + if (tb_cflags(tb) & CF_USE_ICOUNT) { + tcg_gen_st16_i32(count, cpu_env, -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low)); } @@ -46,32 +45,30 @@ static inline void gen_tb_start(TranslationBlock *tb) static inline void gen_tb_end(TranslationBlock *tb, int num_insns) { - if (tb->cflags & CF_USE_ICOUNT) { + if (tb_cflags(tb) & CF_USE_ICOUNT) { /* Update the num_insn immediate parameter now that we know * the actual insn count. */ tcg_set_insn_param(icount_start_insn_idx, 1, num_insns); } - gen_set_label(exitreq_label); + gen_set_label(tcg_ctx->exitreq_label); tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED); /* Terminate the linked list. */ - tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0; + tcg_ctx->gen_op_buf[tcg_ctx->gen_op_buf[0].prev].next = 0; } static inline void gen_io_start(void) { TCGv_i32 tmp = tcg_const_i32(1); - tcg_gen_st_i32(tmp, tcg_ctx.tcg_env, - -ENV_OFFSET + offsetof(CPUState, can_do_io)); + tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io)); tcg_temp_free_i32(tmp); } static inline void gen_io_end(void) { TCGv_i32 tmp = tcg_const_i32(0); - tcg_gen_st_i32(tmp, tcg_ctx.tcg_env, - -ENV_OFFSET + offsetof(CPUState, can_do_io)); + tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io)); tcg_temp_free_i32(tmp); } diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h index 8239ffc..15204ab 100644 --- a/include/exec/helper-gen.h +++ b/include/exec/helper-gen.h @@ -9,31 +9,31 @@ #define DEF_HELPER_FLAGS_0(name, flags, ret) \ static inline void glue(gen_helper_, name)(dh_retvar_decl0(ret)) \ { \ - tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 0, NULL); \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 0, NULL); \ } #define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1)) \ { \ - TCGArg args[1] = { dh_arg(t1, 1) }; \ - tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 1, args); \ + TCGTemp *args[1] = { dh_arg(t1, 1) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 1, args); \ } #define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2)) \ { \ - TCGArg args[2] = { dh_arg(t1, 1), dh_arg(t2, 2) }; \ - tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 2, args); \ + TCGTemp *args[2] = { dh_arg(t1, 1), dh_arg(t2, 2) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 2, args); \ } #define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3)) \ { \ - TCGArg args[3] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3) }; \ - tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 3, args); \ + TCGTemp *args[3] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3) }; \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 3, args); \ } #define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \ @@ -41,9 +41,9 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), \ dh_arg_decl(t3, 3), dh_arg_decl(t4, 4)) \ { \ - TCGArg args[4] = { dh_arg(t1, 1), dh_arg(t2, 2), \ + TCGTemp *args[4] = { dh_arg(t1, 1), dh_arg(t2, 2), \ dh_arg(t3, 3), dh_arg(t4, 4) }; \ - tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 4, args); \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 4, args); \ } #define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \ @@ -51,9 +51,9 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \ dh_arg_decl(t4, 4), dh_arg_decl(t5, 5)) \ { \ - TCGArg args[5] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ + TCGTemp *args[5] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ dh_arg(t4, 4), dh_arg(t5, 5) }; \ - tcg_gen_callN(&tcg_ctx, HELPER(name), dh_retvar(ret), 5, args); \ + tcg_gen_callN(HELPER(name), dh_retvar(ret), 5, args); \ } #include "helper.h" diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h index 1cfc43b..639eefd 100644 --- a/include/exec/helper-head.h +++ b/include/exec/helper-head.h @@ -20,10 +20,6 @@ #define HELPER(name) glue(helper_, name) -#define GET_TCGV_i32 GET_TCGV_I32 -#define GET_TCGV_i64 GET_TCGV_I64 -#define GET_TCGV_ptr GET_TCGV_PTR - /* Some types that make sense in C, but not for TCG. */ #define dh_alias_i32 i32 #define dh_alias_s32 i32 @@ -78,11 +74,11 @@ #define dh_retvar_decl_ptr TCGv_ptr retval, #define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t)) -#define dh_retvar_void TCG_CALL_DUMMY_ARG -#define dh_retvar_noreturn TCG_CALL_DUMMY_ARG -#define dh_retvar_i32 GET_TCGV_i32(retval) -#define dh_retvar_i64 GET_TCGV_i64(retval) -#define dh_retvar_ptr GET_TCGV_ptr(retval) +#define dh_retvar_void NULL +#define dh_retvar_noreturn NULL +#define dh_retvar_i32 tcgv_i32_temp(retval) +#define dh_retvar_i64 tcgv_i64_temp(retval) +#define dh_retvar_ptr tcgv_ptr_temp(retval) #define dh_retvar(t) glue(dh_retvar_, dh_alias(t)) #define dh_is_64bit_void 0 @@ -113,7 +109,7 @@ ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1))) #define dh_arg(t, n) \ - glue(GET_TCGV_, dh_alias(t))(glue(arg, n)) + glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n)) #define dh_arg_decl(t, n) glue(TCGv_, dh_alias(t)) glue(arg, n) diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index 25c2afe..1d41202 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -31,10 +31,8 @@ typedef struct TBContext TBContext; struct TBContext { - TranslationBlock **tbs; + GTree *tb_tree; struct qht htable; - size_t tbs_size; - int nb_tbs; /* any access to the tbs or the page table must use this lock */ QemuMutex tb_lock; @@ -43,4 +41,6 @@ struct TBContext { int tb_phys_invalidate_count; }; +extern TBContext tb_ctx; + #endif diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h index 6cd3022..747a9a6 100644 --- a/include/exec/tb-hash-xx.h +++ b/include/exec/tb-hash-xx.h @@ -48,8 +48,8 @@ * xxhash32, customized for input variables that are not guaranteed to be * contiguous in memory. */ -static inline -uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f) +static inline uint32_t +tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g) { uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2; uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2; @@ -78,7 +78,7 @@ uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f) v4 *= PRIME32_1; h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18); - h32 += 24; + h32 += 28; h32 += e * PRIME32_3; h32 = rol32(h32, 17) * PRIME32_4; @@ -86,6 +86,9 @@ uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f) h32 += f * PRIME32_3; h32 = rol32(h32, 17) * PRIME32_4; + h32 += g * PRIME32_3; + h32 = rol32(h32, 17) * PRIME32_4; + h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h index 17b5ee0..0526c4f 100644 --- a/include/exec/tb-hash.h +++ b/include/exec/tb-hash.h @@ -59,9 +59,9 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) static inline uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, - uint32_t trace_vcpu_dstate) + uint32_t cf_mask, uint32_t trace_vcpu_dstate) { - return tb_hash_func6(phys_pc, pc, flags, trace_vcpu_dstate); + return tb_hash_func7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); } #endif diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h index 436b6d5..2961385 100644 --- a/include/exec/tb-lookup.h +++ b/include/exec/tb-lookup.h @@ -21,7 +21,7 @@ /* Might cause an exception, so have a longjmp destination ready */ static inline TranslationBlock * tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base, - uint32_t *flags) + uint32_t *flags, uint32_t cf_mask) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; @@ -35,10 +35,10 @@ tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base, tb->cs_base == *cs_base && tb->flags == *flags && tb->trace_vcpu_dstate == *cpu->trace_dstate && - !(atomic_read(&tb->cflags) & CF_INVALID))) { + (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) { return tb; } - tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags); + tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask); if (tb == NULL) { return NULL; } |