diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2023-01-19 11:56:50 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2023-01-19 11:56:51 +0000 |
commit | ef4f031fab7b070816454949a1b6b6c7aa3cf503 (patch) | |
tree | e120d446527f8c91b9f3020a90338e52e1b2de59 | |
parent | 701ed34833f53880ba38bde09b0846d01fc16d66 (diff) | |
parent | 493c9b19a7fb7f387c4fcf57d3836504d5242bf5 (diff) | |
download | qemu-ef4f031fab7b070816454949a1b6b6c7aa3cf503.zip qemu-ef4f031fab7b070816454949a1b6b6c7aa3cf503.tar.gz qemu-ef4f031fab7b070816454949a1b6b6c7aa3cf503.tar.bz2 |
Merge tag 'pull-tcg-20230117' of https://gitlab.com/rth7680/qemu into staging
tcg: Fix race conditions in (most) goto_tb implementations
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmPHKmEdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+W+gf/cPFM+cY5QrC/ziVI
# Dbqc9Z+B4QVNoQjA+Qv9uL+ErjJ1zXuaS512NPHaL5nkjqzzbU4rYiIk1UET/vDJ
# 4RDw7o4pia3umt68zf9BfxPhkCqB+Zfdo/iolszL9OBpufPGD9NRjJn1qfM6Mpbd
# 38s6InRrmCWlGT3fsJq2cGzmT64dGDIovbHbJ5Y0EbtWBIcP99w2YfucclMB5AFI
# j6gX8jOKCfuN1Hru8waH77X2B0Jv36xlfRV9GLrwo/FQ/aiCyJLl5Hxuwm0adSql
# PBhDSdTXOTiNFmtnBUhaV1orgYKGGwWWfrGmzd0cGg/UcgHHDuJaoeuZHGNJlYai
# PFLW/Q==
# =fMV8
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 17 Jan 2023 23:08:17 GMT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* tag 'pull-tcg-20230117' of https://gitlab.com/rth7680/qemu: (22 commits)
tcg/riscv: Implement direct branch for goto_tb
tcg/riscv: Introduce OPC_NOP
tcg/arm: Implement direct branch for goto_tb
tcg/sparc64: Reorg goto_tb implementation
tcg/sparc64: Remove USE_REG_TB
tcg/ppc: Reorg goto_tb implementation
tcg/aarch64: Reorg goto_tb implementation
tcg: Remove TCG_TARGET_HAS_direct_jump
tcg: Always define tb_target_set_jmp_target
tcg: Move tb_target_set_jmp_target declaration to tcg.h
tcg: Change tb_target_set_jmp_target arguments
tcg: Add TranslationBlock.jmp_insn_offset
tcg: Add gen_tb to TCGContext
tcg: Rename TB_JMP_RESET_OFFSET_INVALID to TB_JMP_OFFSET_INVALID
tcg: Split out tcg_out_goto_tb
tcg: Introduce get_jmp_target_addr
tcg: Introduce set_jmp_insn_offset
tcg: Replace asserts on tcg_jmp_insn_offset
tcg/sparc64: Remove unused goto_tb code for indirect jump
tcg/ppc: Remove unused goto_tb code for indirect jump
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | accel/tcg/cpu-exec.c | 21 | ||||
-rw-r--r-- | accel/tcg/translate-all.c | 10 | ||||
-rw-r--r-- | include/exec/exec-all.h | 5 | ||||
-rw-r--r-- | include/tcg/tcg.h | 14 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.c.inc | 106 | ||||
-rw-r--r-- | tcg/aarch64/tcg-target.h | 6 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c.inc | 89 | ||||
-rw-r--r-- | tcg/arm/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c.inc | 68 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 9 | ||||
-rw-r--r-- | tcg/loongarch64/tcg-target.c.inc | 66 | ||||
-rw-r--r-- | tcg/loongarch64/tcg-target.h | 3 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c.inc | 59 | ||||
-rw-r--r-- | tcg/mips/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c.inc | 193 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 7 | ||||
-rw-r--r-- | tcg/riscv/tcg-target.c.inc | 65 | ||||
-rw-r--r-- | tcg/riscv/tcg-target.h | 4 | ||||
-rw-r--r-- | tcg/s390x/tcg-target.c.inc | 67 | ||||
-rw-r--r-- | tcg/s390x/tcg-target.h | 11 | ||||
-rw-r--r-- | tcg/sparc64/tcg-target.c.inc | 201 | ||||
-rw-r--r-- | tcg/sparc64/tcg-target.h | 4 | ||||
-rw-r--r-- | tcg/tcg-op.c | 14 | ||||
-rw-r--r-- | tcg/tcg.c | 42 | ||||
-rw-r--r-- | tcg/tci/tcg-target.c.inc | 31 | ||||
-rw-r--r-- | tcg/tci/tcg-target.h | 4 |
26 files changed, 528 insertions, 581 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 8927092..04cd1f3 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -572,15 +572,18 @@ void cpu_exec_step_atomic(CPUState *cpu) void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { - if (TCG_TARGET_HAS_direct_jump) { - uintptr_t offset = tb->jmp_target_arg[n]; - uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr; - uintptr_t jmp_rx = tc_ptr + offset; - uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff; - tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr); - } else { - tb->jmp_target_arg[n] = addr; - } + /* + * Get the rx view of the structure, from which we find the + * executable code address, and tb_target_set_jmp_target can + * produce a pc-relative displacement to jmp_target_addr[n]. + */ + const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb); + uintptr_t offset = tb->jmp_insn_offset[n]; + uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset; + uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff; + + tb->jmp_target_addr[n] = addr; + tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw); } static inline void tb_add_jump(TranslationBlock *tb, int n, diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 979f8e1..9e925c1 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -350,7 +350,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->trace_vcpu_dstate = *cpu->trace_dstate; tb_set_page_addr0(tb, phys_pc); tb_set_page_addr1(tb, -1); - tcg_ctx->tb_cflags = cflags; + tcg_ctx->gen_tb = tb; tb_overflow: #ifdef CONFIG_PROFILER @@ -508,10 +508,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->jmp_dest[1] = (uintptr_t)NULL; /* init original jump addresses which have been set during tcg_gen_code() */ - if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { + if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { tb_reset_jump(tb, 0); } - if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { + if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { tb_reset_jump(tb, 1); } @@ -693,9 +693,9 @@ static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) if (tb_page_addr1(tb) != -1) { tst->cross_page++; } - if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { + if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { tst->direct_jmp_count++; - if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { + if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { tst->direct_jmp2_count++; } } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 25e11b0..54585a9 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -585,9 +585,10 @@ struct TranslationBlock { * setting one of the jump targets (or patching the jump instruction). Only * two of such jumps are supported. */ +#define TB_JMP_OFFSET_INVALID 0xffff /* indicates no jump generated */ uint16_t jmp_reset_offset[2]; /* offset of original jump target */ -#define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */ - uintptr_t jmp_target_arg[2]; /* target address or offset */ + uint16_t jmp_insn_offset[2]; /* offset of direct jump insn */ + uintptr_t jmp_target_addr[2]; /* target address */ /* * Each TB has a NULL-terminated list (jmp_list_head) of incoming jumps. diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index b949d75..6f49717 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -552,20 +552,15 @@ struct TCGContext { int nb_indirects; int nb_ops; - /* goto_tb support */ - tcg_insn_unit *code_buf; - uint16_t *tb_jmp_reset_offset; /* tb->jmp_reset_offset */ - uintptr_t *tb_jmp_insn_offset; /* tb->jmp_target_arg if direct_jump */ - uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_arg if !direct_jump */ - TCGRegSet reserved_regs; - uint32_t tb_cflags; /* cflags of the current TB */ intptr_t current_frame_offset; intptr_t frame_start; intptr_t frame_end; TCGTemp *frame_temp; - tcg_insn_unit *code_ptr; + TranslationBlock *gen_tb; /* tb for which code is being generated */ + tcg_insn_unit *code_buf; /* pointer for start of tb */ + tcg_insn_unit *code_ptr; /* pointer for running end of tb */ #ifdef CONFIG_PROFILER TCGProfile prof; @@ -838,6 +833,9 @@ void tcg_func_start(TCGContext *s); int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start); +void tb_target_set_jmp_target(const TranslationBlock *, int, + uintptr_t, uintptr_t); + void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size); TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr, diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index ad1816e..330d26b 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1353,32 +1353,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, tcg_out_call_int(s, target); } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) -{ - tcg_insn_unit i1, i2; - TCGType rt = TCG_TYPE_I64; - TCGReg rd = TCG_REG_TMP; - uint64_t pair; - - ptrdiff_t offset = addr - jmp_rx; - - if (offset == sextract64(offset, 0, 26)) { - i1 = I3206_B | ((offset >> 2) & 0x3ffffff); - i2 = NOP; - } else { - offset = (addr >> 12) - (jmp_rx >> 12); - - /* patch ADRP */ - i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; - /* patch ADDI */ - i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; - } - pair = (uint64_t)i2 << 32 | i1; - qatomic_set((uint64_t *)jmp_rw, pair); - flush_idcache_range(jmp_rx, jmp_rw, 8); -} - static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) { if (!l->has_value) { @@ -1887,6 +1861,54 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, static const tcg_insn_unit *tb_ret_addr; +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_goto_long(s, tcg_code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); + tcg_out_goto_long(s, tb_ret_addr); + } +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* + * Direct branch, or indirect address load, will be patched + * by tb_target_set_jmp_target. Assert indirect load offset + * in range early, regardless of direct branch distance. + */ + intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); + tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); + + set_jmp_insn_offset(s, which); + tcg_out32(s, I3206_B); + tcg_out_insn(s, 3207, BR, TCG_REG_TMP); + set_jmp_reset_offset(s, which); +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t d_addr = tb->jmp_target_addr[n]; + ptrdiff_t d_offset = d_addr - jmp_rx; + tcg_insn_unit insn; + + /* Either directly branch, or indirect branch load. */ + if (d_offset == sextract64(d_offset, 0, 28)) { + insn = deposit32(I3206_B, 0, 26, d_offset >> 2); + } else { + uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; + ptrdiff_t i_offset = i_addr - jmp_rx; + + /* Note that we asserted this in range in tcg_out_goto_tb. */ + insn = deposit32(I3305_LDR | TCG_REG_TMP, 0, 5, i_offset >> 2); + } + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1906,36 +1928,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - if (a0 == 0) { - tcg_out_goto_long(s, tcg_code_gen_epilogue); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); - tcg_out_goto_long(s, tb_ret_addr); - } - break; - - case INDEX_op_goto_tb: - tcg_debug_assert(s->tb_jmp_insn_offset != NULL); - /* - * Ensure that ADRP+ADD are 8-byte aligned so that an atomic - * write can be used to patch the target address. - */ - if ((uintptr_t)s->code_ptr & 7) { - tcg_out32(s, NOP); - } - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - /* - * actual branch destination will be patched by - * tb_target_set_jmp_target later - */ - tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); - tcg_out_insn(s, 3207, BR, TCG_REG_TMP); - set_jmp_reset_offset(s, a0); - break; - case INDEX_op_goto_ptr: tcg_out_insn(s, 3207, BR, a0); break; @@ -2305,6 +2297,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 413a541..8d24429 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -15,7 +15,7 @@ #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 -#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) typedef enum { TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, @@ -123,7 +123,6 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_v64 1 #define TCG_TARGET_HAS_v128 1 @@ -151,9 +150,6 @@ typedef enum { #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 0 - -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 9245ea8..6abe941 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -135,6 +135,8 @@ typedef enum { ARITH_BIC = 0xe << 21, ARITH_MVN = 0xf << 21, + INSN_B = 0x0a000000, + INSN_CLZ = 0x016f0f10, INSN_RBIT = 0x06ff0f30, @@ -546,7 +548,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) static void tcg_out_b_imm(TCGContext *s, ARMCond cond, int32_t offset) { - tcg_out32(s, (cond << 28) | 0x0a000000 | + tcg_out32(s, (cond << 28) | INSN_B | (((offset - 8) >> 2) & 0x00ffffff)); } @@ -1933,6 +1935,62 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) static void tcg_out_epilogue(TCGContext *s); +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, arg); + tcg_out_epilogue(s); +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + uintptr_t i_addr; + intptr_t i_disp; + + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, INSN_NOP); + + /* When branch is out of range, fall through to indirect. */ + i_addr = get_jmp_target_addr(s, which); + i_disp = tcg_pcrel_diff(s, (void *)i_addr) - 8; + tcg_debug_assert(i_disp < 0); + if (i_disp >= -0xfff) { + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, i_disp); + } else { + /* + * The TB is close, but outside the 12 bits addressable by + * the load. We can extend this to 20 bits with a sub of a + * shifted immediate from pc. + */ + int h = -i_disp; + int l = h & 0xfff; + + h = encode_imm_nofail(h - l); + tcg_out_dat_imm(s, COND_AL, ARITH_SUB, TCG_REG_R0, TCG_REG_PC, h); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, l); + } + set_jmp_reset_offset(s, which); +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t addr = tb->jmp_target_addr[n]; + ptrdiff_t offset = addr - (jmp_rx + 8); + tcg_insn_unit insn; + + /* Either directly branch, or fall through to indirect branch. */ + if (offset == sextract64(offset, 0, 26)) { + /* B <addr> */ + insn = deposit32((COND_AL << 28) | INSN_B, 0, 24, offset >> 2); + } else { + insn = INSN_NOP; + } + + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1941,33 +1999,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c; switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]); - tcg_out_epilogue(s); - break; - case INDEX_op_goto_tb: - { - /* Indirect jump method */ - intptr_t ptr, dif, dil; - TCGReg base = TCG_REG_PC; - - tcg_debug_assert(s->tb_jmp_insn_offset == 0); - ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]); - dif = tcg_pcrel_diff(s, (void *)ptr) - 8; - dil = sextract32(dif, 0, 12); - if (dif != dil) { - /* The TB is close, but outside the 12 bits addressable by - the load. We can extend this to 20 bits with a sub of a - shifted immediate from pc. In the vastly unlikely event - the code requires more than 1MB, we'll use 2 insns and - be no worse off. */ - base = TCG_REG_R0; - tcg_out_movi32(s, COND_AL, base, ptr - dil); - } - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil); - set_jmp_reset_offset(s, args[0]); - } - break; case INDEX_op_goto_ptr: tcg_out_b_reg(s, COND_AL, args[0]); break; @@ -2256,6 +2287,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index b7843d2..91b8954 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -121,7 +121,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 -#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_v64 use_neon_instructions @@ -150,10 +149,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 0 - -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 58bd587..c71c3e6 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2347,6 +2347,42 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #endif } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_jmp(s, tcg_code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); + tcg_out_jmp(s, tb_ret_addr); + } +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* + * Jump displacement must be aligned for atomic patching; + * see if we need to add extra nops before jump + */ + int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; + if (gap != 1) { + tcg_out_nopn(s, gap - 1); + } + tcg_out8(s, OPC_JMP_long); /* jmp im */ + set_jmp_insn_offset(s, which); + tcg_out32(s, 0); + set_jmp_reset_offset(s, which); +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* patch the branch destination */ + uintptr_t addr = tb->jmp_target_addr[n]; + qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4)); + /* no need to flush icache explicitly */ +} + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2371,36 +2407,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const_a2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - if (a0 == 0) { - tcg_out_jmp(s, tcg_code_gen_epilogue); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); - tcg_out_jmp(s, tb_ret_addr); - } - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_insn_offset) { - /* direct jump method */ - int gap; - /* jump displacement must be aligned for atomic patching; - * see if we need to add extra nops before jump - */ - gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; - if (gap != 1) { - tcg_out_nopn(s, gap - 1); - } - tcg_out8(s, OPC_JMP_long); /* jmp im */ - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - tcg_out32(s, 0); - } else { - /* indirect jump method */ - tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, - (intptr_t)(s->tb_jmp_target_addr + a0)); - } - set_jmp_reset_offset(s, a0); - break; case INDEX_op_goto_ptr: /* jmp to the given host address (could be epilogue) */ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); @@ -2794,6 +2800,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 7edb7f1..5797a55 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -141,7 +141,6 @@ extern bool have_movbe; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_direct_jump 1 #if TCG_TARGET_REG_BITS == 64 /* Keep target addresses zero-extended in a register. */ @@ -220,14 +219,6 @@ extern bool have_movbe; #define TCG_TARGET_extract_i64_valid(ofs, len) \ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) -static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) -{ - /* patch the branch destination */ - qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4)); - /* no need to flush icache explicitly */ -} - /* This defines the natural memory order supported by this * architecture before guarantees made by various barrier * instructions. diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index c9e99e8..3174557 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1039,11 +1039,12 @@ static void tcg_out_nop(TCGContext *s) tcg_out32(s, NOP); } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) { tcg_insn_unit i1, i2; ptrdiff_t upper, lower; + uintptr_t addr = tb->jmp_target_addr[n]; ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2; if (offset == sextreg(offset, 0, 26)) { @@ -1068,6 +1069,36 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, static const tcg_insn_unit *tb_ret_addr; +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_call_int(s, tcg_code_gen_epilogue, true); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); + tcg_out_call_int(s, tb_ret_addr, true); + } +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* + * Ensure that patch area is 8-byte aligned so that an + * atomic write can be used to patch the target address. + */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out_nop(s); + } + set_jmp_insn_offset(s, which); + /* + * actual branch destination will be patched by + * tb_target_set_jmp_target later + */ + tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0); + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); + set_jmp_reset_offset(s, which); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1078,35 +1109,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - if (a0 == 0) { - tcg_out_call_int(s, tcg_code_gen_epilogue, true); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); - tcg_out_call_int(s, tb_ret_addr, true); - } - break; - - case INDEX_op_goto_tb: - tcg_debug_assert(s->tb_jmp_insn_offset != NULL); - /* - * Ensure that patch area is 8-byte aligned so that an - * atomic write can be used to patch the target address. - */ - if ((uintptr_t)s->code_ptr & 7) { - tcg_out_nop(s); - } - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - /* - * actual branch destination will be patched by - * tb_target_set_jmp_target later - */ - tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0); - tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); - set_jmp_reset_offset(s, a0); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -1500,6 +1502,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index e5f7a1f..1c3e48d 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -128,7 +128,6 @@ typedef enum { #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_brcond2 0 #define TCG_TARGET_HAS_setcond2 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -171,8 +170,6 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); - #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 292e490..6e000d8 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1951,6 +1951,37 @@ static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6, } } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + TCGReg b0 = TCG_REG_ZERO; + + if (a0 & ~0xffff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); + b0 = TCG_REG_V0; + } + if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)tb_ret_addr); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + } + tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* indirect jump method */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, + get_jmp_target_addr(s, which)); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + tcg_out_nop(s); + set_jmp_reset_offset(s, which); +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1970,32 +2001,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, c2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - { - TCGReg b0 = TCG_REG_ZERO; - - a0 = (intptr_t)a0; - if (a0 & ~0xffff) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); - b0 = TCG_REG_V0; - } - if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, - (uintptr_t)tb_ret_addr); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); - } - tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); - } - break; - case INDEX_op_goto_tb: - /* indirect jump method */ - tcg_debug_assert(s->tb_jmp_insn_offset == 0); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_jmp_target_addr + a0)); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); - tcg_out_nop(s); - set_jmp_reset_offset(s, a0); - break; case INDEX_op_goto_ptr: /* jmp to the given host address (could be epilogue) */ tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); @@ -2403,6 +2408,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 15721c3..7bc8e15 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -134,7 +134,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_direct_jump 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 @@ -205,10 +204,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t) - QEMU_ERROR("code path is reachable"); - #define TCG_TARGET_NEED_LDST_LABELS #endif diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index e062146..8d6899c 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1854,103 +1854,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) tcg_out32(s, insn); } -static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2) -{ - if (HOST_BIG_ENDIAN) { - return (uint64_t)i1 << 32 | i2; - } - return (uint64_t)i2 << 32 | i1; -} - -static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw, - tcg_insn_unit i0, tcg_insn_unit i1) -{ -#if TCG_TARGET_REG_BITS == 64 - qatomic_set((uint64_t *)rw, make_pair(i0, i1)); - flush_idcache_range(rx, rw, 8); -#else - qemu_build_not_reached(); -#endif -} - -static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw, - tcg_insn_unit i0, tcg_insn_unit i1, - tcg_insn_unit i2, tcg_insn_unit i3) -{ - uint64_t p[2]; - - p[!HOST_BIG_ENDIAN] = make_pair(i0, i1); - p[HOST_BIG_ENDIAN] = make_pair(i2, i3); - - /* - * There's no convenient way to get the compiler to allocate a pair - * of registers at an even index, so copy into r6/r7 and clobber. - */ - asm("mr %%r6, %1\n\t" - "mr %%r7, %2\n\t" - "stq %%r6, %0" - : "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7"); - flush_idcache_range(rx, rw, 16); -} - -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) -{ - tcg_insn_unit i0, i1, i2, i3; - intptr_t tb_diff = addr - tc_ptr; - intptr_t br_diff = addr - (jmp_rx + 4); - intptr_t lo, hi; - - if (TCG_TARGET_REG_BITS == 32) { - intptr_t diff = addr - jmp_rx; - tcg_debug_assert(in_range_b(diff)); - qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc)); - flush_idcache_range(jmp_rx, jmp_rw, 4); - return; - } - - /* - * For 16-bit displacements, we can use a single add + branch. - * This happens quite often. - */ - if (tb_diff == (int16_t)tb_diff) { - i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff); - i1 = B | (br_diff & 0x3fffffc); - ppc64_replace2(jmp_rx, jmp_rw, i0, i1); - return; - } - - lo = (int16_t)tb_diff; - hi = (int32_t)(tb_diff - lo); - assert(tb_diff == hi + lo); - i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16); - i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo); - - /* - * Without stq from 2.07, we can only update two insns, - * and those must be the ones that load the target address. - */ - if (!have_isa_2_07) { - ppc64_replace2(jmp_rx, jmp_rw, i0, i1); - return; - } - - /* - * For 26-bit displacements, we can use a direct branch. - * Otherwise we still need the indirect branch, which we - * must restore after a potential direct branch write. - */ - br_diff -= 4; - if (in_range_b(br_diff)) { - i2 = B | (br_diff & 0x3fffffc); - i3 = NOP; - } else { - i2 = MTSPR | RS(TCG_REG_TB) | CTR; - i3 = BCCTR | BO_ALWAYS; - } - ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3); -} - static void tcg_out_call_int(TCGContext *s, int lk, const tcg_insn_unit *target) { @@ -2616,6 +2519,64 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, BCLR | BO_ALWAYS); } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); + tcg_out_b(s, 0, tcg_code_gen_epilogue); +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + uintptr_t ptr = get_jmp_target_addr(s, which); + + if (USE_REG_TB) { + ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); + tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); + + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); + + /* When branch is out of range, fall through to indirect. */ + tcg_out32(s, BCCTR | BO_ALWAYS); + + /* For the unlinked case, need to reset TCG_REG_TB. */ + set_jmp_reset_offset(s, which); + tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, + -tcg_current_code_size(s)); + } else { + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, NOP); + + /* When branch is out of range, fall through to indirect. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr); + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + set_jmp_reset_offset(s, which); + } +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t addr = tb->jmp_target_addr[n]; + intptr_t diff = addr - jmp_rx; + tcg_insn_unit insn; + + if (in_range_b(diff)) { + insn = B | (diff & 0x3fffffc); + } else if (USE_REG_TB) { + insn = MTSPR | RS(TCG_REG_TB) | CTR; + } else { + insn = NOP; + } + + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2623,42 +2584,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGArg a0, a1, a2; switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); - tcg_out_b(s, 0, tcg_code_gen_epilogue); - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_insn_offset) { - /* Direct jump. */ - if (TCG_TARGET_REG_BITS == 64) { - /* Ensure the next insns are 8 or 16-byte aligned. */ - while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { - tcg_out32(s, NOP); - } - s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); - tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); - tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); - } else { - s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); - tcg_out32(s, B); - s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); - break; - } - } else { - /* Indirect jump. */ - tcg_debug_assert(s->tb_jmp_insn_offset == NULL); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0, - (intptr_t)(s->tb_jmp_insn_offset + args[0])); - } - tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS); - set_jmp_reset_offset(s, args[0]); - if (USE_REG_TB) { - /* For the unlinked case, need to reset TCG_REG_TB. */ - tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, - -tcg_current_code_size(s)); - } - break; case INDEX_op_goto_ptr: tcg_out32(s, MTSPR | RS(args[0]) | CTR); if (USE_REG_TB) { @@ -3185,6 +3110,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index b5cd225..af81c5a 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -27,11 +27,10 @@ #ifdef _ARCH_PPC64 # define TCG_TARGET_REG_BITS 64 -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) #else # define TCG_TARGET_REG_BITS 32 -# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB) #endif +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) #define TCG_TARGET_NB_REGS 64 #define TCG_TARGET_INSN_UNIT_SIZE 4 @@ -108,7 +107,6 @@ extern bool have_vsx; #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -180,11 +178,8 @@ extern bool have_vsx; #define TCG_TARGET_HAS_bitsel_vec have_vsx #define TCG_TARGET_HAS_cmpsel_vec 0 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); - #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index f741e05..fc0edd8 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -267,6 +267,7 @@ typedef enum { #endif OPC_FENCE = 0x0000000f, + OPC_NOP = OPC_ADDI, /* nop = addi r0,r0,0 */ } RISCVInsn; /* @@ -403,7 +404,7 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; for (i = 0; i < count; ++i) { - p[i] = encode_i(OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); + p[i] = OPC_NOP; } } @@ -1289,6 +1290,47 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) static const tcg_insn_unit *tb_ret_addr; +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_call_int(s, tcg_code_gen_epilogue, true); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); + tcg_out_call_int(s, tb_ret_addr, true); + } +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, OPC_JAL); + + /* When branch is out of range, fall through to indirect. */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, + get_jmp_target_addr(s, which)); + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); + set_jmp_reset_offset(s, which); +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t addr = tb->jmp_target_addr[n]; + ptrdiff_t offset = addr - jmp_rx; + tcg_insn_unit insn; + + /* Either directly branch, or fall through to indirect branch. */ + if (offset == sextreg(offset, 0, 20)) { + insn = encode_uj(OPC_JAL, TCG_REG_ZERO, offset); + } else { + insn = OPC_NOP; + } + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1299,25 +1341,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - if (a0 == 0) { - tcg_out_call_int(s, tcg_code_gen_epilogue, true); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); - tcg_out_call_int(s, tb_ret_addr, true); - } - break; - - case INDEX_op_goto_tb: - assert(s->tb_jmp_insn_offset == 0); - /* indirect jump method */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_jmp_target_addr + a0)); - tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); - set_jmp_reset_offset(s, a0); - break; - case INDEX_op_goto_ptr: tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); break; @@ -1617,6 +1640,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 232537c..1337bc1 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -121,7 +121,6 @@ typedef enum { #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_brcond2 1 #define TCG_TARGET_HAS_setcond2 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -165,9 +164,6 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i64 1 #endif -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); - #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 2b38fd9..218318f 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1944,6 +1944,45 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, #endif } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0); + tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); + } +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* + * Branch displacement must be aligned for atomic patching; + * see if we need to add extra nop before branch + */ + if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) { + tcg_out16(s, NOP); + } + tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); + set_jmp_insn_offset(s, which); + s->code_ptr += 2; + set_jmp_reset_offset(s, which); +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + if (!HAVE_FACILITY(GEN_INST_EXT)) { + return; + } + /* patch the branch destination */ + uintptr_t addr = tb->jmp_target_addr[n]; + intptr_t disp = addr - (jmp_rx - 2); + qatomic_set((int32_t *)jmp_rw, disp / 2); + /* no need to flush icache explicitly */ +} + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ case glue(glue(INDEX_op_,x),_i64) @@ -1956,32 +1995,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGArg a0, a1, a2; switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - a0 = args[0]; - if (a0 == 0) { - tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0); - tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); - } - break; - - case INDEX_op_goto_tb: - a0 = args[0]; - /* - * branch displacement must be aligned for atomic patching; - * see if we need to add extra nop before branch - */ - if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) { - tcg_out16(s, NOP); - } - tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - s->code_ptr += 2; - set_jmp_reset_offset(s, a0); - break; - case INDEX_op_goto_ptr: a0 = args[0]; tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0); @@ -2619,6 +2632,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 68dcbc6..e597e47 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -105,7 +105,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_div2_i64 1 @@ -174,16 +173,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_MEMORY_BSWAP 1 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) - -static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) -{ - /* patch the branch destination */ - intptr_t disp = addr - (jmp_rx - 2); - qatomic_set((int32_t *)jmp_rw, disp / 2); - /* no need to flush icache explicitly */ -} - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index eb913f3..dd406bc 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -92,7 +92,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif #define TCG_REG_TB TCG_REG_I1 -#define USE_REG_TB (sizeof(void *) > 4) static const int tcg_target_reg_alloc_order[] = { TCG_REG_L0, @@ -439,7 +438,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } /* A 13-bit constant relative to the TB. */ - if (!in_prologue && USE_REG_TB) { + if (!in_prologue) { test = tcg_tbrel_diff(s, (void *)arg); if (check_fit_ptr(test, 13)) { tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD); @@ -468,7 +467,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } /* Use the constant pool, if possible. */ - if (!in_prologue && USE_REG_TB) { + if (!in_prologue) { new_pool_label(s, arg, R_SPARC_13, s->code_ptr, tcg_tbrel_diff(s, NULL)); tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB)); @@ -537,17 +536,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, const void *arg) -{ - intptr_t diff = tcg_tbrel_diff(s, arg); - if (USE_REG_TB && check_fit_ptr(diff, 13)) { - tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff); - return; - } - tcg_out_movi(s, TCG_TYPE_PTR, ret, (uintptr_t)arg & ~0x3ff); - tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, (uintptr_t)arg & 0x3ff); -} - static void tcg_out_sety(TCGContext *s, TCGReg rs) { tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); @@ -1026,10 +1014,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) #endif /* We choose TCG_REG_TB such that no move is required. */ - if (USE_REG_TB) { - QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); - } + QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL); /* delay slot */ @@ -1428,6 +1414,78 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, #endif /* CONFIG_SOFTMMU */ } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + if (check_fit_ptr(a0, 13)) { + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_movi_imm13(s, TCG_REG_O0, a0); + return; + } else { + intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0); + if (check_fit_ptr(tb_diff, 13)) { + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + /* Note that TCG_REG_TB has been unwound to O1. */ + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD); + return; + } + } + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + ptrdiff_t off = tcg_tbrel_diff(s, (void *)get_jmp_target_addr(s, which)); + + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, CALL); + /* delay slot */ + tcg_debug_assert(check_fit_ptr(off, 13)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, TCG_REG_TB, off); + set_jmp_reset_offset(s, which); + + /* + * For the unlinked path of goto_tb, we need to reset TCG_REG_TB + * to the beginning of this TB. + */ + off = -tcg_current_code_size(s); + if (check_fit_i32(off, 13)) { + tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, off, ARITH_ADD); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, off); + tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); + } +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t addr = tb->jmp_target_addr[n]; + intptr_t br_disp = (intptr_t)(addr - jmp_rx) >> 2; + tcg_insn_unit insn; + + br_disp >>= 2; + if (check_fit_ptr(br_disp, 19)) { + /* ba,pt %icc, addr */ + insn = deposit32(INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A) + | BPCC_ICC | BPCC_PT, 0, 19, br_disp); + } else if (check_fit_ptr(br_disp, 22)) { + /* ba addr */ + insn = deposit32(INSN_OP(0) | INSN_OP2(2) | INSN_COND(COND_A), + 0, 22, br_disp); + } else { + /* The code_gen_buffer can't be larger than 2GB. */ + tcg_debug_assert(check_fit_ptr(br_disp, 30)); + /* call addr */ + insn = deposit32(CALL, 0, 30, br_disp); + } + + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1442,70 +1500,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, c2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - if (check_fit_ptr(a0, 13)) { - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - tcg_out_movi_imm13(s, TCG_REG_O0, a0); - break; - } else if (USE_REG_TB) { - intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0); - if (check_fit_ptr(tb_diff, 13)) { - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - /* Note that TCG_REG_TB has been unwound to O1. */ - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD); - break; - } - } - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); - break; - case INDEX_op_goto_tb: - if (s->tb_jmp_insn_offset) { - /* direct jump method */ - if (USE_REG_TB) { - /* make sure the patch is 8-byte aligned. */ - if ((intptr_t)s->code_ptr & 4) { - tcg_out_nop(s); - } - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - tcg_out_sethi(s, TCG_REG_T1, 0); - tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); - tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); - tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); - } else { - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - tcg_out32(s, CALL); - tcg_out_nop(s); - } - } else { - /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_TB, s->tb_jmp_target_addr + a0); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL); - tcg_out_nop(s); - } - set_jmp_reset_offset(s, a0); - - /* For the unlinked path of goto_tb, we need to reset - TCG_REG_TB to the beginning of this TB. */ - if (USE_REG_TB) { - c = -tcg_current_code_size(s); - if (check_fit_i32(c, 13)) { - tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c); - tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, - TCG_REG_T1, ARITH_ADD); - } - } - break; case INDEX_op_goto_ptr: tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); - if (USE_REG_TB) { - tcg_out_mov_delay(s, TCG_REG_TB, a0); - } else { - tcg_out_nop(s); - } + tcg_out_mov_delay(s, TCG_REG_TB, a0); break; case INDEX_op_br: tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); @@ -1716,6 +1713,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } @@ -1895,45 +1894,3 @@ void tcg_register_jit(const void *buf, size_t buf_size) { tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } - -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) -{ - intptr_t tb_disp = addr - tc_ptr; - intptr_t br_disp = addr - jmp_rx; - tcg_insn_unit i1, i2; - - /* We can reach the entire address space for ILP32. - For LP64, the code_gen_buffer can't be larger than 2GB. */ - tcg_debug_assert(tb_disp == (int32_t)tb_disp); - tcg_debug_assert(br_disp == (int32_t)br_disp); - - if (!USE_REG_TB) { - qatomic_set((uint32_t *)jmp_rw, - deposit32(CALL, 0, 30, br_disp >> 2)); - flush_idcache_range(jmp_rx, jmp_rw, 4); - return; - } - - /* This does not exercise the range of the branch, but we do - still need to be able to load the new value of TCG_REG_TB. - But this does still happen quite often. */ - if (check_fit_ptr(tb_disp, 13)) { - /* ba,pt %icc, addr */ - i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A) - | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp)); - i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB) - | INSN_IMM13(tb_disp)); - } else if (tb_disp >= 0) { - i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfffffc00) >> 10); - i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1) - | INSN_IMM13(tb_disp & 0x3ff)); - } else { - i1 = SETHI | INSN_RD(TCG_REG_T1) | ((~tb_disp & 0xfffffc00) >> 10); - i2 = (ARITH_XOR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1) - | INSN_IMM13((tb_disp & 0x3ff) | -0x400)); - } - - qatomic_set((uint64_t *)jmp_rw, deposit64(i2, 32, 32, i1)); - flush_idcache_range(jmp_rx, jmp_rw, 8); -} diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index 0044ac8..1d6a5c8 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -111,7 +111,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 1 @@ -154,9 +153,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 - -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); - #define TCG_TARGET_NEED_POOL_LABELS #endif diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index cd1cd4e..9fa9f1b 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -86,7 +86,7 @@ void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, void tcg_gen_mb(TCGBar mb_type) { - if (tcg_ctx->tb_cflags & CF_PARALLEL) { + if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { tcg_gen_op1(INDEX_op_mb, mb_type); } } @@ -2782,7 +2782,7 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) void tcg_gen_goto_tb(unsigned idx) { /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */ - tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB)); + tcg_debug_assert(!(tcg_ctx->gen_tb->cflags & CF_NO_GOTO_TB)); /* We only support two chained exits. */ tcg_debug_assert(idx <= TB_EXIT_IDXMAX); #ifdef CONFIG_DEBUG_TCG @@ -2798,7 +2798,7 @@ void tcg_gen_lookup_and_goto_ptr(void) { TCGv_ptr ptr; - if (tcg_ctx->tb_cflags & CF_NO_GOTO_PTR) { + if (tcg_ctx->gen_tb->cflags & CF_NO_GOTO_PTR) { tcg_gen_exit_tb(NULL, 0); return; } @@ -3165,7 +3165,7 @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv, { memop = tcg_canonicalize_memop(memop, 0, 0); - if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) { + if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { TCGv_i32 t1 = tcg_temp_new_i32(); TCGv_i32 t2 = tcg_temp_new_i32(); @@ -3203,7 +3203,7 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv, { memop = tcg_canonicalize_memop(memop, 1, 0); - if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) { + if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); @@ -3364,7 +3364,7 @@ static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \ void tcg_gen_atomic_##NAME##_i32 \ (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \ { \ - if (tcg_ctx->tb_cflags & CF_PARALLEL) { \ + if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \ do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \ } else { \ do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \ @@ -3374,7 +3374,7 @@ void tcg_gen_atomic_##NAME##_i32 \ void tcg_gen_atomic_##NAME##_i64 \ (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \ { \ - if (tcg_ctx->tb_cflags & CF_PARALLEL) { \ + if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \ do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \ } else { \ do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \ @@ -104,6 +104,8 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); +static void tcg_out_goto_tb(TCGContext *s, int which); static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]); @@ -309,7 +311,25 @@ static void set_jmp_reset_offset(TCGContext *s, int which) * We will check for overflow at the end of the opcode loop in * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. */ - s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); + s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); +} + +static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) +{ + /* + * We will check for overflow at the end of the opcode loop in + * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. + */ + s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); +} + +static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) +{ + /* + * Return the read-execute version of the pointer, for the benefit + * of any pc-relative addressing mode. + */ + return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); } /* Signal overflow, starting over with fewer guest insns. */ @@ -4645,16 +4665,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) #endif /* Initialize goto_tb jump offsets. */ - tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; - tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID; - tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; - if (TCG_TARGET_HAS_direct_jump) { - tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; - tcg_ctx->tb_jmp_target_addr = NULL; - } else { - tcg_ctx->tb_jmp_insn_offset = NULL; - tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; - } + tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; + tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; + tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; + tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; tcg_reg_alloc_start(s); @@ -4718,6 +4732,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) case INDEX_op_call: tcg_reg_alloc_call(s, op); break; + case INDEX_op_exit_tb: + tcg_out_exit_tb(s, op->args[0]); + break; + case INDEX_op_goto_tb: + tcg_out_goto_tb(s, op->args[0]); + break; case INDEX_op_dup2_vec: if (tcg_reg_alloc_dup2(s, op)) { break; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d36a7eb..bc45200 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -590,6 +590,24 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, # define CASE_64(x) #endif +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + tcg_out_op_p(s, INDEX_op_exit_tb, (void *)arg); +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* indirect jump method. */ + tcg_out_op_p(s, INDEX_op_goto_tb, (void *)get_jmp_target_addr(s, which)); + set_jmp_reset_offset(s, which); +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -597,17 +615,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGOpcode exts; switch (opc) { - case INDEX_op_exit_tb: - tcg_out_op_p(s, opc, (void *)args[0]); - break; - - case INDEX_op_goto_tb: - tcg_debug_assert(s->tb_jmp_insn_offset == 0); - /* indirect jump method. */ - tcg_out_op_p(s, opc, s->tb_jmp_target_addr + args[0]); - set_jmp_reset_offset(s, args[0]); - break; - case INDEX_op_goto_ptr: tcg_out_op_r(s, opc, args[0]); break; @@ -779,6 +786,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 94ec541..1414ab4 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -82,7 +82,6 @@ #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 @@ -176,7 +175,4 @@ typedef enum { #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); - #endif /* TCG_TARGET_H */ |