diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-10-30 19:47:54 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-10-30 19:47:54 +0000 |
commit | c3dee4de9252ec6f675603e460f9a668f1b5ec1b (patch) | |
tree | ad971590dfbb61efbc69b9abf4e034a6773363c0 | |
parent | 9a2ea4f4a7230fe224dee91d9adf2ef872c3d226 (diff) | |
parent | 1d705e8a5bbfe36294081baa45ab68a9ad987f33 (diff) | |
download | qemu-c3dee4de9252ec6f675603e460f9a668f1b5ec1b.zip qemu-c3dee4de9252ec6f675603e460f9a668f1b5ec1b.tar.gz qemu-c3dee4de9252ec6f675603e460f9a668f1b5ec1b.tar.bz2 |
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20201027' into staging
Optimize across branches.
Add logging for cpu_io_recompile.
# gpg: Signature made Tue 27 Oct 2020 16:48:50 GMT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* remotes/rth-gitlab/tags/pull-tcg-20201027:
accel/tcg: Add CPU_LOG_EXEC tracing for cpu_io_recompile()
tcg/optimize: Flush data at labels not TCG_OPF_BB_END
tcg: Do not kill globals at conditional branches
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | accel/tcg/translate-all.c | 4 | ||||
-rw-r--r-- | include/tcg/tcg-opc.h | 7 | ||||
-rw-r--r-- | include/tcg/tcg.h | 4 | ||||
-rw-r--r-- | tcg/optimize.c | 35 | ||||
-rw-r--r-- | tcg/tcg.c | 55 |
5 files changed, 82 insertions, 23 deletions
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index d760972..4572b49 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2267,6 +2267,10 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) tb_destroy(tb); } + qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, + "cpu_io_recompile: rewound execution of TB to " + TARGET_FMT_lx "\n", tb->pc); + /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not * the first in the TB) then we end up generating a whole new TB and * repeating the fault, which is horribly inefficient. diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index e3929b8..67092e8 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -81,7 +81,7 @@ DEF(extract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_extract_i32)) DEF(sextract_i32, 1, 1, 2, IMPL(TCG_TARGET_HAS_sextract_i32)) DEF(extract2_i32, 1, 2, 1, IMPL(TCG_TARGET_HAS_extract2_i32)) -DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END) +DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) @@ -89,7 +89,8 @@ DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32)) DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32)) -DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) +DEF(brcond2_i32, 0, 4, 2, + TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL(TCG_TARGET_REG_BITS == 32)) DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) @@ -159,7 +160,7 @@ DEF(extrh_i64_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_extrh_i64_i32) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) -DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) +DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | IMPL64) DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64)) DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64)) DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64)) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 8804a8c..8ff9dad 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -990,7 +990,7 @@ typedef struct TCGArgConstraint { #define TCG_MAX_OP_ARGS 16 -/* Bits for TCGOpDef->flags, 8 bits available. */ +/* Bits for TCGOpDef->flags, 8 bits available, all used. */ enum { /* Instruction exits the translation block. */ TCG_OPF_BB_EXIT = 0x01, @@ -1008,6 +1008,8 @@ enum { TCG_OPF_NOT_PRESENT = 0x20, /* Instruction operands are vectors. */ TCG_OPF_VECTOR = 0x40, + /* Instruction is a conditional branch. */ + TCG_OPF_COND_BRANCH = 0x80 }; typedef struct TCGOpDef { diff --git a/tcg/optimize.c b/tcg/optimize.c index 220f460..9952c28 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1484,29 +1484,30 @@ void tcg_optimize(TCGContext *s) } } } - goto do_reset_output; + /* fall through */ default: do_default: - /* Default case: we know nothing about operation (or were unable - to compute the operation result) so no propagation is done. - We trash everything if the operation is the end of a basic - block, otherwise we only trash the output args. "mask" is - the non-zero bits mask for the first output arg. */ - if (def->flags & TCG_OPF_BB_END) { - bitmap_zero(temps_used.l, nb_temps); - } else { - do_reset_output: - for (i = 0; i < nb_oargs; i++) { - reset_temp(op->args[i]); - /* Save the corresponding known-zero bits mask for the - first output argument (only one supported so far). */ - if (i == 0) { - arg_info(op->args[i])->mask = mask; - } + /* + * Default case: we know nothing about operation (or were unable + * to compute the operation result) so no propagation is done. + */ + for (i = 0; i < nb_oargs; i++) { + reset_temp(op->args[i]); + /* + * Save the corresponding known-zero bits mask for the + * first output argument (only one supported so far). + */ + if (i == 0) { + arg_info(op->args[i])->mask = mask; } } break; + + case INDEX_op_set_label: + /* Trash everything at the start of a new extended bb. */ + bitmap_zero(temps_used.l, nb_temps); + break; } /* Eliminate duplicate and redundant fence instructions. */ @@ -2519,6 +2519,28 @@ static void la_global_sync(TCGContext *s, int ng) } } +/* + * liveness analysis: conditional branch: all temps are dead, + * globals and local temps should be synced. + */ +static void la_bb_sync(TCGContext *s, int ng, int nt) +{ + la_global_sync(s, ng); + + for (int i = ng; i < nt; ++i) { + if (s->temps[i].temp_local) { + int state = s->temps[i].state; + s->temps[i].state = state | TS_MEM; + if (state != TS_DEAD) { + continue; + } + } else { + s->temps[i].state = TS_DEAD; + } + la_reset_pref(&s->temps[i]); + } +} + /* liveness analysis: sync globals back to memory and kill. */ static void la_global_kill(TCGContext *s, int ng) { @@ -2795,6 +2817,8 @@ static void liveness_pass_1(TCGContext *s) /* If end of basic block, update. */ if (def->flags & TCG_OPF_BB_EXIT) { la_func_end(s, nb_globals, nb_temps); + } else if (def->flags & TCG_OPF_COND_BRANCH) { + la_bb_sync(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_BB_END) { la_bb_end(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { @@ -2907,7 +2931,10 @@ static bool liveness_pass_2(TCGContext *s) nb_oargs = def->nb_oargs; /* Set flags similar to how calls require. */ - if (def->flags & TCG_OPF_BB_END) { + if (def->flags & TCG_OPF_COND_BRANCH) { + /* Like reading globals: sync_globals */ + call_flags = TCG_CALL_NO_WRITE_GLOBALS; + } else if (def->flags & TCG_OPF_BB_END) { /* Like writing globals: save_globals */ call_flags = 0; } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { @@ -3380,6 +3407,28 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) } /* + * At a conditional branch, we assume all temporaries are dead and + * all globals and local temps are synced to their location. + */ +static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) +{ + sync_globals(s, allocated_regs); + + for (int i = s->nb_globals; i < s->nb_temps; i++) { + TCGTemp *ts = &s->temps[i]; + /* + * The liveness analysis already ensures that temps are dead. + * Keep tcg_debug_asserts for safety. + */ + if (ts->temp_local) { + tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent); + } else { + tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD); + } + } +} + +/* * Specialized code generation for INDEX_op_movi_*. */ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, @@ -3730,7 +3779,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } } - if (def->flags & TCG_OPF_BB_END) { + if (def->flags & TCG_OPF_COND_BRANCH) { + tcg_reg_alloc_cbranch(s, i_allocated_regs); + } else if (def->flags & TCG_OPF_BB_END) { tcg_reg_alloc_bb_end(s, i_allocated_regs); } else { if (def->flags & TCG_OPF_CALL_CLOBBER) { |