aboutsummaryrefslogtreecommitdiff
path: root/target/xtensa/cpu.h
diff options
context:
space:
mode:
authorMax Filippov <jcmvbkbc@gmail.com>2018-10-03 15:59:11 -0700
committerMax Filippov <jcmvbkbc@gmail.com>2019-01-11 20:52:24 -0800
commit5d630cef4f91fd842df3b0b98ae8d3ac6eb2ce4a (patch)
tree862bf51c3daaa92444b0939d89abda4791406de4 /target/xtensa/cpu.h
parent32a1a94dd324d33578dca1dc96d7896a0244d768 (diff)
downloadqemu-5d630cef4f91fd842df3b0b98ae8d3ac6eb2ce4a.zip
qemu-5d630cef4f91fd842df3b0b98ae8d3ac6eb2ce4a.tar.gz
qemu-5d630cef4f91fd842df3b0b98ae8d3ac6eb2ce4a.tar.bz2
target/xtensa: rework zero overhead loops implementation
Don't invalidate TB with the end of zero overhead loop when LBEG or LEND change. Instead encode the distance from the start of the page where the TB starts to the LEND in the TB cs_base and generate loopback code when the next PC matches encoded LEND. Distance to a destination within the same page and up to a maximum instruction length into the next page is encoded literally, otherwise it's zero. The distance from LEND to LBEG is also encoded in the cs_base: it's encoded literally when less than 256 or as 0 otherwise. This allows for TB chaining for the loopback branch at the end of a loop for the most common loop sizes. With this change the resulting emulation speed is about 10% higher in softmmu mode on uClibc-ng and LTP tests. Emulation speed in linux user mode is a few percent lower because there's no direct TB chaining between different memory pages. Testing with lower limit on direct TB chaining range shows gradual slowdown to ~15% for the block size of 64 bytes and ~50% for the block size of 32 bytes. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/xtensa/cpu.h')
-rw-r--r--target/xtensa/cpu.h32
1 files changed, 32 insertions, 0 deletions
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 34e5ccd..bf6f9a0 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -400,6 +400,7 @@ struct XtensaConfig {
int excm_level;
int ndepc;
unsigned inst_fetch_width;
+ unsigned max_insn_size;
uint32_t vecbase;
uint32_t exception_vector[EXC_MAX];
unsigned ninterrupt;
@@ -695,6 +696,11 @@ static inline int cpu_mmu_index(CPUXtensaState *env, bool ifetch)
#define XTENSA_TBFLAG_CALLINC_MASK 0x180000
#define XTENSA_TBFLAG_CALLINC_SHIFT 19
+#define XTENSA_CSBASE_LEND_MASK 0x0000ffff
+#define XTENSA_CSBASE_LEND_SHIFT 0
+#define XTENSA_CSBASE_LBEG_OFF_MASK 0x00ff0000
+#define XTENSA_CSBASE_LBEG_OFF_SHIFT 16
+
static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
target_ulong *cs_base, uint32_t *flags)
{
@@ -706,6 +712,32 @@ static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
*flags |= xtensa_get_ring(env);
if (env->sregs[PS] & PS_EXCM) {
*flags |= XTENSA_TBFLAG_EXCM;
+ } else if (xtensa_option_enabled(env->config, XTENSA_OPTION_LOOP)) {
+ target_ulong lend_dist =
+ env->sregs[LEND] - (env->pc & -(1u << TARGET_PAGE_BITS));
+
+ /*
+ * 0 in the csbase_lend field means that there may not be a loopback
+ * for any instruction that starts inside this page. Any other value
+ * means that an instruction that ends at this offset from the page
+ * start may loop back and will need loopback code to be generated.
+ *
+ * lend_dist is 0 when LEND points to the start of the page, but
+ * no instruction that starts inside this page may end at offset 0,
+ * so it's still correct.
+ *
+ * When an instruction ends at a page boundary it may only start in
+ * the previous page. lend_dist will be encoded as TARGET_PAGE_SIZE
+ * for the TB that contains this instruction.
+ */
+ if (lend_dist < (1u << TARGET_PAGE_BITS) + env->config->max_insn_size) {
+ target_ulong lbeg_off = env->sregs[LEND] - env->sregs[LBEG];
+
+ *cs_base = lend_dist;
+ if (lbeg_off < 256) {
+ *cs_base |= lbeg_off << XTENSA_CSBASE_LBEG_OFF_SHIFT;
+ }
+ }
}
if (xtensa_option_enabled(env->config, XTENSA_OPTION_EXTENDED_L32R) &&
(env->sregs[LITBASE] & 1)) {