diff options
39 files changed, 1444 insertions, 1193 deletions
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index 0ff7f91..afa8a9d 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -74,7 +74,7 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS) { ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; DATA_TYPE ret; uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, ATOMIC_MMU_IDX); @@ -95,7 +95,7 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; - DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP_R; uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, ATOMIC_MMU_IDX); @@ -110,7 +110,7 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) { ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_W; uint16_t info = trace_mem_build_info(SHIFT, false, 0, true, ATOMIC_MMU_IDX); @@ -125,7 +125,7 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) { ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; DATA_TYPE ret; uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, ATOMIC_MMU_IDX); @@ -142,7 +142,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ABI_TYPE val EXTRA_ARGS) \ { \ ATOMIC_MMU_DECLS; \ - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \ DATA_TYPE ret; \ uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, \ ATOMIC_MMU_IDX); \ @@ -176,7 +176,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ABI_TYPE xval EXTRA_ARGS) \ { \ ATOMIC_MMU_DECLS; \ - XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \ XDATA_TYPE cmp, old, new, val = xval; \ uint16_t info = trace_mem_build_info(SHIFT, false, 0, false, \ ATOMIC_MMU_IDX); \ @@ -221,7 +221,7 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS) { ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; DATA_TYPE ret; uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false, ATOMIC_MMU_IDX); @@ -242,7 +242,7 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) { ATOMIC_MMU_DECLS; - DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP_R; uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false, ATOMIC_MMU_IDX); @@ -257,7 +257,7 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) { ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_W; uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, true, ATOMIC_MMU_IDX); @@ -274,7 +274,7 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) { ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; ABI_TYPE ret; uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false, ATOMIC_MMU_IDX); @@ -291,7 +291,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ABI_TYPE val EXTRA_ARGS) \ { \ ATOMIC_MMU_DECLS; \ - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \ DATA_TYPE ret; \ uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, \ false, ATOMIC_MMU_IDX); \ @@ -323,7 +323,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ ABI_TYPE xval EXTRA_ARGS) \ { \ ATOMIC_MMU_DECLS; \ - XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP_RW; \ XDATA_TYPE ldo, ldn, old, new, val = xval; \ uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, \ false, ATOMIC_MMU_IDX); \ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index f24348e..b6d5fc6 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1742,18 +1742,22 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx, #endif -/* Probe for a read-modify-write atomic operation. Do not allow unaligned - * operations, or io operations to proceed. Return the host address. */ +/* + * Probe for an atomic operation. Do not allow unaligned operations, + * or io operations to proceed. Return the host address. + * + * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE. + */ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, - TCGMemOpIdx oi, uintptr_t retaddr) + TCGMemOpIdx oi, int size, int prot, + uintptr_t retaddr) { size_t mmu_idx = get_mmuidx(oi); - uintptr_t index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr); - target_ulong tlb_addr = tlb_addr_write(tlbe); MemOp mop = get_memop(oi); int a_bits = get_alignment_bits(mop); - int s_bits = mop & MO_SIZE; + uintptr_t index; + CPUTLBEntry *tlbe; + target_ulong tlb_addr; void *hostaddr; /* Adjust the given return address. */ @@ -1767,7 +1771,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, } /* Enforce qemu required alignment. */ - if (unlikely(addr & ((1 << s_bits) - 1))) { + if (unlikely(addr & (size - 1))) { /* We get here if guest alignment was not requested, or was not enforced by cpu_unaligned_access above. We might widen the access and emulate, but for now @@ -1775,15 +1779,45 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, goto stop_the_world; } + index = tlb_index(env, mmu_idx, addr); + tlbe = tlb_entry(env, mmu_idx, addr); + /* Check TLB entry and enforce page permissions. */ - if (!tlb_hit(tlb_addr, addr)) { - if (!VICTIM_TLB_HIT(addr_write, addr)) { - tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE, - mmu_idx, retaddr); - index = tlb_index(env, mmu_idx, addr); - tlbe = tlb_entry(env, mmu_idx, addr); + if (prot & PAGE_WRITE) { + tlb_addr = tlb_addr_write(tlbe); + if (!tlb_hit(tlb_addr, addr)) { + if (!VICTIM_TLB_HIT(addr_write, addr)) { + tlb_fill(env_cpu(env), addr, size, + MMU_DATA_STORE, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + tlbe = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; + } + + /* Let the guest notice RMW on a write-only page. */ + if ((prot & PAGE_READ) && + unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { + tlb_fill(env_cpu(env), addr, size, + MMU_DATA_LOAD, mmu_idx, retaddr); + /* + * Since we don't support reads and writes to different addresses, + * and we do have the proper page loaded for write, this shouldn't + * ever return. But just in case, handle via stop-the-world. + */ + goto stop_the_world; + } + } else /* if (prot & PAGE_READ) */ { + tlb_addr = tlbe->addr_read; + if (!tlb_hit(tlb_addr, addr)) { + if (!VICTIM_TLB_HIT(addr_write, addr)) { + tlb_fill(env_cpu(env), addr, size, + MMU_DATA_LOAD, mmu_idx, retaddr); + index = tlb_index(env, mmu_idx, addr); + tlbe = tlb_entry(env, mmu_idx, addr); + } + tlb_addr = tlbe->addr_read & ~TLB_INVALID_MASK; } - tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK; } /* Notice an IO access or a needs-MMU-lookup access */ @@ -1793,20 +1827,10 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, goto stop_the_world; } - /* Let the guest notice RMW on a write-only page. */ - if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) { - tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD, - mmu_idx, retaddr); - /* Since we don't support reads and writes to different addresses, - and we do have the proper page loaded for write, this shouldn't - ever return. But just in case, handle via stop-the-world. */ - goto stop_the_world; - } - hostaddr = (void *)((uintptr_t)addr + tlbe->addend); if (unlikely(tlb_addr & TLB_NOTDIRTY)) { - notdirty_write(env_cpu(env), addr, 1 << s_bits, + notdirty_write(env_cpu(env), addr, size, &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr); } @@ -2669,7 +2693,12 @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) #define ATOMIC_NAME(X) \ HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) #define ATOMIC_MMU_DECLS -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr) +#define ATOMIC_MMU_LOOKUP_RW \ + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ | PAGE_WRITE, retaddr) +#define ATOMIC_MMU_LOOKUP_R \ + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ, retaddr) +#define ATOMIC_MMU_LOOKUP_W \ + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_WRITE, retaddr) #define ATOMIC_MMU_CLEANUP #define ATOMIC_MMU_IDX get_mmuidx(oi) @@ -2698,10 +2727,18 @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val) #undef EXTRA_ARGS #undef ATOMIC_NAME -#undef ATOMIC_MMU_LOOKUP +#undef ATOMIC_MMU_LOOKUP_RW +#undef ATOMIC_MMU_LOOKUP_R +#undef ATOMIC_MMU_LOOKUP_W + #define EXTRA_ARGS , TCGMemOpIdx oi #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END)) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC()) +#define ATOMIC_MMU_LOOKUP_RW \ + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ | PAGE_WRITE, GETPC()) +#define ATOMIC_MMU_LOOKUP_R \ + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_READ, GETPC()) +#define ATOMIC_MMU_LOOKUP_W \ + atomic_mmu_lookup(env, addr, oi, DATA_SIZE, PAGE_WRITE, GETPC()) #define DATA_SIZE 1 #include "atomic_template.h" diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index 7627225..88e25c6 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -160,9 +160,8 @@ static void gen_empty_mem_helper(void) tcg_temp_free_ptr(ptr); } -static inline -void gen_plugin_cb_start(enum plugin_gen_from from, - enum plugin_gen_cb type, unsigned wr) +static void gen_plugin_cb_start(enum plugin_gen_from from, + enum plugin_gen_cb type, unsigned wr) { TCGOp *op; @@ -179,7 +178,7 @@ static void gen_wrapped(enum plugin_gen_from from, tcg_gen_plugin_cb_end(); } -static inline void plugin_gen_empty_callback(enum plugin_gen_from from) +static void plugin_gen_empty_callback(enum plugin_gen_from from) { switch (from) { case PLUGIN_GEN_AFTER_INSN: @@ -385,7 +384,7 @@ static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op) } static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, - void *func, unsigned tcg_flags, int *cb_idx) + void *func, int *cb_idx) { /* copy all ops until the call */ do { @@ -412,7 +411,7 @@ static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, tcg_debug_assert(i < MAX_OPC_PARAM_ARGS); } op->args[*cb_idx] = (uintptr_t)func; - op->args[*cb_idx + 1] = tcg_flags; + op->args[*cb_idx + 1] = (*begin_op)->args[*cb_idx + 1]; return op; } @@ -439,7 +438,7 @@ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb, /* call */ op = copy_call(&begin_op, op, HELPER(plugin_vcpu_udata_cb), - cb->f.vcpu_udata, cb->tcg_flags, cb_idx); + cb->f.vcpu_udata, cb_idx); return op; } @@ -490,7 +489,7 @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb, if (type == PLUGIN_GEN_CB_MEM) { /* call */ op = copy_call(&begin_op, op, HELPER(plugin_vcpu_mem_cb), - cb->f.vcpu_udata, cb->tcg_flags, cb_idx); + cb->f.vcpu_udata, cb_idx); } return op; @@ -513,9 +512,8 @@ static bool op_rw(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb) return !!(cb->rw & (w + 1)); } -static inline -void inject_cb_type(const GArray *cbs, TCGOp *begin_op, inject_fn inject, - op_ok_fn ok) +static void inject_cb_type(const GArray *cbs, TCGOp *begin_op, + inject_fn inject, op_ok_fn ok) { TCGOp *end_op; TCGOp *op; diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h index 1916ee7..9829abe 100644 --- a/accel/tcg/plugin-helpers.h +++ b/accel/tcg/plugin-helpers.h @@ -1,5 +1,4 @@ #ifdef CONFIG_PLUGIN -/* Note: no TCG flags because those are overwritten later */ -DEF_HELPER_2(plugin_vcpu_udata_cb, void, i32, ptr) -DEF_HELPER_4(plugin_vcpu_mem_cb, void, i32, i32, i64, ptr) +DEF_HELPER_FLAGS_2(plugin_vcpu_udata_cb, TCG_CALL_NO_RWG, void, i32, ptr) +DEF_HELPER_FLAGS_4(plugin_vcpu_mem_cb, TCG_CALL_NO_RWG, void, i32, i32, i64, ptr) #endif diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index fb2d43e..e67b161 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -1220,7 +1220,9 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, /* Macro to call the above, with local variables from the use context. */ #define ATOMIC_MMU_DECLS do {} while (0) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC()) +#define ATOMIC_MMU_LOOKUP_RW atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC()) +#define ATOMIC_MMU_LOOKUP_R ATOMIC_MMU_LOOKUP_RW +#define ATOMIC_MMU_LOOKUP_W ATOMIC_MMU_LOOKUP_RW #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0) #define ATOMIC_MMU_IDX MMU_USER_IDX @@ -1250,12 +1252,12 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #undef EXTRA_ARGS #undef ATOMIC_NAME -#undef ATOMIC_MMU_LOOKUP +#undef ATOMIC_MMU_LOOKUP_RW #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr #define ATOMIC_NAME(X) \ HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) -#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr) +#define ATOMIC_MMU_LOOKUP_RW atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr) #define DATA_SIZE 16 #include "atomic_template.h" @@ -5803,6 +5803,9 @@ fi if test "$optreset" = "yes" ; then echo "HAVE_OPTRESET=y" >> $config_host_mak fi +if test "$tcg" = "enabled" -a "$tcg_interpreter" = "true" ; then + echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak +fi if test "$fdatasync" = "yes" ; then echo "CONFIG_FDATASYNC=y" >> $config_host_mak fi diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h index 3094c79..b974eb3 100644 --- a/include/exec/helper-head.h +++ b/include/exec/helper-head.h @@ -85,32 +85,14 @@ #define dh_retvar_ptr tcgv_ptr_temp(retval) #define dh_retvar(t) glue(dh_retvar_, dh_alias(t)) -#define dh_is_64bit_void 0 -#define dh_is_64bit_noreturn 0 -#define dh_is_64bit_i32 0 -#define dh_is_64bit_i64 1 -#define dh_is_64bit_ptr (sizeof(void *) == 8) -#define dh_is_64bit_cptr dh_is_64bit_ptr -#define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t)) - -#define dh_is_signed_void 0 -#define dh_is_signed_noreturn 0 -#define dh_is_signed_i32 0 -#define dh_is_signed_s32 1 -#define dh_is_signed_i64 0 -#define dh_is_signed_s64 1 -#define dh_is_signed_f16 0 -#define dh_is_signed_f32 0 -#define dh_is_signed_f64 0 -#define dh_is_signed_tl 0 -#define dh_is_signed_int 1 -/* ??? This is highly specific to the host cpu. There are even special - extension instructions that may be required, e.g. ia64's addp4. But - for now we don't support any 64-bit targets with 32-bit pointers. */ -#define dh_is_signed_ptr 0 -#define dh_is_signed_cptr dh_is_signed_ptr -#define dh_is_signed_env dh_is_signed_ptr -#define dh_is_signed(t) dh_is_signed_##t +#define dh_typecode_void 0 +#define dh_typecode_noreturn 0 +#define dh_typecode_i32 2 +#define dh_typecode_s32 3 +#define dh_typecode_i64 4 +#define dh_typecode_s64 5 +#define dh_typecode_ptr 6 +#define dh_typecode(t) glue(dh_typecode_, dh_alias(t)) #define dh_callflag_i32 0 #define dh_callflag_s32 0 @@ -126,8 +108,7 @@ #define dh_callflag_noreturn TCG_CALL_NO_RETURN #define dh_callflag(t) glue(dh_callflag_, dh_alias(t)) -#define dh_sizemask(t, n) \ - ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1))) +#define dh_typemask(t, n) (dh_typecode(t) << (n * 3)) #define dh_arg(t, n) \ glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n)) diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h index 6888514..16cd318 100644 --- a/include/exec/helper-tcg.h +++ b/include/exec/helper-tcg.h @@ -13,50 +13,50 @@ #define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) }, + .typemask = dh_typemask(ret, 0) }, #define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) }, #define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) }, #define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) }, #define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) }, #define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ - | dh_sizemask(t5, 5) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \ + | dh_typemask(t5, 5) }, #define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \ { .func = HELPER(NAME), .name = str(NAME), \ .flags = FLAGS | dh_callflag(ret), \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ - | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \ + | dh_typemask(t5, 5) | dh_typemask(t6, 6) }, #define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \ { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ - .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ - | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ - | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) }, + .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \ + | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \ + | dh_typemask(t5, 5) | dh_typemask(t6, 6) | dh_typemask(t7, 7) }, #include "helper.h" #include "trace/generated-helpers.h" diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h index c5a79a8..0fefbc6 100644 --- a/include/qemu/plugin.h +++ b/include/qemu/plugin.h @@ -79,7 +79,6 @@ enum plugin_dyn_cb_subtype { struct qemu_plugin_dyn_cb { union qemu_plugin_cb_sig f; void *userp; - unsigned tcg_flags; enum plugin_dyn_cb_subtype type; /* @rw applies to mem callbacks only (both regular and inline) */ enum qemu_plugin_mem_rw rw; diff --git a/include/tcg/tcg-cond.h b/include/tcg/tcg-cond.h new file mode 100644 index 0000000..2a38a38 --- /dev/null +++ b/include/tcg/tcg-cond.h @@ -0,0 +1,101 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TCG_COND_H +#define TCG_COND_H + +/* + * Conditions. Note that these are laid out for easy manipulation by + * the functions below: + * bit 0 is used for inverting; + * bit 1 is signed, + * bit 2 is unsigned, + * bit 3 is used with bit 0 for swapping signed/unsigned. + */ +typedef enum { + /* non-signed */ + TCG_COND_NEVER = 0 | 0 | 0 | 0, + TCG_COND_ALWAYS = 0 | 0 | 0 | 1, + TCG_COND_EQ = 8 | 0 | 0 | 0, + TCG_COND_NE = 8 | 0 | 0 | 1, + /* signed */ + TCG_COND_LT = 0 | 0 | 2 | 0, + TCG_COND_GE = 0 | 0 | 2 | 1, + TCG_COND_LE = 8 | 0 | 2 | 0, + TCG_COND_GT = 8 | 0 | 2 | 1, + /* unsigned */ + TCG_COND_LTU = 0 | 4 | 0 | 0, + TCG_COND_GEU = 0 | 4 | 0 | 1, + TCG_COND_LEU = 8 | 4 | 0 | 0, + TCG_COND_GTU = 8 | 4 | 0 | 1, +} TCGCond; + +/* Invert the sense of the comparison. */ +static inline TCGCond tcg_invert_cond(TCGCond c) +{ + return (TCGCond)(c ^ 1); +} + +/* Swap the operands in a comparison. */ +static inline TCGCond tcg_swap_cond(TCGCond c) +{ + return c & 6 ? (TCGCond)(c ^ 9) : c; +} + +/* Create an "unsigned" version of a "signed" comparison. */ +static inline TCGCond tcg_unsigned_cond(TCGCond c) +{ + return c & 2 ? (TCGCond)(c ^ 6) : c; +} + +/* Create a "signed" version of an "unsigned" comparison. */ +static inline TCGCond tcg_signed_cond(TCGCond c) +{ + return c & 4 ? (TCGCond)(c ^ 6) : c; +} + +/* Must a comparison be considered unsigned? */ +static inline bool is_unsigned_cond(TCGCond c) +{ + return (c & 4) != 0; +} + +/* + * Create a "high" version of a double-word comparison. + * This removes equality from a LTE or GTE comparison. + */ +static inline TCGCond tcg_high_cond(TCGCond c) +{ + switch (c) { + case TCG_COND_GE: + case TCG_COND_LE: + case TCG_COND_GEU: + case TCG_COND_LEU: + return (TCGCond)(c ^ 8); + default: + return c; + } +} + +#endif /* TCG_COND_H */ diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index bbb0884..5bbec85 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -277,8 +277,8 @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT) #ifdef TCG_TARGET_INTERPRETER /* These opcodes are only for use between the tci generator and interpreter. */ -DEF(tci_movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT) -DEF(tci_movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT) +DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) #endif #undef TLADDR_ARGS diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 064dab3..41a6c4b 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -33,6 +33,7 @@ #include "tcg/tcg-mo.h" #include "tcg-target.h" #include "qemu/int128.h" +#include "tcg/tcg-cond.h" /* XXX: make safe guess about sizes */ #define MAX_OP_PER_INSTR 266 @@ -52,6 +53,7 @@ #define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) #define CPU_TEMP_BUF_NLONGS 128 +#define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) /* Default target word size to pointer size. */ #ifndef TCG_TARGET_REG_BITS @@ -406,75 +408,6 @@ typedef TCGv_ptr TCGv_env; /* Used to align parameters. See the comment before tcgv_i32_temp. */ #define TCG_CALL_DUMMY_ARG ((TCGArg)0) -/* Conditions. Note that these are laid out for easy manipulation by - the functions below: - bit 0 is used for inverting; - bit 1 is signed, - bit 2 is unsigned, - bit 3 is used with bit 0 for swapping signed/unsigned. */ -typedef enum { - /* non-signed */ - TCG_COND_NEVER = 0 | 0 | 0 | 0, - TCG_COND_ALWAYS = 0 | 0 | 0 | 1, - TCG_COND_EQ = 8 | 0 | 0 | 0, - TCG_COND_NE = 8 | 0 | 0 | 1, - /* signed */ - TCG_COND_LT = 0 | 0 | 2 | 0, - TCG_COND_GE = 0 | 0 | 2 | 1, - TCG_COND_LE = 8 | 0 | 2 | 0, - TCG_COND_GT = 8 | 0 | 2 | 1, - /* unsigned */ - TCG_COND_LTU = 0 | 4 | 0 | 0, - TCG_COND_GEU = 0 | 4 | 0 | 1, - TCG_COND_LEU = 8 | 4 | 0 | 0, - TCG_COND_GTU = 8 | 4 | 0 | 1, -} TCGCond; - -/* Invert the sense of the comparison. */ -static inline TCGCond tcg_invert_cond(TCGCond c) -{ - return (TCGCond)(c ^ 1); -} - -/* Swap the operands in a comparison. */ -static inline TCGCond tcg_swap_cond(TCGCond c) -{ - return c & 6 ? (TCGCond)(c ^ 9) : c; -} - -/* Create an "unsigned" version of a "signed" comparison. */ -static inline TCGCond tcg_unsigned_cond(TCGCond c) -{ - return c & 2 ? (TCGCond)(c ^ 6) : c; -} - -/* Create a "signed" version of an "unsigned" comparison. */ -static inline TCGCond tcg_signed_cond(TCGCond c) -{ - return c & 4 ? (TCGCond)(c ^ 6) : c; -} - -/* Must a comparison be considered unsigned? */ -static inline bool is_unsigned_cond(TCGCond c) -{ - return (c & 4) != 0; -} - -/* Create a "high" version of a double-word comparison. - This removes equality from a LTE or GTE comparison. */ -static inline TCGCond tcg_high_cond(TCGCond c) -{ - switch (c) { - case TCG_COND_GE: - case TCG_COND_LE: - case TCG_COND_GEU: - case TCG_COND_LEU: - return (TCGCond)(c ^ 8); - default: - return c; - } -} - typedef enum TCGTempVal { TEMP_VAL_DEAD, TEMP_VAL_REG, diff --git a/plugins/core.c b/plugins/core.c index 55d188a..e1bcdb5 100644 --- a/plugins/core.c +++ b/plugins/core.c @@ -295,33 +295,15 @@ void plugin_register_inline_op(GArray **arr, dyn_cb->inline_insn.imm = imm; } -static inline uint32_t cb_to_tcg_flags(enum qemu_plugin_cb_flags flags) -{ - uint32_t ret; - - switch (flags) { - case QEMU_PLUGIN_CB_RW_REGS: - ret = 0; - break; - case QEMU_PLUGIN_CB_R_REGS: - ret = TCG_CALL_NO_WG; - break; - case QEMU_PLUGIN_CB_NO_REGS: - default: - ret = TCG_CALL_NO_RWG; - } - return ret; -} - -inline void -plugin_register_dyn_cb__udata(GArray **arr, - qemu_plugin_vcpu_udata_cb_t cb, - enum qemu_plugin_cb_flags flags, void *udata) +void plugin_register_dyn_cb__udata(GArray **arr, + qemu_plugin_vcpu_udata_cb_t cb, + enum qemu_plugin_cb_flags flags, + void *udata) { struct qemu_plugin_dyn_cb *dyn_cb = plugin_get_dyn_cb(arr); dyn_cb->userp = udata; - dyn_cb->tcg_flags = cb_to_tcg_flags(flags); + /* Note flags are discarded as unused. */ dyn_cb->f.vcpu_udata = cb; dyn_cb->type = PLUGIN_CB_REGULAR; } @@ -336,7 +318,7 @@ void plugin_register_vcpu_mem_cb(GArray **arr, dyn_cb = plugin_get_dyn_cb(arr); dyn_cb->userp = udata; - dyn_cb->tcg_flags = cb_to_tcg_flags(flags); + /* Note flags are discarded as unused. */ dyn_cb->type = PLUGIN_CB_REGULAR; dyn_cb->rw = rw; dyn_cb->f.generic = cb; diff --git a/target/hppa/helper.h b/target/hppa/helper.h index 2d483aa..0a629ff 100644 --- a/target/hppa/helper.h +++ b/target/hppa/helper.h @@ -1,12 +1,9 @@ #if TARGET_REGISTER_BITS == 64 # define dh_alias_tr i64 -# define dh_is_64bit_tr 1 #else # define dh_alias_tr i32 -# define dh_is_64bit_tr 0 #endif #define dh_ctype_tr target_ureg -#define dh_is_signed_tr 0 DEF_HELPER_2(excp, noreturn, env, int) DEF_HELPER_FLAGS_2(tsv, TCG_CALL_NO_WG, void, env, tr) diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index 6c0c849..e68af5c 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -30,9 +30,6 @@ #define dh_ctype_Reg Reg * #define dh_ctype_ZMMReg ZMMReg * #define dh_ctype_MMXReg MMXReg * -#define dh_is_signed_Reg dh_is_signed_ptr -#define dh_is_signed_ZMMReg dh_is_signed_ptr -#define dh_is_signed_MMXReg dh_is_signed_ptr DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg) DEF_HELPER_3(glue(psraw, SUFFIX), void, env, Reg, Reg) diff --git a/target/m68k/helper.h b/target/m68k/helper.h index 7780849..9842eea 100644 --- a/target/m68k/helper.h +++ b/target/m68k/helper.h @@ -17,7 +17,6 @@ DEF_HELPER_4(cas2l_parallel, void, env, i32, i32, i32) #define dh_alias_fp ptr #define dh_ctype_fp FPReg * -#define dh_is_signed_fp dh_is_signed_ptr DEF_HELPER_3(exts32, void, env, fp, s32) DEF_HELPER_3(extf32, void, env, fp, f32) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index c517b9f..4076aa2 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -109,11 +109,9 @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64) #define dh_alias_avr ptr #define dh_ctype_avr ppc_avr_t * -#define dh_is_signed_avr dh_is_signed_ptr #define dh_alias_vsr ptr #define dh_ctype_vsr ppc_vsr_t * -#define dh_is_signed_vsr dh_is_signed_ptr DEF_HELPER_3(vavgub, void, avr, avr, avr) DEF_HELPER_3(vavguh, void, avr, avr, avr) @@ -697,7 +695,6 @@ DEF_HELPER_3(store_601_batu, void, env, i32, tl) #define dh_alias_fprp ptr #define dh_ctype_fprp ppc_fprp_t * -#define dh_is_signed_fprp dh_is_signed_ptr DEF_HELPER_4(dadd, void, env, fprp, fprp, fprp) DEF_HELPER_4(daddq, void, env, fprp, fprp, fprp) diff --git a/tcg/meson.build b/tcg/meson.build index 5be3915..c4c63b1 100644 --- a/tcg/meson.build +++ b/tcg/meson.build @@ -9,6 +9,12 @@ tcg_ss.add(files( 'tcg-op-gvec.c', 'tcg-op-vec.c', )) -tcg_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tci.c')) + +if get_option('tcg_interpreter') + libffi = dependency('libffi', version: '>=3.0', required: true, + method: 'pkg-config', kwargs: static_kwargs) + specific_ss.add(libffi) + specific_ss.add(files('tci.c')) +endif specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss) diff --git a/tcg/optimize.c b/tcg/optimize.c index 37c9022..211a420 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "tcg/tcg-op.h" +#include "tcg-internal.h" #define CASE_OP_32_64(x) \ glue(glue(case INDEX_op_, x), _i32): \ @@ -1481,7 +1482,7 @@ void tcg_optimize(TCGContext *s) break; case INDEX_op_call: - if (!(op->args[nb_oargs + nb_iargs + 1] + if (!(tcg_call_flags(op) & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { for (i = 0; i < nb_globals; i++) { if (test_bit(i, temps_used.l)) { diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index ce39ac2..a6ec94a 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -984,14 +984,18 @@ static void tcg_target_qemu_prologue(TCGContext *s) { int tmp_buf_size, frame_size; - /* The TCG temp buffer is at the top of the frame, immediately - below the frame pointer. */ + /* + * The TCG temp buffer is at the top of the frame, immediately + * below the frame pointer. Use the logical (aligned) offset here; + * the stack bias is applied in temp_allocate_frame(). + */ tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long); - tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size, - tmp_buf_size); + tcg_set_frame(s, TCG_REG_I6, -tmp_buf_size, tmp_buf_size); - /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is - otherwise the minimal frame usable by callees. */ + /* + * TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is + * otherwise the minimal frame usable by callees. + */ frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS; frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size; frame_size += TCG_TARGET_STACK_ALIGN - 1; diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index 181f865..92c91dc 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -27,6 +27,13 @@ #define TCG_HIGHWATER 1024 +typedef struct TCGHelperInfo { + void *func; + const char *name; + unsigned flags; + unsigned typemask; +} TCGHelperInfo; + extern TCGContext tcg_init_ctx; extern TCGContext **tcg_ctxs; extern unsigned int tcg_cur_ctxs; @@ -37,4 +44,19 @@ bool tcg_region_alloc(TCGContext *s); void tcg_region_initial_alloc(TCGContext *s); void tcg_region_prologue_set(TCGContext *s); +static inline void *tcg_call_func(TCGOp *op) +{ + return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op)]; +} + +static inline const TCGHelperInfo *tcg_call_info(TCGOp *op) +{ + return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; +} + +static inline unsigned tcg_call_flags(TCGOp *op) +{ + return tcg_call_info(op)->flags; +} + #endif /* TCG_INTERNAL_H */ @@ -60,6 +60,10 @@ #include "exec/log.h" #include "tcg-internal.h" +#ifdef CONFIG_TCG_INTERPRETER +#include <ffi.h> +#endif + /* Forward declarations for functions declared in tcg-target.c.inc and used here. */ static void tcg_target_init(TCGContext *s); @@ -143,7 +147,12 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2); static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, TCGReg base, intptr_t ofs); +#ifdef CONFIG_TCG_INTERPRETER +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, + ffi_cif *cif); +#else static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); +#endif static bool tcg_target_const_match(int64_t val, TCGType type, int ct); #ifdef TCG_TARGET_NEED_LDST_LABELS static int tcg_out_ldst_finalize(TCGContext *s); @@ -532,13 +541,6 @@ void tcg_pool_reset(TCGContext *s) s->pool_current = NULL; } -typedef struct TCGHelperInfo { - void *func; - const char *name; - unsigned flags; - unsigned sizemask; -} TCGHelperInfo; - #include "exec/helper-proto.h" static const TCGHelperInfo all_helpers[] = { @@ -546,6 +548,19 @@ static const TCGHelperInfo all_helpers[] = { }; static GHashTable *helper_table; +#ifdef CONFIG_TCG_INTERPRETER +static GHashTable *ffi_table; + +static ffi_type * const typecode_to_ffi[8] = { + [dh_typecode_void] = &ffi_type_void, + [dh_typecode_i32] = &ffi_type_uint32, + [dh_typecode_s32] = &ffi_type_sint32, + [dh_typecode_i64] = &ffi_type_uint64, + [dh_typecode_s64] = &ffi_type_sint64, + [dh_typecode_ptr] = &ffi_type_pointer, +}; +#endif + static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; static void process_op_defs(TCGContext *s); static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, @@ -589,6 +604,47 @@ static void tcg_context_init(unsigned max_cpus) (gpointer)&all_helpers[i]); } +#ifdef CONFIG_TCG_INTERPRETER + /* g_direct_hash/equal for direct comparisons on uint32_t. */ + ffi_table = g_hash_table_new(NULL, NULL); + for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { + struct { + ffi_cif cif; + ffi_type *args[]; + } *ca; + uint32_t typemask = all_helpers[i].typemask; + gpointer hash = (gpointer)(uintptr_t)typemask; + ffi_status status; + int nargs; + + if (g_hash_table_lookup(ffi_table, hash)) { + continue; + } + + /* Ignoring the return type, find the last non-zero field. */ + nargs = 32 - clz32(typemask >> 3); + nargs = DIV_ROUND_UP(nargs, 3); + + ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); + ca->cif.rtype = typecode_to_ffi[typemask & 7]; + ca->cif.nargs = nargs; + + if (nargs != 0) { + ca->cif.arg_types = ca->args; + for (i = 0; i < nargs; ++i) { + int typecode = extract32(typemask, (i + 1) * 3, 3); + ca->args[i] = typecode_to_ffi[typecode]; + } + } + + status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, + ca->cif.rtype, ca->cif.arg_types); + assert(status == FFI_OK); + + g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); + } +#endif + tcg_target_init(s); process_op_defs(s); @@ -729,10 +785,16 @@ void tcg_prologue_init(TCGContext *s) } #endif - /* Assert that goto_ptr is implemented completely. */ +#ifndef CONFIG_TCG_INTERPRETER + /* + * Assert that goto_ptr is implemented completely, setting an epilogue. + * For tci, we use NULL as the signal to return from the interpreter, + * so skip this check. + */ if (TCG_TARGET_HAS_goto_ptr) { tcg_debug_assert(tcg_code_gen_epilogue != NULL); } +#endif } void tcg_func_start(TCGContext *s) @@ -1395,13 +1457,12 @@ bool tcg_op_supported(TCGOpcode op) void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) { int i, real_args, nb_rets, pi; - unsigned sizemask, flags; - TCGHelperInfo *info; + unsigned typemask; + const TCGHelperInfo *info; TCGOp *op; info = g_hash_table_lookup(helper_table, (gpointer)func); - flags = info->flags; - sizemask = info->sizemask; + typemask = info->typemask; #ifdef CONFIG_PLUGIN /* detect non-plugin helpers */ @@ -1414,36 +1475,41 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) && !defined(CONFIG_TCG_INTERPRETER) /* We have 64-bit values in one register, but need to pass as two separate parameters. Split them. */ - int orig_sizemask = sizemask; + int orig_typemask = typemask; int orig_nargs = nargs; TCGv_i64 retl, reth; TCGTemp *split_args[MAX_OPC_PARAM]; retl = NULL; reth = NULL; - if (sizemask != 0) { - for (i = real_args = 0; i < nargs; ++i) { - int is_64bit = sizemask & (1 << (i+1)*2); - if (is_64bit) { - TCGv_i64 orig = temp_tcgv_i64(args[i]); - TCGv_i32 h = tcg_temp_new_i32(); - TCGv_i32 l = tcg_temp_new_i32(); - tcg_gen_extr_i64_i32(l, h, orig); - split_args[real_args++] = tcgv_i32_temp(h); - split_args[real_args++] = tcgv_i32_temp(l); - } else { - split_args[real_args++] = args[i]; - } + typemask = 0; + for (i = real_args = 0; i < nargs; ++i) { + int argtype = extract32(orig_typemask, (i + 1) * 3, 3); + bool is_64bit = (argtype & ~1) == dh_typecode_i64; + + if (is_64bit) { + TCGv_i64 orig = temp_tcgv_i64(args[i]); + TCGv_i32 h = tcg_temp_new_i32(); + TCGv_i32 l = tcg_temp_new_i32(); + tcg_gen_extr_i64_i32(l, h, orig); + split_args[real_args++] = tcgv_i32_temp(h); + typemask |= dh_typecode_i32 << (real_args * 3); + split_args[real_args++] = tcgv_i32_temp(l); + typemask |= dh_typecode_i32 << (real_args * 3); + } else { + split_args[real_args++] = args[i]; + typemask |= argtype << (real_args * 3); } - nargs = real_args; - args = split_args; - sizemask = 0; } + nargs = real_args; + args = split_args; #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { - int is_64bit = sizemask & (1 << (i+1)*2); - int is_signed = sizemask & (2 << (i+1)*2); - if (!is_64bit) { + int argtype = extract32(typemask, (i + 1) * 3, 3); + bool is_32bit = (argtype & ~1) == dh_typecode_i32; + bool is_signed = argtype & 1; + + if (is_32bit) { TCGv_i64 temp = tcg_temp_new_i64(); TCGv_i64 orig = temp_tcgv_i64(args[i]); if (is_signed) { @@ -1462,7 +1528,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) if (ret != NULL) { #if defined(__sparc__) && !defined(__arch64__) \ && !defined(CONFIG_TCG_INTERPRETER) - if (orig_sizemask & 1) { + if ((typemask & 6) == dh_typecode_i64) { /* The 32-bit ABI is going to return the 64-bit value in the %o0/%o1 register pair. Prepare for this by using two return temporaries, and reassemble below. */ @@ -1476,7 +1542,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) nb_rets = 1; } #else - if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { + if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { #ifdef HOST_WORDS_BIGENDIAN op->args[pi++] = temp_arg(ret + 1); op->args[pi++] = temp_arg(ret); @@ -1497,25 +1563,39 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) real_args = 0; for (i = 0; i < nargs; i++) { - int is_64bit = sizemask & (1 << (i+1)*2); - if (TCG_TARGET_REG_BITS < 64 && is_64bit) { -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - /* some targets want aligned 64 bit args */ - if (real_args & 1) { - op->args[pi++] = TCG_CALL_DUMMY_ARG; - real_args++; - } + int argtype = extract32(typemask, (i + 1) * 3, 3); + bool is_64bit = (argtype & ~1) == dh_typecode_i64; + bool want_align = false; + +#if defined(CONFIG_TCG_INTERPRETER) + /* + * Align all arguments, so that they land in predictable places + * for passing off to ffi_call. + */ + want_align = true; +#elif defined(TCG_TARGET_CALL_ALIGN_ARGS) + /* Some targets want aligned 64 bit args */ + want_align = is_64bit; #endif - /* If stack grows up, then we will be placing successive - arguments at lower addresses, which means we need to - reverse the order compared to how we would normally - treat either big or little-endian. For those arguments - that will wind up in registers, this still works for - HPPA (the only current STACK_GROWSUP target) since the - argument registers are *also* allocated in decreasing - order. If another such target is added, this logic may - have to get more complicated to differentiate between - stack arguments and register arguments. */ + + if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { + op->args[pi++] = TCG_CALL_DUMMY_ARG; + real_args++; + } + + if (TCG_TARGET_REG_BITS < 64 && is_64bit) { + /* + * If stack grows up, then we will be placing successive + * arguments at lower addresses, which means we need to + * reverse the order compared to how we would normally + * treat either big or little-endian. For those arguments + * that will wind up in registers, this still works for + * HPPA (the only current STACK_GROWSUP target) since the + * argument registers are *also* allocated in decreasing + * order. If another such target is added, this logic may + * have to get more complicated to differentiate between + * stack arguments and register arguments. + */ #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) op->args[pi++] = temp_arg(args[i] + 1); op->args[pi++] = temp_arg(args[i]); @@ -1531,7 +1611,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) real_args++; } op->args[pi++] = (uintptr_t)func; - op->args[pi++] = flags; + op->args[pi++] = (uintptr_t)info; TCGOP_CALLI(op) = real_args; /* Make sure the fields didn't overflow. */ @@ -1542,7 +1622,9 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) && !defined(CONFIG_TCG_INTERPRETER) /* Free all of the parts we allocated above. */ for (i = real_args = 0; i < orig_nargs; ++i) { - int is_64bit = orig_sizemask & (1 << (i+1)*2); + int argtype = extract32(orig_typemask, (i + 1) * 3, 3); + bool is_64bit = (argtype & ~1) == dh_typecode_i64; + if (is_64bit) { tcg_temp_free_internal(args[real_args++]); tcg_temp_free_internal(args[real_args++]); @@ -1550,7 +1632,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) real_args++; } } - if (orig_sizemask & 1) { + if ((orig_typemask & 6) == dh_typecode_i64) { /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. Note that describing these as TCGv_i64 eliminates an unnecessary zero-extension that tcg_gen_concat_i32_i64 would create. */ @@ -1560,8 +1642,10 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) } #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { - int is_64bit = sizemask & (1 << (i+1)*2); - if (!is_64bit) { + int argtype = extract32(typemask, (i + 1) * 3, 3); + bool is_32bit = (argtype & ~1) == dh_typecode_i32; + + if (is_32bit) { tcg_temp_free_internal(args[i]); } } @@ -1646,19 +1730,6 @@ static char *tcg_get_arg_str(TCGContext *s, char *buf, return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg)); } -/* Find helper name. */ -static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val) -{ - const char *ret = NULL; - if (helper_table) { - TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val); - if (info) { - ret = info->name; - } - } - return ret; -} - static const char * const cond_name[] = { [TCG_COND_NEVER] = "never", @@ -1749,15 +1820,28 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs) col += qemu_log(" " TARGET_FMT_lx, a); } } else if (c == INDEX_op_call) { + const TCGHelperInfo *info = tcg_call_info(op); + void *func = tcg_call_func(op); + /* variable number of arguments */ nb_oargs = TCGOP_CALLO(op); nb_iargs = TCGOP_CALLI(op); nb_cargs = def->nb_cargs; - /* function name, flags, out args */ - col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, - tcg_find_helper(s, op->args[nb_oargs + nb_iargs]), - op->args[nb_oargs + nb_iargs + 1], nb_oargs); + col += qemu_log(" %s ", def->name); + + /* + * Print the function name from TCGHelperInfo, if available. + * Note that plugins have a template function for the info, + * but the actual function pointer comes from the plugin. + */ + if (func == info->func) { + col += qemu_log("%s", info->name); + } else { + col += qemu_log("plugin(%p)", func); + } + + col += qemu_log("$0x%x,$%d", info->flags, nb_oargs); for (i = 0; i < nb_oargs; i++) { col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf), op->args[i])); @@ -2144,7 +2228,6 @@ static void reachable_code_pass(TCGContext *s) QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { bool remove = dead; TCGLabel *label; - int call_flags; switch (op->opc) { case INDEX_op_set_label: @@ -2189,8 +2272,7 @@ static void reachable_code_pass(TCGContext *s) case INDEX_op_call: /* Notice noreturn helper calls, raising exceptions. */ - call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1]; - if (call_flags & TCG_CALL_NO_RETURN) { + if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) { dead = true; } break; @@ -2391,7 +2473,7 @@ static void liveness_pass_1(TCGContext *s) nb_oargs = TCGOP_CALLO(op); nb_iargs = TCGOP_CALLI(op); - call_flags = op->args[nb_oargs + nb_iargs + 1]; + call_flags = tcg_call_flags(op); /* pure functions can be removed if their result is unused */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { @@ -2706,7 +2788,7 @@ static bool liveness_pass_2(TCGContext *s) if (opc == INDEX_op_call) { nb_oargs = TCGOP_CALLO(op); nb_iargs = TCGOP_CALLI(op); - call_flags = op->args[nb_oargs + nb_iargs + 1]; + call_flags = tcg_call_flags(op); } else { nb_iargs = def->nb_iargs; nb_oargs = def->nb_oargs; @@ -2933,20 +3015,42 @@ static void check_regs(TCGContext *s) static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) { -#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64) - /* Sparc64 stack is accessed with offset of 2047 */ - s->current_frame_offset = (s->current_frame_offset + - (tcg_target_long)sizeof(tcg_target_long) - 1) & - ~(sizeof(tcg_target_long) - 1); -#endif - if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) > - s->frame_end) { - tcg_abort(); + intptr_t off, size, align; + + switch (ts->type) { + case TCG_TYPE_I32: + size = align = 4; + break; + case TCG_TYPE_I64: + case TCG_TYPE_V64: + size = align = 8; + break; + case TCG_TYPE_V128: + size = align = 16; + break; + case TCG_TYPE_V256: + /* Note that we do not require aligned storage for V256. */ + size = 32, align = 16; + break; + default: + g_assert_not_reached(); } - ts->mem_offset = s->current_frame_offset; + + assert(align <= TCG_TARGET_STACK_ALIGN); + off = ROUND_UP(s->current_frame_offset, align); + + /* If we've exhausted the stack frame, restart with a smaller TB. */ + if (off + size > s->frame_end) { + tcg_raise_tb_overflow(s); + } + s->current_frame_offset = off + size; + + ts->mem_offset = off; +#if defined(__sparc__) + ts->mem_offset += TCG_TARGET_STACK_BIAS; +#endif ts->mem_base = s->frame_temp; ts->mem_allocated = 1; - s->current_frame_offset += sizeof(tcg_target_long); } static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); @@ -3777,6 +3881,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) const int nb_oargs = TCGOP_CALLO(op); const int nb_iargs = TCGOP_CALLI(op); const TCGLifeData arg_life = op->life; + const TCGHelperInfo *info; int flags, nb_regs, i; TCGReg reg; TCGArg arg; @@ -3787,8 +3892,9 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) int allocate_args; TCGRegSet allocated_regs; - func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs]; - flags = op->args[nb_oargs + nb_iargs + 1]; + func_addr = tcg_call_func(op); + info = tcg_call_info(op); + flags = info->flags; nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); if (nb_regs > nb_iargs) { @@ -3880,7 +3986,16 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) save_globals(s, allocated_regs); } +#ifdef CONFIG_TCG_INTERPRETER + { + gpointer hash = (gpointer)(uintptr_t)info->typemask; + ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); + assert(cif != NULL); + tcg_out_call(s, func_addr, cif); + } +#else tcg_out_call(s, func_addr); +#endif /* assign output registers and emit moves if needed */ for(i = 0; i < nb_oargs; i++) { @@ -18,59 +18,31 @@ */ #include "qemu/osdep.h" - -/* Enable TCI assertions only when debugging TCG (and without NDEBUG defined). - * Without assertions, the interpreter runs much faster. */ -#if defined(CONFIG_DEBUG_TCG) -# define tci_assert(cond) assert(cond) -#else -# define tci_assert(cond) ((void)(cond)) -#endif - #include "qemu-common.h" #include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */ #include "exec/cpu_ldst.h" #include "tcg/tcg-op.h" #include "qemu/compiler.h" +#include <ffi.h> -#if MAX_OPC_PARAM_IARGS != 6 -# error Fix needed, number of supported input arguments changed! -#endif -#if TCG_TARGET_REG_BITS == 32 -typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, - tcg_target_ulong, tcg_target_ulong, - tcg_target_ulong, tcg_target_ulong, - tcg_target_ulong, tcg_target_ulong, - tcg_target_ulong, tcg_target_ulong, - tcg_target_ulong, tcg_target_ulong); + +/* + * Enable TCI assertions only when debugging TCG (and without NDEBUG defined). + * Without assertions, the interpreter runs much faster. + */ +#if defined(CONFIG_DEBUG_TCG) +# define tci_assert(cond) assert(cond) #else -typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, - tcg_target_ulong, tcg_target_ulong, - tcg_target_ulong, tcg_target_ulong); +# define tci_assert(cond) ((void)(cond)) #endif __thread uintptr_t tci_tb_ptr; -static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index) -{ - tci_assert(index < TCG_TARGET_NB_REGS); - return regs[index]; -} - -static void -tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value) -{ - tci_assert(index < TCG_TARGET_NB_REGS); - tci_assert(index != TCG_AREG0); - tci_assert(index != TCG_REG_CALL_STACK); - regs[index] = value; -} - static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index, uint32_t low_index, uint64_t value) { - tci_write_reg(regs, low_index, value); - tci_write_reg(regs, high_index, value >> 32); + regs[low_index] = (uint32_t)value; + regs[high_index] = value >> 32; } /* Create a 64 bit value from two 32 bit values. */ @@ -79,49 +51,6 @@ static uint64_t tci_uint64(uint32_t high, uint32_t low) return ((uint64_t)high << 32) + low; } -/* Read constant byte from bytecode. */ -static uint8_t tci_read_b(const uint8_t **tb_ptr) -{ - return *(tb_ptr[0]++); -} - -/* Read register number from bytecode. */ -static TCGReg tci_read_r(const uint8_t **tb_ptr) -{ - uint8_t regno = tci_read_b(tb_ptr); - tci_assert(regno < TCG_TARGET_NB_REGS); - return regno; -} - -/* Read constant (native size) from bytecode. */ -static tcg_target_ulong tci_read_i(const uint8_t **tb_ptr) -{ - tcg_target_ulong value = *(const tcg_target_ulong *)(*tb_ptr); - *tb_ptr += sizeof(value); - return value; -} - -/* Read unsigned constant (32 bit) from bytecode. */ -static uint32_t tci_read_i32(const uint8_t **tb_ptr) -{ - uint32_t value = *(const uint32_t *)(*tb_ptr); - *tb_ptr += sizeof(value); - return value; -} - -/* Read signed constant (32 bit) from bytecode. */ -static int32_t tci_read_s32(const uint8_t **tb_ptr) -{ - int32_t value = *(const int32_t *)(*tb_ptr); - *tb_ptr += sizeof(value); - return value; -} - -static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr) -{ - return tci_read_i(tb_ptr); -} - /* * Load sets of arguments all at once. The naming convention is: * tci_args_<arguments> @@ -133,223 +62,147 @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr) * I = immediate (tcg_target_ulong) * l = label or pointer * m = immediate (TCGMemOpIdx) + * n = immediate (call return length) * r = register * s = signed ldst offset */ -static void check_size(const uint8_t *start, const uint8_t **tb_ptr) +static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0) { - const uint8_t *old_code_ptr = start - 2; - uint8_t op_size = old_code_ptr[1]; - tci_assert(*tb_ptr == old_code_ptr + op_size); + int diff = sextract32(insn, 12, 20); + *l0 = diff ? (void *)tb_ptr + diff : NULL; } -static void tci_args_l(const uint8_t **tb_ptr, void **l0) +static void tci_args_r(uint32_t insn, TCGReg *r0) { - const uint8_t *start = *tb_ptr; - - *l0 = (void *)tci_read_label(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); } -static void tci_args_rr(const uint8_t **tb_ptr, - TCGReg *r0, TCGReg *r1) +static void tci_args_nl(uint32_t insn, const void *tb_ptr, + uint8_t *n0, void **l1) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - - check_size(start, tb_ptr); + *n0 = extract32(insn, 8, 4); + *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr; } -static void tci_args_ri(const uint8_t **tb_ptr, - TCGReg *r0, tcg_target_ulong *i1) +static void tci_args_rl(uint32_t insn, const void *tb_ptr, + TCGReg *r0, void **l1) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *i1 = tci_read_i32(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr; } -#if TCG_TARGET_REG_BITS == 64 -static void tci_args_rI(const uint8_t **tb_ptr, - TCGReg *r0, tcg_target_ulong *i1) +static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *i1 = tci_read_i(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); } -#endif -static void tci_args_rrm(const uint8_t **tb_ptr, - TCGReg *r0, TCGReg *r1, TCGMemOpIdx *m2) +static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *m2 = tci_read_i32(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *i1 = sextract32(insn, 12, 20); } -static void tci_args_rrr(const uint8_t **tb_ptr, - TCGReg *r0, TCGReg *r1, TCGReg *r2) +static void tci_args_rrm(uint32_t insn, TCGReg *r0, + TCGReg *r1, TCGMemOpIdx *m2) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *r2 = tci_read_r(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *m2 = extract32(insn, 20, 12); } -static void tci_args_rrs(const uint8_t **tb_ptr, - TCGReg *r0, TCGReg *r1, int32_t *i2) +static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *i2 = tci_read_s32(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); } -static void tci_args_rrcl(const uint8_t **tb_ptr, - TCGReg *r0, TCGReg *r1, TCGCond *c2, void **l3) +static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *c2 = tci_read_b(tb_ptr); - *l3 = (void *)tci_read_label(tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *i2 = sextract32(insn, 16, 16); +} - check_size(start, tb_ptr); +static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1, + uint8_t *i2, uint8_t *i3) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *i2 = extract32(insn, 16, 6); + *i3 = extract32(insn, 22, 6); } -static void tci_args_rrrc(const uint8_t **tb_ptr, +static void tci_args_rrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *r2 = tci_read_r(tb_ptr); - *c3 = tci_read_b(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *c3 = extract32(insn, 20, 4); } -static void tci_args_rrrm(const uint8_t **tb_ptr, +static void tci_args_rrrm(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGMemOpIdx *m3) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *r2 = tci_read_r(tb_ptr); - *m3 = tci_read_i32(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *m3 = extract32(insn, 20, 12); } -static void tci_args_rrrbb(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, +static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2, uint8_t *i3, uint8_t *i4) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *r2 = tci_read_r(tb_ptr); - *i3 = tci_read_b(tb_ptr); - *i4 = tci_read_b(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *i3 = extract32(insn, 20, 6); + *i4 = extract32(insn, 26, 6); } -static void tci_args_rrrrm(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, - TCGReg *r2, TCGReg *r3, TCGMemOpIdx *m4) +static void tci_args_rrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, + TCGReg *r2, TCGReg *r3, TCGReg *r4) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *r2 = tci_read_r(tb_ptr); - *r3 = tci_read_r(tb_ptr); - *m4 = tci_read_i32(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *r3 = extract32(insn, 20, 4); + *r4 = extract32(insn, 24, 4); } -#if TCG_TARGET_REG_BITS == 32 -static void tci_args_rrrr(const uint8_t **tb_ptr, +static void tci_args_rrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *r2 = tci_read_r(tb_ptr); - *r3 = tci_read_r(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *r3 = extract32(insn, 20, 4); } -static void tci_args_rrrrcl(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, - TCGReg *r2, TCGReg *r3, TCGCond *c4, void **l5) -{ - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *r2 = tci_read_r(tb_ptr); - *r3 = tci_read_r(tb_ptr); - *c4 = tci_read_b(tb_ptr); - *l5 = (void *)tci_read_label(tb_ptr); - - check_size(start, tb_ptr); -} - -static void tci_args_rrrrrc(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, +static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *r2 = tci_read_r(tb_ptr); - *r3 = tci_read_r(tb_ptr); - *r4 = tci_read_r(tb_ptr); - *c5 = tci_read_b(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *r3 = extract32(insn, 20, 4); + *r4 = extract32(insn, 24, 4); + *c5 = extract32(insn, 28, 4); } -static void tci_args_rrrrrr(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, +static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5) { - const uint8_t *start = *tb_ptr; - - *r0 = tci_read_r(tb_ptr); - *r1 = tci_read_r(tb_ptr); - *r2 = tci_read_r(tb_ptr); - *r3 = tci_read_r(tb_ptr); - *r4 = tci_read_r(tb_ptr); - *r5 = tci_read_r(tb_ptr); - - check_size(start, tb_ptr); + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *r3 = extract32(insn, 20, 4); + *r4 = extract32(insn, 24, 4); + *r5 = extract32(insn, 28, 4); } -#endif static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) { @@ -435,34 +288,155 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) return result; } -#define qemu_ld_ub \ - cpu_ldub_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_ld_leuw \ - cpu_lduw_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_ld_leul \ - cpu_ldl_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_ld_leq \ - cpu_ldq_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_ld_beuw \ - cpu_lduw_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_ld_beul \ - cpu_ldl_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_ld_beq \ - cpu_ldq_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_st_b(X) \ - cpu_stb_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_st_lew(X) \ - cpu_stw_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_st_lel(X) \ - cpu_stl_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_st_leq(X) \ - cpu_stq_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_st_bew(X) \ - cpu_stw_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_st_bel(X) \ - cpu_stl_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) -#define qemu_st_beq(X) \ - cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr) +static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr, + TCGMemOpIdx oi, const void *tb_ptr) +{ + MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); + uintptr_t ra = (uintptr_t)tb_ptr; + +#ifdef CONFIG_SOFTMMU + switch (mop) { + case MO_UB: + return helper_ret_ldub_mmu(env, taddr, oi, ra); + case MO_SB: + return helper_ret_ldsb_mmu(env, taddr, oi, ra); + case MO_LEUW: + return helper_le_lduw_mmu(env, taddr, oi, ra); + case MO_LESW: + return helper_le_ldsw_mmu(env, taddr, oi, ra); + case MO_LEUL: + return helper_le_ldul_mmu(env, taddr, oi, ra); + case MO_LESL: + return helper_le_ldsl_mmu(env, taddr, oi, ra); + case MO_LEQ: + return helper_le_ldq_mmu(env, taddr, oi, ra); + case MO_BEUW: + return helper_be_lduw_mmu(env, taddr, oi, ra); + case MO_BESW: + return helper_be_ldsw_mmu(env, taddr, oi, ra); + case MO_BEUL: + return helper_be_ldul_mmu(env, taddr, oi, ra); + case MO_BESL: + return helper_be_ldsl_mmu(env, taddr, oi, ra); + case MO_BEQ: + return helper_be_ldq_mmu(env, taddr, oi, ra); + default: + g_assert_not_reached(); + } +#else + void *haddr = g2h(env_cpu(env), taddr); + uint64_t ret; + + set_helper_retaddr(ra); + switch (mop) { + case MO_UB: + ret = ldub_p(haddr); + break; + case MO_SB: + ret = ldsb_p(haddr); + break; + case MO_LEUW: + ret = lduw_le_p(haddr); + break; + case MO_LESW: + ret = ldsw_le_p(haddr); + break; + case MO_LEUL: + ret = (uint32_t)ldl_le_p(haddr); + break; + case MO_LESL: + ret = (int32_t)ldl_le_p(haddr); + break; + case MO_LEQ: + ret = ldq_le_p(haddr); + break; + case MO_BEUW: + ret = lduw_be_p(haddr); + break; + case MO_BESW: + ret = ldsw_be_p(haddr); + break; + case MO_BEUL: + ret = (uint32_t)ldl_be_p(haddr); + break; + case MO_BESL: + ret = (int32_t)ldl_be_p(haddr); + break; + case MO_BEQ: + ret = ldq_be_p(haddr); + break; + default: + g_assert_not_reached(); + } + clear_helper_retaddr(); + return ret; +#endif +} + +static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val, + TCGMemOpIdx oi, const void *tb_ptr) +{ + MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE); + uintptr_t ra = (uintptr_t)tb_ptr; + +#ifdef CONFIG_SOFTMMU + switch (mop) { + case MO_UB: + helper_ret_stb_mmu(env, taddr, val, oi, ra); + break; + case MO_LEUW: + helper_le_stw_mmu(env, taddr, val, oi, ra); + break; + case MO_LEUL: + helper_le_stl_mmu(env, taddr, val, oi, ra); + break; + case MO_LEQ: + helper_le_stq_mmu(env, taddr, val, oi, ra); + break; + case MO_BEUW: + helper_be_stw_mmu(env, taddr, val, oi, ra); + break; + case MO_BEUL: + helper_be_stl_mmu(env, taddr, val, oi, ra); + break; + case MO_BEQ: + helper_be_stq_mmu(env, taddr, val, oi, ra); + break; + default: + g_assert_not_reached(); + } +#else + void *haddr = g2h(env_cpu(env), taddr); + + set_helper_retaddr(ra); + switch (mop) { + case MO_UB: + stb_p(haddr, val); + break; + case MO_LEUW: + stw_le_p(haddr, val); + break; + case MO_LEUL: + stl_le_p(haddr, val); + break; + case MO_LEQ: + stq_le_p(haddr, val); + break; + case MO_BEUW: + stw_be_p(haddr, val); + break; + case MO_BEUL: + stl_be_p(haddr, val); + break; + case MO_BEQ: + stq_be_p(haddr, val); + break; + default: + g_assert_not_reached(); + } + clear_helper_retaddr(); +#endif +} #if TCG_TARGET_REG_BITS == 64 # define CASE_32_64(x) \ @@ -485,135 +459,171 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, const void *v_tb_ptr) { - const uint8_t *tb_ptr = v_tb_ptr; + const uint32_t *tb_ptr = v_tb_ptr; tcg_target_ulong regs[TCG_TARGET_NB_REGS]; - long tcg_temps[CPU_TEMP_BUF_NLONGS]; - uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS); + uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) + / sizeof(uint64_t)]; + void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)]; regs[TCG_AREG0] = (tcg_target_ulong)env; - regs[TCG_REG_CALL_STACK] = sp_value; + regs[TCG_REG_CALL_STACK] = (uintptr_t)stack; + /* Other call_slots entries initialized at first use (see below). */ + call_slots[0] = NULL; tci_assert(tb_ptr); for (;;) { - TCGOpcode opc = tb_ptr[0]; - TCGReg r0, r1, r2, r3; + uint32_t insn; + TCGOpcode opc; + TCGReg r0, r1, r2, r3, r4, r5; tcg_target_ulong t1; TCGCond condition; target_ulong taddr; uint8_t pos, len; uint32_t tmp32; uint64_t tmp64; -#if TCG_TARGET_REG_BITS == 32 - TCGReg r4, r5; uint64_t T1, T2; -#endif TCGMemOpIdx oi; int32_t ofs; void *ptr; - /* Skip opcode and size entry. */ - tb_ptr += 2; + insn = *tb_ptr++; + opc = extract32(insn, 0, 8); switch (opc) { case INDEX_op_call: - tci_args_l(&tb_ptr, &ptr); + /* + * Set up the ffi_avalue array once, delayed until now + * because many TB's do not make any calls. In tcg_gen_callN, + * we arranged for every real argument to be "left-aligned" + * in each 64-bit slot. + */ + if (unlikely(call_slots[0] == NULL)) { + for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) { + call_slots[i] = &stack[i]; + } + } + + tci_args_nl(insn, tb_ptr, &len, &ptr); + + /* Helper functions may need to access the "return address" */ tci_tb_ptr = (uintptr_t)tb_ptr; -#if TCG_TARGET_REG_BITS == 32 - tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0), - tci_read_reg(regs, TCG_REG_R1), - tci_read_reg(regs, TCG_REG_R2), - tci_read_reg(regs, TCG_REG_R3), - tci_read_reg(regs, TCG_REG_R4), - tci_read_reg(regs, TCG_REG_R5), - tci_read_reg(regs, TCG_REG_R6), - tci_read_reg(regs, TCG_REG_R7), - tci_read_reg(regs, TCG_REG_R8), - tci_read_reg(regs, TCG_REG_R9), - tci_read_reg(regs, TCG_REG_R10), - tci_read_reg(regs, TCG_REG_R11)); - tci_write_reg(regs, TCG_REG_R0, tmp64); - tci_write_reg(regs, TCG_REG_R1, tmp64 >> 32); -#else - tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0), - tci_read_reg(regs, TCG_REG_R1), - tci_read_reg(regs, TCG_REG_R2), - tci_read_reg(regs, TCG_REG_R3), - tci_read_reg(regs, TCG_REG_R4), - tci_read_reg(regs, TCG_REG_R5)); - tci_write_reg(regs, TCG_REG_R0, tmp64); -#endif + + { + void **pptr = ptr; + ffi_call(pptr[1], pptr[0], stack, call_slots); + } + + /* Any result winds up "left-aligned" in the stack[0] slot. */ + switch (len) { + case 0: /* void */ + break; + case 1: /* uint32_t */ + /* + * Note that libffi has an odd special case in that it will + * always widen an integral result to ffi_arg. + */ + if (sizeof(ffi_arg) == 4) { + regs[TCG_REG_R0] = *(uint32_t *)stack; + break; + } + /* fall through */ + case 2: /* uint64_t */ + if (TCG_TARGET_REG_BITS == 32) { + tci_write_reg64(regs, TCG_REG_R1, TCG_REG_R0, stack[0]); + } else { + regs[TCG_REG_R0] = stack[0]; + } + break; + default: + g_assert_not_reached(); + } break; + case INDEX_op_br: - tci_args_l(&tb_ptr, &ptr); + tci_args_l(insn, tb_ptr, &ptr); tb_ptr = ptr; continue; case INDEX_op_setcond_i32: - tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &condition); + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); regs[r0] = tci_compare32(regs[r1], regs[r2], condition); break; + case INDEX_op_movcond_i32: + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); + tmp32 = tci_compare32(regs[r1], regs[r2], condition); + regs[r0] = regs[tmp32 ? r3 : r4]; + break; #if TCG_TARGET_REG_BITS == 32 case INDEX_op_setcond2_i32: - tci_args_rrrrrc(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &condition); + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); T1 = tci_uint64(regs[r2], regs[r1]); T2 = tci_uint64(regs[r4], regs[r3]); regs[r0] = tci_compare64(T1, T2, condition); break; #elif TCG_TARGET_REG_BITS == 64 case INDEX_op_setcond_i64: - tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &condition); + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); regs[r0] = tci_compare64(regs[r1], regs[r2], condition); break; + case INDEX_op_movcond_i64: + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); + tmp32 = tci_compare64(regs[r1], regs[r2], condition); + regs[r0] = regs[tmp32 ? r3 : r4]; + break; #endif CASE_32_64(mov) - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = regs[r1]; break; - case INDEX_op_tci_movi_i32: - tci_args_ri(&tb_ptr, &r0, &t1); + case INDEX_op_tci_movi: + tci_args_ri(insn, &r0, &t1); regs[r0] = t1; break; + case INDEX_op_tci_movl: + tci_args_rl(insn, tb_ptr, &r0, &ptr); + regs[r0] = *(tcg_target_ulong *)ptr; + break; /* Load/store operations (32 bit). */ CASE_32_64(ld8u) - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(uint8_t *)ptr; break; CASE_32_64(ld8s) - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(int8_t *)ptr; break; CASE_32_64(ld16u) - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(uint16_t *)ptr; break; CASE_32_64(ld16s) - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(int16_t *)ptr; break; case INDEX_op_ld_i32: CASE_64(ld32u) - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(uint32_t *)ptr; break; CASE_32_64(st8) - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); *(uint8_t *)ptr = regs[r0]; break; CASE_32_64(st16) - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); *(uint16_t *)ptr = regs[r0]; break; case INDEX_op_st_i32: CASE_64(st32) - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); *(uint32_t *)ptr = regs[r0]; break; @@ -621,180 +631,240 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Arithmetic operations (mixed 32/64 bit). */ CASE_32_64(add) - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] + regs[r2]; break; CASE_32_64(sub) - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] - regs[r2]; break; CASE_32_64(mul) - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] * regs[r2]; break; CASE_32_64(and) - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & regs[r2]; break; CASE_32_64(or) - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] | regs[r2]; break; CASE_32_64(xor) - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ^ regs[r2]; break; +#if TCG_TARGET_HAS_andc_i32 || TCG_TARGET_HAS_andc_i64 + CASE_32_64(andc) + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] & ~regs[r2]; + break; +#endif +#if TCG_TARGET_HAS_orc_i32 || TCG_TARGET_HAS_orc_i64 + CASE_32_64(orc) + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] | ~regs[r2]; + break; +#endif +#if TCG_TARGET_HAS_eqv_i32 || TCG_TARGET_HAS_eqv_i64 + CASE_32_64(eqv) + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ~(regs[r1] ^ regs[r2]); + break; +#endif +#if TCG_TARGET_HAS_nand_i32 || TCG_TARGET_HAS_nand_i64 + CASE_32_64(nand) + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ~(regs[r1] & regs[r2]); + break; +#endif +#if TCG_TARGET_HAS_nor_i32 || TCG_TARGET_HAS_nor_i64 + CASE_32_64(nor) + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ~(regs[r1] | regs[r2]); + break; +#endif /* Arithmetic operations (32 bit). */ case INDEX_op_div_i32: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2]; break; case INDEX_op_divu_i32: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2]; break; case INDEX_op_rem_i32: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2]; break; case INDEX_op_remu_i32: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2]; break; +#if TCG_TARGET_HAS_clz_i32 + case INDEX_op_clz_i32: + tci_args_rrr(insn, &r0, &r1, &r2); + tmp32 = regs[r1]; + regs[r0] = tmp32 ? clz32(tmp32) : regs[r2]; + break; +#endif +#if TCG_TARGET_HAS_ctz_i32 + case INDEX_op_ctz_i32: + tci_args_rrr(insn, &r0, &r1, &r2); + tmp32 = regs[r1]; + regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2]; + break; +#endif +#if TCG_TARGET_HAS_ctpop_i32 + case INDEX_op_ctpop_i32: + tci_args_rr(insn, &r0, &r1); + regs[r0] = ctpop32(regs[r1]); + break; +#endif /* Shift/rotate operations (32 bit). */ case INDEX_op_shl_i32: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] << (regs[r2] & 31); break; case INDEX_op_shr_i32: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] >> (regs[r2] & 31); break; case INDEX_op_sar_i32: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int32_t)regs[r1] >> (regs[r2] & 31); break; #if TCG_TARGET_HAS_rot_i32 case INDEX_op_rotl_i32: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = rol32(regs[r1], regs[r2] & 31); break; case INDEX_op_rotr_i32: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror32(regs[r1], regs[r2] & 31); break; #endif #if TCG_TARGET_HAS_deposit_i32 case INDEX_op_deposit_i32: - tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); + tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); regs[r0] = deposit32(regs[r1], pos, len, regs[r2]); break; #endif +#if TCG_TARGET_HAS_extract_i32 + case INDEX_op_extract_i32: + tci_args_rrbb(insn, &r0, &r1, &pos, &len); + regs[r0] = extract32(regs[r1], pos, len); + break; +#endif +#if TCG_TARGET_HAS_sextract_i32 + case INDEX_op_sextract_i32: + tci_args_rrbb(insn, &r0, &r1, &pos, &len); + regs[r0] = sextract32(regs[r1], pos, len); + break; +#endif case INDEX_op_brcond_i32: - tci_args_rrcl(&tb_ptr, &r0, &r1, &condition, &ptr); - if (tci_compare32(regs[r0], regs[r1], condition)) { + tci_args_rl(insn, tb_ptr, &r0, &ptr); + if ((uint32_t)regs[r0]) { tb_ptr = ptr; } break; -#if TCG_TARGET_REG_BITS == 32 +#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32 case INDEX_op_add2_i32: - tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5); + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); T1 = tci_uint64(regs[r3], regs[r2]); T2 = tci_uint64(regs[r5], regs[r4]); tci_write_reg64(regs, r1, r0, T1 + T2); break; +#endif +#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32 case INDEX_op_sub2_i32: - tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5); + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); T1 = tci_uint64(regs[r3], regs[r2]); T2 = tci_uint64(regs[r5], regs[r4]); tci_write_reg64(regs, r1, r0, T1 - T2); break; - case INDEX_op_brcond2_i32: - tci_args_rrrrcl(&tb_ptr, &r0, &r1, &r2, &r3, &condition, &ptr); - T1 = tci_uint64(regs[r1], regs[r0]); - T2 = tci_uint64(regs[r3], regs[r2]); - if (tci_compare64(T1, T2, condition)) { - tb_ptr = ptr; - continue; - } - break; +#endif +#if TCG_TARGET_HAS_mulu2_i32 case INDEX_op_mulu2_i32: - tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3); - tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3]; + tci_write_reg64(regs, r1, r0, tmp64); break; -#endif /* TCG_TARGET_REG_BITS == 32 */ +#endif +#if TCG_TARGET_HAS_muls2_i32 + case INDEX_op_muls2_i32: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3]; + tci_write_reg64(regs, r1, r0, tmp64); + break; +#endif #if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 CASE_32_64(ext8s) - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = (int8_t)regs[r1]; break; #endif #if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 CASE_32_64(ext16s) - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = (int16_t)regs[r1]; break; #endif #if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64 CASE_32_64(ext8u) - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = (uint8_t)regs[r1]; break; #endif #if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64 CASE_32_64(ext16u) - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = (uint16_t)regs[r1]; break; #endif #if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 CASE_32_64(bswap16) - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = bswap16(regs[r1]); break; #endif #if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64 CASE_32_64(bswap32) - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = bswap32(regs[r1]); break; #endif #if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64 CASE_32_64(not) - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = ~regs[r1]; break; #endif #if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64 CASE_32_64(neg) - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = -regs[r1]; break; #endif #if TCG_TARGET_REG_BITS == 64 - case INDEX_op_tci_movi_i64: - tci_args_rI(&tb_ptr, &r0, &t1); - regs[r0] = t1; - break; - /* Load/store operations (64 bit). */ case INDEX_op_ld32s_i64: - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(int32_t *)ptr; break; case INDEX_op_ld_i64: - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(uint64_t *)ptr; break; case INDEX_op_st_i64: - tci_args_rrs(&tb_ptr, &r0, &r1, &ofs); + tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); *(uint64_t *)ptr = regs[r0]; break; @@ -802,71 +872,131 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* Arithmetic operations (64 bit). */ case INDEX_op_div_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2]; break; case INDEX_op_divu_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2]; break; case INDEX_op_rem_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2]; break; case INDEX_op_remu_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2]; break; +#if TCG_TARGET_HAS_clz_i64 + case INDEX_op_clz_i64: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; + break; +#endif +#if TCG_TARGET_HAS_ctz_i64 + case INDEX_op_ctz_i64: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; + break; +#endif +#if TCG_TARGET_HAS_ctpop_i64 + case INDEX_op_ctpop_i64: + tci_args_rr(insn, &r0, &r1); + regs[r0] = ctpop64(regs[r1]); + break; +#endif +#if TCG_TARGET_HAS_mulu2_i64 + case INDEX_op_mulu2_i64: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + mulu64(®s[r0], ®s[r1], regs[r2], regs[r3]); + break; +#endif +#if TCG_TARGET_HAS_muls2_i64 + case INDEX_op_muls2_i64: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + muls64(®s[r0], ®s[r1], regs[r2], regs[r3]); + break; +#endif +#if TCG_TARGET_HAS_add2_i64 + case INDEX_op_add2_i64: + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); + T1 = regs[r2] + regs[r4]; + T2 = regs[r3] + regs[r5] + (T1 < regs[r2]); + regs[r0] = T1; + regs[r1] = T2; + break; +#endif +#if TCG_TARGET_HAS_add2_i64 + case INDEX_op_sub2_i64: + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); + T1 = regs[r2] - regs[r4]; + T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]); + regs[r0] = T1; + regs[r1] = T2; + break; +#endif /* Shift/rotate operations (64 bit). */ case INDEX_op_shl_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] << (regs[r2] & 63); break; case INDEX_op_shr_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] >> (regs[r2] & 63); break; case INDEX_op_sar_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] >> (regs[r2] & 63); break; #if TCG_TARGET_HAS_rot_i64 case INDEX_op_rotl_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = rol64(regs[r1], regs[r2] & 63); break; case INDEX_op_rotr_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror64(regs[r1], regs[r2] & 63); break; #endif #if TCG_TARGET_HAS_deposit_i64 case INDEX_op_deposit_i64: - tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); + tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); break; #endif +#if TCG_TARGET_HAS_extract_i64 + case INDEX_op_extract_i64: + tci_args_rrbb(insn, &r0, &r1, &pos, &len); + regs[r0] = extract64(regs[r1], pos, len); + break; +#endif +#if TCG_TARGET_HAS_sextract_i64 + case INDEX_op_sextract_i64: + tci_args_rrbb(insn, &r0, &r1, &pos, &len); + regs[r0] = sextract64(regs[r1], pos, len); + break; +#endif case INDEX_op_brcond_i64: - tci_args_rrcl(&tb_ptr, &r0, &r1, &condition, &ptr); - if (tci_compare64(regs[r0], regs[r1], condition)) { + tci_args_rl(insn, tb_ptr, &r0, &ptr); + if (regs[r0]) { tb_ptr = ptr; } break; case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = (int32_t)regs[r1]; break; case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = (uint32_t)regs[r1]; break; #if TCG_TARGET_HAS_bswap64_i64 case INDEX_op_bswap64_i64: - tci_args_rr(&tb_ptr, &r0, &r1); + tci_args_rr(insn, &r0, &r1); regs[r0] = bswap64(regs[r1]); break; #endif @@ -875,104 +1005,48 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, /* QEMU specific operations. */ case INDEX_op_exit_tb: - tci_args_l(&tb_ptr, &ptr); + tci_args_l(insn, tb_ptr, &ptr); return (uintptr_t)ptr; case INDEX_op_goto_tb: - tci_args_l(&tb_ptr, &ptr); + tci_args_l(insn, tb_ptr, &ptr); tb_ptr = *(void **)ptr; break; + case INDEX_op_goto_ptr: + tci_args_r(insn, &r0); + ptr = (void *)regs[r0]; + if (!ptr) { + return 0; + } + tb_ptr = ptr; + break; + case INDEX_op_qemu_ld_i32: if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); + tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; } else { - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); taddr = tci_uint64(regs[r2], regs[r1]); } - switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { - case MO_UB: - tmp32 = qemu_ld_ub; - break; - case MO_SB: - tmp32 = (int8_t)qemu_ld_ub; - break; - case MO_LEUW: - tmp32 = qemu_ld_leuw; - break; - case MO_LESW: - tmp32 = (int16_t)qemu_ld_leuw; - break; - case MO_LEUL: - tmp32 = qemu_ld_leul; - break; - case MO_BEUW: - tmp32 = qemu_ld_beuw; - break; - case MO_BESW: - tmp32 = (int16_t)qemu_ld_beuw; - break; - case MO_BEUL: - tmp32 = qemu_ld_beul; - break; - default: - g_assert_not_reached(); - } + tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr); regs[r0] = tmp32; break; case INDEX_op_qemu_ld_i64: if (TCG_TARGET_REG_BITS == 64) { - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); + tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); taddr = regs[r2]; } else { - tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi); + tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4); taddr = tci_uint64(regs[r3], regs[r2]); + oi = regs[r4]; } - switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) { - case MO_UB: - tmp64 = qemu_ld_ub; - break; - case MO_SB: - tmp64 = (int8_t)qemu_ld_ub; - break; - case MO_LEUW: - tmp64 = qemu_ld_leuw; - break; - case MO_LESW: - tmp64 = (int16_t)qemu_ld_leuw; - break; - case MO_LEUL: - tmp64 = qemu_ld_leul; - break; - case MO_LESL: - tmp64 = (int32_t)qemu_ld_leul; - break; - case MO_LEQ: - tmp64 = qemu_ld_leq; - break; - case MO_BEUW: - tmp64 = qemu_ld_beuw; - break; - case MO_BESW: - tmp64 = (int16_t)qemu_ld_beuw; - break; - case MO_BEUL: - tmp64 = qemu_ld_beul; - break; - case MO_BESL: - tmp64 = (int32_t)qemu_ld_beul; - break; - case MO_BEQ: - tmp64 = qemu_ld_beq; - break; - default: - g_assert_not_reached(); - } + tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr); if (TCG_TARGET_REG_BITS == 32) { tci_write_reg64(regs, r1, r0, tmp64); } else { @@ -982,74 +1056,33 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, case INDEX_op_qemu_st_i32: if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); + tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; } else { - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); taddr = tci_uint64(regs[r2], regs[r1]); } tmp32 = regs[r0]; - switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { - case MO_UB: - qemu_st_b(tmp32); - break; - case MO_LEUW: - qemu_st_lew(tmp32); - break; - case MO_LEUL: - qemu_st_lel(tmp32); - break; - case MO_BEUW: - qemu_st_bew(tmp32); - break; - case MO_BEUL: - qemu_st_bel(tmp32); - break; - default: - g_assert_not_reached(); - } + tci_qemu_st(env, taddr, tmp32, oi, tb_ptr); break; case INDEX_op_qemu_st_i64: if (TCG_TARGET_REG_BITS == 64) { - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); + tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; tmp64 = regs[r0]; } else { if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); taddr = regs[r2]; } else { - tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi); + tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4); taddr = tci_uint64(regs[r3], regs[r2]); + oi = regs[r4]; } tmp64 = tci_uint64(regs[r1], regs[r0]); } - switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) { - case MO_UB: - qemu_st_b(tmp64); - break; - case MO_LEUW: - qemu_st_lew(tmp64); - break; - case MO_LEUL: - qemu_st_lel(tmp64); - break; - case MO_LEQ: - qemu_st_leq(tmp64); - break; - case MO_BEUW: - qemu_st_bew(tmp64); - break; - case MO_BEUL: - qemu_st_bel(tmp64); - break; - case MO_BEQ: - qemu_st_beq(tmp64); - break; - default: - g_assert_not_reached(); - } + tci_qemu_st(env, taddr, tmp64, oi, tb_ptr); break; case INDEX_op_mb: @@ -1105,82 +1138,71 @@ static const char *str_c(TCGCond c) /* Disassemble TCI bytecode. */ int print_insn_tci(bfd_vma addr, disassemble_info *info) { - uint8_t buf[256]; - int length, status; + const uint32_t *tb_ptr = (const void *)(uintptr_t)addr; const TCGOpDef *def; const char *op_name; + uint32_t insn; TCGOpcode op; - TCGReg r0, r1, r2, r3; -#if TCG_TARGET_REG_BITS == 32 - TCGReg r4, r5; -#endif + TCGReg r0, r1, r2, r3, r4, r5; tcg_target_ulong i1; int32_t s2; TCGCond c; TCGMemOpIdx oi; uint8_t pos, len; void *ptr; - const uint8_t *tb_ptr; - - status = info->read_memory_func(addr, buf, 2, info); - if (status != 0) { - info->memory_error_func(status, addr, info); - return -1; - } - op = buf[0]; - length = buf[1]; - if (length < 2) { - info->fprintf_func(info->stream, "invalid length %d", length); - return 1; - } + /* TCI is always the host, so we don't need to load indirect. */ + insn = *tb_ptr++; - status = info->read_memory_func(addr + 2, buf + 2, length - 2, info); - if (status != 0) { - info->memory_error_func(status, addr + 2, info); - return -1; - } + info->fprintf_func(info->stream, "%08x ", insn); + op = extract32(insn, 0, 8); def = &tcg_op_defs[op]; op_name = def->name; - tb_ptr = buf + 2; switch (op) { case INDEX_op_br: - case INDEX_op_call: case INDEX_op_exit_tb: case INDEX_op_goto_tb: - tci_args_l(&tb_ptr, &ptr); + tci_args_l(insn, tb_ptr, &ptr); info->fprintf_func(info->stream, "%-12s %p", op_name, ptr); break; + case INDEX_op_goto_ptr: + tci_args_r(insn, &r0); + info->fprintf_func(info->stream, "%-12s %s", op_name, str_r(r0)); + break; + + case INDEX_op_call: + tci_args_nl(insn, tb_ptr, &len, &ptr); + info->fprintf_func(info->stream, "%-12s %d, %p", op_name, len, ptr); + break; + case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: - tci_args_rrcl(&tb_ptr, &r0, &r1, &c, &ptr); - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %p", - op_name, str_r(r0), str_r(r1), str_c(c), ptr); + tci_args_rl(insn, tb_ptr, &r0, &ptr); + info->fprintf_func(info->stream, "%-12s %s, 0, ne, %p", + op_name, str_r(r0), ptr); break; case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: - tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &c); + tci_args_rrrc(insn, &r0, &r1, &r2, &c); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2), str_c(c)); break; - case INDEX_op_tci_movi_i32: - tci_args_ri(&tb_ptr, &r0, &i1); + case INDEX_op_tci_movi: + tci_args_ri(insn, &r0, &i1); info->fprintf_func(info->stream, "%-12s %s, 0x%" TCG_PRIlx, op_name, str_r(r0), i1); break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_tci_movi_i64: - tci_args_rI(&tb_ptr, &r0, &i1); - info->fprintf_func(info->stream, "%-12s %s, 0x%" TCG_PRIlx, - op_name, str_r(r0), i1); + case INDEX_op_tci_movl: + tci_args_rl(insn, tb_ptr, &r0, &ptr); + info->fprintf_func(info->stream, "%-12s %s, %p", + op_name, str_r(r0), ptr); break; -#endif case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: @@ -1201,7 +1223,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_st32_i64: case INDEX_op_st_i32: case INDEX_op_st_i64: - tci_args_rrs(&tb_ptr, &r0, &r1, &s2); + tci_args_rrs(insn, &r0, &r1, &s2); info->fprintf_func(info->stream, "%-12s %s, %s, %d", op_name, str_r(r0), str_r(r1), s2); break; @@ -1228,7 +1250,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_not_i64: case INDEX_op_neg_i32: case INDEX_op_neg_i64: - tci_args_rr(&tb_ptr, &r0, &r1); + case INDEX_op_ctpop_i32: + case INDEX_op_ctpop_i64: + tci_args_rr(insn, &r0, &r1); info->fprintf_func(info->stream, "%-12s %s, %s", op_name, str_r(r0), str_r(r1)); break; @@ -1245,6 +1269,16 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_or_i64: case INDEX_op_xor_i32: case INDEX_op_xor_i64: + case INDEX_op_andc_i32: + case INDEX_op_andc_i64: + case INDEX_op_orc_i32: + case INDEX_op_orc_i64: + case INDEX_op_eqv_i32: + case INDEX_op_eqv_i64: + case INDEX_op_nand_i32: + case INDEX_op_nand_i64: + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: case INDEX_op_div_i32: case INDEX_op_div_i64: case INDEX_op_rem_i32: @@ -1263,48 +1297,59 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) case INDEX_op_rotl_i64: case INDEX_op_rotr_i32: case INDEX_op_rotr_i64: - tci_args_rrr(&tb_ptr, &r0, &r1, &r2); + case INDEX_op_clz_i32: + case INDEX_op_clz_i64: + case INDEX_op_ctz_i32: + case INDEX_op_ctz_i64: + tci_args_rrr(insn, &r0, &r1, &r2); info->fprintf_func(info->stream, "%-12s %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2)); break; case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: - tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); + tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %d, %d", op_name, str_r(r0), str_r(r1), str_r(r2), pos, len); break; -#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_extract_i32: + case INDEX_op_extract_i64: + case INDEX_op_sextract_i32: + case INDEX_op_sextract_i64: + tci_args_rrbb(insn, &r0, &r1, &pos, &len); + info->fprintf_func(info->stream, "%-12s %s,%s,%d,%d", + op_name, str_r(r0), str_r(r1), pos, len); + break; + + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: case INDEX_op_setcond2_i32: - tci_args_rrrrrc(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &c); + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2), str_r(r3), str_r(r4), str_c(c)); break; - case INDEX_op_brcond2_i32: - tci_args_rrrrcl(&tb_ptr, &r0, &r1, &r2, &r3, &c, &ptr); - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %p", - op_name, str_r(r0), str_r(r1), - str_r(r2), str_r(r3), str_c(c), ptr); - break; - case INDEX_op_mulu2_i32: - tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3); + case INDEX_op_mulu2_i64: + case INDEX_op_muls2_i32: + case INDEX_op_muls2_i64: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2), str_r(r3)); break; case INDEX_op_add2_i32: + case INDEX_op_add2_i64: case INDEX_op_sub2_i32: - tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5); + case INDEX_op_sub2_i64: + tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2), str_r(r3), str_r(r4), str_r(r5)); break; -#endif case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_st_i64: @@ -1317,30 +1362,38 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS); switch (len) { case 2: - tci_args_rrm(&tb_ptr, &r0, &r1, &oi); + tci_args_rrm(insn, &r0, &r1, &oi); info->fprintf_func(info->stream, "%-12s %s, %s, %x", op_name, str_r(r0), str_r(r1), oi); break; case 3: - tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi); + tci_args_rrrm(insn, &r0, &r1, &r2, &oi); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %x", op_name, str_r(r0), str_r(r1), str_r(r2), oi); break; case 4: - tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi); - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %x", + tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4); + info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), - str_r(r2), str_r(r3), oi); + str_r(r2), str_r(r3), str_r(r4)); break; default: g_assert_not_reached(); } break; + case 0: + /* tcg_out_nop_fill uses zeros */ + if (insn == 0) { + info->fprintf_func(info->stream, "align"); + break; + } + /* fall through */ + default: info->fprintf_func(info->stream, "illegal opcode %d", op); break; } - return length; + return sizeof(insn); } diff --git a/tcg/tci/README b/tcg/tci/README index 9bb7d7a..f72a40a 100644 --- a/tcg/tci/README +++ b/tcg/tci/README @@ -23,10 +23,12 @@ This is what TCI (Tiny Code Interpreter) does. Like each TCG host frontend, TCI implements the code generator in tcg-target.c.inc, tcg-target.h. Both files are in directory tcg/tci. -The additional file tcg/tci.c adds the interpreter. +The additional file tcg/tci.c adds the interpreter and disassembler. -The bytecode consists of opcodes (same numeric values as those used by -TCG), command length and arguments of variable size and number. +The bytecode consists of opcodes (with only a few exceptions, with +the same same numeric values and semantics as used by TCG), and up +to six arguments packed into a 32-bit integer. See comments in tci.c +for details on the encoding. 3) Usage @@ -39,11 +41,6 @@ suggest using this option. Setting it automatically would need additional code in configure which must be fixed when new native TCG implementations are added. -System emulation should work on any 32 or 64 bit host. -User mode emulation might work. Maybe a new linker script (*.ld) -is needed. Byte order might be wrong (on big endian hosts) -and need fixes in configure. - For hosts with native TCG, the interpreter TCI can be enabled by configure --enable-tcg-interpreter @@ -118,13 +115,6 @@ u1 = linux-user-test works in the interpreter. These opcodes raise a runtime exception, so it is possible to see where code must be added. -* The pseudo code is not optimized and still ugly. For hosts with special - alignment requirements, it needs some fixes (maybe aligned bytecode - would also improve speed for hosts which support byte alignment). - -* A better disassembler for the pseudo code would be nice (a very primitive - disassembler is included in tcg-target.c.inc). - * It might be useful to have a runtime option which selects the native TCG or TCI, so QEMU would have to include two TCGs. Today, selecting TCI is a configure option, so you need two compilations of QEMU. diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h index 316730f..ae2dc3b 100644 --- a/tcg/tci/tcg-target-con-set.h +++ b/tcg/tci/tcg-target-con-set.h @@ -9,6 +9,7 @@ * Each operand should be a sequence of constraint letters as defined by * tcg-target-con-str.h; the constraint combination is inclusive or. */ +C_O0_I1(r) C_O0_I2(r, r) C_O0_I3(r, r, r) C_O0_I4(r, r, r, r) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 823ecd5..9651e7a 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -22,24 +22,14 @@ * THE SOFTWARE. */ -/* TODO list: - * - See TODO comments in code. - */ - -/* Marker for missing code. */ -#define TODO() \ - do { \ - fprintf(stderr, "TODO %s:%u: %s()\n", \ - __FILE__, __LINE__, __func__); \ - tcg_abort(); \ - } while (0) - -/* Bitfield n...m (in 32 bit value). */ -#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m) +#include "../tcg-pool.c.inc" static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) { switch (op) { + case INDEX_op_goto_ptr: + return C_O0_I1(r); + case INDEX_op_ld8u_i32: case INDEX_op_ld8s_i32: case INDEX_op_ld16u_i32: @@ -73,6 +63,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_bswap32_i32: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: + case INDEX_op_extract_i32: + case INDEX_op_extract_i64: + case INDEX_op_sextract_i32: + case INDEX_op_sextract_i64: + case INDEX_op_ctpop_i32: + case INDEX_op_ctpop_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: @@ -128,24 +124,37 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_setcond_i64: case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: + case INDEX_op_clz_i32: + case INDEX_op_clz_i64: + case INDEX_op_ctz_i32: + case INDEX_op_ctz_i64: return C_O1_I2(r, r, r); case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(r, r); -#if TCG_TARGET_REG_BITS == 32 - /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */ case INDEX_op_add2_i32: + case INDEX_op_add2_i64: case INDEX_op_sub2_i32: + case INDEX_op_sub2_i64: return C_O2_I4(r, r, r, r, r, r); + +#if TCG_TARGET_REG_BITS == 32 case INDEX_op_brcond2_i32: return C_O0_I4(r, r, r, r); +#endif + case INDEX_op_mulu2_i32: + case INDEX_op_mulu2_i64: + case INDEX_op_muls2_i32: + case INDEX_op_muls2_i64: return C_O2_I2(r, r, r, r); + + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: case INDEX_op_setcond2_i32: return C_O1_I4(r, r, r, r, r); -#endif case INDEX_op_qemu_ld_i32: return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS @@ -170,8 +179,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) } static const int tcg_target_reg_alloc_order[] = { - TCG_REG_R0, - TCG_REG_R1, TCG_REG_R2, TCG_REG_R3, TCG_REG_R4, @@ -186,29 +193,16 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R13, TCG_REG_R14, TCG_REG_R15, + TCG_REG_R1, + TCG_REG_R0, }; #if MAX_OPC_PARAM_IARGS != 6 # error Fix needed, number of supported input arguments changed! #endif -static const int tcg_target_call_iarg_regs[] = { - TCG_REG_R0, - TCG_REG_R1, - TCG_REG_R2, - TCG_REG_R3, - TCG_REG_R4, - TCG_REG_R5, -#if TCG_TARGET_REG_BITS == 32 - /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */ - TCG_REG_R6, - TCG_REG_R7, - TCG_REG_R8, - TCG_REG_R9, - TCG_REG_R10, - TCG_REG_R11, -#endif -}; +/* No call arguments via registers. All will be stored on the "stack". */ +static const int tcg_target_call_iarg_regs[] = { }; static const int tcg_target_call_oarg_regs[] = { TCG_REG_R0, @@ -241,317 +235,281 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - /* tcg_out_reloc always uses the same type, addend. */ - tcg_debug_assert(type == sizeof(tcg_target_long)); - tcg_debug_assert(addend == 0); - tcg_debug_assert(value != 0); - if (TCG_TARGET_REG_BITS == 32) { - tcg_patch32(code_ptr, value); - } else { - tcg_patch64(code_ptr, value); - } - return true; -} + intptr_t diff = value - (intptr_t)(code_ptr + 1); -/* Write value (native size). */ -static void tcg_out_i(TCGContext *s, tcg_target_ulong v) -{ - if (TCG_TARGET_REG_BITS == 32) { - tcg_out32(s, v); - } else { - tcg_out64(s, v); - } -} - -/* Write opcode. */ -static void tcg_out_op_t(TCGContext *s, TCGOpcode op) -{ - tcg_out8(s, op); - tcg_out8(s, 0); -} - -/* Write register. */ -static void tcg_out_r(TCGContext *s, TCGArg t0) -{ - tcg_debug_assert(t0 < TCG_TARGET_NB_REGS); - tcg_out8(s, t0); -} + tcg_debug_assert(addend == 0); + tcg_debug_assert(type == 20); -/* Write label. */ -static void tci_out_label(TCGContext *s, TCGLabel *label) -{ - if (label->has_value) { - tcg_out_i(s, label->u.value); - tcg_debug_assert(label->u.value); - } else { - tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0); - s->code_ptr += sizeof(tcg_target_ulong); + if (diff == sextract32(diff, 0, type)) { + tcg_patch32(code_ptr, deposit32(*code_ptr, 32 - type, type, diff)); + return true; } + return false; } static void stack_bounds_check(TCGReg base, target_long offset) { if (base == TCG_REG_CALL_STACK) { - tcg_debug_assert(offset < 0); - tcg_debug_assert(offset >= -(CPU_TEMP_BUF_NLONGS * sizeof(long))); + tcg_debug_assert(offset >= 0); + tcg_debug_assert(offset < (TCG_STATIC_CALL_ARGS_SIZE + + TCG_STATIC_FRAME_SIZE)); } } static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) { - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); - tci_out_label(s, l0); - - old_code_ptr[1] = s->code_ptr - old_code_ptr; + tcg_out_reloc(s, s->code_ptr, 20, l0, 0); + insn = deposit32(insn, 0, 8, op); + tcg_out32(s, insn); } static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0) { - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, op); - tcg_out_i(s, (uintptr_t)p0); + tcg_insn_unit insn = 0; + intptr_t diff; - old_code_ptr[1] = s->code_ptr - old_code_ptr; + /* Special case for exit_tb: map null -> 0. */ + if (p0 == NULL) { + diff = 0; + } else { + diff = p0 - (void *)(s->code_ptr + 1); + tcg_debug_assert(diff != 0); + if (diff != sextract32(diff, 0, 20)) { + tcg_raise_tb_overflow(s); + } + } + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 12, 20, diff); + tcg_out32(s, insn); } -static void tcg_out_op_v(TCGContext *s, TCGOpcode op) +static void tcg_out_op_r(TCGContext *s, TCGOpcode op, TCGReg r0) { - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + tcg_out32(s, insn); +} - old_code_ptr[1] = s->code_ptr - old_code_ptr; +static void tcg_out_op_v(TCGContext *s, TCGOpcode op) +{ + tcg_out32(s, (uint8_t)op); } static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) { - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out32(s, i1); + tcg_insn_unit insn = 0; - old_code_ptr[1] = s->code_ptr - old_code_ptr; + tcg_debug_assert(i1 == sextract32(i1, 0, 20)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 20, i1); + tcg_out32(s, insn); } -#if TCG_TARGET_REG_BITS == 64 -static void tcg_out_op_rI(TCGContext *s, TCGOpcode op, - TCGReg r0, uint64_t i1) +static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1) { - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out64(s, i1); - - old_code_ptr[1] = s->code_ptr - old_code_ptr; + tcg_out_reloc(s, s->code_ptr, 20, l1, 0); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + tcg_out32(s, insn); } -#endif static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1) { - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - - old_code_ptr[1] = s->code_ptr - old_code_ptr; + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + tcg_out32(s, insn); } static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGArg m2) { - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out32(s, m2); + tcg_insn_unit insn = 0; - old_code_ptr[1] = s->code_ptr - old_code_ptr; + tcg_debug_assert(m2 == extract32(m2, 0, 12)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 20, 12, m2); + tcg_out32(s, insn); } static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2) { - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out_r(s, r2); - - old_code_ptr[1] = s->code_ptr - old_code_ptr; + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + tcg_out32(s, insn); } static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, intptr_t i2) { - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_debug_assert(i2 == (int32_t)i2); - tcg_out32(s, i2); + tcg_insn_unit insn = 0; - old_code_ptr[1] = s->code_ptr - old_code_ptr; + tcg_debug_assert(i2 == sextract32(i2, 0, 16)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 16, i2); + tcg_out32(s, insn); } -static void tcg_out_op_rrcl(TCGContext *s, TCGOpcode op, - TCGReg r0, TCGReg r1, TCGCond c2, TCGLabel *l3) +static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0, + TCGReg r1, uint8_t b2, uint8_t b3) { - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out8(s, c2); - tci_out_label(s, l3); - - old_code_ptr[1] = s->code_ptr - old_code_ptr; + tcg_debug_assert(b2 == extract32(b2, 0, 6)); + tcg_debug_assert(b3 == extract32(b3, 0, 6)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 6, b2); + insn = deposit32(insn, 22, 6, b3); + tcg_out32(s, insn); } static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3) { - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out_r(s, r2); - tcg_out8(s, c3); + tcg_insn_unit insn = 0; - old_code_ptr[1] = s->code_ptr - old_code_ptr; + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 4, c3); + tcg_out32(s, insn); } static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3) { - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out_r(s, r2); - tcg_out32(s, m3); - - old_code_ptr[1] = s->code_ptr - old_code_ptr; + tcg_debug_assert(m3 == extract32(m3, 0, 12)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 12, m3); + tcg_out32(s, insn); } static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4) { - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out_r(s, r2); - tcg_out8(s, b3); - tcg_out8(s, b4); + tcg_insn_unit insn = 0; - old_code_ptr[1] = s->code_ptr - old_code_ptr; + tcg_debug_assert(b3 == extract32(b3, 0, 6)); + tcg_debug_assert(b4 == extract32(b4, 0, 6)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 6, b3); + insn = deposit32(insn, 26, 6, b4); + tcg_out32(s, insn); } -static void tcg_out_op_rrrrm(TCGContext *s, TCGOpcode op, TCGReg r0, - TCGReg r1, TCGReg r2, TCGReg r3, TCGArg m4) +static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0, + TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4) { - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out_r(s, r2); - tcg_out_r(s, r3); - tcg_out32(s, m4); - - old_code_ptr[1] = s->code_ptr - old_code_ptr; + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 4, r3); + insn = deposit32(insn, 24, 4, r4); + tcg_out32(s, insn); } -#if TCG_TARGET_REG_BITS == 32 static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3) { - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out_r(s, r2); - tcg_out_r(s, r3); - - old_code_ptr[1] = s->code_ptr - old_code_ptr; -} - -static void tcg_out_op_rrrrcl(TCGContext *s, TCGOpcode op, - TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3, - TCGCond c4, TCGLabel *l5) -{ - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out_r(s, r2); - tcg_out_r(s, r3); - tcg_out8(s, c4); - tci_out_label(s, l5); - - old_code_ptr[1] = s->code_ptr - old_code_ptr; + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 4, r3); + tcg_out32(s, insn); } static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4, TCGCond c5) { - uint8_t *old_code_ptr = s->code_ptr; - - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out_r(s, r2); - tcg_out_r(s, r3); - tcg_out_r(s, r4); - tcg_out8(s, c5); + tcg_insn_unit insn = 0; - old_code_ptr[1] = s->code_ptr - old_code_ptr; + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 4, r3); + insn = deposit32(insn, 24, 4, r4); + insn = deposit32(insn, 28, 4, c5); + tcg_out32(s, insn); } static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4, TCGReg r5) { - uint8_t *old_code_ptr = s->code_ptr; + tcg_insn_unit insn = 0; - tcg_out_op_t(s, op); - tcg_out_r(s, r0); - tcg_out_r(s, r1); - tcg_out_r(s, r2); - tcg_out_r(s, r3); - tcg_out_r(s, r4); - tcg_out_r(s, r5); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 4, r3); + insn = deposit32(insn, 24, 4, r4); + insn = deposit32(insn, 28, 4, r5); + tcg_out32(s, insn); +} - old_code_ptr[1] = s->code_ptr - old_code_ptr; +static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, + TCGReg base, intptr_t offset) +{ + stack_bounds_check(base, offset); + if (offset != sextract32(offset, 0, 16)) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset); + tcg_out_op_rrr(s, (TCG_TARGET_REG_BITS == 32 + ? INDEX_op_add_i32 : INDEX_op_add_i64), + TCG_REG_TMP, TCG_REG_TMP, base); + base = TCG_REG_TMP; + offset = 0; + } + tcg_out_op_rrs(s, op, val, base, offset); } -#endif static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base, intptr_t offset) { - stack_bounds_check(base, offset); switch (type) { case TCG_TYPE_I32: - tcg_out_op_rrs(s, INDEX_op_ld_i32, val, base, offset); + tcg_out_ldst(s, INDEX_op_ld_i32, val, base, offset); break; #if TCG_TARGET_REG_BITS == 64 case TCG_TYPE_I64: - tcg_out_op_rrs(s, INDEX_op_ld_i64, val, base, offset); + tcg_out_ldst(s, INDEX_op_ld_i64, val, base, offset); break; #endif default: @@ -581,24 +539,46 @@ static void tcg_out_movi(TCGContext *s, TCGType type, { switch (type) { case TCG_TYPE_I32: - tcg_out_op_ri(s, INDEX_op_tci_movi_i32, ret, arg); - break; #if TCG_TARGET_REG_BITS == 64 + arg = (int32_t)arg; + /* fall through */ case TCG_TYPE_I64: - tcg_out_op_rI(s, INDEX_op_tci_movi_i64, ret, arg); - break; #endif + break; default: g_assert_not_reached(); } + + if (arg == sextract32(arg, 0, 20)) { + tcg_out_op_ri(s, INDEX_op_tci_movi, ret, arg); + } else { + tcg_insn_unit insn = 0; + + new_pool_label(s, arg, 20, s->code_ptr, 0); + insn = deposit32(insn, 0, 8, INDEX_op_tci_movl); + insn = deposit32(insn, 8, 4, ret); + tcg_out32(s, insn); + } } -static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, + ffi_cif *cif) { - uint8_t *old_code_ptr = s->code_ptr; - tcg_out_op_t(s, INDEX_op_call); - tcg_out_i(s, (uintptr_t)arg); - old_code_ptr[1] = s->code_ptr - old_code_ptr; + tcg_insn_unit insn = 0; + uint8_t which; + + if (cif->rtype == &ffi_type_void) { + which = 0; + } else if (cif->rtype->size == 4) { + which = 1; + } else { + tcg_debug_assert(cif->rtype->size == 8); + which = 2; + } + new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif); + insn = deposit32(insn, 0, 8, INDEX_op_call); + insn = deposit32(insn, 8, 4, which); + tcg_out32(s, insn); } #if TCG_TARGET_REG_BITS == 64 @@ -629,6 +609,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, set_jmp_reset_offset(s, args[0]); break; + case INDEX_op_goto_ptr: + tcg_out_op_r(s, opc, args[0]); + break; + case INDEX_op_br: tcg_out_op_l(s, opc, arg_label(args[0])); break; @@ -637,12 +621,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_op_rrrc(s, opc, args[0], args[1], args[2], args[3]); break; -#if TCG_TARGET_REG_BITS == 32 + CASE_32_64(movcond) case INDEX_op_setcond2_i32: tcg_out_op_rrrrrc(s, opc, args[0], args[1], args[2], args[3], args[4], args[5]); break; -#endif CASE_32_64(ld8u) CASE_32_64(ld8s) @@ -657,8 +640,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_st_i32: CASE_64(st32) CASE_64(st) - stack_bounds_check(args[1], args[2]); - tcg_out_op_rrs(s, opc, args[0], args[1], args[2]); + tcg_out_ldst(s, opc, args[0], args[1], args[2]); break; CASE_32_64(add) @@ -681,6 +663,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, CASE_32_64(divu) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */ CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */ + CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */ + CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */ tcg_out_op_rrr(s, opc, args[0], args[1], args[2]); break; @@ -696,8 +680,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + CASE_32_64(extract) /* Optional (TCG_TARGET_HAS_extract_*). */ + CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */ + { + TCGArg pos = args[2], len = args[3]; + TCGArg max = tcg_op_defs[opc].flags & TCG_OPF_64BIT ? 64 : 32; + + tcg_debug_assert(pos < max); + tcg_debug_assert(pos + len <= max); + + tcg_out_op_rrbb(s, opc, args[0], args[1], pos, len); + } + break; + CASE_32_64(brcond) - tcg_out_op_rrcl(s, opc, args[0], args[1], args[2], arg_label(args[3])); + tcg_out_op_rrrc(s, (opc == INDEX_op_brcond_i32 + ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64), + TCG_REG_TMP, args[0], args[1], args[2]); + tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3])); break; CASE_32_64(neg) /* Optional (TCG_TARGET_HAS_neg_*). */ @@ -713,23 +713,28 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, CASE_32_64(bswap16) /* Optional (TCG_TARGET_HAS_bswap16_*). */ CASE_32_64(bswap32) /* Optional (TCG_TARGET_HAS_bswap32_*). */ CASE_64(bswap64) /* Optional (TCG_TARGET_HAS_bswap64_i64). */ + CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */ tcg_out_op_rr(s, opc, args[0], args[1]); break; -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_add2_i32: - case INDEX_op_sub2_i32: + CASE_32_64(add2) + CASE_32_64(sub2) tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], args[3], args[4], args[5]); break; + +#if TCG_TARGET_REG_BITS == 32 case INDEX_op_brcond2_i32: - tcg_out_op_rrrrcl(s, opc, args[0], args[1], args[2], - args[3], args[4], arg_label(args[5])); + tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP, + args[0], args[1], args[2], args[3], args[4]); + tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5])); break; - case INDEX_op_mulu2_i32: +#endif + + CASE_32_64(mulu2) + CASE_32_64(muls2) tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]); break; -#endif case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_st_i32: @@ -747,8 +752,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) { tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]); } else { - tcg_out_op_rrrrm(s, opc, args[0], args[1], - args[2], args[3], args[4]); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]); + tcg_out_op_rrrrr(s, opc, args[0], args[1], + args[2], args[3], TCG_REG_TMP); } break; @@ -794,6 +800,11 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) return ct & TCG_CT_CONST; } +static void tcg_out_nop_fill(tcg_insn_unit *p, int count) +{ + memset(p, 0, sizeof(*p) * count); +} + static void tcg_target_init(TCGContext *s) { #if defined(CONFIG_DEBUG_TCG_INTERPRETER) @@ -810,17 +821,22 @@ static void tcg_target_init(TCGContext *s) tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1; /* Registers available for 64 bit operations. */ tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1; - /* TODO: Which registers should be set here? */ - tcg_target_call_clobber_regs = BIT(TCG_TARGET_NB_REGS) - 1; + /* + * The interpreter "registers" are in the local stack frame and + * cannot be clobbered by the called helper functions. However, + * the interpreter assumes a 64-bit return value and assigns to + * the return value registers. + */ + tcg_target_call_clobber_regs = + MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS); s->reserved_regs = 0; + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - /* We use negative offsets from "sp" so that we can distinguish - stores that might pretend to be call arguments. */ - tcg_set_frame(s, TCG_REG_CALL_STACK, - -CPU_TEMP_BUF_NLONGS * sizeof(long), - CPU_TEMP_BUF_NLONGS * sizeof(long)); + /* The call arguments come first, followed by the temp storage. */ + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + TCG_STATIC_FRAME_SIZE); } /* Generate global QEMU prologue and epilogue code. */ diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index d0b5f3f..7b6089f 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -41,7 +41,7 @@ #define TCG_TARGET_H #define TCG_TARGET_INTERPRETER 1 -#define TCG_TARGET_INSN_UNIT_SIZE 1 +#define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) @@ -68,26 +68,26 @@ #define TCG_TARGET_HAS_ext16s_i32 1 #define TCG_TARGET_HAS_ext8u_i32 1 #define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_andc_i32 0 +#define TCG_TARGET_HAS_andc_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 -#define TCG_TARGET_HAS_extract_i32 0 -#define TCG_TARGET_HAS_sextract_i32 0 +#define TCG_TARGET_HAS_extract_i32 1 +#define TCG_TARGET_HAS_sextract_i32 1 #define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 0 -#define TCG_TARGET_HAS_ctz_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 0 +#define TCG_TARGET_HAS_eqv_i32 1 +#define TCG_TARGET_HAS_nand_i32 1 +#define TCG_TARGET_HAS_nor_i32 1 +#define TCG_TARGET_HAS_clz_i32 1 +#define TCG_TARGET_HAS_ctz_i32 1 +#define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_orc_i32 0 +#define TCG_TARGET_HAS_orc_i32 1 #define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_movcond_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_movcond_i32 1 +#define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_goto_ptr 0 +#define TCG_TARGET_HAS_goto_ptr 1 #define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -98,8 +98,8 @@ #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 #define TCG_TARGET_HAS_deposit_i64 1 -#define TCG_TARGET_HAS_extract_i64 0 -#define TCG_TARGET_HAS_sextract_i64 0 +#define TCG_TARGET_HAS_extract_i64 1 +#define TCG_TARGET_HAS_sextract_i64 1 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 @@ -109,25 +109,25 @@ #define TCG_TARGET_HAS_ext8u_i64 1 #define TCG_TARGET_HAS_ext16u_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_andc_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 0 -#define TCG_TARGET_HAS_ctz_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 +#define TCG_TARGET_HAS_andc_i64 1 +#define TCG_TARGET_HAS_eqv_i64 1 +#define TCG_TARGET_HAS_nand_i64 1 +#define TCG_TARGET_HAS_nor_i64 1 +#define TCG_TARGET_HAS_clz_i64 1 +#define TCG_TARGET_HAS_ctz_i64 1 +#define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_orc_i64 0 +#define TCG_TARGET_HAS_orc_i64 1 #define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_movcond_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 -#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_movcond_i64 1 +#define TCG_TARGET_HAS_muls2_i64 1 +#define TCG_TARGET_HAS_add2_i32 1 +#define TCG_TARGET_HAS_sub2_i32 1 +#define TCG_TARGET_HAS_mulu2_i32 1 +#define TCG_TARGET_HAS_add2_i64 1 +#define TCG_TARGET_HAS_sub2_i64 1 +#define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 #else @@ -156,15 +156,17 @@ typedef enum { TCG_REG_R14, TCG_REG_R15, + TCG_REG_TMP = TCG_REG_R13, TCG_AREG0 = TCG_REG_R14, TCG_REG_CALL_STACK = TCG_REG_R15, } TCGReg; /* Used for function call generation. */ #define TCG_TARGET_CALL_STACK_OFFSET 0 -#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_STACK_ALIGN 8 #define HAVE_TCG_QEMU_TB_EXEC +#define TCG_TARGET_NEED_POOL_LABELS /* We could notice __i386__ or __s390x__ and reduce the barriers depending on the host. But if you want performance, you use the normal backend. diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker index 7eeecac..7e6997e 100644 --- a/tests/docker/dockerfiles/alpine.docker +++ b/tests/docker/dockerfiles/alpine.docker @@ -22,6 +22,7 @@ ENV PACKAGES \ libaio-dev \ libbpf-dev \ libcap-ng-dev \ + libffi-dev \ libjpeg-turbo-dev \ libnfs-dev \ libpng-dev \ diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker index efc1349..03e0440 100644 --- a/tests/docker/dockerfiles/centos8.docker +++ b/tests/docker/dockerfiles/centos8.docker @@ -17,6 +17,7 @@ ENV PACKAGES \ libbpf-devel \ libepoxy-devel \ libfdt-devel \ + libffi-devel \ libgcrypt-devel \ lzo-devel \ make \ diff --git a/tests/docker/dockerfiles/debian10.docker b/tests/docker/dockerfiles/debian10.docker index 63cf835..4ffe476 100644 --- a/tests/docker/dockerfiles/debian10.docker +++ b/tests/docker/dockerfiles/debian10.docker @@ -26,6 +26,7 @@ RUN apt update && \ gdb-multiarch \ gettext \ git \ + libffi-dev \ libncurses5-dev \ ninja-build \ pkg-config \ diff --git a/tests/docker/dockerfiles/fedora-i386-cross.docker b/tests/docker/dockerfiles/fedora-i386-cross.docker index 66cdb06..8004fd8 100644 --- a/tests/docker/dockerfiles/fedora-i386-cross.docker +++ b/tests/docker/dockerfiles/fedora-i386-cross.docker @@ -6,6 +6,7 @@ ENV PACKAGES \ findutils \ gcc \ git \ + libffi-devel.i686 \ libtasn1-devel.i686 \ libzstd-devel.i686 \ make \ diff --git a/tests/docker/dockerfiles/fedora-win32-cross.docker b/tests/docker/dockerfiles/fedora-win32-cross.docker index 3733df6..a638afb 100644 --- a/tests/docker/dockerfiles/fedora-win32-cross.docker +++ b/tests/docker/dockerfiles/fedora-win32-cross.docker @@ -19,6 +19,7 @@ ENV PACKAGES \ mingw32-gmp \ mingw32-gnutls \ mingw32-gtk3 \ + mingw32-libffi \ mingw32-libjpeg-turbo \ mingw32-libpng \ mingw32-libtasn1 \ diff --git a/tests/docker/dockerfiles/fedora-win64-cross.docker b/tests/docker/dockerfiles/fedora-win64-cross.docker index 2564ce4..f53007a 100644 --- a/tests/docker/dockerfiles/fedora-win64-cross.docker +++ b/tests/docker/dockerfiles/fedora-win64-cross.docker @@ -18,6 +18,7 @@ ENV PACKAGES \ mingw64-glib2 \ mingw64-gmp \ mingw64-gtk3 \ + mingw64-libffi \ mingw64-libjpeg-turbo \ mingw64-libpng \ mingw64-libtasn1 \ diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index 0979c0e..00cac5d 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -33,6 +33,7 @@ ENV PACKAGES \ libepoxy-devel \ libfdt-devel \ libbpf-devel \ + libffi-devel \ libiscsi-devel \ libjpeg-devel \ libpmem-devel \ diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker index 98a5273..24d1647 100644 --- a/tests/docker/dockerfiles/ubuntu.docker +++ b/tests/docker/dockerfiles/ubuntu.docker @@ -28,6 +28,7 @@ ENV PACKAGES \ libdrm-dev \ libepoxy-dev \ libfdt-dev \ + libffi-dev \ libgbm-dev \ libgnutls28-dev \ libgtk-3-dev \ diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker index c0d3642..2f1ec7c 100644 --- a/tests/docker/dockerfiles/ubuntu1804.docker +++ b/tests/docker/dockerfiles/ubuntu1804.docker @@ -16,6 +16,7 @@ ENV PACKAGES \ libdrm-dev \ libepoxy-dev \ libfdt-dev \ + libffi-dev \ libgbm-dev \ libgtk-3-dev \ libibverbs-dev \ diff --git a/tests/docker/dockerfiles/ubuntu2004.docker b/tests/docker/dockerfiles/ubuntu2004.docker index f1e0eba..fe993fe 100644 --- a/tests/docker/dockerfiles/ubuntu2004.docker +++ b/tests/docker/dockerfiles/ubuntu2004.docker @@ -19,6 +19,7 @@ ENV PACKAGES flex bison \ libdrm-dev \ libepoxy-dev \ libfdt-dev \ + libffi-dev \ libgbm-dev \ libgtk-3-dev \ libibverbs-dev \ diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target index b29fae4..63cf1b2 100644 --- a/tests/tcg/Makefile.target +++ b/tests/tcg/Makefile.target @@ -81,8 +81,10 @@ LDFLAGS= QEMU_OPTS= -# If TCG debugging is enabled things are a lot slower -ifeq ($(CONFIG_DEBUG_TCG),y) +# If TCG debugging, or TCI is enabled things are a lot slower +ifneq ($(CONFIG_TCG_INTERPRETER),) +TIMEOUT=90 +else ifneq ($(CONFIG_DEBUG_TCG),) TIMEOUT=60 else TIMEOUT=15 diff --git a/util/oslib-win32.c b/util/oslib-win32.c index ee3a369..af559ef 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -58,7 +58,11 @@ void *qemu_try_memalign(size_t alignment, size_t size) void *ptr; g_assert(size != 0); - g_assert(is_power_of_2(alignment)); + if (alignment < sizeof(void *)) { + alignment = sizeof(void *); + } else { + g_assert(is_power_of_2(alignment)); + } ptr = _aligned_malloc(size, alignment); trace_qemu_memalign(alignment, size, ptr); return ptr; |