diff options
Diffstat (limited to 'tcg/tcg.c')
-rw-r--r-- | tcg/tcg.c | 2007 |
1 files changed, 1390 insertions, 617 deletions
@@ -34,6 +34,7 @@ #include "qemu/cacheflush.h" #include "qemu/cacheinfo.h" #include "qemu/timer.h" +#include "exec/target_page.h" #include "exec/translation-block.h" #include "exec/tlb-common.h" #include "tcg/startup.h" @@ -56,6 +57,7 @@ #include "tcg/tcg-temp-internal.h" #include "tcg-internal.h" #include "tcg/perf.h" +#include "tcg-has.h" #ifdef CONFIG_USER_ONLY #include "user/guest-base.h" #endif @@ -66,6 +68,11 @@ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend); +static void tcg_out_nop_fill(tcg_insn_unit *p, int count); + +typedef struct TCGLabelQemuLdst TCGLabelQemuLdst; +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l); +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l); /* The CIE and FDE header definitions will be common to all hosts. */ typedef struct { @@ -90,18 +97,17 @@ typedef struct QEMU_PACKED { DebugFrameFDEHeader fde; } DebugFrameHeader; -typedef struct TCGLabelQemuLdst { +struct TCGLabelQemuLdst { bool is_ld; /* qemu_ld: true, qemu_st: false */ MemOpIdx oi; TCGType type; /* result type of a load */ - TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */ - TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */ + TCGReg addr_reg; /* reg index for guest virtual addr */ TCGReg datalo_reg; /* reg index for low word to be loaded or stored */ TCGReg datahi_reg; /* reg index for high word to be loaded or stored */ const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */ tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */ QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next; -} TCGLabelQemuLdst; +}; static void tcg_register_jit_int(const void *buf, size_t size, const void *debug_frame, @@ -128,9 +134,11 @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_goto_tb(TCGContext *s, int which); -static void tcg_out_op(TCGContext *s, TCGOpcode opc, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]); +static void tcg_out_goto_ptr(TCGContext *s, TCGReg dest); +static void tcg_out_mb(TCGContext *s, unsigned bar); +static void tcg_out_br(TCGContext *s, TCGLabel *l); +static void tcg_out_set_carry(TCGContext *s); +static void tcg_out_set_borrow(TCGContext *s); #if TCG_TARGET_MAYBE_vec static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg src); @@ -165,6 +173,10 @@ static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, { g_assert_not_reached(); } +int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve) +{ + return 0; +} #endif static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2); @@ -175,9 +187,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); static bool tcg_target_const_match(int64_t val, int ct, TCGType type, TCGCond cond, int vece); -#ifdef TCG_TARGET_NEED_LDST_LABELS -static int tcg_out_ldst_finalize(TCGContext *s); -#endif #ifndef CONFIG_USER_ONLY #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) @@ -634,6 +643,197 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, } } +/* + * Allocate a new TCGLabelQemuLdst entry. + */ + +__attribute__((unused)) +static TCGLabelQemuLdst *new_ldst_label(TCGContext *s) +{ + TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l)); + + memset(l, 0, sizeof(*l)); + QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next); + + return l; +} + +/* + * Allocate new constant pool entries. + */ + +typedef struct TCGLabelPoolData { + struct TCGLabelPoolData *next; + tcg_insn_unit *label; + intptr_t addend; + int rtype; + unsigned nlong; + tcg_target_ulong data[]; +} TCGLabelPoolData; + +static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype, + tcg_insn_unit *label, intptr_t addend) +{ + TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData) + + sizeof(tcg_target_ulong) * nlong); + + n->label = label; + n->addend = addend; + n->rtype = rtype; + n->nlong = nlong; + return n; +} + +static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n) +{ + TCGLabelPoolData *i, **pp; + int nlong = n->nlong; + + /* Insertion sort on the pool. */ + for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) { + if (nlong > i->nlong) { + break; + } + if (nlong < i->nlong) { + continue; + } + if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) { + break; + } + } + n->next = *pp; + *pp = n; +} + +/* The "usual" for generic integer code. */ +__attribute__((unused)) +static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype, + tcg_insn_unit *label, intptr_t addend) +{ + TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend); + n->data[0] = d; + new_pool_insert(s, n); +} + +/* For v64 or v128, depending on the host. */ +__attribute__((unused)) +static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label, + intptr_t addend, tcg_target_ulong d0, + tcg_target_ulong d1) +{ + TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend); + n->data[0] = d0; + n->data[1] = d1; + new_pool_insert(s, n); +} + +/* For v128 or v256, depending on the host. */ +__attribute__((unused)) +static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label, + intptr_t addend, tcg_target_ulong d0, + tcg_target_ulong d1, tcg_target_ulong d2, + tcg_target_ulong d3) +{ + TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend); + n->data[0] = d0; + n->data[1] = d1; + n->data[2] = d2; + n->data[3] = d3; + new_pool_insert(s, n); +} + +/* For v256, for 32-bit host. */ +__attribute__((unused)) +static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label, + intptr_t addend, tcg_target_ulong d0, + tcg_target_ulong d1, tcg_target_ulong d2, + tcg_target_ulong d3, tcg_target_ulong d4, + tcg_target_ulong d5, tcg_target_ulong d6, + tcg_target_ulong d7) +{ + TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend); + n->data[0] = d0; + n->data[1] = d1; + n->data[2] = d2; + n->data[3] = d3; + n->data[4] = d4; + n->data[5] = d5; + n->data[6] = d6; + n->data[7] = d7; + new_pool_insert(s, n); +} + +/* + * Generate TB finalization at the end of block + */ + +static int tcg_out_ldst_finalize(TCGContext *s) +{ + TCGLabelQemuLdst *lb; + + /* qemu_ld/st slow paths */ + QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) { + if (lb->is_ld + ? !tcg_out_qemu_ld_slow_path(s, lb) + : !tcg_out_qemu_st_slow_path(s, lb)) { + return -2; + } + + /* + * Test for (pending) buffer overflow. The assumption is that any + * one operation beginning below the high water mark cannot overrun + * the buffer completely. Thus we can test for overflow after + * generating code without having to check during generation. + */ + if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) { + return -1; + } + } + return 0; +} + +static int tcg_out_pool_finalize(TCGContext *s) +{ + TCGLabelPoolData *p = s->pool_labels; + TCGLabelPoolData *l = NULL; + void *a; + + if (p == NULL) { + return 0; + } + + /* + * ??? Round up to qemu_icache_linesize, but then do not round + * again when allocating the next TranslationBlock structure. + */ + a = (void *)ROUND_UP((uintptr_t)s->code_ptr, + sizeof(tcg_target_ulong) * p->nlong); + tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr); + s->data_gen_ptr = a; + + for (; p != NULL; p = p->next) { + size_t size = sizeof(tcg_target_ulong) * p->nlong; + uintptr_t value; + + if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) { + if (unlikely(a > s->code_gen_highwater)) { + return -1; + } + memcpy(a, p->data, size); + a += size; + l = p; + } + + value = (uintptr_t)tcg_splitwx_to_rx(a) - size; + if (!patch_reloc(p->label, p->rtype, value, p->addend)) { + return -2; + } + } + + s->code_ptr = a; + return 0; +} + #define C_PFX1(P, A) P##A #define C_PFX2(P, A, B) P##A##_##B #define C_PFX3(P, A, B, C) P##A##_##B##_##C @@ -664,10 +864,12 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), typedef enum { + C_Dynamic = -2, + C_NotImplemented = -1, #include "tcg-target-con-set.h" } TCGConstraintSetIndex; -static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); +static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned); #undef C_O0_I1 #undef C_O0_I2 @@ -688,31 +890,35 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); /* Put all of the constraint sets into an array, indexed by the enum. */ -#define C_O0_I1(I1) { .args_ct_str = { #I1 } }, -#define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } }, -#define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } }, -#define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } }, +typedef struct TCGConstraintSet { + uint8_t nb_oargs, nb_iargs; + const char *args_ct_str[TCG_MAX_OP_ARGS]; +} TCGConstraintSet; + +#define C_O0_I1(I1) { 0, 1, { #I1 } }, +#define C_O0_I2(I1, I2) { 0, 2, { #I1, #I2 } }, +#define C_O0_I3(I1, I2, I3) { 0, 3, { #I1, #I2, #I3 } }, +#define C_O0_I4(I1, I2, I3, I4) { 0, 4, { #I1, #I2, #I3, #I4 } }, -#define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } }, -#define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } }, -#define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } }, -#define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, +#define C_O1_I1(O1, I1) { 1, 1, { #O1, #I1 } }, +#define C_O1_I2(O1, I1, I2) { 1, 2, { #O1, #I1, #I2 } }, +#define C_O1_I3(O1, I1, I2, I3) { 1, 3, { #O1, #I1, #I2, #I3 } }, +#define C_O1_I4(O1, I1, I2, I3, I4) { 1, 4, { #O1, #I1, #I2, #I3, #I4 } }, -#define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, -#define C_N1O1_I1(O1, O2, I1) { .args_ct_str = { "&" #O1, #O2, #I1 } }, -#define C_N2_I1(O1, O2, I1) { .args_ct_str = { "&" #O1, "&" #O2, #I1 } }, +#define C_N1_I2(O1, I1, I2) { 1, 2, { "&" #O1, #I1, #I2 } }, +#define C_N1O1_I1(O1, O2, I1) { 2, 1, { "&" #O1, #O2, #I1 } }, +#define C_N2_I1(O1, O2, I1) { 2, 1, { "&" #O1, "&" #O2, #I1 } }, -#define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, -#define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, -#define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, -#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, -#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, +#define C_O2_I1(O1, O2, I1) { 2, 1, { #O1, #O2, #I1 } }, +#define C_O2_I2(O1, O2, I1, I2) { 2, 2, { #O1, #O2, #I1, #I2 } }, +#define C_O2_I3(O1, O2, I1, I2, I3) { 2, 3, { #O1, #O2, #I1, #I2, #I3 } }, +#define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } }, +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, -static const TCGTargetOpDef constraint_sets[] = { +static const TCGConstraintSet constraint_sets[] = { #include "tcg-target-con-set.h" }; - #undef C_O0_I1 #undef C_O0_I2 #undef C_O0_I3 @@ -752,6 +958,164 @@ static const TCGTargetOpDef constraint_sets[] = { #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) +/* + * TCGOutOp is the base class for a set of structures that describe how + * to generate code for a given TCGOpcode. + * + * @static_constraint: + * C_NotImplemented: The TCGOpcode is not supported by the backend. + * C_Dynamic: Use @dynamic_constraint to select a constraint set + * based on any of @type, @flags, or host isa. + * Otherwise: The register allocation constrains for the TCGOpcode. + * + * Subclasses of TCGOutOp will define a set of output routines that may + * be used. Such routines will often be selected by the set of registers + * and constants that come out of register allocation. The set of + * routines that are provided will guide the set of constraints that are + * legal. In particular, assume that tcg_optimize() has done its job in + * swapping commutative operands and folding operations for which all + * operands are constant. + */ +typedef struct TCGOutOp { + TCGConstraintSetIndex static_constraint; + TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); +} TCGOutOp; + +typedef struct TCGOutOpAddSubCarry { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2); + void (*out_rir)(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2); + void (*out_rii)(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2); +} TCGOutOpAddSubCarry; + +typedef struct TCGOutOpBinary { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2); +} TCGOutOpBinary; + +typedef struct TCGOutOpBrcond { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a1, TCGReg a2, TCGLabel *label); + void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a1, tcg_target_long a2, TCGLabel *label); +} TCGOutOpBrcond; + +typedef struct TCGOutOpBrcond2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh, TCGLabel *l); +} TCGOutOpBrcond2; + +typedef struct TCGOutOpBswap { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags); +} TCGOutOpBswap; + +typedef struct TCGOutOpDeposit { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len); + void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len); + void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0, + TCGReg a2, unsigned ofs, unsigned len); +} TCGOutOpDeposit; + +typedef struct TCGOutOpDivRem { + TCGOutOp base; + void (*out_rr01r)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4); +} TCGOutOpDivRem; + +typedef struct TCGOutOpExtract { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len); +} TCGOutOpExtract; + +typedef struct TCGOutOpExtract2 { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned shr); +} TCGOutOpExtract2; + +typedef struct TCGOutOpLoad { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, intptr_t offset); +} TCGOutOpLoad; + +typedef struct TCGOutOpMovcond { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf); +} TCGOutOpMovcond; + +typedef struct TCGOutOpMul2 { + TCGOutOp base; + void (*out_rrrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); +} TCGOutOpMul2; + +typedef struct TCGOutOpQemuLdSt { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dest, + TCGReg addr, MemOpIdx oi); +} TCGOutOpQemuLdSt; + +typedef struct TCGOutOpQemuLdSt2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dlo, TCGReg dhi, + TCGReg addr, MemOpIdx oi); +} TCGOutOpQemuLdSt2; + +typedef struct TCGOutOpUnary { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); +} TCGOutOpUnary; + +typedef struct TCGOutOpSetcond { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg a1, tcg_target_long a2); +} TCGOutOpSetcond; + +typedef struct TCGOutOpSetcond2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, TCGArg bh, bool const_bh); +} TCGOutOpSetcond2; + +typedef struct TCGOutOpStore { + TCGOutOp base; + void (*out_r)(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, intptr_t offset); + void (*out_i)(TCGContext *s, TCGType type, tcg_target_long data, + TCGReg base, intptr_t offset); +} TCGOutOpStore; + +typedef struct TCGOutOpSubtract { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rir)(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2); +} TCGOutOpSubtract; + #include "tcg-target.c.inc" #ifndef CONFIG_TCG_INTERPRETER @@ -761,6 +1125,144 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - < MIN_TLB_MASK_TABLE_OFS); #endif +#if TCG_TARGET_REG_BITS == 64 +/* + * We require these functions for slow-path function calls. + * Adapt them generically for opcode output. + */ + +static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_exts_i32_i64(s, a0, a1); +} + +static const TCGOutOpUnary outop_exts_i32_i64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_exts_i32_i64, +}; + +static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_extu_i32_i64(s, a0, a1); +} + +static const TCGOutOpUnary outop_extu_i32_i64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extu_i32_i64, +}; + +static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_extrl_i64_i32(s, a0, a1); +} + +static const TCGOutOpUnary outop_extrl_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL, +}; +#endif + +static const TCGOutOp outop_goto_ptr = { + .static_constraint = C_O0_I1(r), +}; + +static const TCGOutOpLoad outop_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tcg_out_ld, +}; + +/* + * Register V as the TCGOutOp for O. + * This verifies that V is of type T, otherwise give a nice compiler error. + * This prevents trivial mistakes within each arch/tcg-target.c.inc. + */ +#define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) + +/* Register allocation descriptions for every TCGOpcode. */ +static const TCGOutOp * const all_outop[NB_OPS] = { + OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), + OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci), + OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio), + OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco), + /* addc1o is implemented with set_carry + addcio */ + OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio), + OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), + OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), + OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16), + OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32), + OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), + OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), + OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), + OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit), + OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), + OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), + OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), + OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), + OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), + OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), + OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2), + OUTOP(INDEX_op_ld8u, TCGOutOpLoad, outop_ld8u), + OUTOP(INDEX_op_ld8s, TCGOutOpLoad, outop_ld8s), + OUTOP(INDEX_op_ld16u, TCGOutOpLoad, outop_ld16u), + OUTOP(INDEX_op_ld16s, TCGOutOpLoad, outop_ld16s), + OUTOP(INDEX_op_ld, TCGOutOpLoad, outop_ld), + OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), + OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), + OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), + OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), + OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2), + OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), + OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), + OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), + OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond), + OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), + OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), + OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), + OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), + OUTOP(INDEX_op_qemu_ld, TCGOutOpQemuLdSt, outop_qemu_ld), + OUTOP(INDEX_op_qemu_ld2, TCGOutOpQemuLdSt2, outop_qemu_ld2), + OUTOP(INDEX_op_qemu_st, TCGOutOpQemuLdSt, outop_qemu_st), + OUTOP(INDEX_op_qemu_st2, TCGOutOpQemuLdSt2, outop_qemu_st2), + OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), + OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), + OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), + OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), + OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), + OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), + OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract), + OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), + OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), + OUTOP(INDEX_op_st, TCGOutOpStore, outop_st), + OUTOP(INDEX_op_st8, TCGOutOpStore, outop_st8), + OUTOP(INDEX_op_st16, TCGOutOpStore, outop_st16), + OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), + OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi), + OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio), + OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo), + /* subb1o is implemented with set_borrow + subbio */ + OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio), + OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), + + [INDEX_op_goto_ptr] = &outop_goto_ptr, + +#if TCG_TARGET_REG_BITS == 32 + OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), + OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), +#else + OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), + OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), + OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), + OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), + OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), + OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u), + OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s), + OUTOP(INDEX_op_st32, TCGOutOpStore, outop_st), +#endif +}; + +#undef OUTOP + /* * All TCG threads except the parent (i.e. the one that called tcg_context_init * and registered the target's TCG globals) must register with this function @@ -1293,39 +1795,19 @@ static void init_call_layout(TCGHelperInfo *info) } static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; -static void process_op_defs(TCGContext *s); +static void process_constraint_sets(void); static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type, TCGReg reg, const char *name); -static void tcg_context_init(unsigned max_cpus) +static void tcg_context_init(unsigned max_threads) { TCGContext *s = &tcg_init_ctx; - int op, total_args, n, i; - TCGOpDef *def; - TCGArgConstraint *args_ct; + int n, i; TCGTemp *ts; memset(s, 0, sizeof(*s)); s->nb_globals = 0; - /* Count total number of arguments and allocate the corresponding - space */ - total_args = 0; - for(op = 0; op < NB_OPS; op++) { - def = &tcg_op_defs[op]; - n = def->nb_iargs + def->nb_oargs; - total_args += n; - } - - args_ct = g_new0(TCGArgConstraint, total_args); - - for(op = 0; op < NB_OPS; op++) { - def = &tcg_op_defs[op]; - def->args_ct = args_ct; - n = def->nb_iargs + def->nb_oargs; - args_ct += n; - } - init_call_layout(&info_helper_ld32_mmu); init_call_layout(&info_helper_ld64_mmu); init_call_layout(&info_helper_ld128_mmu); @@ -1334,7 +1816,7 @@ static void tcg_context_init(unsigned max_cpus) init_call_layout(&info_helper_st128_mmu); tcg_target_init(s); - process_op_defs(s); + process_constraint_sets(); /* Reverse the order of the saved registers, assuming they're all at the start of tcg_target_reg_alloc_order. */ @@ -1356,15 +1838,15 @@ static void tcg_context_init(unsigned max_cpus) * In user-mode we simply share the init context among threads, since we * use a single region. See the documentation tcg_region_init() for the * reasoning behind this. - * In system-mode we will have at most max_cpus TCG threads. + * In system-mode we will have at most max_threads TCG threads. */ #ifdef CONFIG_USER_ONLY tcg_ctxs = &tcg_ctx; tcg_cur_ctxs = 1; tcg_max_ctxs = 1; #else - tcg_max_ctxs = max_cpus; - tcg_ctxs = g_new0(TCGContext *, max_cpus); + tcg_max_ctxs = max_threads; + tcg_ctxs = g_new0(TCGContext *, max_threads); #endif tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0)); @@ -1372,10 +1854,10 @@ static void tcg_context_init(unsigned max_cpus) tcg_env = temp_tcgv_ptr(ts); } -void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus) +void tcg_init(size_t tb_size, int splitwx, unsigned max_threads) { - tcg_context_init(max_cpus); - tcg_region_init(tb_size, splitwx, max_cpus); + tcg_context_init(max_threads); + tcg_region_init(tb_size, splitwx, max_threads); } /* @@ -1399,7 +1881,6 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s) goto retry; } qatomic_set(&s->code_gen_ptr, next); - s->data_gen_ptr = NULL; return tb; } @@ -1416,21 +1897,17 @@ void tcg_prologue_init(void) tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); #endif -#ifdef TCG_TARGET_NEED_POOL_LABELS s->pool_labels = NULL; -#endif qemu_thread_jit_write(); /* Generate the prologue. */ tcg_target_qemu_prologue(s); -#ifdef TCG_TARGET_NEED_POOL_LABELS /* Allow the prologue to put e.g. guest_base into a pool entry. */ { int result = tcg_out_pool_finalize(s); tcg_debug_assert(result == 0); } -#endif prologue_size = tcg_current_code_size(s); perf_report_prologue(s->code_gen_ptr, prologue_size); @@ -1490,7 +1967,7 @@ void tcg_func_start(TCGContext *s) s->nb_temps = s->nb_globals; /* No temps have been previously allocated for size or locality. */ - memset(s->free_temps, 0, sizeof(s->free_temps)); + tcg_temp_ebb_reset_freed(s); /* No constant temps have been previously allocated. */ for (int i = 0; i < TCG_TYPE_COUNT; ++i) { @@ -1512,10 +1989,7 @@ void tcg_func_start(TCGContext *s) s->emit_before_op = NULL; QSIMPLEQ_INIT(&s->labels); - tcg_debug_assert(s->addr_type == TCG_TYPE_I32 || - s->addr_type == TCG_TYPE_I64); - - tcg_debug_assert(s->insn_start_words > 0); + tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); } static TCGTemp *tcg_temp_alloc(TCGContext *s) @@ -1894,6 +2368,11 @@ TCGv_i64 tcg_constant_i64(int64_t val) return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val)); } +TCGv_vaddr tcg_constant_vaddr(uintptr_t val) +{ + return temp_tcgv_vaddr(tcg_constant_internal(TCG_TYPE_PTR, val)); +} + TCGv_ptr tcg_constant_ptr_int(intptr_t val) { return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val)); @@ -1932,12 +2411,34 @@ TCGTemp *tcgv_i32_temp(TCGv_i32 v) } #endif /* CONFIG_DEBUG_TCG */ -/* Return true if OP may appear in the opcode stream. - Test the runtime variable that controls each opcode. */ -bool tcg_op_supported(TCGOpcode op) +/* + * Return true if OP may appear in the opcode stream with TYPE. + * Test the runtime variable that controls each opcode. + */ +bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) { - const bool have_vec - = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256; + bool has_type; + + switch (type) { + case TCG_TYPE_I32: + has_type = true; + break; + case TCG_TYPE_I64: + has_type = TCG_TARGET_REG_BITS == 64; + break; + case TCG_TYPE_V64: + has_type = TCG_TARGET_HAS_v64; + break; + case TCG_TYPE_V128: + has_type = TCG_TARGET_HAS_v128; + break; + case TCG_TYPE_V256: + has_type = TCG_TARGET_HAS_v256; + break; + default: + has_type = false; + break; + } switch (op) { case INDEX_op_discard: @@ -1949,221 +2450,56 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_exit_tb: case INDEX_op_goto_tb: case INDEX_op_goto_ptr: - case INDEX_op_qemu_ld_a32_i32: - case INDEX_op_qemu_ld_a64_i32: - case INDEX_op_qemu_st_a32_i32: - case INDEX_op_qemu_st_a64_i32: - case INDEX_op_qemu_ld_a32_i64: - case INDEX_op_qemu_ld_a64_i64: - case INDEX_op_qemu_st_a32_i64: - case INDEX_op_qemu_st_a64_i64: return true; - case INDEX_op_qemu_st8_a32_i32: - case INDEX_op_qemu_st8_a64_i32: - return TCG_TARGET_HAS_qemu_st8_i32; - - case INDEX_op_qemu_ld_a32_i128: - case INDEX_op_qemu_ld_a64_i128: - case INDEX_op_qemu_st_a32_i128: - case INDEX_op_qemu_st_a64_i128: - return TCG_TARGET_HAS_qemu_ldst_i128; - - case INDEX_op_mov_i32: - case INDEX_op_setcond_i32: - case INDEX_op_brcond_i32: - case INDEX_op_movcond_i32: - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_add_i32: - case INDEX_op_sub_i32: - case INDEX_op_neg_i32: - case INDEX_op_mul_i32: - case INDEX_op_and_i32: - case INDEX_op_or_i32: - case INDEX_op_xor_i32: - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + tcg_debug_assert(type <= TCG_TYPE_REG); return true; - case INDEX_op_negsetcond_i32: - return TCG_TARGET_HAS_negsetcond_i32; - case INDEX_op_div_i32: - case INDEX_op_divu_i32: - return TCG_TARGET_HAS_div_i32; - case INDEX_op_rem_i32: - case INDEX_op_remu_i32: - return TCG_TARGET_HAS_rem_i32; - case INDEX_op_div2_i32: - case INDEX_op_divu2_i32: - return TCG_TARGET_HAS_div2_i32; - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - return TCG_TARGET_HAS_rot_i32; - case INDEX_op_deposit_i32: - return TCG_TARGET_HAS_deposit_i32; - case INDEX_op_extract_i32: - return TCG_TARGET_HAS_extract_i32; - case INDEX_op_sextract_i32: - return TCG_TARGET_HAS_sextract_i32; - case INDEX_op_extract2_i32: - return TCG_TARGET_HAS_extract2_i32; - case INDEX_op_add2_i32: - return TCG_TARGET_HAS_add2_i32; - case INDEX_op_sub2_i32: - return TCG_TARGET_HAS_sub2_i32; - case INDEX_op_mulu2_i32: - return TCG_TARGET_HAS_mulu2_i32; - case INDEX_op_muls2_i32: - return TCG_TARGET_HAS_muls2_i32; - case INDEX_op_muluh_i32: - return TCG_TARGET_HAS_muluh_i32; - case INDEX_op_mulsh_i32: - return TCG_TARGET_HAS_mulsh_i32; - case INDEX_op_ext8s_i32: - return TCG_TARGET_HAS_ext8s_i32; - case INDEX_op_ext16s_i32: - return TCG_TARGET_HAS_ext16s_i32; - case INDEX_op_ext8u_i32: - return TCG_TARGET_HAS_ext8u_i32; - case INDEX_op_ext16u_i32: - return TCG_TARGET_HAS_ext16u_i32; - case INDEX_op_bswap16_i32: - return TCG_TARGET_HAS_bswap16_i32; - case INDEX_op_bswap32_i32: - return TCG_TARGET_HAS_bswap32_i32; - case INDEX_op_not_i32: - return TCG_TARGET_HAS_not_i32; - case INDEX_op_andc_i32: - return TCG_TARGET_HAS_andc_i32; - case INDEX_op_orc_i32: - return TCG_TARGET_HAS_orc_i32; - case INDEX_op_eqv_i32: - return TCG_TARGET_HAS_eqv_i32; - case INDEX_op_nand_i32: - return TCG_TARGET_HAS_nand_i32; - case INDEX_op_nor_i32: - return TCG_TARGET_HAS_nor_i32; - case INDEX_op_clz_i32: - return TCG_TARGET_HAS_clz_i32; - case INDEX_op_ctz_i32: - return TCG_TARGET_HAS_ctz_i32; - case INDEX_op_ctpop_i32: - return TCG_TARGET_HAS_ctpop_i32; + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: + if (TCG_TARGET_REG_BITS == 32) { + tcg_debug_assert(type == TCG_TYPE_I64); + return true; + } + tcg_debug_assert(type == TCG_TYPE_I128); + goto do_lookup; + + case INDEX_op_add: + case INDEX_op_and: + case INDEX_op_brcond: + case INDEX_op_deposit: + case INDEX_op_extract: + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld: + case INDEX_op_mov: + case INDEX_op_movcond: + case INDEX_op_negsetcond: + case INDEX_op_or: + case INDEX_op_setcond: + case INDEX_op_sextract: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st: + case INDEX_op_xor: + return has_type; case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; - case INDEX_op_mov_i64: - case INDEX_op_setcond_i64: - case INDEX_op_brcond_i64: - case INDEX_op_movcond_i64: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - case INDEX_op_add_i64: - case INDEX_op_sub_i64: - case INDEX_op_neg_i64: - case INDEX_op_mul_i64: - case INDEX_op_and_i64: - case INDEX_op_or_i64: - case INDEX_op_xor_i64: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: + case INDEX_op_ld32u: + case INDEX_op_ld32s: + case INDEX_op_st32: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - return TCG_TARGET_REG_BITS == 64; - - case INDEX_op_negsetcond_i64: - return TCG_TARGET_HAS_negsetcond_i64; - case INDEX_op_div_i64: - case INDEX_op_divu_i64: - return TCG_TARGET_HAS_div_i64; - case INDEX_op_rem_i64: - case INDEX_op_remu_i64: - return TCG_TARGET_HAS_rem_i64; - case INDEX_op_div2_i64: - case INDEX_op_divu2_i64: - return TCG_TARGET_HAS_div2_i64; - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return TCG_TARGET_HAS_rot_i64; - case INDEX_op_deposit_i64: - return TCG_TARGET_HAS_deposit_i64; - case INDEX_op_extract_i64: - return TCG_TARGET_HAS_extract_i64; - case INDEX_op_sextract_i64: - return TCG_TARGET_HAS_sextract_i64; - case INDEX_op_extract2_i64: - return TCG_TARGET_HAS_extract2_i64; case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: - return TCG_TARGET_HAS_extr_i64_i32; - case INDEX_op_ext8s_i64: - return TCG_TARGET_HAS_ext8s_i64; - case INDEX_op_ext16s_i64: - return TCG_TARGET_HAS_ext16s_i64; - case INDEX_op_ext32s_i64: - return TCG_TARGET_HAS_ext32s_i64; - case INDEX_op_ext8u_i64: - return TCG_TARGET_HAS_ext8u_i64; - case INDEX_op_ext16u_i64: - return TCG_TARGET_HAS_ext16u_i64; - case INDEX_op_ext32u_i64: - return TCG_TARGET_HAS_ext32u_i64; - case INDEX_op_bswap16_i64: - return TCG_TARGET_HAS_bswap16_i64; - case INDEX_op_bswap32_i64: - return TCG_TARGET_HAS_bswap32_i64; - case INDEX_op_bswap64_i64: - return TCG_TARGET_HAS_bswap64_i64; - case INDEX_op_not_i64: - return TCG_TARGET_HAS_not_i64; - case INDEX_op_andc_i64: - return TCG_TARGET_HAS_andc_i64; - case INDEX_op_orc_i64: - return TCG_TARGET_HAS_orc_i64; - case INDEX_op_eqv_i64: - return TCG_TARGET_HAS_eqv_i64; - case INDEX_op_nand_i64: - return TCG_TARGET_HAS_nand_i64; - case INDEX_op_nor_i64: - return TCG_TARGET_HAS_nor_i64; - case INDEX_op_clz_i64: - return TCG_TARGET_HAS_clz_i64; - case INDEX_op_ctz_i64: - return TCG_TARGET_HAS_ctz_i64; - case INDEX_op_ctpop_i64: - return TCG_TARGET_HAS_ctpop_i64; - case INDEX_op_add2_i64: - return TCG_TARGET_HAS_add2_i64; - case INDEX_op_sub2_i64: - return TCG_TARGET_HAS_sub2_i64; - case INDEX_op_mulu2_i64: - return TCG_TARGET_HAS_mulu2_i64; - case INDEX_op_muls2_i64: - return TCG_TARGET_HAS_muls2_i64; - case INDEX_op_muluh_i64: - return TCG_TARGET_HAS_muluh_i64; - case INDEX_op_mulsh_i64: - return TCG_TARGET_HAS_mulsh_i64; + return TCG_TARGET_REG_BITS == 64; case INDEX_op_mov_vec: case INDEX_op_dup_vec: @@ -2176,67 +2512,106 @@ bool tcg_op_supported(TCGOpcode op) case INDEX_op_or_vec: case INDEX_op_xor_vec: case INDEX_op_cmp_vec: - return have_vec; + return has_type; case INDEX_op_dup2_vec: - return have_vec && TCG_TARGET_REG_BITS == 32; + return has_type && TCG_TARGET_REG_BITS == 32; case INDEX_op_not_vec: - return have_vec && TCG_TARGET_HAS_not_vec; + return has_type && TCG_TARGET_HAS_not_vec; case INDEX_op_neg_vec: - return have_vec && TCG_TARGET_HAS_neg_vec; + return has_type && TCG_TARGET_HAS_neg_vec; case INDEX_op_abs_vec: - return have_vec && TCG_TARGET_HAS_abs_vec; + return has_type && TCG_TARGET_HAS_abs_vec; case INDEX_op_andc_vec: - return have_vec && TCG_TARGET_HAS_andc_vec; + return has_type && TCG_TARGET_HAS_andc_vec; case INDEX_op_orc_vec: - return have_vec && TCG_TARGET_HAS_orc_vec; + return has_type && TCG_TARGET_HAS_orc_vec; case INDEX_op_nand_vec: - return have_vec && TCG_TARGET_HAS_nand_vec; + return has_type && TCG_TARGET_HAS_nand_vec; case INDEX_op_nor_vec: - return have_vec && TCG_TARGET_HAS_nor_vec; + return has_type && TCG_TARGET_HAS_nor_vec; case INDEX_op_eqv_vec: - return have_vec && TCG_TARGET_HAS_eqv_vec; + return has_type && TCG_TARGET_HAS_eqv_vec; case INDEX_op_mul_vec: - return have_vec && TCG_TARGET_HAS_mul_vec; + return has_type && TCG_TARGET_HAS_mul_vec; case INDEX_op_shli_vec: case INDEX_op_shri_vec: case INDEX_op_sari_vec: - return have_vec && TCG_TARGET_HAS_shi_vec; + return has_type && TCG_TARGET_HAS_shi_vec; case INDEX_op_shls_vec: case INDEX_op_shrs_vec: case INDEX_op_sars_vec: - return have_vec && TCG_TARGET_HAS_shs_vec; + return has_type && TCG_TARGET_HAS_shs_vec; case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: - return have_vec && TCG_TARGET_HAS_shv_vec; + return has_type && TCG_TARGET_HAS_shv_vec; case INDEX_op_rotli_vec: - return have_vec && TCG_TARGET_HAS_roti_vec; + return has_type && TCG_TARGET_HAS_roti_vec; case INDEX_op_rotls_vec: - return have_vec && TCG_TARGET_HAS_rots_vec; + return has_type && TCG_TARGET_HAS_rots_vec; case INDEX_op_rotlv_vec: case INDEX_op_rotrv_vec: - return have_vec && TCG_TARGET_HAS_rotv_vec; + return has_type && TCG_TARGET_HAS_rotv_vec; case INDEX_op_ssadd_vec: case INDEX_op_usadd_vec: case INDEX_op_sssub_vec: case INDEX_op_ussub_vec: - return have_vec && TCG_TARGET_HAS_sat_vec; + return has_type && TCG_TARGET_HAS_sat_vec; case INDEX_op_smin_vec: case INDEX_op_umin_vec: case INDEX_op_smax_vec: case INDEX_op_umax_vec: - return have_vec && TCG_TARGET_HAS_minmax_vec; + return has_type && TCG_TARGET_HAS_minmax_vec; case INDEX_op_bitsel_vec: - return have_vec && TCG_TARGET_HAS_bitsel_vec; + return has_type && TCG_TARGET_HAS_bitsel_vec; case INDEX_op_cmpsel_vec: - return have_vec && TCG_TARGET_HAS_cmpsel_vec; + return has_type && TCG_TARGET_HAS_cmpsel_vec; default: - tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); + if (op < INDEX_op_last_generic) { + const TCGOutOp *outop; + TCGConstraintSetIndex con_set; + + if (!has_type) { + return false; + } + + do_lookup: + outop = all_outop[op]; + tcg_debug_assert(outop != NULL); + + con_set = outop->static_constraint; + if (con_set == C_Dynamic) { + con_set = outop->dynamic_constraint(type, flags); + } + if (con_set >= 0) { + return true; + } + tcg_debug_assert(con_set == C_NotImplemented); + return false; + } + tcg_debug_assert(op < NB_OPS); return true; + + case INDEX_op_last_generic: + g_assert_not_reached(); } } +bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len) +{ + unsigned width; + + tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64); + width = (type == TCG_TYPE_I32 ? 32 : 64); + + tcg_debug_assert(ofs < width); + tcg_debug_assert(len > 0); + tcg_debug_assert(len <= width - ofs); + + return TCG_TARGET_deposit_valid(type, ofs, len); +} + static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); static void tcg_gen_callN(void *func, TCGHelperInfo *info, @@ -2573,7 +2948,7 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) nb_oargs = 0; col += ne_fprintf(f, "\n ----"); - for (i = 0, k = s->insn_start_words; i < k; ++i) { + for (i = 0, k = INSN_START_WORDS; i < k; ++i) { col += ne_fprintf(f, " %016" PRIx64, tcg_get_insn_start_param(op, i)); } @@ -2610,17 +2985,23 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) col += ne_fprintf(f, ",%s", t); } } else { - col += ne_fprintf(f, " %s ", def->name); + if (def->flags & TCG_OPF_INT) { + col += ne_fprintf(f, " %s_i%d ", + def->name, + 8 * tcg_type_size(TCGOP_TYPE(op))); + } else if (def->flags & TCG_OPF_VECTOR) { + col += ne_fprintf(f, "%s v%d,e%d,", + def->name, + 8 * tcg_type_size(TCGOP_TYPE(op)), + 8 << TCGOP_VECE(op)); + } else { + col += ne_fprintf(f, " %s ", def->name); + } nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; nb_cargs = def->nb_cargs; - if (def->flags & TCG_OPF_VECTOR) { - col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op), - 8 << TCGOP_VECE(op)); - } - k = 0; for (i = 0; i < nb_oargs; i++) { const char *sep = k ? "," : ""; @@ -2635,16 +3016,12 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) op->args[k++])); } switch (c) { - case INDEX_op_brcond_i32: - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: - case INDEX_op_movcond_i32: + case INDEX_op_brcond: + case INDEX_op_setcond: + case INDEX_op_negsetcond: + case INDEX_op_movcond: case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: - case INDEX_op_brcond_i64: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i64: - case INDEX_op_movcond_i64: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: if (op->args[k] < ARRAY_SIZE(cond_name) @@ -2655,20 +3032,10 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) } i = 1; break; - case INDEX_op_qemu_ld_a32_i32: - case INDEX_op_qemu_ld_a64_i32: - case INDEX_op_qemu_st_a32_i32: - case INDEX_op_qemu_st_a64_i32: - case INDEX_op_qemu_st8_a32_i32: - case INDEX_op_qemu_st8_a64_i32: - case INDEX_op_qemu_ld_a32_i64: - case INDEX_op_qemu_ld_a64_i64: - case INDEX_op_qemu_st_a32_i64: - case INDEX_op_qemu_st_a64_i64: - case INDEX_op_qemu_ld_a32_i128: - case INDEX_op_qemu_ld_a64_i128: - case INDEX_op_qemu_st_a32_i128: - case INDEX_op_qemu_st_a64_i128: + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: { const char *s_al, *s_op, *s_at; MemOpIdx oi = op->args[k++]; @@ -2691,11 +3058,9 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) i = 1; } break; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: + case INDEX_op_bswap16: + case INDEX_op_bswap32: + case INDEX_op_bswap64: { TCGArg flags = op->args[k]; const char *name = NULL; @@ -2736,8 +3101,7 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) switch (c) { case INDEX_op_set_label: case INDEX_op_br: - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: case INDEX_op_brcond2_i32: col += ne_fprintf(f, "%s$L%d", k ? "," : "", arg_label(op->args[k])->id); @@ -2890,10 +3254,12 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) } /* we give more priority to constraints with less registers */ -static int get_constraint_priority(const TCGOpDef *def, int k) +static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k) { - const TCGArgConstraint *arg_ct = &def->args_ct[k]; - int n = ctpop64(arg_ct->regs); + int n; + + arg_ct += k; + n = ctpop64(arg_ct->regs); /* * Sort constraints of a single register first, which includes output @@ -2922,10 +3288,9 @@ static int get_constraint_priority(const TCGOpDef *def, int k) } /* sort from highest priority to lowest */ -static void sort_constraints(TCGOpDef *def, int start, int n) +static void sort_constraints(TCGArgConstraint *a, int start, int n) { int i, j; - TCGArgConstraint *a = def->args_ct; for (i = 0; i < n; i++) { a[start + i].sort_index = start + i; @@ -2935,8 +3300,8 @@ static void sort_constraints(TCGOpDef *def, int start, int n) } for (i = 0; i < n - 1; i++) { for (j = i + 1; j < n; j++) { - int p1 = get_constraint_priority(def, a[start + i].sort_index); - int p2 = get_constraint_priority(def, a[start + j].sort_index); + int p1 = get_constraint_priority(a, a[start + i].sort_index); + int p2 = get_constraint_priority(a, a[start + j].sort_index); if (p1 < p2) { int tmp = a[start + i].sort_index; a[start + i].sort_index = a[start + j].sort_index; @@ -2946,56 +3311,39 @@ static void sort_constraints(TCGOpDef *def, int start, int n) } } -static void process_op_defs(TCGContext *s) -{ - TCGOpcode op; +static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS]; +static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS]; - for (op = 0; op < NB_OPS; op++) { - TCGOpDef *def = &tcg_op_defs[op]; - const TCGTargetOpDef *tdefs; +static void process_constraint_sets(void) +{ + for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) { + const TCGConstraintSet *tdefs = &constraint_sets[c]; + TCGArgConstraint *args_ct = all_cts[c]; + int nb_oargs = tdefs->nb_oargs; + int nb_iargs = tdefs->nb_iargs; + int nb_args = nb_oargs + nb_iargs; bool saw_alias_pair = false; - int i, o, i2, o2, nb_args; - - if (def->flags & TCG_OPF_NOT_PRESENT) { - continue; - } - - nb_args = def->nb_iargs + def->nb_oargs; - if (nb_args == 0) { - continue; - } - - /* - * Macro magic should make it impossible, but double-check that - * the array index is in range. Since the signness of an enum - * is implementation defined, force the result to unsigned. - */ - unsigned con_set = tcg_target_op_def(op); - tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); - tdefs = &constraint_sets[con_set]; - for (i = 0; i < nb_args; i++) { + for (int i = 0; i < nb_args; i++) { const char *ct_str = tdefs->args_ct_str[i]; - bool input_p = i >= def->nb_oargs; - - /* Incomplete TCGTargetOpDef entry. */ - tcg_debug_assert(ct_str != NULL); + bool input_p = i >= nb_oargs; + int o; switch (*ct_str) { case '0' ... '9': o = *ct_str - '0'; tcg_debug_assert(input_p); - tcg_debug_assert(o < def->nb_oargs); - tcg_debug_assert(def->args_ct[o].regs != 0); - tcg_debug_assert(!def->args_ct[o].oalias); - def->args_ct[i] = def->args_ct[o]; + tcg_debug_assert(o < nb_oargs); + tcg_debug_assert(args_ct[o].regs != 0); + tcg_debug_assert(!args_ct[o].oalias); + args_ct[i] = args_ct[o]; /* The output sets oalias. */ - def->args_ct[o].oalias = 1; - def->args_ct[o].alias_index = i; + args_ct[o].oalias = 1; + args_ct[o].alias_index = i; /* The input sets ialias. */ - def->args_ct[i].ialias = 1; - def->args_ct[i].alias_index = o; - if (def->args_ct[i].pair) { + args_ct[i].ialias = 1; + args_ct[i].alias_index = o; + if (args_ct[i].pair) { saw_alias_pair = true; } tcg_debug_assert(ct_str[1] == '\0'); @@ -3003,41 +3351,41 @@ static void process_op_defs(TCGContext *s) case '&': tcg_debug_assert(!input_p); - def->args_ct[i].newreg = true; + args_ct[i].newreg = true; ct_str++; break; case 'p': /* plus */ /* Allocate to the register after the previous. */ - tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); + tcg_debug_assert(i > (input_p ? nb_oargs : 0)); o = i - 1; - tcg_debug_assert(!def->args_ct[o].pair); - tcg_debug_assert(!def->args_ct[o].ct); - def->args_ct[i] = (TCGArgConstraint){ + tcg_debug_assert(!args_ct[o].pair); + tcg_debug_assert(!args_ct[o].ct); + args_ct[i] = (TCGArgConstraint){ .pair = 2, .pair_index = o, - .regs = def->args_ct[o].regs << 1, - .newreg = def->args_ct[o].newreg, + .regs = args_ct[o].regs << 1, + .newreg = args_ct[o].newreg, }; - def->args_ct[o].pair = 1; - def->args_ct[o].pair_index = i; + args_ct[o].pair = 1; + args_ct[o].pair_index = i; tcg_debug_assert(ct_str[1] == '\0'); continue; case 'm': /* minus */ /* Allocate to the register before the previous. */ - tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); + tcg_debug_assert(i > (input_p ? nb_oargs : 0)); o = i - 1; - tcg_debug_assert(!def->args_ct[o].pair); - tcg_debug_assert(!def->args_ct[o].ct); - def->args_ct[i] = (TCGArgConstraint){ + tcg_debug_assert(!args_ct[o].pair); + tcg_debug_assert(!args_ct[o].ct); + args_ct[i] = (TCGArgConstraint){ .pair = 1, .pair_index = o, - .regs = def->args_ct[o].regs >> 1, - .newreg = def->args_ct[o].newreg, + .regs = args_ct[o].regs >> 1, + .newreg = args_ct[o].newreg, }; - def->args_ct[o].pair = 2; - def->args_ct[o].pair_index = i; + args_ct[o].pair = 2; + args_ct[o].pair_index = i; tcg_debug_assert(ct_str[1] == '\0'); continue; } @@ -3045,16 +3393,21 @@ static void process_op_defs(TCGContext *s) do { switch (*ct_str) { case 'i': - def->args_ct[i].ct |= TCG_CT_CONST; + args_ct[i].ct |= TCG_CT_CONST; + break; +#ifdef TCG_REG_ZERO + case 'z': + args_ct[i].ct |= TCG_CT_REG_ZERO; break; +#endif /* Include all of the target-specific constraints. */ #undef CONST #define CONST(CASE, MASK) \ - case CASE: def->args_ct[i].ct |= MASK; break; + case CASE: args_ct[i].ct |= MASK; break; #define REGS(CASE, MASK) \ - case CASE: def->args_ct[i].regs |= MASK; break; + case CASE: args_ct[i].regs |= MASK; break; #include "tcg-target-con-str.h" @@ -3065,15 +3418,12 @@ static void process_op_defs(TCGContext *s) case '&': case 'p': case 'm': - /* Typo in TCGTargetOpDef constraint. */ + /* Typo in TCGConstraintSet constraint. */ g_assert_not_reached(); } } while (*++ct_str != '\0'); } - /* TCGTargetOpDef entry with too much information? */ - tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); - /* * Fix up output pairs that are aliased with inputs. * When we created the alias, we copied pair from the output. @@ -3094,51 +3444,53 @@ static void process_op_defs(TCGContext *s) * first output to pair=3, and the pair_index'es to match. */ if (saw_alias_pair) { - for (i = def->nb_oargs; i < nb_args; i++) { + for (int i = nb_oargs; i < nb_args; i++) { + int o, o2, i2; + /* * Since [0-9pm] must be alone in the constraint string, * the only way they can both be set is if the pair comes * from the output alias. */ - if (!def->args_ct[i].ialias) { + if (!args_ct[i].ialias) { continue; } - switch (def->args_ct[i].pair) { + switch (args_ct[i].pair) { case 0: break; case 1: - o = def->args_ct[i].alias_index; - o2 = def->args_ct[o].pair_index; - tcg_debug_assert(def->args_ct[o].pair == 1); - tcg_debug_assert(def->args_ct[o2].pair == 2); - if (def->args_ct[o2].oalias) { + o = args_ct[i].alias_index; + o2 = args_ct[o].pair_index; + tcg_debug_assert(args_ct[o].pair == 1); + tcg_debug_assert(args_ct[o2].pair == 2); + if (args_ct[o2].oalias) { /* Case 1a */ - i2 = def->args_ct[o2].alias_index; - tcg_debug_assert(def->args_ct[i2].pair == 2); - def->args_ct[i2].pair_index = i; - def->args_ct[i].pair_index = i2; + i2 = args_ct[o2].alias_index; + tcg_debug_assert(args_ct[i2].pair == 2); + args_ct[i2].pair_index = i; + args_ct[i].pair_index = i2; } else { /* Case 1b */ - def->args_ct[i].pair_index = i; + args_ct[i].pair_index = i; } break; case 2: - o = def->args_ct[i].alias_index; - o2 = def->args_ct[o].pair_index; - tcg_debug_assert(def->args_ct[o].pair == 2); - tcg_debug_assert(def->args_ct[o2].pair == 1); - if (def->args_ct[o2].oalias) { + o = args_ct[i].alias_index; + o2 = args_ct[o].pair_index; + tcg_debug_assert(args_ct[o].pair == 2); + tcg_debug_assert(args_ct[o2].pair == 1); + if (args_ct[o2].oalias) { /* Case 1a */ - i2 = def->args_ct[o2].alias_index; - tcg_debug_assert(def->args_ct[i2].pair == 1); - def->args_ct[i2].pair_index = i; - def->args_ct[i].pair_index = i2; + i2 = args_ct[o2].alias_index; + tcg_debug_assert(args_ct[i2].pair == 1); + args_ct[i2].pair_index = i; + args_ct[i].pair_index = i2; } else { /* Case 2 */ - def->args_ct[i].pair = 3; - def->args_ct[o2].pair = 3; - def->args_ct[i].pair_index = o2; - def->args_ct[o2].pair_index = i; + args_ct[i].pair = 3; + args_ct[o2].pair = 3; + args_ct[i].pair_index = o2; + args_ct[o2].pair_index = i; } break; default: @@ -3148,9 +3500,40 @@ static void process_op_defs(TCGContext *s) } /* sort the constraints (XXX: this is just an heuristic) */ - sort_constraints(def, 0, def->nb_oargs); - sort_constraints(def, def->nb_oargs, def->nb_iargs); + sort_constraints(args_ct, 0, nb_oargs); + sort_constraints(args_ct, nb_oargs, nb_iargs); + } +} + +static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) +{ + TCGOpcode opc = op->opc; + TCGType type = TCGOP_TYPE(op); + unsigned flags = TCGOP_FLAGS(op); + const TCGOpDef *def = &tcg_op_defs[opc]; + const TCGOutOp *outop = all_outop[opc]; + TCGConstraintSetIndex con_set; + + if (def->flags & TCG_OPF_NOT_PRESENT) { + return empty_cts; + } + + if (outop) { + con_set = outop->static_constraint; + if (con_set == C_Dynamic) { + con_set = outop->dynamic_constraint(type, flags); + } + } else { + con_set = tcg_target_op_def(opc, type, flags); } + tcg_debug_assert(con_set >= 0); + tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); + + /* The constraint arguments must match TCGOpcode arguments. */ + tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); + tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs); + + return all_cts[con_set]; } static void remove_label_use(TCGOp *op, int idx) @@ -3173,8 +3556,7 @@ void tcg_op_remove(TCGContext *s, TCGOp *op) case INDEX_op_br: remove_label_use(op, 0); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: remove_label_use(op, 3); break; case INDEX_op_brcond2_i32: @@ -3246,17 +3628,21 @@ TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) } TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, unsigned nargs) + TCGOpcode opc, TCGType type, unsigned nargs) { TCGOp *new_op = tcg_op_alloc(opc, nargs); + + TCGOP_TYPE(new_op) = type; QTAILQ_INSERT_BEFORE(old_op, new_op, link); return new_op; } TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, unsigned nargs) + TCGOpcode opc, TCGType type, unsigned nargs) { TCGOp *new_op = tcg_op_alloc(opc, nargs); + + TCGOP_TYPE(new_op) = type; QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); return new_op; } @@ -3271,8 +3657,7 @@ static void move_label_uses(TCGLabel *to, TCGLabel *from) case INDEX_op_br: op->args[0] = label_arg(to); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: op->args[3] = label_arg(to); break; case INDEX_op_brcond2_i32: @@ -3591,6 +3976,17 @@ liveness_pass_0(TCGContext *s) } } +static void assert_carry_dead(TCGContext *s) +{ + /* + * Carry operations can be separated by a few insns like mov, + * load or store, but they should always be "close", and + * carry-out operations should always be paired with carry-in. + * At various boundaries, carry must have been consumed. + */ + tcg_debug_assert(!s->carry_live); +} + /* Liveness analysis : update the opc_arg_life array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ @@ -3601,27 +3997,28 @@ liveness_pass_1(TCGContext *s) int nb_temps = s->nb_temps; TCGOp *op, *op_prev; TCGRegSet *prefs; - int i; prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); - for (i = 0; i < nb_temps; ++i) { + for (int i = 0; i < nb_temps; ++i) { s->temps[i].state_ptr = prefs + i; } /* ??? Should be redundant with the exit_tb that ends the TB. */ la_func_end(s, nb_globals, nb_temps); + s->carry_live = false; QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { int nb_iargs, nb_oargs; TCGOpcode opc_new, opc_new2; - bool have_opc_new2; TCGLifeData arg_life = 0; TCGTemp *ts; TCGOpcode opc = op->opc; - const TCGOpDef *def = &tcg_op_defs[opc]; + const TCGOpDef *def; + const TCGArgConstraint *args_ct; switch (opc) { case INDEX_op_call: + assert_carry_dead(s); { const TCGHelperInfo *info = tcg_call_info(op); int call_flags = tcg_call_flags(op); @@ -3631,7 +4028,7 @@ liveness_pass_1(TCGContext *s) /* pure functions can be removed if their result is unused */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { - for (i = 0; i < nb_oargs; i++) { + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); if (ts->state != TS_DEAD) { goto do_not_remove_call; @@ -3642,7 +4039,7 @@ liveness_pass_1(TCGContext *s) do_not_remove_call: /* Output args are dead. */ - for (i = 0; i < nb_oargs; i++) { + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; @@ -3665,7 +4062,7 @@ liveness_pass_1(TCGContext *s) } /* Record arguments that die in this helper. */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; @@ -3685,7 +4082,7 @@ liveness_pass_1(TCGContext *s) * order so that if a temp is used more than once, the stack * reset to max happens before the register reset to 0. */ - for (i = nb_iargs - 1; i >= 0; i--) { + for (int i = nb_iargs - 1; i >= 0; i--) { const TCGCallArgumentLoc *loc = &info->in[i]; ts = arg_temp(op->args[nb_oargs + i]); @@ -3713,7 +4110,7 @@ liveness_pass_1(TCGContext *s) * If a temp is used once, this produces a single set bit; * if a temp is used multiple times, this produces a set. */ - for (i = 0; i < nb_iargs; i++) { + for (int i = 0; i < nb_iargs; i++) { const TCGCallArgumentLoc *loc = &info->in[i]; ts = arg_temp(op->args[nb_oargs + i]); @@ -3733,6 +4130,7 @@ liveness_pass_1(TCGContext *s) } break; case INDEX_op_insn_start: + assert_carry_dead(s); break; case INDEX_op_discard: /* mark the temporary as dead */ @@ -3741,62 +4139,15 @@ liveness_pass_1(TCGContext *s) la_reset_pref(ts); break; - case INDEX_op_add2_i32: - opc_new = INDEX_op_add_i32; - goto do_addsub2; - case INDEX_op_sub2_i32: - opc_new = INDEX_op_sub_i32; - goto do_addsub2; - case INDEX_op_add2_i64: - opc_new = INDEX_op_add_i64; - goto do_addsub2; - case INDEX_op_sub2_i64: - opc_new = INDEX_op_sub_i64; - do_addsub2: - nb_iargs = 4; - nb_oargs = 2; - /* Test if the high part of the operation is dead, but not - the low part. The result can be optimized to a simple - add or sub. This happens often for x86_64 guest when the - cpu mode is set to 32 bit. */ - if (arg_temp(op->args[1])->state == TS_DEAD) { - if (arg_temp(op->args[0])->state == TS_DEAD) { - goto do_remove; - } - /* Replace the opcode and adjust the args in place, - leaving 3 unused args at the end. */ - op->opc = opc = opc_new; - op->args[1] = op->args[2]; - op->args[2] = op->args[4]; - /* Fall through and mark the single-word operation live. */ - nb_iargs = 2; - nb_oargs = 1; - } - goto do_not_remove; - - case INDEX_op_mulu2_i32: - opc_new = INDEX_op_mul_i32; - opc_new2 = INDEX_op_muluh_i32; - have_opc_new2 = TCG_TARGET_HAS_muluh_i32; - goto do_mul2; - case INDEX_op_muls2_i32: - opc_new = INDEX_op_mul_i32; - opc_new2 = INDEX_op_mulsh_i32; - have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; - goto do_mul2; - case INDEX_op_mulu2_i64: - opc_new = INDEX_op_mul_i64; - opc_new2 = INDEX_op_muluh_i64; - have_opc_new2 = TCG_TARGET_HAS_muluh_i64; - goto do_mul2; - case INDEX_op_muls2_i64: - opc_new = INDEX_op_mul_i64; - opc_new2 = INDEX_op_mulsh_i64; - have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; + case INDEX_op_muls2: + opc_new = INDEX_op_mul; + opc_new2 = INDEX_op_mulsh; goto do_mul2; + case INDEX_op_mulu2: + opc_new = INDEX_op_mul; + opc_new2 = INDEX_op_muluh; do_mul2: - nb_iargs = 2; - nb_oargs = 2; + assert_carry_dead(s); if (arg_temp(op->args[1])->state == TS_DEAD) { if (arg_temp(op->args[0])->state == TS_DEAD) { /* Both parts of the operation are dead. */ @@ -3806,7 +4157,8 @@ liveness_pass_1(TCGContext *s) op->opc = opc = opc_new; op->args[1] = op->args[2]; op->args[2] = op->args[3]; - } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { + } else if (arg_temp(op->args[0])->state == TS_DEAD && + tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { /* The low part of the operation is dead; generate the high. */ op->opc = opc = opc_new2; op->args[0] = op->args[1]; @@ -3816,19 +4168,94 @@ liveness_pass_1(TCGContext *s) goto do_not_remove; } /* Mark the single-word operation live. */ - nb_oargs = 1; goto do_not_remove; - default: - /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ - nb_iargs = def->nb_iargs; - nb_oargs = def->nb_oargs; + case INDEX_op_addco: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_add; + goto do_default; + + case INDEX_op_addcio: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_addci; + goto do_default; + + case INDEX_op_subbo: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to sub, but this may also require canonicalization. */ + op->opc = opc = INDEX_op_sub; + ts = arg_temp(op->args[2]); + if (ts->kind == TEMP_CONST) { + ts = tcg_constant_internal(ts->type, -ts->val); + if (ts->state_ptr == NULL) { + tcg_debug_assert(temp_idx(ts) == nb_temps); + nb_temps++; + ts->state_ptr = tcg_malloc(sizeof(TCGRegSet)); + ts->state = TS_DEAD; + la_reset_pref(ts); + } + op->args[2] = temp_arg(ts); + op->opc = opc = INDEX_op_add; + } + goto do_default; + + case INDEX_op_subbio: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_subbi; + goto do_default; + + case INDEX_op_addc1o: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to add, add +1. */ + op_prev = tcg_op_insert_before(s, op, INDEX_op_add, + TCGOP_TYPE(op), 3); + op_prev->args[0] = op->args[0]; + op_prev->args[1] = op->args[1]; + op_prev->args[2] = op->args[2]; + op->opc = opc = INDEX_op_add; + op->args[1] = op->args[0]; + ts = arg_temp(op->args[0]); + ts = tcg_constant_internal(ts->type, 1); + op->args[2] = temp_arg(ts); + goto do_default; + + case INDEX_op_subb1o: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to sub, add -1. */ + op_prev = tcg_op_insert_before(s, op, INDEX_op_sub, + TCGOP_TYPE(op), 3); + op_prev->args[0] = op->args[0]; + op_prev->args[1] = op->args[1]; + op_prev->args[2] = op->args[2]; + op->opc = opc = INDEX_op_add; + op->args[1] = op->args[0]; + ts = arg_temp(op->args[0]); + ts = tcg_constant_internal(ts->type, -1); + op->args[2] = temp_arg(ts); + goto do_default; - /* Test if the operation can be removed because all - its outputs are dead. We assume that nb_oargs == 0 - implies side effects */ - if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { - for (i = 0; i < nb_oargs; i++) { + default: + do_default: + /* + * Test if the operation can be removed because all + * its outputs are dead. We assume that nb_oargs == 0 + * implies side effects. + */ + def = &tcg_op_defs[opc]; + if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) { + for (int i = def->nb_oargs - 1; i >= 0; i--) { if (arg_temp(op->args[i])->state != TS_DEAD) { goto do_not_remove; } @@ -3842,7 +4269,11 @@ liveness_pass_1(TCGContext *s) break; do_not_remove: - for (i = 0; i < nb_oargs; i++) { + def = &tcg_op_defs[opc]; + nb_iargs = def->nb_iargs; + nb_oargs = def->nb_oargs; + + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); /* Remember the preference of the uses that followed. */ @@ -3863,12 +4294,16 @@ liveness_pass_1(TCGContext *s) /* If end of basic block, update. */ if (def->flags & TCG_OPF_BB_EXIT) { + assert_carry_dead(s); la_func_end(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_COND_BRANCH) { + assert_carry_dead(s); la_bb_sync(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_BB_END) { + assert_carry_dead(s); la_bb_end(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + assert_carry_dead(s); la_global_sync(s, nb_globals); if (def->flags & TCG_OPF_CALL_CLOBBER) { la_cross_call(s, nb_temps); @@ -3876,15 +4311,18 @@ liveness_pass_1(TCGContext *s) } /* Record arguments that die in this opcode. */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; } } + if (def->flags & TCG_OPF_CARRY_OUT) { + s->carry_live = false; + } /* Input arguments are live for preceding opcodes. */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { /* For operands that were dead, initially allow @@ -3893,11 +4331,13 @@ liveness_pass_1(TCGContext *s) ts->state &= ~TS_DEAD; } } + if (def->flags & TCG_OPF_CARRY_IN) { + s->carry_live = true; + } /* Incorporate constraints for this operand. */ switch (opc) { - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: + case INDEX_op_mov: /* Note that these are TCG_OPF_NOT_PRESENT and do not have proper constraints. That said, special case moves to propagate preferences backward. */ @@ -3908,8 +4348,9 @@ liveness_pass_1(TCGContext *s) break; default: - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - const TCGArgConstraint *ct = &def->args_ct[i]; + args_ct = opcode_args_ct(op); + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + const TCGArgConstraint *ct = &args_ct[i]; TCGRegSet set, *pset; ts = arg_temp(op->args[i]); @@ -3932,6 +4373,7 @@ liveness_pass_1(TCGContext *s) } op->life = arg_life; } + assert_carry_dead(s); } /* Liveness analysis: Convert indirect regs to direct temporaries. */ @@ -4002,10 +4444,8 @@ liveness_pass_2(TCGContext *s) arg_ts = arg_temp(op->args[i]); dir_ts = arg_ts->state_ptr; if (dir_ts && arg_ts->state == TS_DEAD) { - TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_ld_i32 - : INDEX_op_ld_i64); - TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); + TCGOp *lop = tcg_op_insert_before(s, op, INDEX_op_ld, + arg_ts->type, 3); lop->args[0] = temp_arg(dir_ts); lop->args[1] = temp_arg(arg_ts->mem_base); @@ -4054,7 +4494,7 @@ liveness_pass_2(TCGContext *s) } /* Outputs become available. */ - if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { + if (opc == INDEX_op_mov) { arg_ts = arg_temp(op->args[0]); dir_ts = arg_ts->state_ptr; if (dir_ts) { @@ -4065,10 +4505,8 @@ liveness_pass_2(TCGContext *s) arg_ts->state = 0; if (NEED_SYNC_ARG(0)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_st_i32 - : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, + arg_ts->type, 3); TCGTemp *out_ts = dir_ts; if (IS_DEAD_ARG(0)) { @@ -4101,10 +4539,8 @@ liveness_pass_2(TCGContext *s) /* Sync outputs upon their last write. */ if (NEED_SYNC_ARG(i)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_st_i32 - : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, + arg_ts->type, 3); sop->args[0] = temp_arg(dir_ts); sop->args[1] = temp_arg(arg_ts->mem_base); @@ -4462,6 +4898,9 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, ts->mem_coherent = 0; break; case TEMP_VAL_MEM: + if (!ts->mem_allocated) { + temp_allocate_frame(s, ts); + } reg = tcg_reg_alloc(s, desired_regs, allocated_regs, preferred_regs, ts->indirect_base); tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset); @@ -4514,9 +4953,8 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) all globals are stored at their canonical location. */ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) { - int i; - - for (i = s->nb_globals; i < s->nb_temps; i++) { + assert_carry_dead(s); + for (int i = s->nb_globals; i < s->nb_temps; i++) { TCGTemp *ts = &s->temps[i]; switch (ts->kind) { @@ -4547,6 +4985,7 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) */ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) { + assert_carry_dead(s); sync_globals(s, allocated_regs); for (int i = s->nb_globals; i < s->nb_temps; i++) { @@ -4696,6 +5135,7 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; TCGRegSet dup_out_regs, dup_in_regs; + const TCGArgConstraint *dup_args_ct; TCGTemp *its, *ots; TCGType itype, vtype; unsigned vece; @@ -4710,11 +5150,11 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) itype = its->type; vece = TCGOP_VECE(op); - vtype = TCGOP_VECL(op) + TCG_TYPE_V64; + vtype = TCGOP_TYPE(op); if (its->val_type == TEMP_VAL_CONST) { /* Propagate constant via movi -> dupi. */ - tcg_target_ulong val = its->val; + tcg_target_ulong val = dup_const(vece, its->val); if (IS_DEAD_ARG(1)) { temp_dead(s, its); } @@ -4722,8 +5162,9 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) return; } - dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; - dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs; + dup_args_ct = opcode_args_ct(op); + dup_out_regs = dup_args_ct[0].regs; + dup_in_regs = dup_args_ct[1].regs; /* Allocate the output register now. */ if (ots->val_type != TEMP_VAL_REG) { @@ -4809,12 +5250,17 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) int i, k, nb_iargs, nb_oargs; TCGReg reg; TCGArg arg; + const TCGArgConstraint *args_ct; const TCGArgConstraint *arg_ct; TCGTemp *ts; TCGArg new_args[TCG_MAX_OP_ARGS]; int const_args[TCG_MAX_OP_ARGS]; TCGCond op_cond; + if (def->flags & TCG_OPF_CARRY_IN) { + tcg_debug_assert(s->carry_live); + } + nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; @@ -4827,22 +5273,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) o_allocated_regs = s->reserved_regs; switch (op->opc) { - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: op_cond = op->args[2]; break; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: + case INDEX_op_setcond: + case INDEX_op_negsetcond: case INDEX_op_cmp_vec: op_cond = op->args[3]; break; case INDEX_op_brcond2_i32: op_cond = op->args[4]; break; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: + case INDEX_op_movcond: case INDEX_op_setcond2_i32: case INDEX_op_cmpsel_vec: op_cond = op->args[5]; @@ -4853,6 +5295,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) break; } + args_ct = opcode_args_ct(op); + /* satisfy input constraints */ for (k = 0; k < nb_iargs; k++) { TCGRegSet i_preferred_regs, i_required_regs; @@ -4860,18 +5304,28 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) TCGTemp *ts2; int i1, i2; - i = def->args_ct[nb_oargs + k].sort_index; + i = args_ct[nb_oargs + k].sort_index; arg = op->args[i]; - arg_ct = &def->args_ct[i]; + arg_ct = &args_ct[i]; ts = arg_temp(arg); - if (ts->val_type == TEMP_VAL_CONST - && tcg_target_const_match(ts->val, arg_ct->ct, ts->type, - op_cond, TCGOP_VECE(op))) { - /* constant is OK for instruction */ - const_args[i] = 1; - new_args[i] = ts->val; - continue; + if (ts->val_type == TEMP_VAL_CONST) { +#ifdef TCG_REG_ZERO + if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) { + /* Hardware zero register: indicate register via non-const. */ + const_args[i] = 0; + new_args[i] = TCG_REG_ZERO; + continue; + } +#endif + + if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type, + op_cond, TCGOP_VECE(op))) { + /* constant is OK for instruction */ + const_args[i] = 1; + new_args[i] = ts->val; + continue; + } } reg = ts->reg; @@ -4892,7 +5346,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) * register and move it. */ if (temp_readonly(ts) || !IS_DEAD_ARG(i) - || def->args_ct[arg_ct->alias_index].newreg) { + || args_ct[arg_ct->alias_index].newreg) { allocate_new_reg = true; } else if (ts->val_type == TEMP_VAL_REG) { /* @@ -5063,6 +5517,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) tcg_reg_alloc_bb_end(s, i_allocated_regs); } else { if (def->flags & TCG_OPF_CALL_CLOBBER) { + assert_carry_dead(s); /* XXX: permit generic clobber register list ? */ for (i = 0; i < TCG_TARGET_NB_REGS; i++) { if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { @@ -5077,10 +5532,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } /* satisfy the output constraints */ - for(k = 0; k < nb_oargs; k++) { - i = def->args_ct[k].sort_index; + for (k = 0; k < nb_oargs; k++) { + i = args_ct[k].sort_index; arg = op->args[i]; - arg_ct = &def->args_ct[i]; + arg_ct = &args_ct[i]; ts = arg_temp(arg); /* ENV should not be modified. */ @@ -5139,50 +5594,345 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } /* emit instruction */ + TCGType type = TCGOP_TYPE(op); switch (op->opc) { - case INDEX_op_ext8s_i32: - tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]); + case INDEX_op_addc1o: + tcg_out_set_carry(s); + /* fall through */ + case INDEX_op_add: + case INDEX_op_addcio: + case INDEX_op_addco: + case INDEX_op_and: + case INDEX_op_andc: + case INDEX_op_clz: + case INDEX_op_ctz: + case INDEX_op_divs: + case INDEX_op_divu: + case INDEX_op_eqv: + case INDEX_op_mul: + case INDEX_op_mulsh: + case INDEX_op_muluh: + case INDEX_op_nand: + case INDEX_op_nor: + case INDEX_op_or: + case INDEX_op_orc: + case INDEX_op_rems: + case INDEX_op_remu: + case INDEX_op_rotl: + case INDEX_op_rotr: + case INDEX_op_sar: + case INDEX_op_shl: + case INDEX_op_shr: + case INDEX_op_xor: + { + const TCGOutOpBinary *out = + container_of(all_outop[op->opc], TCGOutOpBinary, base); + + /* Constants should never appear in the first source operand. */ + tcg_debug_assert(!const_args[1]); + if (const_args[2]) { + out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } + break; + + case INDEX_op_sub: + { + const TCGOutOpSubtract *out = &outop_sub; + + /* + * Constants should never appear in the second source operand. + * These are folded to add with negative constant. + */ + tcg_debug_assert(!const_args[2]); + if (const_args[1]) { + out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } + break; + + case INDEX_op_subb1o: + tcg_out_set_borrow(s); + /* fall through */ + case INDEX_op_addci: + case INDEX_op_subbi: + case INDEX_op_subbio: + case INDEX_op_subbo: + { + const TCGOutOpAddSubCarry *out = + container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base); + + if (const_args[2]) { + if (const_args[1]) { + out->out_rii(s, type, new_args[0], + new_args[1], new_args[2]); + } else { + out->out_rri(s, type, new_args[0], + new_args[1], new_args[2]); + } + } else if (const_args[1]) { + out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } + break; + + case INDEX_op_bswap64: + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: + assert(TCG_TARGET_REG_BITS == 64); + /* fall through */ + case INDEX_op_ctpop: + case INDEX_op_neg: + case INDEX_op_not: + { + const TCGOutOpUnary *out = + container_of(all_outop[op->opc], TCGOutOpUnary, base); + + /* Constants should have been folded. */ + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1]); + } + break; + + case INDEX_op_bswap16: + case INDEX_op_bswap32: + { + const TCGOutOpBswap *out = + container_of(all_outop[op->opc], TCGOutOpBswap, base); + + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1], new_args[2]); + } break; - case INDEX_op_ext8s_i64: - tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]); + + case INDEX_op_deposit: + { + const TCGOutOpDeposit *out = &outop_deposit; + + if (const_args[2]) { + tcg_debug_assert(!const_args[1]); + out->out_rri(s, type, new_args[0], new_args[1], + new_args[2], new_args[3], new_args[4]); + } else if (const_args[1]) { + tcg_debug_assert(new_args[1] == 0); + tcg_debug_assert(!const_args[2]); + out->out_rzr(s, type, new_args[0], new_args[2], + new_args[3], new_args[4]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3], new_args[4]); + } + } break; - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - tcg_out_ext8u(s, new_args[0], new_args[1]); + + case INDEX_op_divs2: + case INDEX_op_divu2: + { + const TCGOutOpDivRem *out = + container_of(all_outop[op->opc], TCGOutOpDivRem, base); + + /* Only used by x86 and s390x, which use matching constraints. */ + tcg_debug_assert(new_args[0] == new_args[2]); + tcg_debug_assert(new_args[1] == new_args[3]); + tcg_debug_assert(!const_args[4]); + out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); + } break; - case INDEX_op_ext16s_i32: - tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]); + + case INDEX_op_extract: + case INDEX_op_sextract: + { + const TCGOutOpExtract *out = + container_of(all_outop[op->opc], TCGOutOpExtract, base); + + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } break; - case INDEX_op_ext16s_i64: - tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]); + + case INDEX_op_extract2: + { + const TCGOutOpExtract2 *out = &outop_extract2; + + tcg_debug_assert(!const_args[1]); + tcg_debug_assert(!const_args[2]); + out->out_rrr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } break; - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - tcg_out_ext16u(s, new_args[0], new_args[1]); + + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld32u: + case INDEX_op_ld32s: + case INDEX_op_ld: + { + const TCGOutOpLoad *out = + container_of(all_outop[op->opc], TCGOutOpLoad, base); + + tcg_debug_assert(!const_args[1]); + out->out(s, type, new_args[0], new_args[1], new_args[2]); + } break; - case INDEX_op_ext32s_i64: - tcg_out_ext32s(s, new_args[0], new_args[1]); + + case INDEX_op_muls2: + case INDEX_op_mulu2: + { + const TCGOutOpMul2 *out = + container_of(all_outop[op->opc], TCGOutOpMul2, base); + + tcg_debug_assert(!const_args[2]); + tcg_debug_assert(!const_args[3]); + out->out_rrrr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } break; - case INDEX_op_ext32u_i64: - tcg_out_ext32u(s, new_args[0], new_args[1]); + + case INDEX_op_st32: + /* Use tcg_op_st w/ I32. */ + type = TCG_TYPE_I32; + /* fall through */ + case INDEX_op_st: + case INDEX_op_st8: + case INDEX_op_st16: + { + const TCGOutOpStore *out = + container_of(all_outop[op->opc], TCGOutOpStore, base); + + if (const_args[0]) { + out->out_i(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_r(s, type, new_args[0], new_args[1], new_args[2]); + } + } break; - case INDEX_op_ext_i32_i64: - tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); + + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + { + const TCGOutOpQemuLdSt *out = + container_of(all_outop[op->opc], TCGOutOpQemuLdSt, base); + + out->out(s, type, new_args[0], new_args[1], new_args[2]); + } break; - case INDEX_op_extu_i32_i64: - tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); + + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: + { + const TCGOutOpQemuLdSt2 *out = + container_of(all_outop[op->opc], TCGOutOpQemuLdSt2, base); + + out->out(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } break; - case INDEX_op_extrl_i64_i32: - tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); + + case INDEX_op_brcond: + { + const TCGOutOpBrcond *out = &outop_brcond; + TCGCond cond = new_args[2]; + TCGLabel *label = arg_label(new_args[3]); + + tcg_debug_assert(!const_args[0]); + if (const_args[1]) { + out->out_ri(s, type, cond, new_args[0], new_args[1], label); + } else { + out->out_rr(s, type, cond, new_args[0], new_args[1], label); + } + } break; - default: - if (def->flags & TCG_OPF_VECTOR) { - tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op), - new_args, const_args); - } else { - tcg_out_op(s, op->opc, new_args, const_args); + + case INDEX_op_movcond: + { + const TCGOutOpMovcond *out = &outop_movcond; + TCGCond cond = new_args[5]; + + tcg_debug_assert(!const_args[1]); + out->out(s, type, cond, new_args[0], + new_args[1], new_args[2], const_args[2], + new_args[3], const_args[3], + new_args[4], const_args[4]); + } + break; + + case INDEX_op_setcond: + case INDEX_op_negsetcond: + { + const TCGOutOpSetcond *out = + container_of(all_outop[op->opc], TCGOutOpSetcond, base); + TCGCond cond = new_args[3]; + + tcg_debug_assert(!const_args[1]); + if (const_args[2]) { + out->out_rri(s, type, cond, + new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, cond, + new_args[0], new_args[1], new_args[2]); + } + } + break; + +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + { + const TCGOutOpBrcond2 *out = &outop_brcond2; + TCGCond cond = new_args[4]; + TCGLabel *label = arg_label(new_args[5]); + + tcg_debug_assert(!const_args[0]); + tcg_debug_assert(!const_args[1]); + out->out(s, cond, new_args[0], new_args[1], + new_args[2], const_args[2], + new_args[3], const_args[3], label); } break; + case INDEX_op_setcond2_i32: + { + const TCGOutOpSetcond2 *out = &outop_setcond2; + TCGCond cond = new_args[5]; + + tcg_debug_assert(!const_args[1]); + tcg_debug_assert(!const_args[2]); + out->out(s, cond, new_args[0], new_args[1], new_args[2], + new_args[3], const_args[3], new_args[4], const_args[4]); + } + break; +#else + case INDEX_op_brcond2_i32: + case INDEX_op_setcond2_i32: + g_assert_not_reached(); +#endif + + case INDEX_op_goto_ptr: + tcg_debug_assert(!const_args[0]); + tcg_out_goto_ptr(s, new_args[0]); + break; + + default: + tcg_debug_assert(def->flags & TCG_OPF_VECTOR); + tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, + TCGOP_VECE(op), new_args, const_args); + break; + } + + if (def->flags & TCG_OPF_CARRY_IN) { + s->carry_live = false; + } + if (def->flags & TCG_OPF_CARRY_OUT) { + s->carry_live = true; } /* move the outputs in the correct register if needed */ @@ -5204,7 +5954,7 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) { const TCGLifeData arg_life = op->life; TCGTemp *ots, *itsl, *itsh; - TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64; + TCGType vtype = TCGOP_TYPE(op); /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */ tcg_debug_assert(TCG_TARGET_REG_BITS == 32); @@ -5220,8 +5970,7 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) /* Allocate the output register now. */ if (ots->val_type != TEMP_VAL_REG) { TCGRegSet allocated_regs = s->reserved_regs; - TCGRegSet dup_out_regs = - tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; + TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs; TCGReg oreg; /* Make sure to not spill the input registers. */ @@ -5506,7 +6255,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc, MemOp host_atom, bool allow_two_ops) { - MemOp align = get_alignment_bits(opc); + MemOp align = memop_alignment_bits(opc); MemOp size = opc & MO_SIZE; MemOp half = size ? size - 1 : 0; MemOp atom = opc & MO_ATOM_MASK; @@ -5852,7 +6601,7 @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, */ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, TCG_TYPE_I32, TCG_TYPE_I32, - ldst->addrlo_reg, -1); + ldst->addr_reg, -1); tcg_out_helper_load_slots(s, 1, mov, parm); tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot, @@ -5860,7 +6609,7 @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, next_arg += 2; } else { nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, - ldst->addrlo_reg, ldst->addrhi_reg); + ldst->addr_reg, -1); tcg_out_helper_load_slots(s, nmov, mov, parm); next_arg += nmov; } @@ -6017,21 +6766,22 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, /* Handle addr argument. */ loc = &info->in[next_arg]; - if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { + tcg_debug_assert(s->addr_type <= TCG_TYPE_REG); + if (TCG_TARGET_REG_BITS == 32) { /* - * 32-bit host with 32-bit guest: zero-extend the guest address + * 32-bit host (and thus 32-bit guest): zero-extend the guest address * to 64-bits for the helper by storing the low part. Later, * after we have processed the register inputs, we will load a * zero for the high part. */ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN, TCG_TYPE_I32, TCG_TYPE_I32, - ldst->addrlo_reg, -1); + ldst->addr_reg, -1); next_arg += 2; nmov += 1; } else { n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type, - ldst->addrlo_reg, ldst->addrhi_reg); + ldst->addr_reg, -1); next_arg += n; nmov += n; } @@ -6079,7 +6829,7 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, g_assert_not_reached(); } - if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) { + if (TCG_TARGET_REG_BITS == 32) { /* Zero extend the address by loading a zero for the high part. */ loc = &info->in[1 + !HOST_BIG_ENDIAN]; tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm); @@ -6090,7 +6840,7 @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst, int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) { - int i, start_words, num_insns; + int i, num_insns; TCGOp *op; if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP) @@ -6121,6 +6871,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) } #endif + /* Do not reuse any EBB that may be allocated within the TB. */ + tcg_temp_ebb_reset_freed(s); + tcg_optimize(s); reachable_code_pass(s); @@ -6172,27 +6925,39 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) */ s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr); s->code_ptr = s->code_buf; + s->data_gen_ptr = NULL; -#ifdef TCG_TARGET_NEED_LDST_LABELS QSIMPLEQ_INIT(&s->ldst_labels); -#endif -#ifdef TCG_TARGET_NEED_POOL_LABELS s->pool_labels = NULL; -#endif - start_words = s->insn_start_words; s->gen_insn_data = - tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words); + tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * INSN_START_WORDS); tcg_out_tb_start(s); num_insns = -1; + s->carry_live = false; QTAILQ_FOREACH(op, &s->ops, link) { TCGOpcode opc = op->opc; switch (opc) { - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: + case INDEX_op_extrl_i64_i32: + assert(TCG_TARGET_REG_BITS == 64); + /* + * If TCG_TYPE_I32 is represented in some canonical form, + * e.g. zero or sign-extended, then emit as a unary op. + * Otherwise we can treat this as a plain move. + * If the output dies, treat this as a plain move, because + * this will be implemented with a store. + */ + if (TCG_TARGET_HAS_extr_i64_i32) { + TCGLifeData arg_life = op->life; + if (!IS_DEAD_ARG(0)) { + goto do_default; + } + } + /* fall through */ + case INDEX_op_mov: case INDEX_op_mov_vec: tcg_reg_alloc_mov(s, op); break; @@ -6200,6 +6965,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) tcg_reg_alloc_dup(s, op); break; case INDEX_op_insn_start: + assert_carry_dead(s); if (num_insns >= 0) { size_t off = tcg_current_code_size(s); s->gen_insn_end_off[num_insns] = off; @@ -6207,8 +6973,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) assert(s->gen_insn_end_off[num_insns] == off); } num_insns++; - for (i = 0; i < start_words; ++i) { - s->gen_insn_data[num_insns * start_words + i] = + for (i = 0; i < INSN_START_WORDS; ++i) { + s->gen_insn_data[num_insns * INSN_START_WORDS + i] = tcg_get_insn_start_param(op, i); } break; @@ -6220,6 +6986,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) tcg_out_label(s, arg_label(op->args[0])); break; case INDEX_op_call: + assert_carry_dead(s); tcg_reg_alloc_call(s, op); break; case INDEX_op_exit_tb: @@ -6228,14 +6995,22 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) case INDEX_op_goto_tb: tcg_out_goto_tb(s, op->args[0]); break; + case INDEX_op_br: + tcg_out_br(s, arg_label(op->args[0])); + break; + case INDEX_op_mb: + tcg_out_mb(s, op->args[0]); + break; case INDEX_op_dup2_vec: if (tcg_reg_alloc_dup2(s, op)) { break; } /* fall through */ default: + do_default: /* Sanity check that we've not introduced any unhandled opcodes. */ - tcg_debug_assert(tcg_op_supported(opc)); + tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), + TCGOP_FLAGS(op))); /* Note: in order to speed up the code, it would be much faster to have specialized register allocator functions for some common argument patterns */ @@ -6254,22 +7029,20 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) return -2; } } + assert_carry_dead(s); + tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); /* Generate TB finalization at the end of block */ -#ifdef TCG_TARGET_NEED_LDST_LABELS i = tcg_out_ldst_finalize(s); if (i < 0) { return i; } -#endif -#ifdef TCG_TARGET_NEED_POOL_LABELS i = tcg_out_pool_finalize(s); if (i < 0) { return i; } -#endif if (!tcg_resolve_relocs(s)) { return -2; } |