diff options
Diffstat (limited to 'tcg/optimize.c')
-rw-r--r-- | tcg/optimize.c | 2061 |
1 files changed, 1231 insertions, 830 deletions
diff --git a/tcg/optimize.c b/tcg/optimize.c index 8886f70..62a128b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -28,15 +28,8 @@ #include "qemu/interval-tree.h" #include "tcg/tcg-op-common.h" #include "tcg-internal.h" +#include "tcg-has.h" -#define CASE_OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64) - -#define CASE_OP_32_64_VEC(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64): \ - glue(glue(case INDEX_op_, x), _vec) typedef struct MemCopyInfo { IntervalTreeNode itree; @@ -46,13 +39,12 @@ typedef struct MemCopyInfo { } MemCopyInfo; typedef struct TempOptInfo { - bool is_const; TCGTemp *prev_copy; TCGTemp *next_copy; QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; - uint64_t val; uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ - uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */ + uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */ + uint64_t s_mask; /* mask bit is 1 if value bit matches msb */ } TempOptInfo; typedef struct OptContext { @@ -64,70 +56,45 @@ typedef struct OptContext { QSIMPLEQ_HEAD(, MemCopyInfo) mem_free; /* In flight values from optimization. */ - uint64_t a_mask; /* mask bit is 0 iff value identical to first input */ - uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */ - uint64_t s_mask; /* mask of clrsb(value) bits */ TCGType type; + int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ } OptContext; -/* Calculate the smask for a specific value. */ -static uint64_t smask_from_value(uint64_t value) +static inline TempOptInfo *ts_info(TCGTemp *ts) { - int rep = clrsb64(value); - return ~(~0ull >> rep); + return ts->state_ptr; } -/* - * Calculate the smask for a given set of known-zeros. - * If there are lots of zeros on the left, we can consider the remainder - * an unsigned field, and thus the corresponding signed field is one bit - * larger. - */ -static uint64_t smask_from_zmask(uint64_t zmask) +static inline TempOptInfo *arg_info(TCGArg arg) { - /* - * Only the 0 bits are significant for zmask, thus the msb itself - * must be zero, else we have no sign information. - */ - int rep = clz64(zmask); - if (rep == 0) { - return 0; - } - rep -= 1; - return ~(~0ull >> rep); + return ts_info(arg_temp(arg)); } -/* - * Recreate a properly left-aligned smask after manipulation. - * Some bit-shuffling, particularly shifts and rotates, may - * retain sign bits on the left, but may scatter disconnected - * sign bits on the right. Retain only what remains to the left. - */ -static uint64_t smask_from_smask(int64_t smask) +static inline bool ti_is_const(TempOptInfo *ti) { - /* Only the 1 bits are significant for smask */ - return smask_from_zmask(~smask); + /* If all bits that are not known zeros are known ones, it's constant. */ + return ti->z_mask == ti->o_mask; } -static inline TempOptInfo *ts_info(TCGTemp *ts) +static inline uint64_t ti_const_val(TempOptInfo *ti) { - return ts->state_ptr; + /* If constant, both z_mask and o_mask contain the value. */ + return ti->z_mask; } -static inline TempOptInfo *arg_info(TCGArg arg) +static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val) { - return ts_info(arg_temp(arg)); + return ti_is_const(ti) && ti_const_val(ti) == val; } static inline bool ts_is_const(TCGTemp *ts) { - return ts_info(ts)->is_const; + return ti_is_const(ts_info(ts)); } static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val) { - TempOptInfo *ti = ts_info(ts); - return ti->is_const && ti->val == val; + return ti_is_const_val(ts_info(ts), val); } static inline bool arg_is_const(TCGArg arg) @@ -135,6 +102,11 @@ static inline bool arg_is_const(TCGArg arg) return ts_is_const(arg_temp(arg)); } +static inline uint64_t arg_const_val(TCGArg arg) +{ + return ti_const_val(arg_info(arg)); +} + static inline bool arg_is_const_val(TCGArg arg, uint64_t val) { return ts_is_const_val(arg_temp(arg), val); @@ -171,13 +143,12 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts) ti->prev_copy = ts; QSIMPLEQ_INIT(&ti->mem_copy); if (ts->kind == TEMP_CONST) { - ti->is_const = true; - ti->val = ts->val; ti->z_mask = ts->val; - ti->s_mask = smask_from_value(ts->val); + ti->o_mask = ts->val; + ti->s_mask = INT64_MIN >> clrsb64(ts->val); } else { - ti->is_const = false; ti->z_mask = -1; + ti->o_mask = 0; ti->s_mask = 0; } } @@ -263,8 +234,8 @@ static void reset_ts(OptContext *ctx, TCGTemp *ts) pi->next_copy = ti->next_copy; ti->next_copy = ts; ti->prev_copy = ts; - ti->is_const = false; ti->z_mask = -1; + ti->o_mask = 0; ti->s_mask = 0; if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { @@ -371,6 +342,18 @@ static TCGArg arg_new_temp(OptContext *ctx) return temp_arg(ts); } +static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); +} + +static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); +} + static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) { TCGTemp *dst_ts = arg_temp(dst); @@ -390,15 +373,13 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) switch (ctx->type) { case TCG_TYPE_I32: - new_op = INDEX_op_mov_i32; - break; case TCG_TYPE_I64: - new_op = INDEX_op_mov_i64; + new_op = INDEX_op_mov; break; case TCG_TYPE_V64: case TCG_TYPE_V128: case TCG_TYPE_V256: - /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ + /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */ new_op = INDEX_op_mov_vec; break; default: @@ -409,6 +390,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) op->args[1] = src; di->z_mask = si->z_mask; + di->o_mask = si->o_mask; di->s_mask = si->s_mask; if (src_ts->type == dst_ts->type) { @@ -418,13 +400,19 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) di->prev_copy = src_ts; ni->prev_copy = dst_ts; si->next_copy = dst_ts; - di->is_const = si->is_const; - di->val = si->val; if (!QSIMPLEQ_EMPTY(&si->mem_copy) && cmp_better_copy(src_ts, dst_ts) == dst_ts) { move_mem_copies(dst_ts, src_ts); } + } else if (dst_ts->type == TCG_TYPE_I32) { + di->z_mask = (int32_t)di->z_mask; + di->o_mask = (int32_t)di->o_mask; + di->s_mask |= INT32_MIN; + } else { + di->z_mask |= MAKE_64BIT_MASK(32, 32); + di->o_mask = (uint32_t)di->o_mask; + di->s_mask = INT64_MIN; } return true; } @@ -436,162 +424,163 @@ static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); } -static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) +static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, + uint64_t x, uint64_t y) { uint64_t l64, h64; switch (op) { - CASE_OP_32_64(add): + case INDEX_op_add: return x + y; - CASE_OP_32_64(sub): + case INDEX_op_sub: return x - y; - CASE_OP_32_64(mul): + case INDEX_op_mul: return x * y; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: return x & y; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: return x | y; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: return x ^ y; - case INDEX_op_shl_i32: - return (uint32_t)x << (y & 31); - - case INDEX_op_shl_i64: + case INDEX_op_shl: + if (type == TCG_TYPE_I32) { + return (uint32_t)x << (y & 31); + } return (uint64_t)x << (y & 63); - case INDEX_op_shr_i32: - return (uint32_t)x >> (y & 31); - - case INDEX_op_shr_i64: + case INDEX_op_shr: + if (type == TCG_TYPE_I32) { + return (uint32_t)x >> (y & 31); + } return (uint64_t)x >> (y & 63); - case INDEX_op_sar_i32: - return (int32_t)x >> (y & 31); - - case INDEX_op_sar_i64: + case INDEX_op_sar: + if (type == TCG_TYPE_I32) { + return (int32_t)x >> (y & 31); + } return (int64_t)x >> (y & 63); - case INDEX_op_rotr_i32: - return ror32(x, y & 31); - - case INDEX_op_rotr_i64: + case INDEX_op_rotr: + if (type == TCG_TYPE_I32) { + return ror32(x, y & 31); + } return ror64(x, y & 63); - case INDEX_op_rotl_i32: - return rol32(x, y & 31); - - case INDEX_op_rotl_i64: + case INDEX_op_rotl: + if (type == TCG_TYPE_I32) { + return rol32(x, y & 31); + } return rol64(x, y & 63); - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: return ~x; - CASE_OP_32_64(neg): + case INDEX_op_neg: return -x; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: return x & ~y; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: return x | ~y; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: return ~(x ^ y); - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: return ~(x & y); - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: return ~(x | y); - case INDEX_op_clz_i32: - return (uint32_t)x ? clz32(x) : y; - - case INDEX_op_clz_i64: + case INDEX_op_clz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? clz32(x) : y; + } return x ? clz64(x) : y; - case INDEX_op_ctz_i32: - return (uint32_t)x ? ctz32(x) : y; - - case INDEX_op_ctz_i64: + case INDEX_op_ctz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? ctz32(x) : y; + } return x ? ctz64(x) : y; - case INDEX_op_ctpop_i32: - return ctpop32(x); - - case INDEX_op_ctpop_i64: - return ctpop64(x); - - CASE_OP_32_64(ext8s): - return (int8_t)x; + case INDEX_op_ctpop: + return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); - CASE_OP_32_64(ext16s): - return (int16_t)x; - - CASE_OP_32_64(ext8u): - return (uint8_t)x; - - CASE_OP_32_64(ext16u): - return (uint16_t)x; - - CASE_OP_32_64(bswap16): + case INDEX_op_bswap16: x = bswap16(x); return y & TCG_BSWAP_OS ? (int16_t)x : x; - CASE_OP_32_64(bswap32): + case INDEX_op_bswap32: x = bswap32(x); return y & TCG_BSWAP_OS ? (int32_t)x : x; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: return bswap64(x); case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: return (int32_t)x; case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: - case INDEX_op_ext32u_i64: return (uint32_t)x; case INDEX_op_extrh_i64_i32: return (uint64_t)x >> 32; - case INDEX_op_muluh_i32: - return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; - case INDEX_op_mulsh_i32: - return ((int64_t)(int32_t)x * (int32_t)y) >> 32; - - case INDEX_op_muluh_i64: + case INDEX_op_muluh: + if (type == TCG_TYPE_I32) { + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; + } mulu64(&l64, &h64, x, y); return h64; - case INDEX_op_mulsh_i64: + + case INDEX_op_mulsh: + if (type == TCG_TYPE_I32) { + return ((int64_t)(int32_t)x * (int32_t)y) >> 32; + } muls64(&l64, &h64, x, y); return h64; - case INDEX_op_div_i32: + case INDEX_op_divs: /* Avoid crashing on divide by zero, otherwise undefined. */ - return (int32_t)x / ((int32_t)y ? : 1); - case INDEX_op_divu_i32: - return (uint32_t)x / ((uint32_t)y ? : 1); - case INDEX_op_div_i64: + if (type == TCG_TYPE_I32) { + return (int32_t)x / ((int32_t)y ? : 1); + } return (int64_t)x / ((int64_t)y ? : 1); - case INDEX_op_divu_i64: + + case INDEX_op_divu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x / ((uint32_t)y ? : 1); + } return (uint64_t)x / ((uint64_t)y ? : 1); - case INDEX_op_rem_i32: - return (int32_t)x % ((int32_t)y ? : 1); - case INDEX_op_remu_i32: - return (uint32_t)x % ((uint32_t)y ? : 1); - case INDEX_op_rem_i64: + case INDEX_op_rems: + if (type == TCG_TYPE_I32) { + return (int32_t)x % ((int32_t)y ? : 1); + } return (int64_t)x % ((int64_t)y ? : 1); - case INDEX_op_remu_i64: + + case INDEX_op_remu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x % ((uint32_t)y ? : 1); + } return (uint64_t)x % ((uint64_t)y ? : 1); default: @@ -602,7 +591,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) static uint64_t do_constant_folding(TCGOpcode op, TCGType type, uint64_t x, uint64_t y) { - uint64_t res = do_constant_folding_2(op, x, y); + uint64_t res = do_constant_folding_2(op, type, x, y); if (type == TCG_TYPE_I32) { res = (int32_t)res; } @@ -710,8 +699,8 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, TCGArg y, TCGCond c) { if (arg_is_const(x) && arg_is_const(y)) { - uint64_t xv = arg_info(x)->val; - uint64_t yv = arg_info(y)->val; + uint64_t xv = arg_const_val(x); + uint64_t yv = arg_const_val(y); switch (type) { case TCG_TYPE_I32: @@ -752,12 +741,18 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, #define NO_DEST temp_arg(NULL) +static int pref_commutative(TempOptInfo *ti) +{ + /* Slight preference for non-zero constants second. */ + return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; +} + static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { TCGArg a1 = *p1, a2 = *p2; int sum = 0; - sum += arg_is_const(a1); - sum -= arg_is_const(a2); + sum += pref_commutative(arg_info(a1)); + sum -= pref_commutative(arg_info(a2)); /* Prefer the constant in second argument, and then the form op a, a, b, which is better handled on non-RISC hosts. */ @@ -772,10 +767,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { int sum = 0; - sum += arg_is_const(p1[0]); - sum += arg_is_const(p1[1]); - sum -= arg_is_const(p2[0]); - sum -= arg_is_const(p2[1]); + sum += pref_commutative(arg_info(p1[0])); + sum += pref_commutative(arg_info(p1[1])); + sum -= pref_commutative(arg_info(p2[0])); + sum -= pref_commutative(arg_info(p2[1])); if (sum > 0) { TCGArg t; t = p1[0], p1[0] = p2[0], p2[0] = t; @@ -789,10 +784,12 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2) * Return -1 if the condition can't be simplified, * and the result of the condition (0 or 1) if it can. */ +static bool fold_and(OptContext *ctx, TCGOp *op); static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, TCGArg *p1, TCGArg *p2, TCGArg *pcond) { TCGCond cond; + TempOptInfo *i1; bool swap; int r; @@ -810,19 +807,21 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, return -1; } + i1 = arg_info(*p1); + /* * TSTNE x,x -> NE x,0 - * TSTNE x,-1 -> NE x,0 + * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x */ - if (args_are_copies(*p1, *p2) || arg_is_const_val(*p2, -1)) { + if (args_are_copies(*p1, *p2) || + (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) { *p2 = arg_new_constant(ctx, 0); *pcond = tcg_tst_eqne_cond(cond); return -1; } - /* TSTNE x,sign -> LT x,0 */ - if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32 - ? INT32_MIN : INT64_MIN))) { + /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */ + if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) { *p2 = arg_new_constant(ctx, 0); *pcond = tcg_tst_ltge_cond(cond); return -1; @@ -830,14 +829,13 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst) { - TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_and_i32 : INDEX_op_and_i64); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, and_opc, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg tmp = arg_new_temp(ctx); op2->args[0] = tmp; op2->args[1] = *p1; op2->args[2] = *p2; + fold_and(ctx, op2); *p1 = tmp; *p2 = arg_new_constant(ctx, 0); @@ -865,13 +863,13 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) bh = args[3]; if (arg_is_const(bl) && arg_is_const(bh)) { - tcg_target_ulong blv = arg_info(bl)->val; - tcg_target_ulong bhv = arg_info(bh)->val; + tcg_target_ulong blv = arg_const_val(bl); + tcg_target_ulong bhv = arg_const_val(bh); uint64_t b = deposit64(blv, 32, 32, bhv); if (arg_is_const(al) && arg_is_const(ah)) { - tcg_target_ulong alv = arg_info(al)->val; - tcg_target_ulong ahv = arg_info(ah)->val; + tcg_target_ulong alv = arg_const_val(al); + tcg_target_ulong ahv = arg_const_val(ah); uint64_t a = deposit64(alv, 32, 32, ahv); r = do_constant_folding_cond_64(a, b, c); @@ -925,17 +923,20 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { - TCGOp *op1 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); + TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg t1 = arg_new_temp(ctx); TCGArg t2 = arg_new_temp(ctx); op1->args[0] = t1; op1->args[1] = al; op1->args[2] = bl; + fold_and(ctx, op1); + op2->args[0] = t2; op2->args[1] = ah; op2->args[2] = bh; + fold_and(ctx, op1); args[0] = t1; args[1] = t2; @@ -964,37 +965,31 @@ static void copy_propagate(OptContext *ctx, TCGOp *op, } } -static void finish_folding(OptContext *ctx, TCGOp *op) +static void finish_bb(OptContext *ctx) +{ + /* We only optimize memory barriers across basic blocks. */ + ctx->prev_mb = NULL; +} + +static void finish_ebb(OptContext *ctx) +{ + finish_bb(ctx); + /* We only optimize across extended basic blocks. */ + memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); + remove_mem_copy_all(ctx); +} + +static bool finish_folding(OptContext *ctx, TCGOp *op) { const TCGOpDef *def = &tcg_op_defs[op->opc]; int i, nb_oargs; - /* - * We only optimize extended basic blocks. If the opcode ends a BB - * and is not a conditional branch, reset all temp data. - */ - if (def->flags & TCG_OPF_BB_END) { - ctx->prev_mb = NULL; - if (!(def->flags & TCG_OPF_COND_BRANCH)) { - memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); - remove_mem_copy_all(ctx); - } - return; - } - nb_oargs = def->nb_oargs; for (i = 0; i < nb_oargs; i++) { TCGTemp *ts = arg_temp(op->args[i]); reset_ts(ctx, ts); - /* - * Save the corresponding known-zero/sign bits mask for the - * first output argument (only one supported so far). - */ - if (i == 0) { - ts_info(ts)->z_mask = ctx->z_mask; - ts_info(ts)->s_mask = ctx->s_mask; - } } + return true; } /* @@ -1011,9 +1006,8 @@ static void finish_folding(OptContext *ctx, TCGOp *op) static bool fold_const1(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1])) { - uint64_t t; + uint64_t t = arg_const_val(op->args[1]); - t = arg_info(op->args[1])->val; t = do_constant_folding(op->opc, ctx->type, t, 0); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -1023,8 +1017,8 @@ static bool fold_const1(OptContext *ctx, TCGOp *op) static bool fold_const2(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t1 = arg_info(op->args[1])->val; - uint64_t t2 = arg_info(op->args[2])->val; + uint64_t t1 = arg_const_val(op->args[1]); + uint64_t t2 = arg_const_val(op->args[2]); t1 = do_constant_folding(op->opc, ctx->type, t1, t2); return tcg_opt_gen_movi(ctx, op, op->args[0], t1); @@ -1044,11 +1038,23 @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) return fold_const2(ctx, op); } -static bool fold_masks(OptContext *ctx, TCGOp *op) +/* + * Record "zero" and "sign" masks for the single output of @op. + * See TempOptInfo definition of z_mask and s_mask. + * If z_mask allows, fold the output to constant zero. + * The passed s_mask may be augmented by z_mask. + */ +static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t o_mask, + int64_t s_mask, uint64_t a_mask) { - uint64_t a_mask = ctx->a_mask; - uint64_t z_mask = ctx->z_mask; - uint64_t s_mask = ctx->s_mask; + const TCGOpDef *def = &tcg_op_defs[op->opc]; + TCGTemp *ts; + TempOptInfo *ti; + int rep; + + /* Only single-output opcodes are supported here. */ + tcg_debug_assert(def->nb_oargs == 1); /* * 32-bit ops generate 32-bit results, which for the purpose of @@ -1058,22 +1064,76 @@ static bool fold_masks(OptContext *ctx, TCGOp *op) * type changing opcodes. */ if (ctx->type == TCG_TYPE_I32) { - a_mask = (int32_t)a_mask; z_mask = (int32_t)z_mask; - s_mask |= MAKE_64BIT_MASK(32, 32); - ctx->z_mask = z_mask; - ctx->s_mask = s_mask; + o_mask = (int32_t)o_mask; + s_mask |= INT32_MIN; + a_mask = (uint32_t)a_mask; } - if (z_mask == 0) { - return tcg_opt_gen_movi(ctx, op, op->args[0], 0); + /* Bits that are known 1 and bits that are known 0 must not overlap. */ + tcg_debug_assert((o_mask & ~z_mask) == 0); + + /* All bits that are not known zero are known one is a constant. */ + if (z_mask == o_mask) { + return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask); } + + /* If no bits are affected, the operation devolves to a copy. */ if (a_mask == 0) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } + + ts = arg_temp(op->args[0]); + reset_ts(ctx, ts); + + ti = ts_info(ts); + ti->z_mask = z_mask; + + /* Canonicalize s_mask and incorporate data from z_mask. */ + rep = clz64(~s_mask); + rep = MAX(rep, clz64(z_mask)); + rep = MAX(rep, clz64(~o_mask)); + rep = MAX(rep - 1, 0); + ti->s_mask = INT64_MIN >> rep; + return false; } +static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask, + uint64_t o_mask, int64_t s_mask, uint64_t a_mask) +{ + fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1); + return true; +} + +static bool fold_masks_zos(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t o_mask, uint64_t s_mask) +{ + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1); +} + +static bool fold_masks_zo(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t o_mask) +{ + return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1); +} + +static bool fold_masks_zs(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t s_mask) +{ + return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1); +} + +static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) +{ + return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1); +} + +static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) +{ + return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1); +} + /* * Convert @op to NOT, if NOT is supported by the host. * Return true f the conversion is successful, which will still @@ -1087,12 +1147,9 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) switch (ctx->type) { case TCG_TYPE_I32: - not_op = INDEX_op_not_i32; - have_not = TCG_TARGET_HAS_not_i32; - break; case TCG_TYPE_I64: - not_op = INDEX_op_not_i64; - have_not = TCG_TARGET_HAS_not_i64; + not_op = INDEX_op_not; + have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -1183,13 +1240,19 @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) * 3) those that produce information about the result value. */ +static bool fold_addco(OptContext *ctx, TCGOp *op); +static bool fold_or(OptContext *ctx, TCGOp *op); +static bool fold_orc(OptContext *ctx, TCGOp *op); +static bool fold_subbo(OptContext *ctx, TCGOp *op); +static bool fold_xor(OptContext *ctx, TCGOp *op); + static bool fold_add(OptContext *ctx, TCGOp *op) { if (fold_const2_commutative(ctx, op) || fold_xi_to_x(ctx, op, 0)) { return true; } - return false; + return finish_folding(ctx, op); } /* We cannot as yet do_constant_folding with vectors. */ @@ -1199,145 +1262,319 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op) fold_xi_to_x(ctx, op, 0)) { return true; } - return false; + return finish_folding(ctx, op); } -static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) +static void squash_prev_carryout(OptContext *ctx, TCGOp *op) { - bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]); - bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]); + TempOptInfo *t2; - if (a_const && b_const) { - uint64_t al = arg_info(op->args[2])->val; - uint64_t ah = arg_info(op->args[3])->val; - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; - TCGArg rl, rh; - TCGOp *op2; + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_addco: + op->opc = INDEX_op_add; + fold_add(ctx, op); + break; + case INDEX_op_addcio: + op->opc = INDEX_op_addci; + break; + case INDEX_op_addc1o: + op->opc = INDEX_op_add; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, 1); + } + break; + default: + g_assert_not_reached(); + } +} - if (ctx->type == TCG_TYPE_I32) { - uint64_t a = deposit64(al, 32, 32, ah); - uint64_t b = deposit64(bl, 32, 32, bh); +static bool fold_addci(OptContext *ctx, TCGOp *op) +{ + fold_commutative(ctx, op); - if (add) { - a += b; - } else { - a -= b; - } + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_carryout(ctx, op); + op->opc = INDEX_op_add; + + if (ctx->carry_state > 0) { + TempOptInfo *t2 = arg_info(op->args[2]); - al = sextract64(a, 0, 32); - ah = sextract64(a, 32, 32); + /* + * Propagate the known carry-in into a constant, if possible. + * Otherwise emit a second add +1. + */ + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); } else { - Int128 a = int128_make128(al, ah); - Int128 b = int128_make128(bl, bh); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); - if (add) { - a = int128_add(a, b); - } else { - a = int128_sub(a, b); - } + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_add(ctx, op2); - al = int128_getlo(a); - ah = int128_gethi(a); + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, 1); } + } - rl = op->args[0]; - rh = op->args[1]; + ctx->carry_state = -1; + return fold_add(ctx, op); +} - /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); +static bool fold_addcio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t sum, max; - tcg_opt_gen_movi(ctx, op, rl, al); - tcg_opt_gen_movi(ctx, op2, rh, ah); - return true; + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && + !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { + carry_out = 0; } - /* Fold sub2 r,x,i to add2 r,x,-i */ - if (!add && b_const) { - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; + if (ctx->carry_state < 0) { + ctx->carry_state = carry_out; + return finish_folding(ctx, op); + } - /* Negate the two parts without assembling and disassembling. */ - bl = -bl; - bh = ~bh + !bl; + squash_prev_carryout(ctx, op); + if (ctx->carry_state == 0) { + goto do_addco; + } - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add2_i32 : INDEX_op_add2_i64); - op->args[4] = arg_new_constant(ctx, bl); - op->args[5] = arg_new_constant(ctx, bh); + /* Propagate the known carry-in into a constant, if possible. */ + max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + if (ti_is_const(t2)) { + uint64_t v = ti_const_val(t2) & max; + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + /* max + known carry in produces known carry out. */ + carry_out = 1; + } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1) & max; + if (v < max) { + op->args[1] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + carry_out = 1; } - return false; + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_addc1o; + ctx->carry_state = carry_out; + return finish_folding(ctx, op); + + do_addco: + op->opc = INDEX_op_addco; + return fold_addco(ctx, op); } -static bool fold_add2(OptContext *ctx, TCGOp *op) +static bool fold_addco(OptContext *ctx, TCGOp *op) { - /* Note that the high and low parts may be independently swapped. */ - swap_commutative(op->args[0], &op->args[2], &op->args[4]); - swap_commutative(op->args[1], &op->args[3], &op->args[5]); + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t ign; + + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); - return fold_addsub2(ctx, op, true); + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + + if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + /* Given sign-extension of z_mask for I32, we need not truncate. */ + carry_out = uadd64_overflow(v1, v2, &ign); + } else if (v2 == 0) { + carry_out = 0; + } + } else { + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { + carry_out = 0; + } + } + ctx->carry_state = carry_out; + return finish_folding(ctx, op); } static bool fold_and(OptContext *ctx, TCGOp *op) { - uint64_t z1, z2; + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1, *t2; - if (fold_const2_commutative(ctx, op) || - fold_xi_to_i(ctx, op, 0) || - fold_xi_to_x(ctx, op, -1) || - fold_xx_to_x(ctx, op)) { + if (fold_const2_commutative(ctx, op)) { return true; } - z1 = arg_info(op->args[1])->z_mask; - z2 = arg_info(op->args[2])->z_mask; - ctx->z_mask = z1 & z2; + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = t1->z_mask & t2->z_mask; + o_mask = t1->o_mask & t2->o_mask; /* * Sign repetitions are perforce all identical, whether they are 1 or 0. * Bitwise operations preserve the relative quantity of the repetitions. */ - ctx->s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - - /* - * Known-zeros does not imply known-ones. Therefore unless - * arg2 is constant, we can't infer affected bits from it. - */ - if (arg_is_const(op->args[2])) { - ctx->a_mask = z1 & ~z2; + s_mask = t1->s_mask & t2->s_mask; + + /* Affected bits are those not known zero, masked by those known one. */ + a_mask = t1->z_mask & ~t2->o_mask; + + if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) { + if (ti_is_const(t2)) { + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + uint64_t val = ti_const_val(t2); + if (!(val & (val + 1))) { + unsigned len = ctz64(~val); + if (TCG_TARGET_extract_valid(ctx->type, 0, len)) { + op->opc = INDEX_op_extract; + op->args[2] = 0; + op->args[3] = len; + } + } + } else { + fold_xx_to_x(ctx, op); + } } - - return fold_masks(ctx, op); + return true; } static bool fold_andc(OptContext *ctx, TCGOp *op) { - uint64_t z1; + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1, *t2; - if (fold_const2(ctx, op) || - fold_xx_to_i(ctx, op, 0) || - fold_xi_to_x(ctx, op, 0) || + if (fold_const2(ctx, op)) { + return true; + } + + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + if (ti_is_const(t2)) { + /* Fold andc r,x,i to and r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_and; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_and_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_and(ctx, op); + } + if (fold_xx_to_i(ctx, op, 0) || fold_ix_to_not(ctx, op, -1)) { return true; } - z1 = arg_info(op->args[1])->z_mask; + z_mask = t1->z_mask & ~t2->o_mask; + o_mask = t1->o_mask & ~t2->z_mask; + s_mask = t1->s_mask & t2->s_mask; - /* - * Known-zeros does not imply known-ones. Therefore unless - * arg2 is constant, we can't infer anything from it. - */ - if (arg_is_const(op->args[2])) { - uint64_t z2 = ~arg_info(op->args[2])->z_mask; - ctx->a_mask = z1 & ~z2; - z1 &= z2; + /* Affected bits are those not known zero, masked by those known zero. */ + a_mask = t1->z_mask & t2->z_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); +} + +static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) +{ + /* If true and false values are the same, eliminate the cmp. */ + if (args_are_copies(op->args[2], op->args[3])) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); } - ctx->z_mask = z1; - ctx->s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return fold_masks(ctx, op); + if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { + uint64_t tv = arg_const_val(op->args[2]); + uint64_t fv = arg_const_val(op->args[3]); + + if (tv == -1 && fv == 0) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); + } + if (tv == 0 && fv == -1) { + if (TCG_TARGET_HAS_not_vec) { + op->opc = INDEX_op_not_vec; + return fold_not(ctx, op); + } else { + op->opc = INDEX_op_xor_vec; + op->args[2] = arg_new_constant(ctx, -1); + return fold_xor(ctx, op); + } + } + } + if (arg_is_const(op->args[2])) { + uint64_t tv = arg_const_val(op->args[2]); + if (tv == -1) { + op->opc = INDEX_op_or_vec; + op->args[2] = op->args[3]; + return fold_or(ctx, op); + } + if (tv == 0 && TCG_TARGET_HAS_andc_vec) { + op->opc = INDEX_op_andc_vec; + op->args[2] = op->args[1]; + op->args[1] = op->args[3]; + return fold_andc(ctx, op); + } + } + if (arg_is_const(op->args[3])) { + uint64_t fv = arg_const_val(op->args[3]); + if (fv == 0) { + op->opc = INDEX_op_and_vec; + return fold_and(ctx, op); + } + if (fv == -1 && TCG_TARGET_HAS_orc_vec) { + op->opc = INDEX_op_orc_vec; + op->args[2] = op->args[1]; + op->args[1] = op->args[3]; + return fold_orc(ctx, op); + } + } + return finish_folding(ctx, op); } static bool fold_brcond(OptContext *ctx, TCGOp *op) @@ -1351,8 +1588,11 @@ static bool fold_brcond(OptContext *ctx, TCGOp *op) if (i > 0) { op->opc = INDEX_op_br; op->args[0] = op->args[3]; + finish_ebb(ctx); + } else { + finish_bb(ctx); } - return false; + return true; } static bool fold_brcond2(OptContext *ctx, TCGOp *op) @@ -1422,14 +1662,14 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) break; do_brcond_low: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[1] = op->args[2]; op->args[2] = cond; op->args[3] = label; return fold_brcond(ctx, op); do_brcond_high: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[0] = op->args[1]; op->args[1] = op->args[3]; op->args[2] = cond; @@ -1443,64 +1683,62 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) } op->opc = INDEX_op_br; op->args[0] = label; - break; + finish_ebb(ctx); + return true; } - return false; + + finish_bb(ctx); + return true; } static bool fold_bswap(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask, sign; - - if (arg_is_const(op->args[1])) { - uint64_t t = arg_info(op->args[1])->val; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1 = arg_info(op->args[1]); + int flags = op->args[2]; - t = do_constant_folding(op->opc, ctx->type, t, op->args[2]); - return tcg_opt_gen_movi(ctx, op, op->args[0], t); + if (ti_is_const(t1)) { + return tcg_opt_gen_movi(ctx, op, op->args[0], + do_constant_folding(op->opc, ctx->type, + ti_const_val(t1), flags)); } - z_mask = arg_info(op->args[1])->z_mask; + z_mask = t1->z_mask; + o_mask = t1->o_mask; + s_mask = 0; switch (op->opc) { - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: + case INDEX_op_bswap16: z_mask = bswap16(z_mask); - sign = INT16_MIN; + o_mask = bswap16(o_mask); + if (flags & TCG_BSWAP_OS) { + z_mask = (int16_t)z_mask; + o_mask = (int16_t)o_mask; + s_mask = INT16_MIN; + } else if (!(flags & TCG_BSWAP_OZ)) { + z_mask |= MAKE_64BIT_MASK(16, 48); + } break; - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: + case INDEX_op_bswap32: z_mask = bswap32(z_mask); - sign = INT32_MIN; + o_mask = bswap32(o_mask); + if (flags & TCG_BSWAP_OS) { + z_mask = (int32_t)z_mask; + o_mask = (int32_t)o_mask; + s_mask = INT32_MIN; + } else if (!(flags & TCG_BSWAP_OZ)) { + z_mask |= MAKE_64BIT_MASK(32, 32); + } break; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: z_mask = bswap64(z_mask); - sign = INT64_MIN; + o_mask = bswap64(o_mask); break; default: g_assert_not_reached(); } - s_mask = smask_from_zmask(z_mask); - - switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { - case TCG_BSWAP_OZ: - break; - case TCG_BSWAP_OS: - /* If the sign bit may be 1, force all the bits above to 1. */ - if (z_mask & sign) { - z_mask |= sign; - s_mask = sign << 1; - } - break; - default: - /* The high bits are undefined: force all bits above the sign to 1. */ - z_mask |= sign << 1; - s_mask = 0; - break; - } - ctx->z_mask = z_mask; - ctx->s_mask = s_mask; - return fold_masks(ctx, op); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_call(OptContext *ctx, TCGOp *op) @@ -1540,12 +1778,44 @@ static bool fold_call(OptContext *ctx, TCGOp *op) return true; } +static bool fold_cmp_vec(OptContext *ctx, TCGOp *op) +{ + /* Canonicalize the comparison to put immediate second. */ + if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { + op->args[3] = tcg_swap_cond(op->args[3]); + } + return finish_folding(ctx, op); +} + +static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op) +{ + /* If true and false values are the same, eliminate the cmp. */ + if (args_are_copies(op->args[3], op->args[4])) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); + } + + /* Canonicalize the comparison to put immediate second. */ + if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { + op->args[5] = tcg_swap_cond(op->args[5]); + } + /* + * Canonicalize the "false" input reg to match the destination, + * so that the tcg backend can implement "move if true". + */ + if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { + op->args[5] = tcg_invert_cond(op->args[5]); + } + return finish_folding(ctx, op); +} + static bool fold_count_zeros(OptContext *ctx, TCGOp *op) { - uint64_t z_mask; + uint64_t z_mask, s_mask; + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); - if (arg_is_const(op->args[1])) { - uint64_t t = arg_info(op->args[1])->val; + if (ti_is_const(t1)) { + uint64_t t = ti_const_val(t1); if (t != 0) { t = do_constant_folding(op->opc, ctx->type, t, 0); @@ -1564,79 +1834,79 @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op) default: g_assert_not_reached(); } - ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask; - ctx->s_mask = smask_from_zmask(ctx->z_mask); - return false; + s_mask = ~z_mask; + z_mask |= t2->z_mask; + s_mask &= t2->s_mask; + + return fold_masks_zs(ctx, op, z_mask, s_mask); } static bool fold_ctpop(OptContext *ctx, TCGOp *op) { + uint64_t z_mask; + if (fold_const1(ctx, op)) { return true; } switch (ctx->type) { case TCG_TYPE_I32: - ctx->z_mask = 32 | 31; + z_mask = 32 | 31; break; case TCG_TYPE_I64: - ctx->z_mask = 64 | 63; + z_mask = 64 | 63; break; default: g_assert_not_reached(); } - ctx->s_mask = smask_from_zmask(ctx->z_mask); - return false; + return fold_masks_z(ctx, op, z_mask); } static bool fold_deposit(OptContext *ctx, TCGOp *op) { - TCGOpcode and_opc; - - if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t1 = arg_info(op->args[1])->val; - uint64_t t2 = arg_info(op->args[2])->val; + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + int ofs = op->args[3]; + int len = op->args[4]; + int width = 8 * tcg_type_size(ctx->type); + uint64_t z_mask, o_mask, s_mask; - t1 = deposit64(t1, op->args[3], op->args[4], t2); - return tcg_opt_gen_movi(ctx, op, op->args[0], t1); - } - - switch (ctx->type) { - case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; - break; - case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; - break; - default: - g_assert_not_reached(); + if (ti_is_const(t1) && ti_is_const(t2)) { + return tcg_opt_gen_movi(ctx, op, op->args[0], + deposit64(ti_const_val(t1), ofs, len, + ti_const_val(t2))); } /* Inserting a value into zero at offset 0. */ - if (arg_is_const_val(op->args[1], 0) && op->args[3] == 0) { - uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]); + if (ti_is_const_val(t1, 0) && ofs == 0) { + uint64_t mask = MAKE_64BIT_MASK(0, len); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = op->args[2]; op->args[2] = arg_new_constant(ctx, mask); - ctx->z_mask = mask & arg_info(op->args[1])->z_mask; - return false; + return fold_and(ctx, op); } /* Inserting zero into a value. */ - if (arg_is_const_val(op->args[2], 0)) { - uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0); + if (ti_is_const_val(t2, 0)) { + uint64_t mask = deposit64(-1, ofs, len, 0); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[2] = arg_new_constant(ctx, mask); - ctx->z_mask = mask & arg_info(op->args[1])->z_mask; - return false; + return fold_and(ctx, op); } - ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask, - op->args[3], op->args[4], - arg_info(op->args[2])->z_mask); - return false; + /* The s_mask from the top portion of the deposit is still valid. */ + if (ofs + len == width) { + s_mask = t2->s_mask << ofs; + } else { + s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len); + } + + z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); + o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask); + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_divide(OptContext *ctx, TCGOp *op) @@ -1645,24 +1915,24 @@ static bool fold_divide(OptContext *ctx, TCGOp *op) fold_xi_to_x(ctx, op, 1)) { return true; } - return false; + return finish_folding(ctx, op); } static bool fold_dup(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1])) { - uint64_t t = arg_info(op->args[1])->val; + uint64_t t = arg_const_val(op->args[1]); t = dup_const(TCGOP_VECE(op), t); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } - return false; + return finish_folding(ctx, op); } static bool fold_dup2(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, - arg_info(op->args[2])->val); + uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32, + arg_const_val(op->args[2])); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -1670,152 +1940,144 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op) op->opc = INDEX_op_dup_vec; TCGOP_VECE(op) = MO_32; } - return false; + return finish_folding(ctx, op); } static bool fold_eqv(OptContext *ctx, TCGOp *op) { - if (fold_const2_commutative(ctx, op) || - fold_xi_to_x(ctx, op, -1) || - fold_xi_to_not(ctx, op, 0)) { + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; + + if (fold_const2_commutative(ctx, op)) { return true; } - ctx->s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return false; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold eqv r,x,i to xor r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_xor; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_xor_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_xor(ctx, op); + } + + t1 = arg_info(op->args[1]); + + z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask); + o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_extract(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; + uint64_t z_mask, o_mask, a_mask; + TempOptInfo *t1 = arg_info(op->args[1]); int pos = op->args[2]; int len = op->args[3]; - if (arg_is_const(op->args[1])) { - uint64_t t; - - t = arg_info(op->args[1])->val; - t = extract64(t, pos, len); - return tcg_opt_gen_movi(ctx, op, op->args[0], t); + if (ti_is_const(t1)) { + return tcg_opt_gen_movi(ctx, op, op->args[0], + extract64(ti_const_val(t1), pos, len)); } - z_mask_old = arg_info(op->args[1])->z_mask; - z_mask = extract64(z_mask_old, pos, len); - if (pos == 0) { - ctx->a_mask = z_mask_old ^ z_mask; - } - ctx->z_mask = z_mask; - ctx->s_mask = smask_from_zmask(z_mask); + z_mask = extract64(t1->z_mask, pos, len); + o_mask = extract64(t1->o_mask, pos, len); + a_mask = pos ? -1 : t1->z_mask ^ z_mask; - return fold_masks(ctx, op); + return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask); } static bool fold_extract2(OptContext *ctx, TCGOp *op) { - if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t v1 = arg_info(op->args[1])->val; - uint64_t v2 = arg_info(op->args[2])->val; - int shr = op->args[3]; + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + uint64_t z1 = t1->z_mask; + uint64_t z2 = t2->z_mask; + uint64_t o1 = t1->o_mask; + uint64_t o2 = t2->o_mask; + int shr = op->args[3]; - if (op->opc == INDEX_op_extract2_i64) { - v1 >>= shr; - v2 <<= 64 - shr; - } else { - v1 = (uint32_t)v1 >> shr; - v2 = (uint64_t)((int32_t)v2 << (32 - shr)); - } - return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); + if (ctx->type == TCG_TYPE_I32) { + z1 = (uint32_t)z1 >> shr; + o1 = (uint32_t)o1 >> shr; + z2 = (uint64_t)((int32_t)z2 << (32 - shr)); + o2 = (uint64_t)((int32_t)o2 << (32 - shr)); + } else { + z1 >>= shr; + o1 >>= shr; + z2 <<= 64 - shr; + o2 <<= 64 - shr; } - return false; + + return fold_masks_zo(ctx, op, z1 | z2, o1 | o2); } static bool fold_exts(OptContext *ctx, TCGOp *op) { - uint64_t s_mask_old, s_mask, z_mask, sign; - bool type_change = false; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1; if (fold_const1(ctx, op)) { return true; } - z_mask = arg_info(op->args[1])->z_mask; - s_mask = arg_info(op->args[1])->s_mask; - s_mask_old = s_mask; + t1 = arg_info(op->args[1]); + z_mask = t1->z_mask; + o_mask = t1->o_mask; + s_mask = t1->s_mask; switch (op->opc) { - CASE_OP_32_64(ext8s): - sign = INT8_MIN; - z_mask = (uint8_t)z_mask; - break; - CASE_OP_32_64(ext16s): - sign = INT16_MIN; - z_mask = (uint16_t)z_mask; - break; case INDEX_op_ext_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32s_i64: - sign = INT32_MIN; - z_mask = (uint32_t)z_mask; + s_mask |= INT32_MIN; + z_mask = (int32_t)z_mask; + o_mask = (int32_t)o_mask; break; default: g_assert_not_reached(); } - - if (z_mask & sign) { - z_mask |= sign; - } - s_mask |= sign << 1; - - ctx->z_mask = z_mask; - ctx->s_mask = s_mask; - if (!type_change) { - ctx->a_mask = s_mask & ~s_mask_old; - } - - return fold_masks(ctx, op); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_extu(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; - bool type_change = false; + uint64_t z_mask, o_mask; + TempOptInfo *t1; if (fold_const1(ctx, op)) { return true; } - z_mask_old = z_mask = arg_info(op->args[1])->z_mask; + t1 = arg_info(op->args[1]); + z_mask = t1->z_mask; + o_mask = t1->o_mask; switch (op->opc) { - CASE_OP_32_64(ext8u): - z_mask = (uint8_t)z_mask; - break; - CASE_OP_32_64(ext16u): - z_mask = (uint16_t)z_mask; - break; case INDEX_op_extrl_i64_i32: case INDEX_op_extu_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32u_i64: z_mask = (uint32_t)z_mask; + o_mask = (uint32_t)o_mask; break; case INDEX_op_extrh_i64_i32: - type_change = true; z_mask >>= 32; + o_mask >>= 32; break; default: g_assert_not_reached(); } - - ctx->z_mask = z_mask; - ctx->s_mask = smask_from_zmask(z_mask); - if (!type_change) { - ctx->a_mask = z_mask_old ^ z_mask; - } - return fold_masks(ctx, op); + return fold_masks_zo(ctx, op, z_mask, o_mask); } static bool fold_mb(OptContext *ctx, TCGOp *op) @@ -1849,8 +2111,15 @@ static bool fold_mov(OptContext *ctx, TCGOp *op) static bool fold_movcond(OptContext *ctx, TCGOp *op) { + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *tt, *ft; int i; + /* If true and false values are the same, eliminate the cmp. */ + if (args_are_copies(op->args[3], op->args[4])) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]); + } + /* * Canonicalize the "false" input reg to match the destination reg so * that the tcg backend can implement a "move if true" operation. @@ -1865,53 +2134,33 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); } - ctx->z_mask = arg_info(op->args[3])->z_mask - | arg_info(op->args[4])->z_mask; - ctx->s_mask = arg_info(op->args[3])->s_mask - & arg_info(op->args[4])->s_mask; + tt = arg_info(op->args[3]); + ft = arg_info(op->args[4]); + z_mask = tt->z_mask | ft->z_mask; + o_mask = tt->o_mask & ft->o_mask; + s_mask = tt->s_mask & ft->s_mask; - if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { - uint64_t tv = arg_info(op->args[3])->val; - uint64_t fv = arg_info(op->args[4])->val; - TCGOpcode opc, negopc = 0; + if (ti_is_const(tt) && ti_is_const(ft)) { + uint64_t tv = ti_const_val(tt); + uint64_t fv = ti_const_val(ft); TCGCond cond = op->args[5]; - switch (ctx->type) { - case TCG_TYPE_I32: - opc = INDEX_op_setcond_i32; - if (TCG_TARGET_HAS_negsetcond_i32) { - negopc = INDEX_op_negsetcond_i32; - } - tv = (int32_t)tv; - fv = (int32_t)fv; - break; - case TCG_TYPE_I64: - opc = INDEX_op_setcond_i64; - if (TCG_TARGET_HAS_negsetcond_i64) { - negopc = INDEX_op_negsetcond_i64; - } - break; - default: - g_assert_not_reached(); - } - if (tv == 1 && fv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; op->args[3] = cond; } else if (fv == 1 && tv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; + op->args[3] = tcg_invert_cond(cond); + } else if (tv == -1 && fv == 0) { + op->opc = INDEX_op_negsetcond; + op->args[3] = cond; + } else if (fv == -1 && tv == 0) { + op->opc = INDEX_op_negsetcond; op->args[3] = tcg_invert_cond(cond); - } else if (negopc) { - if (tv == -1 && fv == 0) { - op->opc = negopc; - op->args[3] = cond; - } else if (fv == -1 && tv == 0) { - op->opc = negopc; - op->args[3] = tcg_invert_cond(cond); - } } } - return false; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_mul(OptContext *ctx, TCGOp *op) @@ -1921,7 +2170,7 @@ static bool fold_mul(OptContext *ctx, TCGOp *op) fold_xi_to_x(ctx, op, 1)) { return true; } - return false; + return finish_folding(ctx, op); } static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) @@ -1930,7 +2179,7 @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) fold_xi_to_i(ctx, op, 0)) { return true; } - return false; + return finish_folding(ctx, op); } static bool fold_multiply2(OptContext *ctx, TCGOp *op) @@ -1938,28 +2187,30 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) swap_commutative(op->args[0], &op->args[2], &op->args[3]); if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { - uint64_t a = arg_info(op->args[2])->val; - uint64_t b = arg_info(op->args[3])->val; + uint64_t a = arg_const_val(op->args[2]); + uint64_t b = arg_const_val(op->args[3]); uint64_t h, l; TCGArg rl, rh; TCGOp *op2; switch (op->opc) { - case INDEX_op_mulu2_i32: - l = (uint64_t)(uint32_t)a * (uint32_t)b; - h = (int32_t)(l >> 32); - l = (int32_t)l; - break; - case INDEX_op_muls2_i32: - l = (int64_t)(int32_t)a * (int32_t)b; - h = l >> 32; - l = (int32_t)l; - break; - case INDEX_op_mulu2_i64: - mulu64(&l, &h, a, b); + case INDEX_op_mulu2: + if (ctx->type == TCG_TYPE_I32) { + l = (uint64_t)(uint32_t)a * (uint32_t)b; + h = (int32_t)(l >> 32); + l = (int32_t)l; + } else { + mulu64(&l, &h, a, b); + } break; - case INDEX_op_muls2_i64: - muls64(&l, &h, a, b); + case INDEX_op_muls2: + if (ctx->type == TCG_TYPE_I32) { + l = (int64_t)(int32_t)a * (int32_t)b; + h = l >> 32; + l = (int32_t)l; + } else { + muls64(&l, &h, a, b); + } break; default: g_assert_not_reached(); @@ -1969,39 +2220,42 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) rh = op->args[1]; /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); + op2 = opt_insert_before(ctx, op, 0, 2); tcg_opt_gen_movi(ctx, op, rl, l); tcg_opt_gen_movi(ctx, op2, rh, h); return true; } - return false; + return finish_folding(ctx, op); } static bool fold_nand(OptContext *ctx, TCGOp *op) { + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; + if (fold_const2_commutative(ctx, op) || fold_xi_to_not(ctx, op, -1)) { return true; } - ctx->s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return false; + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = ~(t1->o_mask & t2->o_mask); + o_mask = ~(t1->z_mask & t2->z_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_neg_no_const(OptContext *ctx, TCGOp *op) { /* Set to 1 all bits to the left of the rightmost. */ uint64_t z_mask = arg_info(op->args[1])->z_mask; - ctx->z_mask = -(z_mask & -z_mask); + z_mask = -(z_mask & -z_mask); - /* - * Because of fold_sub_to_neg, we want to always return true, - * via finish_folding. - */ - finish_folding(ctx, op); - return true; + return fold_masks_z(ctx, op, z_mask); } static bool fold_neg(OptContext *ctx, TCGOp *op) @@ -2011,83 +2265,138 @@ static bool fold_neg(OptContext *ctx, TCGOp *op) static bool fold_nor(OptContext *ctx, TCGOp *op) { + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; + if (fold_const2_commutative(ctx, op) || fold_xi_to_not(ctx, op, 0)) { return true; } - ctx->s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return false; + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = ~(t1->o_mask | t2->o_mask); + o_mask = ~(t1->z_mask | t2->z_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_not(OptContext *ctx, TCGOp *op) { + TempOptInfo *t1; + if (fold_const1(ctx, op)) { return true; } - ctx->s_mask = arg_info(op->args[1])->s_mask; - - /* Because of fold_to_not, we want to always return true, via finish. */ - finish_folding(ctx, op); - return true; + t1 = arg_info(op->args[1]); + return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask); } static bool fold_or(OptContext *ctx, TCGOp *op) { + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1, *t2; + if (fold_const2_commutative(ctx, op) || fold_xi_to_x(ctx, op, 0) || fold_xx_to_x(ctx, op)) { return true; } - ctx->z_mask = arg_info(op->args[1])->z_mask - | arg_info(op->args[2])->z_mask; - ctx->s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return fold_masks(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = t1->z_mask | t2->z_mask; + o_mask = t1->o_mask | t2->o_mask; + s_mask = t1->s_mask & t2->s_mask; + + /* Affected bits are those not known one, masked by those known zero. */ + a_mask = ~t1->o_mask & t2->z_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_orc(OptContext *ctx, TCGOp *op) { - if (fold_const2(ctx, op) || - fold_xx_to_i(ctx, op, -1) || - fold_xi_to_x(ctx, op, -1) || + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1, *t2; + + if (fold_const2(ctx, op)) { + return true; + } + + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold orc r,x,i to or r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_or; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_or_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_or(ctx, op); + } + if (fold_xx_to_i(ctx, op, -1) || fold_ix_to_not(ctx, op, 0)) { return true; } + t1 = arg_info(op->args[1]); - ctx->s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return false; + z_mask = t1->z_mask | ~t2->o_mask; + o_mask = t1->o_mask | ~t2->z_mask; + s_mask = t1->s_mask & t2->s_mask; + + /* Affected bits are those not known one, masked by those known one. */ + a_mask = ~t1->o_mask & t2->o_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } -static bool fold_qemu_ld(OptContext *ctx, TCGOp *op) +static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op) { const TCGOpDef *def = &tcg_op_defs[op->opc]; MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; MemOp mop = get_memop(oi); int width = 8 * memop_size(mop); + uint64_t z_mask = -1, s_mask = 0; if (width < 64) { - ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width); - if (!(mop & MO_SIGN)) { - ctx->z_mask = MAKE_64BIT_MASK(0, width); - ctx->s_mask <<= 1; + if (mop & MO_SIGN) { + s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1)); + } else { + z_mask = MAKE_64BIT_MASK(0, width); } } /* Opcodes that touch guest memory stop the mb optimization. */ ctx->prev_mb = NULL; - return false; + + return fold_masks_zs(ctx, op, z_mask, s_mask); +} + +static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op) +{ + /* Opcodes that touch guest memory stop the mb optimization. */ + ctx->prev_mb = NULL; + return finish_folding(ctx, op); } static bool fold_qemu_st(OptContext *ctx, TCGOp *op) { /* Opcodes that touch guest memory stop the mb optimization. */ ctx->prev_mb = NULL; - return false; + return true; } static bool fold_remainder(OptContext *ctx, TCGOp *op) @@ -2096,10 +2405,11 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op) fold_xx_to_i(ctx, op, 0)) { return true; } - return false; + return finish_folding(ctx, op); } -static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) +/* Return 1 if finished, -1 if simplified, 0 if unchanged. */ +static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) { uint64_t a_zmask, b_val; TCGCond cond; @@ -2109,7 +2419,7 @@ static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) } a_zmask = arg_info(op->args[1])->z_mask; - b_val = arg_info(op->args[2])->val; + b_val = arg_const_val(op->args[2]); cond = op->args[3]; if (ctx->type == TCG_TYPE_I32) { @@ -2164,47 +2474,27 @@ static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) break; } if (convert) { - TCGOpcode add_opc, xor_opc, neg_opc; - if (!inv && !neg) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } - switch (ctx->type) { - case TCG_TYPE_I32: - add_opc = INDEX_op_add_i32; - neg_opc = INDEX_op_neg_i32; - xor_opc = INDEX_op_xor_i32; - break; - case TCG_TYPE_I64: - add_opc = INDEX_op_add_i64; - neg_opc = INDEX_op_neg_i64; - xor_opc = INDEX_op_xor_i64; - break; - default: - g_assert_not_reached(); - } - if (!inv) { - op->opc = neg_opc; + op->opc = INDEX_op_neg; } else if (neg) { - op->opc = add_opc; + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -1); } else { - op->opc = xor_opc; + op->opc = INDEX_op_xor; op->args[2] = arg_new_constant(ctx, 1); } - return false; + return -1; } } - - return false; + return 0; } static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc; - TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; TCGOp *op2; @@ -2217,79 +2507,52 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } src2 = op->args[2]; - val = arg_info(src2)->val; + val = arg_const_val(src2); if (!is_power_of_2(val)) { return; } sh = ctz64(val); - switch (ctx->type) { - case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; - sub_opc = INDEX_op_sub_i32; - xor_opc = INDEX_op_xor_i32; - shr_opc = INDEX_op_shr_i32; - neg_opc = INDEX_op_neg_i32; - if (TCG_TARGET_extract_i32_valid(sh, 1)) { - uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0; - sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0; - } - break; - case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; - sub_opc = INDEX_op_sub_i64; - xor_opc = INDEX_op_xor_i64; - shr_opc = INDEX_op_shr_i64; - neg_opc = INDEX_op_neg_i64; - if (TCG_TARGET_extract_i64_valid(sh, 1)) { - uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0; - sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0; - } - break; - default: - g_assert_not_reached(); - } - ret = op->args[0]; src1 = op->args[1]; inv = cond == TCG_COND_TSTEQ; - if (sh && sext_opc && neg && !inv) { - op->opc = sext_opc; + if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_sextract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; return; - } else if (sh && uext_opc) { - op->opc = uext_opc; + } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_extract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; } else { if (sh) { - op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3); + op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); op2->args[0] = ret; op2->args[1] = src1; op2->args[2] = arg_new_constant(ctx, sh); src1 = ret; } - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = src1; op->args[2] = arg_new_constant(ctx, 1); } if (neg && inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); op2->args[0] = ret; op2->args[1] = ret; - op2->args[2] = arg_new_constant(ctx, 1); + op2->args[2] = arg_new_constant(ctx, -1); } else if (inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); op2->args[0] = ret; op2->args[1] = ret; op2->args[2] = arg_new_constant(ctx, 1); } else if (neg) { - op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2); + op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); op2->args[0] = ret; op2->args[1] = ret; } @@ -2303,14 +2566,15 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op) return tcg_opt_gen_movi(ctx, op, op->args[0], i); } - if (fold_setcond_zmask(ctx, op, false)) { + i = fold_setcond_zmask(ctx, op, false); + if (i > 0) { return true; } - fold_setcond_tst_pow2(ctx, op, false); + if (i == 0) { + fold_setcond_tst_pow2(ctx, op, false); + } - ctx->z_mask = 1; - ctx->s_mask = smask_from_zmask(1); - return false; + return fold_masks_z(ctx, op, 1); } static bool fold_negsetcond(OptContext *ctx, TCGOp *op) @@ -2321,14 +2585,16 @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op) return tcg_opt_gen_movi(ctx, op, op->args[0], -i); } - if (fold_setcond_zmask(ctx, op, true)) { + i = fold_setcond_zmask(ctx, op, true); + if (i > 0) { return true; } - fold_setcond_tst_pow2(ctx, op, true); + if (i == 0) { + fold_setcond_tst_pow2(ctx, op, true); + } /* Value is {0,-1} so all bits are repetitions of the sign. */ - ctx->s_mask = -1; - return false; + return fold_masks_s(ctx, op, -1); } static bool fold_setcond2(OptContext *ctx, TCGOp *op) @@ -2384,7 +2650,7 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) case TCG_COND_TSTEQ: case TCG_COND_TSTNE: - if (arg_is_const_val(op->args[2], 0)) { + if (arg_is_const_val(op->args[3], 0)) { goto do_setcond_high; } if (arg_is_const_val(op->args[4], 0)) { @@ -2398,20 +2664,18 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) do_setcond_low: op->args[2] = op->args[3]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); do_setcond_high: op->args[1] = op->args[2]; op->args[2] = op->args[4]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); } - ctx->z_mask = 1; - ctx->s_mask = smask_from_zmask(1); - return false; + return fold_masks_z(ctx, op, 1); do_setcond_const: return tcg_opt_gen_movi(ctx, op, op->args[0], i); @@ -2419,37 +2683,30 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) static bool fold_sextract(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask, s_mask_old; + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1 = arg_info(op->args[1]); int pos = op->args[2]; int len = op->args[3]; - if (arg_is_const(op->args[1])) { - uint64_t t; - - t = arg_info(op->args[1])->val; - t = sextract64(t, pos, len); - return tcg_opt_gen_movi(ctx, op, op->args[0], t); + if (ti_is_const(t1)) { + return tcg_opt_gen_movi(ctx, op, op->args[0], + sextract64(ti_const_val(t1), pos, len)); } - z_mask = arg_info(op->args[1])->z_mask; - z_mask = sextract64(z_mask, pos, len); - ctx->z_mask = z_mask; - - s_mask_old = arg_info(op->args[1])->s_mask; - s_mask = sextract64(s_mask_old, pos, len); - s_mask |= MAKE_64BIT_MASK(len, 64 - len); - ctx->s_mask = s_mask; + s_mask = t1->s_mask >> pos; + s_mask |= -1ull << (len - 1); + a_mask = pos ? -1 : s_mask & ~t1->s_mask; - if (pos == 0) { - ctx->a_mask = s_mask & ~s_mask_old; - } + z_mask = sextract64(t1->z_mask, pos, len); + o_mask = sextract64(t1->o_mask, pos, len); - return fold_masks(ctx, op); + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_shift(OptContext *ctx, TCGOp *op) { - uint64_t s_mask, z_mask, sign; + uint64_t s_mask, z_mask, o_mask; + TempOptInfo *t1, *t2; if (fold_const2(ctx, op) || fold_ix_to_i(ctx, op, 0) || @@ -2457,43 +2714,43 @@ static bool fold_shift(OptContext *ctx, TCGOp *op) return true; } - s_mask = arg_info(op->args[1])->s_mask; - z_mask = arg_info(op->args[1])->z_mask; - - if (arg_is_const(op->args[2])) { - int sh = arg_info(op->args[2])->val; + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + s_mask = t1->s_mask; + z_mask = t1->z_mask; + o_mask = t1->o_mask; - ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); + if (ti_is_const(t2)) { + int sh = ti_const_val(t2); + z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); + o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh); s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); - ctx->s_mask = smask_from_smask(s_mask); - return fold_masks(ctx, op); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } switch (op->opc) { - CASE_OP_32_64(sar): + case INDEX_op_sar: /* * Arithmetic right shift will not reduce the number of * input sign repetitions. */ - ctx->s_mask = s_mask; - break; - CASE_OP_32_64(shr): + return fold_masks_s(ctx, op, s_mask); + case INDEX_op_shr: /* * If the sign bit is known zero, then logical right shift - * will not reduced the number of input sign repetitions. + * will not reduce the number of input sign repetitions. */ - sign = (s_mask & -s_mask) >> 1; - if (sign && !(z_mask & sign)) { - ctx->s_mask = s_mask; + if (~z_mask & -s_mask) { + return fold_masks_s(ctx, op, s_mask); } break; default: break; } - return false; + return finish_folding(ctx, op); } static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) @@ -2501,17 +2758,14 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) TCGOpcode neg_op; bool have_neg; - if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { + if (!arg_is_const_val(op->args[1], 0)) { return false; } switch (ctx->type) { case TCG_TYPE_I32: - neg_op = INDEX_op_neg_i32; - have_neg = true; - break; case TCG_TYPE_I64: - neg_op = INDEX_op_neg_i64; + neg_op = INDEX_op_neg; have_neg = true; break; case TCG_TYPE_V64: @@ -2540,60 +2794,195 @@ static bool fold_sub_vec(OptContext *ctx, TCGOp *op) fold_sub_to_neg(ctx, op)) { return true; } - return false; + return finish_folding(ctx, op); } static bool fold_sub(OptContext *ctx, TCGOp *op) { - if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) { + if (fold_const2(ctx, op) || + fold_xx_to_i(ctx, op, 0) || + fold_xi_to_x(ctx, op, 0) || + fold_sub_to_neg(ctx, op)) { return true; } /* Fold sub r,x,i to add r,x,-i */ if (arg_is_const(op->args[2])) { - uint64_t val = arg_info(op->args[2])->val; + uint64_t val = arg_const_val(op->args[2]); - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add_i32 : INDEX_op_add_i64); + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -val); } - return false; + return finish_folding(ctx, op); +} + +static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_subbo: + op->opc = INDEX_op_sub; + fold_sub(ctx, op); + break; + case INDEX_op_subbio: + op->opc = INDEX_op_subbi; + break; + case INDEX_op_subb1o: + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->opc = INDEX_op_add; + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op->opc = INDEX_op_sub; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, -1); + } + break; + default: + g_assert_not_reached(); + } } -static bool fold_sub2(OptContext *ctx, TCGOp *op) +static bool fold_subbi(OptContext *ctx, TCGOp *op) { - return fold_addsub2(ctx, op, false); + TempOptInfo *t2; + int borrow_in = ctx->carry_state; + + if (borrow_in < 0) { + return finish_folding(ctx, op); + } + ctx->carry_state = -1; + + squash_prev_borrowout(ctx, op); + if (borrow_in == 0) { + op->opc = INDEX_op_sub; + return fold_sub(ctx, op); + } + + /* + * Propagate the known carry-in into any constant, then negate to + * transform from sub to add. If there is no constant, emit a + * separate add -1. + */ + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + } else { + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); + + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_sub(ctx, op2); + + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, -1); + } + op->opc = INDEX_op_add; + return fold_add(ctx, op); +} + +static bool fold_subbio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int borrow_out = -1; + + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_borrowout(ctx, op); + if (ctx->carry_state == 0) { + goto do_subbo; + } + + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* Propagate the known borrow-in into a constant, if possible. */ + if (ti_is_const(t2)) { + uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + uint64_t v = ti_const_val(t2) & max; + + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_subbo; + } + /* subtracting max + 1 produces known borrow out. */ + borrow_out = 1; + } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1); + if (v != 0) { + op->args[2] = arg_new_constant(ctx, v - 1); + goto do_subbo; + } + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_subb1o; + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); + + do_subbo: + op->opc = INDEX_op_subbo; + return fold_subbo(ctx, op); +} + +static bool fold_subbo(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + int borrow_out = -1; + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + if (v2 == 0) { + borrow_out = 0; + } else if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + borrow_out = v1 < v2; + } + } + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); } static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) { + uint64_t z_mask = -1, s_mask = 0; + /* We can't do any folding with a load, but we can record bits. */ switch (op->opc) { - CASE_OP_32_64(ld8s): - ctx->s_mask = MAKE_64BIT_MASK(8, 56); + case INDEX_op_ld8s: + s_mask = INT8_MIN; break; - CASE_OP_32_64(ld8u): - ctx->z_mask = MAKE_64BIT_MASK(0, 8); - ctx->s_mask = MAKE_64BIT_MASK(9, 55); + case INDEX_op_ld8u: + z_mask = MAKE_64BIT_MASK(0, 8); break; - CASE_OP_32_64(ld16s): - ctx->s_mask = MAKE_64BIT_MASK(16, 48); + case INDEX_op_ld16s: + s_mask = INT16_MIN; break; - CASE_OP_32_64(ld16u): - ctx->z_mask = MAKE_64BIT_MASK(0, 16); - ctx->s_mask = MAKE_64BIT_MASK(17, 47); + case INDEX_op_ld16u: + z_mask = MAKE_64BIT_MASK(0, 16); break; - case INDEX_op_ld32s_i64: - ctx->s_mask = MAKE_64BIT_MASK(32, 32); + case INDEX_op_ld32s: + s_mask = INT32_MIN; break; - case INDEX_op_ld32u_i64: - ctx->z_mask = MAKE_64BIT_MASK(0, 32); - ctx->s_mask = MAKE_64BIT_MASK(33, 31); + case INDEX_op_ld32u: + z_mask = MAKE_64BIT_MASK(0, 32); break; default: g_assert_not_reached(); } - return false; + return fold_masks_zs(ctx, op, z_mask, s_mask); } static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) @@ -2603,7 +2992,7 @@ static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op) TCGType type; if (op->args[1] != tcgv_ptr_arg(tcg_env)) { - return false; + return finish_folding(ctx, op); } type = ctx->type; @@ -2626,23 +3015,20 @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op) if (op->args[1] != tcgv_ptr_arg(tcg_env)) { remove_mem_copy_all(ctx); - return false; + return true; } switch (op->opc) { - CASE_OP_32_64(st8): + case INDEX_op_st8: lm1 = 0; break; - CASE_OP_32_64(st16): + case INDEX_op_st16: lm1 = 1; break; - case INDEX_op_st32_i64: - case INDEX_op_st_i32: + case INDEX_op_st32: lm1 = 3; break; - case INDEX_op_st_i64: - lm1 = 7; - break; + case INDEX_op_st: case INDEX_op_st_vec: lm1 = tcg_type_size(ctx->type) - 1; break; @@ -2650,7 +3036,7 @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op) g_assert_not_reached(); } remove_mem_copy_in(ctx, ofs, ofs + lm1); - return false; + return true; } static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) @@ -2660,8 +3046,7 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) TCGType type; if (op->args[1] != tcgv_ptr_arg(tcg_env)) { - fold_tcg_st(ctx, op); - return false; + return fold_tcg_st(ctx, op); } src = arg_temp(op->args[0]); @@ -2683,11 +3068,14 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) last = ofs + tcg_type_size(type) - 1; remove_mem_copy_in(ctx, ofs, last); record_mem_copy(ctx, type, src, ofs, last); - return false; + return true; } static bool fold_xor(OptContext *ctx, TCGOp *op) { + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; + if (fold_const2_commutative(ctx, op) || fold_xx_to_i(ctx, op, 0) || fold_xi_to_x(ctx, op, 0) || @@ -2695,11 +3083,14 @@ static bool fold_xor(OptContext *ctx, TCGOp *op) return true; } - ctx->z_mask = arg_info(op->args[1])->z_mask - | arg_info(op->args[2])->z_mask; - ctx->s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return fold_masks(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask); + o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } /* Propagate constants and copies, fold constant expressions. */ @@ -2737,62 +3128,59 @@ void tcg_optimize(TCGContext *s) copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); /* Pre-compute the type of the operation. */ - if (def->flags & TCG_OPF_VECTOR) { - ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op); - } else if (def->flags & TCG_OPF_64BIT) { - ctx.type = TCG_TYPE_I64; - } else { - ctx.type = TCG_TYPE_I32; - } - - /* Assume all bits affected, no bits known zero, no sign reps. */ - ctx.a_mask = -1; - ctx.z_mask = -1; - ctx.s_mask = 0; + ctx.type = TCGOP_TYPE(op); /* * Process each opcode. * Sorted alphabetically by opcode as much as possible. */ switch (opc) { - CASE_OP_32_64(add): + case INDEX_op_add: done = fold_add(&ctx, op); break; case INDEX_op_add_vec: done = fold_add_vec(&ctx, op); break; - CASE_OP_32_64(add2): - done = fold_add2(&ctx, op); + case INDEX_op_addci: + done = fold_addci(&ctx, op); + break; + case INDEX_op_addcio: + done = fold_addcio(&ctx, op); + break; + case INDEX_op_addco: + done = fold_addco(&ctx, op); break; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: done = fold_and(&ctx, op); break; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: done = fold_andc(&ctx, op); break; - CASE_OP_32_64(brcond): + case INDEX_op_brcond: done = fold_brcond(&ctx, op); break; case INDEX_op_brcond2_i32: done = fold_brcond2(&ctx, op); break; - CASE_OP_32_64(bswap16): - CASE_OP_32_64(bswap32): - case INDEX_op_bswap64_i64: + case INDEX_op_bswap16: + case INDEX_op_bswap32: + case INDEX_op_bswap64: done = fold_bswap(&ctx, op); break; - CASE_OP_32_64(clz): - CASE_OP_32_64(ctz): + case INDEX_op_clz: + case INDEX_op_ctz: done = fold_count_zeros(&ctx, op); break; - CASE_OP_32_64(ctpop): + case INDEX_op_ctpop: done = fold_ctpop(&ctx, op); break; - CASE_OP_32_64(deposit): + case INDEX_op_deposit: done = fold_deposit(&ctx, op); break; - CASE_OP_32_64(div): - CASE_OP_32_64(divu): + case INDEX_op_divs: + case INDEX_op_divu: done = fold_divide(&ctx, op); break; case INDEX_op_dup_vec: @@ -2801,149 +3189,162 @@ void tcg_optimize(TCGContext *s) case INDEX_op_dup2_vec: done = fold_dup2(&ctx, op); break; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: done = fold_eqv(&ctx, op); break; - CASE_OP_32_64(extract): + case INDEX_op_extract: done = fold_extract(&ctx, op); break; - CASE_OP_32_64(extract2): + case INDEX_op_extract2: done = fold_extract2(&ctx, op); break; - CASE_OP_32_64(ext8s): - CASE_OP_32_64(ext16s): - case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: done = fold_exts(&ctx, op); break; - CASE_OP_32_64(ext8u): - CASE_OP_32_64(ext16u): - case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: done = fold_extu(&ctx, op); break; - CASE_OP_32_64(ld8s): - CASE_OP_32_64(ld8u): - CASE_OP_32_64(ld16s): - CASE_OP_32_64(ld16u): - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: + case INDEX_op_ld8s: + case INDEX_op_ld8u: + case INDEX_op_ld16s: + case INDEX_op_ld16u: + case INDEX_op_ld32s: + case INDEX_op_ld32u: done = fold_tcg_ld(&ctx, op); break; - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: + case INDEX_op_ld: case INDEX_op_ld_vec: done = fold_tcg_ld_memcopy(&ctx, op); break; - CASE_OP_32_64(st8): - CASE_OP_32_64(st16): - case INDEX_op_st32_i64: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st32: done = fold_tcg_st(&ctx, op); break; - case INDEX_op_st_i32: - case INDEX_op_st_i64: + case INDEX_op_st: case INDEX_op_st_vec: done = fold_tcg_st_memcopy(&ctx, op); break; case INDEX_op_mb: done = fold_mb(&ctx, op); break; - CASE_OP_32_64_VEC(mov): + case INDEX_op_mov: + case INDEX_op_mov_vec: done = fold_mov(&ctx, op); break; - CASE_OP_32_64(movcond): + case INDEX_op_movcond: done = fold_movcond(&ctx, op); break; - CASE_OP_32_64(mul): + case INDEX_op_mul: done = fold_mul(&ctx, op); break; - CASE_OP_32_64(mulsh): - CASE_OP_32_64(muluh): + case INDEX_op_mulsh: + case INDEX_op_muluh: done = fold_mul_highpart(&ctx, op); break; - CASE_OP_32_64(muls2): - CASE_OP_32_64(mulu2): + case INDEX_op_muls2: + case INDEX_op_mulu2: done = fold_multiply2(&ctx, op); break; - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: done = fold_nand(&ctx, op); break; - CASE_OP_32_64(neg): + case INDEX_op_neg: done = fold_neg(&ctx, op); break; - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: done = fold_nor(&ctx, op); break; - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: done = fold_not(&ctx, op); break; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: done = fold_or(&ctx, op); break; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: done = fold_orc(&ctx, op); break; - case INDEX_op_qemu_ld_a32_i32: - case INDEX_op_qemu_ld_a64_i32: - case INDEX_op_qemu_ld_a32_i64: - case INDEX_op_qemu_ld_a64_i64: - case INDEX_op_qemu_ld_a32_i128: - case INDEX_op_qemu_ld_a64_i128: - done = fold_qemu_ld(&ctx, op); - break; - case INDEX_op_qemu_st8_a32_i32: - case INDEX_op_qemu_st8_a64_i32: - case INDEX_op_qemu_st_a32_i32: - case INDEX_op_qemu_st_a64_i32: - case INDEX_op_qemu_st_a32_i64: - case INDEX_op_qemu_st_a64_i64: - case INDEX_op_qemu_st_a32_i128: - case INDEX_op_qemu_st_a64_i128: + case INDEX_op_qemu_ld: + done = fold_qemu_ld_1reg(&ctx, op); + break; + case INDEX_op_qemu_ld2: + done = fold_qemu_ld_2reg(&ctx, op); + break; + case INDEX_op_qemu_st: + case INDEX_op_qemu_st2: done = fold_qemu_st(&ctx, op); break; - CASE_OP_32_64(rem): - CASE_OP_32_64(remu): + case INDEX_op_rems: + case INDEX_op_remu: done = fold_remainder(&ctx, op); break; - CASE_OP_32_64(rotl): - CASE_OP_32_64(rotr): - CASE_OP_32_64(sar): - CASE_OP_32_64(shl): - CASE_OP_32_64(shr): + case INDEX_op_rotl: + case INDEX_op_rotr: + case INDEX_op_sar: + case INDEX_op_shl: + case INDEX_op_shr: done = fold_shift(&ctx, op); break; - CASE_OP_32_64(setcond): + case INDEX_op_setcond: done = fold_setcond(&ctx, op); break; - CASE_OP_32_64(negsetcond): + case INDEX_op_negsetcond: done = fold_negsetcond(&ctx, op); break; case INDEX_op_setcond2_i32: done = fold_setcond2(&ctx, op); break; - CASE_OP_32_64(sextract): + case INDEX_op_cmp_vec: + done = fold_cmp_vec(&ctx, op); + break; + case INDEX_op_cmpsel_vec: + done = fold_cmpsel_vec(&ctx, op); + break; + case INDEX_op_bitsel_vec: + done = fold_bitsel_vec(&ctx, op); + break; + case INDEX_op_sextract: done = fold_sextract(&ctx, op); break; - CASE_OP_32_64(sub): + case INDEX_op_sub: done = fold_sub(&ctx, op); break; + case INDEX_op_subbi: + done = fold_subbi(&ctx, op); + break; + case INDEX_op_subbio: + done = fold_subbio(&ctx, op); + break; + case INDEX_op_subbo: + done = fold_subbo(&ctx, op); + break; case INDEX_op_sub_vec: done = fold_sub_vec(&ctx, op); break; - CASE_OP_32_64(sub2): - done = fold_sub2(&ctx, op); - break; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: done = fold_xor(&ctx, op); break; + case INDEX_op_set_label: + case INDEX_op_br: + case INDEX_op_exit_tb: + case INDEX_op_goto_tb: + case INDEX_op_goto_ptr: + finish_ebb(&ctx); + done = true; + break; default: + done = finish_folding(&ctx, op); break; } - - if (!done) { - finish_folding(&ctx, op); - } + tcg_debug_assert(done); } } |