diff options
Diffstat (limited to 'tcg/optimize.c')
-rw-r--r-- | tcg/optimize.c | 1054 |
1 files changed, 611 insertions, 443 deletions
diff --git a/tcg/optimize.c b/tcg/optimize.c index f922f86..10a76c5 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -30,14 +30,6 @@ #include "tcg-internal.h" #include "tcg-has.h" -#define CASE_OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64) - -#define CASE_OP_32_64_VEC(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64): \ - glue(glue(case INDEX_op_, x), _vec) typedef struct MemCopyInfo { IntervalTreeNode itree; @@ -66,6 +58,7 @@ typedef struct OptContext { /* In flight values from optimization. */ TCGType type; + int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ } OptContext; static inline TempOptInfo *ts_info(TCGTemp *ts) @@ -344,6 +337,18 @@ static TCGArg arg_new_temp(OptContext *ctx) return temp_arg(ts); } +static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); +} + +static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); +} + static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) { TCGTemp *dst_ts = arg_temp(dst); @@ -363,10 +368,8 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) switch (ctx->type) { case TCG_TYPE_I32: - new_op = INDEX_op_mov_i32; - break; case TCG_TYPE_I64: - new_op = INDEX_op_mov_i64; + new_op = INDEX_op_mov; break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -409,162 +412,163 @@ static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); } -static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) +static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, + uint64_t x, uint64_t y) { uint64_t l64, h64; switch (op) { - CASE_OP_32_64(add): + case INDEX_op_add: return x + y; - CASE_OP_32_64(sub): + case INDEX_op_sub: return x - y; - CASE_OP_32_64(mul): + case INDEX_op_mul: return x * y; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: return x & y; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: return x | y; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: return x ^ y; - case INDEX_op_shl_i32: - return (uint32_t)x << (y & 31); - - case INDEX_op_shl_i64: + case INDEX_op_shl: + if (type == TCG_TYPE_I32) { + return (uint32_t)x << (y & 31); + } return (uint64_t)x << (y & 63); - case INDEX_op_shr_i32: - return (uint32_t)x >> (y & 31); - - case INDEX_op_shr_i64: + case INDEX_op_shr: + if (type == TCG_TYPE_I32) { + return (uint32_t)x >> (y & 31); + } return (uint64_t)x >> (y & 63); - case INDEX_op_sar_i32: - return (int32_t)x >> (y & 31); - - case INDEX_op_sar_i64: + case INDEX_op_sar: + if (type == TCG_TYPE_I32) { + return (int32_t)x >> (y & 31); + } return (int64_t)x >> (y & 63); - case INDEX_op_rotr_i32: - return ror32(x, y & 31); - - case INDEX_op_rotr_i64: + case INDEX_op_rotr: + if (type == TCG_TYPE_I32) { + return ror32(x, y & 31); + } return ror64(x, y & 63); - case INDEX_op_rotl_i32: - return rol32(x, y & 31); - - case INDEX_op_rotl_i64: + case INDEX_op_rotl: + if (type == TCG_TYPE_I32) { + return rol32(x, y & 31); + } return rol64(x, y & 63); - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: return ~x; - CASE_OP_32_64(neg): + case INDEX_op_neg: return -x; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: return x & ~y; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: return x | ~y; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: return ~(x ^ y); - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: return ~(x & y); - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: return ~(x | y); - case INDEX_op_clz_i32: - return (uint32_t)x ? clz32(x) : y; - - case INDEX_op_clz_i64: + case INDEX_op_clz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? clz32(x) : y; + } return x ? clz64(x) : y; - case INDEX_op_ctz_i32: - return (uint32_t)x ? ctz32(x) : y; - - case INDEX_op_ctz_i64: + case INDEX_op_ctz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? ctz32(x) : y; + } return x ? ctz64(x) : y; - case INDEX_op_ctpop_i32: - return ctpop32(x); - - case INDEX_op_ctpop_i64: - return ctpop64(x); - - CASE_OP_32_64(ext8s): - return (int8_t)x; + case INDEX_op_ctpop: + return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); - CASE_OP_32_64(ext16s): - return (int16_t)x; - - CASE_OP_32_64(ext8u): - return (uint8_t)x; - - CASE_OP_32_64(ext16u): - return (uint16_t)x; - - CASE_OP_32_64(bswap16): + case INDEX_op_bswap16: x = bswap16(x); return y & TCG_BSWAP_OS ? (int16_t)x : x; - CASE_OP_32_64(bswap32): + case INDEX_op_bswap32: x = bswap32(x); return y & TCG_BSWAP_OS ? (int32_t)x : x; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: return bswap64(x); case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: return (int32_t)x; case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: - case INDEX_op_ext32u_i64: return (uint32_t)x; case INDEX_op_extrh_i64_i32: return (uint64_t)x >> 32; - case INDEX_op_muluh_i32: - return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; - case INDEX_op_mulsh_i32: - return ((int64_t)(int32_t)x * (int32_t)y) >> 32; - - case INDEX_op_muluh_i64: + case INDEX_op_muluh: + if (type == TCG_TYPE_I32) { + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; + } mulu64(&l64, &h64, x, y); return h64; - case INDEX_op_mulsh_i64: + + case INDEX_op_mulsh: + if (type == TCG_TYPE_I32) { + return ((int64_t)(int32_t)x * (int32_t)y) >> 32; + } muls64(&l64, &h64, x, y); return h64; - case INDEX_op_div_i32: + case INDEX_op_divs: /* Avoid crashing on divide by zero, otherwise undefined. */ - return (int32_t)x / ((int32_t)y ? : 1); - case INDEX_op_divu_i32: - return (uint32_t)x / ((uint32_t)y ? : 1); - case INDEX_op_div_i64: + if (type == TCG_TYPE_I32) { + return (int32_t)x / ((int32_t)y ? : 1); + } return (int64_t)x / ((int64_t)y ? : 1); - case INDEX_op_divu_i64: + + case INDEX_op_divu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x / ((uint32_t)y ? : 1); + } return (uint64_t)x / ((uint64_t)y ? : 1); - case INDEX_op_rem_i32: - return (int32_t)x % ((int32_t)y ? : 1); - case INDEX_op_remu_i32: - return (uint32_t)x % ((uint32_t)y ? : 1); - case INDEX_op_rem_i64: + case INDEX_op_rems: + if (type == TCG_TYPE_I32) { + return (int32_t)x % ((int32_t)y ? : 1); + } return (int64_t)x % ((int64_t)y ? : 1); - case INDEX_op_remu_i64: + + case INDEX_op_remu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x % ((uint32_t)y ? : 1); + } return (uint64_t)x % ((uint64_t)y ? : 1); default: @@ -575,7 +579,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) static uint64_t do_constant_folding(TCGOpcode op, TCGType type, uint64_t x, uint64_t y) { - uint64_t res = do_constant_folding_2(op, x, y); + uint64_t res = do_constant_folding_2(op, type, x, y); if (type == TCG_TYPE_I32) { res = (int32_t)res; } @@ -725,12 +729,18 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, #define NO_DEST temp_arg(NULL) +static int pref_commutative(TempOptInfo *ti) +{ + /* Slight preference for non-zero constants second. */ + return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; +} + static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { TCGArg a1 = *p1, a2 = *p2; int sum = 0; - sum += arg_is_const(a1); - sum -= arg_is_const(a2); + sum += pref_commutative(arg_info(a1)); + sum -= pref_commutative(arg_info(a2)); /* Prefer the constant in second argument, and then the form op a, a, b, which is better handled on non-RISC hosts. */ @@ -745,10 +755,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { int sum = 0; - sum += arg_is_const(p1[0]); - sum += arg_is_const(p1[1]); - sum -= arg_is_const(p2[0]); - sum -= arg_is_const(p2[1]); + sum += pref_commutative(arg_info(p1[0])); + sum += pref_commutative(arg_info(p1[1])); + sum -= pref_commutative(arg_info(p2[0])); + sum -= pref_commutative(arg_info(p2[1])); if (sum > 0) { TCGArg t; t = p1[0], p1[0] = p2[0], p2[0] = t; @@ -806,9 +816,7 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst) { - TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_and_i32 : INDEX_op_and_i64); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, and_opc, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg tmp = arg_new_temp(ctx); op2->args[0] = tmp; @@ -901,8 +909,8 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { - TCGOp *op1 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); + TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg t1 = arg_new_temp(ctx); TCGArg t2 = arg_new_temp(ctx); @@ -1101,12 +1109,9 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) switch (ctx->type) { case TCG_TYPE_I32: - not_op = INDEX_op_not_i32; - have_not = TCG_TARGET_HAS_not_i32; - break; case TCG_TYPE_I64: - not_op = INDEX_op_not_i64; - have_not = TCG_TARGET_HAS_not_i64; + not_op = INDEX_op_not; + have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -1197,8 +1202,10 @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) * 3) those that produce information about the result value. */ +static bool fold_addco(OptContext *ctx, TCGOp *op); static bool fold_or(OptContext *ctx, TCGOp *op); static bool fold_orc(OptContext *ctx, TCGOp *op); +static bool fold_subbo(OptContext *ctx, TCGOp *op); static bool fold_xor(OptContext *ctx, TCGOp *op); static bool fold_add(OptContext *ctx, TCGOp *op) @@ -1220,80 +1227,168 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op) return finish_folding(ctx, op); } -static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) +static void squash_prev_carryout(OptContext *ctx, TCGOp *op) { - bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]); - bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]); + TempOptInfo *t2; - if (a_const && b_const) { - uint64_t al = arg_info(op->args[2])->val; - uint64_t ah = arg_info(op->args[3])->val; - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; - TCGArg rl, rh; - TCGOp *op2; + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_addco: + op->opc = INDEX_op_add; + fold_add(ctx, op); + break; + case INDEX_op_addcio: + op->opc = INDEX_op_addci; + break; + case INDEX_op_addc1o: + op->opc = INDEX_op_add; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, 1); + } + break; + default: + g_assert_not_reached(); + } +} - if (ctx->type == TCG_TYPE_I32) { - uint64_t a = deposit64(al, 32, 32, ah); - uint64_t b = deposit64(bl, 32, 32, bh); +static bool fold_addci(OptContext *ctx, TCGOp *op) +{ + fold_commutative(ctx, op); - if (add) { - a += b; - } else { - a -= b; - } + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_carryout(ctx, op); + op->opc = INDEX_op_add; + + if (ctx->carry_state > 0) { + TempOptInfo *t2 = arg_info(op->args[2]); - al = sextract64(a, 0, 32); - ah = sextract64(a, 32, 32); + /* + * Propagate the known carry-in into a constant, if possible. + * Otherwise emit a second add +1. + */ + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); } else { - Int128 a = int128_make128(al, ah); - Int128 b = int128_make128(bl, bh); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); - if (add) { - a = int128_add(a, b); - } else { - a = int128_sub(a, b); - } + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_add(ctx, op2); - al = int128_getlo(a); - ah = int128_gethi(a); + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, 1); } + } - rl = op->args[0]; - rh = op->args[1]; + ctx->carry_state = -1; + return fold_add(ctx, op); +} - /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); +static bool fold_addcio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t sum, max; - tcg_opt_gen_movi(ctx, op, rl, al); - tcg_opt_gen_movi(ctx, op2, rh, ah); - return true; + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && + !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { + carry_out = 0; } - /* Fold sub2 r,x,i to add2 r,x,-i */ - if (!add && b_const) { - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; + if (ctx->carry_state < 0) { + ctx->carry_state = carry_out; + return finish_folding(ctx, op); + } - /* Negate the two parts without assembling and disassembling. */ - bl = -bl; - bh = ~bh + !bl; + squash_prev_carryout(ctx, op); + if (ctx->carry_state == 0) { + goto do_addco; + } - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add2_i32 : INDEX_op_add2_i64); - op->args[4] = arg_new_constant(ctx, bl); - op->args[5] = arg_new_constant(ctx, bh); + /* Propagate the known carry-in into a constant, if possible. */ + max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + if (ti_is_const(t2)) { + uint64_t v = ti_const_val(t2) & max; + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + /* max + known carry in produces known carry out. */ + carry_out = 1; } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1) & max; + if (v < max) { + op->args[1] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + carry_out = 1; + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_addc1o; + ctx->carry_state = carry_out; return finish_folding(ctx, op); + + do_addco: + op->opc = INDEX_op_addco; + return fold_addco(ctx, op); } -static bool fold_add2(OptContext *ctx, TCGOp *op) +static bool fold_addco(OptContext *ctx, TCGOp *op) { - /* Note that the high and low parts may be independently swapped. */ - swap_commutative(op->args[0], &op->args[2], &op->args[4]); - swap_commutative(op->args[1], &op->args[3], &op->args[5]); + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t ign; - return fold_addsub2(ctx, op, true); + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + + if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + /* Given sign-extension of z_mask for I32, we need not truncate. */ + carry_out = uadd64_overflow(v1, v2, &ign); + } else if (v2 == 0) { + carry_out = 0; + } + } else { + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { + carry_out = 0; + } + } + ctx->carry_state = carry_out; + return finish_folding(ctx, op); } static bool fold_and(OptContext *ctx, TCGOp *op) @@ -1348,6 +1443,25 @@ static bool fold_andc(OptContext *ctx, TCGOp *op) t2 = arg_info(op->args[2]); z_mask = t1->z_mask; + if (ti_is_const(t2)) { + /* Fold andc r,x,i to and r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_and; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_and_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_and(ctx, op); + } + /* * Known-zeros does not imply known-ones. Therefore unless * arg2 is constant, we can't infer anything from it. @@ -1504,14 +1618,14 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) break; do_brcond_low: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[1] = op->args[2]; op->args[2] = cond; op->args[3] = label; return fold_brcond(ctx, op); do_brcond_high: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[0] = op->args[1]; op->args[1] = op->args[3]; op->args[2] = cond; @@ -1547,17 +1661,15 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op) z_mask = t1->z_mask; switch (op->opc) { - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: + case INDEX_op_bswap16: z_mask = bswap16(z_mask); sign = INT16_MIN; break; - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: + case INDEX_op_bswap32: z_mask = bswap32(z_mask); sign = INT32_MIN; break; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: z_mask = bswap64(z_mask); sign = INT64_MIN; break; @@ -1713,8 +1825,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) TempOptInfo *t2 = arg_info(op->args[2]); int ofs = op->args[3]; int len = op->args[4]; - int width; - TCGOpcode and_opc; + int width = 8 * tcg_type_size(ctx->type); uint64_t z_mask, s_mask; if (ti_is_const(t1) && ti_is_const(t2)) { @@ -1723,24 +1834,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) ti_const_val(t2))); } - switch (ctx->type) { - case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; - width = 32; - break; - case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; - width = 64; - break; - default: - g_assert_not_reached(); - } - /* Inserting a value into zero at offset 0. */ if (ti_is_const_val(t1, 0) && ofs == 0) { uint64_t mask = MAKE_64BIT_MASK(0, len); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = op->args[2]; op->args[2] = arg_new_constant(ctx, mask); return fold_and(ctx, op); @@ -1750,7 +1848,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) if (ti_is_const_val(t2, 0)) { uint64_t mask = deposit64(-1, ofs, len, 0); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[2] = arg_new_constant(ctx, mask); return fold_and(ctx, op); } @@ -1803,6 +1901,7 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op) static bool fold_eqv(OptContext *ctx, TCGOp *op) { uint64_t s_mask; + TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || fold_xi_to_x(ctx, op, -1) || @@ -1810,8 +1909,28 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op) return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold eqv r,x,i to xor r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_xor; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_xor_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_xor(ctx, op); + } + + t1 = arg_info(op->args[1]); + s_mask = t1->s_mask & t2->s_mask; return fold_masks_s(ctx, op, s_mask); } @@ -1843,12 +1962,12 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op) uint64_t v2 = arg_info(op->args[2])->val; int shr = op->args[3]; - if (op->opc == INDEX_op_extract2_i64) { - v1 >>= shr; - v2 <<= 64 - shr; - } else { + if (ctx->type == TCG_TYPE_I32) { v1 = (uint32_t)v1 >> shr; v2 = (uint64_t)((int32_t)v2 << (32 - shr)); + } else { + v1 >>= shr; + v2 <<= 64 - shr; } return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); } @@ -1857,8 +1976,7 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op) static bool fold_exts(OptContext *ctx, TCGOp *op) { - uint64_t s_mask_old, s_mask, z_mask; - bool type_change = false; + uint64_t s_mask, z_mask; TempOptInfo *t1; if (fold_const1(ctx, op)) { @@ -1868,72 +1986,38 @@ static bool fold_exts(OptContext *ctx, TCGOp *op) t1 = arg_info(op->args[1]); z_mask = t1->z_mask; s_mask = t1->s_mask; - s_mask_old = s_mask; switch (op->opc) { - CASE_OP_32_64(ext8s): - s_mask |= INT8_MIN; - z_mask = (int8_t)z_mask; - break; - CASE_OP_32_64(ext16s): - s_mask |= INT16_MIN; - z_mask = (int16_t)z_mask; - break; case INDEX_op_ext_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32s_i64: s_mask |= INT32_MIN; z_mask = (int32_t)z_mask; break; default: g_assert_not_reached(); } - - if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) { - return true; - } - return fold_masks_zs(ctx, op, z_mask, s_mask); } static bool fold_extu(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; - bool type_change = false; + uint64_t z_mask; if (fold_const1(ctx, op)) { return true; } - z_mask_old = z_mask = arg_info(op->args[1])->z_mask; - + z_mask = arg_info(op->args[1])->z_mask; switch (op->opc) { - CASE_OP_32_64(ext8u): - z_mask = (uint8_t)z_mask; - break; - CASE_OP_32_64(ext16u): - z_mask = (uint16_t)z_mask; - break; case INDEX_op_extrl_i64_i32: case INDEX_op_extu_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32u_i64: z_mask = (uint32_t)z_mask; break; case INDEX_op_extrh_i64_i32: - type_change = true; z_mask >>= 32; break; default: g_assert_not_reached(); } - - if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) { - return true; - } - return fold_masks_z(ctx, op, z_mask); } @@ -1999,42 +2083,20 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) if (ti_is_const(tt) && ti_is_const(ft)) { uint64_t tv = ti_const_val(tt); uint64_t fv = ti_const_val(ft); - TCGOpcode opc, negopc = 0; TCGCond cond = op->args[5]; - switch (ctx->type) { - case TCG_TYPE_I32: - opc = INDEX_op_setcond_i32; - if (TCG_TARGET_HAS_negsetcond_i32) { - negopc = INDEX_op_negsetcond_i32; - } - tv = (int32_t)tv; - fv = (int32_t)fv; - break; - case TCG_TYPE_I64: - opc = INDEX_op_setcond_i64; - if (TCG_TARGET_HAS_negsetcond_i64) { - negopc = INDEX_op_negsetcond_i64; - } - break; - default: - g_assert_not_reached(); - } - if (tv == 1 && fv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; op->args[3] = cond; } else if (fv == 1 && tv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; + op->args[3] = tcg_invert_cond(cond); + } else if (tv == -1 && fv == 0) { + op->opc = INDEX_op_negsetcond; + op->args[3] = cond; + } else if (fv == -1 && tv == 0) { + op->opc = INDEX_op_negsetcond; op->args[3] = tcg_invert_cond(cond); - } else if (negopc) { - if (tv == -1 && fv == 0) { - op->opc = negopc; - op->args[3] = cond; - } else if (fv == -1 && tv == 0) { - op->opc = negopc; - op->args[3] = tcg_invert_cond(cond); - } } } @@ -2072,21 +2134,23 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) TCGOp *op2; switch (op->opc) { - case INDEX_op_mulu2_i32: - l = (uint64_t)(uint32_t)a * (uint32_t)b; - h = (int32_t)(l >> 32); - l = (int32_t)l; - break; - case INDEX_op_muls2_i32: - l = (int64_t)(int32_t)a * (int32_t)b; - h = l >> 32; - l = (int32_t)l; - break; - case INDEX_op_mulu2_i64: - mulu64(&l, &h, a, b); + case INDEX_op_mulu2: + if (ctx->type == TCG_TYPE_I32) { + l = (uint64_t)(uint32_t)a * (uint32_t)b; + h = (int32_t)(l >> 32); + l = (int32_t)l; + } else { + mulu64(&l, &h, a, b); + } break; - case INDEX_op_muls2_i64: - muls64(&l, &h, a, b); + case INDEX_op_muls2: + if (ctx->type == TCG_TYPE_I32) { + l = (int64_t)(int32_t)a * (int32_t)b; + h = l >> 32; + l = (int32_t)l; + } else { + muls64(&l, &h, a, b); + } break; default: g_assert_not_reached(); @@ -2096,7 +2160,7 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) rh = op->args[1]; /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); + op2 = opt_insert_before(ctx, op, 0, 2); tcg_opt_gen_movi(ctx, op, rl, l); tcg_opt_gen_movi(ctx, op2, rh, h); @@ -2176,6 +2240,7 @@ static bool fold_or(OptContext *ctx, TCGOp *op) static bool fold_orc(OptContext *ctx, TCGOp *op) { uint64_t s_mask; + TempOptInfo *t1, *t2; if (fold_const2(ctx, op) || fold_xx_to_i(ctx, op, -1) || @@ -2184,8 +2249,28 @@ static bool fold_orc(OptContext *ctx, TCGOp *op) return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold orc r,x,i to or r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_or; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_or_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_or(ctx, op); + } + + t1 = arg_info(op->args[1]); + s_mask = t1->s_mask & t2->s_mask; return fold_masks_s(ctx, op, s_mask); } @@ -2300,34 +2385,17 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) break; } if (convert) { - TCGOpcode add_opc, xor_opc, neg_opc; - if (!inv && !neg) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } - switch (ctx->type) { - case TCG_TYPE_I32: - add_opc = INDEX_op_add_i32; - neg_opc = INDEX_op_neg_i32; - xor_opc = INDEX_op_xor_i32; - break; - case TCG_TYPE_I64: - add_opc = INDEX_op_add_i64; - neg_opc = INDEX_op_neg_i64; - xor_opc = INDEX_op_xor_i64; - break; - default: - g_assert_not_reached(); - } - if (!inv) { - op->opc = neg_opc; + op->opc = INDEX_op_neg; } else if (neg) { - op->opc = add_opc; + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -1); } else { - op->opc = xor_opc; + op->opc = INDEX_op_xor; op->args[2] = arg_new_constant(ctx, 1); } return -1; @@ -2338,8 +2406,6 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc; - TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; TCGOp *op2; @@ -2358,77 +2424,46 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } sh = ctz64(val); - switch (ctx->type) { - case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; - sub_opc = INDEX_op_sub_i32; - xor_opc = INDEX_op_xor_i32; - shr_opc = INDEX_op_shr_i32; - neg_opc = INDEX_op_neg_i32; - if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) { - uext_opc = INDEX_op_extract_i32; - } - if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) { - sext_opc = INDEX_op_sextract_i32; - } - break; - case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; - sub_opc = INDEX_op_sub_i64; - xor_opc = INDEX_op_xor_i64; - shr_opc = INDEX_op_shr_i64; - neg_opc = INDEX_op_neg_i64; - if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) { - uext_opc = INDEX_op_extract_i64; - } - if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) { - sext_opc = INDEX_op_sextract_i64; - } - break; - default: - g_assert_not_reached(); - } - ret = op->args[0]; src1 = op->args[1]; inv = cond == TCG_COND_TSTEQ; - if (sh && sext_opc && neg && !inv) { - op->opc = sext_opc; + if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_sextract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; return; - } else if (sh && uext_opc) { - op->opc = uext_opc; + } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_extract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; } else { if (sh) { - op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3); + op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); op2->args[0] = ret; op2->args[1] = src1; op2->args[2] = arg_new_constant(ctx, sh); src1 = ret; } - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = src1; op->args[2] = arg_new_constant(ctx, 1); } if (neg && inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); op2->args[0] = ret; op2->args[1] = ret; - op2->args[2] = arg_new_constant(ctx, 1); + op2->args[2] = arg_new_constant(ctx, -1); } else if (inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); op2->args[0] = ret; op2->args[1] = ret; op2->args[2] = arg_new_constant(ctx, 1); } else if (neg) { - op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2); + op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); op2->args[0] = ret; op2->args[1] = ret; } @@ -2540,14 +2575,14 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) do_setcond_low: op->args[2] = op->args[3]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); do_setcond_high: op->args[1] = op->args[2]; op->args[2] = op->args[4]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); } @@ -2607,13 +2642,13 @@ static bool fold_shift(OptContext *ctx, TCGOp *op) } switch (op->opc) { - CASE_OP_32_64(sar): + case INDEX_op_sar: /* * Arithmetic right shift will not reduce the number of * input sign repetitions. */ return fold_masks_s(ctx, op, s_mask); - CASE_OP_32_64(shr): + case INDEX_op_shr: /* * If the sign bit is known zero, then logical right shift * will not reduce the number of input sign repetitions. @@ -2640,11 +2675,8 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) switch (ctx->type) { case TCG_TYPE_I32: - neg_op = INDEX_op_neg_i32; - have_neg = true; - break; case TCG_TYPE_I64: - neg_op = INDEX_op_neg_i64; + neg_op = INDEX_op_neg; have_neg = true; break; case TCG_TYPE_V64: @@ -2689,16 +2721,149 @@ static bool fold_sub(OptContext *ctx, TCGOp *op) if (arg_is_const(op->args[2])) { uint64_t val = arg_info(op->args[2])->val; - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add_i32 : INDEX_op_add_i64); + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -val); } return finish_folding(ctx, op); } -static bool fold_sub2(OptContext *ctx, TCGOp *op) +static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_subbo: + op->opc = INDEX_op_sub; + fold_sub(ctx, op); + break; + case INDEX_op_subbio: + op->opc = INDEX_op_subbi; + break; + case INDEX_op_subb1o: + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->opc = INDEX_op_add; + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op->opc = INDEX_op_sub; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, -1); + } + break; + default: + g_assert_not_reached(); + } +} + +static bool fold_subbi(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + int borrow_in = ctx->carry_state; + + if (borrow_in < 0) { + return finish_folding(ctx, op); + } + ctx->carry_state = -1; + + squash_prev_borrowout(ctx, op); + if (borrow_in == 0) { + op->opc = INDEX_op_sub; + return fold_sub(ctx, op); + } + + /* + * Propagate the known carry-in into any constant, then negate to + * transform from sub to add. If there is no constant, emit a + * separate add -1. + */ + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + } else { + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); + + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_sub(ctx, op2); + + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, -1); + } + op->opc = INDEX_op_add; + return fold_add(ctx, op); +} + +static bool fold_subbio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int borrow_out = -1; + + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_borrowout(ctx, op); + if (ctx->carry_state == 0) { + goto do_subbo; + } + + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* Propagate the known borrow-in into a constant, if possible. */ + if (ti_is_const(t2)) { + uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + uint64_t v = ti_const_val(t2) & max; + + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_subbo; + } + /* subtracting max + 1 produces known borrow out. */ + borrow_out = 1; + } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1); + if (v != 0) { + op->args[2] = arg_new_constant(ctx, v - 1); + goto do_subbo; + } + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_subb1o; + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); + + do_subbo: + op->opc = INDEX_op_subbo; + return fold_subbo(ctx, op); +} + +static bool fold_subbo(OptContext *ctx, TCGOp *op) { - return fold_addsub2(ctx, op, false); + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + int borrow_out = -1; + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + if (v2 == 0) { + borrow_out = 0; + } else if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + borrow_out = v1 < v2; + } + } + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); } static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) @@ -2707,22 +2872,22 @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) /* We can't do any folding with a load, but we can record bits. */ switch (op->opc) { - CASE_OP_32_64(ld8s): + case INDEX_op_ld8s: s_mask = INT8_MIN; break; - CASE_OP_32_64(ld8u): + case INDEX_op_ld8u: z_mask = MAKE_64BIT_MASK(0, 8); break; - CASE_OP_32_64(ld16s): + case INDEX_op_ld16s: s_mask = INT16_MIN; break; - CASE_OP_32_64(ld16u): + case INDEX_op_ld16u: z_mask = MAKE_64BIT_MASK(0, 16); break; - case INDEX_op_ld32s_i64: + case INDEX_op_ld32s: s_mask = INT32_MIN; break; - case INDEX_op_ld32u_i64: + case INDEX_op_ld32u: z_mask = MAKE_64BIT_MASK(0, 32); break; default: @@ -2765,19 +2930,16 @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op) } switch (op->opc) { - CASE_OP_32_64(st8): + case INDEX_op_st8: lm1 = 0; break; - CASE_OP_32_64(st16): + case INDEX_op_st16: lm1 = 1; break; - case INDEX_op_st32_i64: - case INDEX_op_st_i32: + case INDEX_op_st32: lm1 = 3; break; - case INDEX_op_st_i64: - lm1 = 7; - break; + case INDEX_op_st: case INDEX_op_st_vec: lm1 = tcg_type_size(ctx->type) - 1; break; @@ -2881,44 +3043,52 @@ void tcg_optimize(TCGContext *s) * Sorted alphabetically by opcode as much as possible. */ switch (opc) { - CASE_OP_32_64(add): + case INDEX_op_add: done = fold_add(&ctx, op); break; case INDEX_op_add_vec: done = fold_add_vec(&ctx, op); break; - CASE_OP_32_64(add2): - done = fold_add2(&ctx, op); + case INDEX_op_addci: + done = fold_addci(&ctx, op); + break; + case INDEX_op_addcio: + done = fold_addcio(&ctx, op); + break; + case INDEX_op_addco: + done = fold_addco(&ctx, op); break; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: done = fold_and(&ctx, op); break; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: done = fold_andc(&ctx, op); break; - CASE_OP_32_64(brcond): + case INDEX_op_brcond: done = fold_brcond(&ctx, op); break; case INDEX_op_brcond2_i32: done = fold_brcond2(&ctx, op); break; - CASE_OP_32_64(bswap16): - CASE_OP_32_64(bswap32): - case INDEX_op_bswap64_i64: + case INDEX_op_bswap16: + case INDEX_op_bswap32: + case INDEX_op_bswap64: done = fold_bswap(&ctx, op); break; - CASE_OP_32_64(clz): - CASE_OP_32_64(ctz): + case INDEX_op_clz: + case INDEX_op_ctz: done = fold_count_zeros(&ctx, op); break; - CASE_OP_32_64(ctpop): + case INDEX_op_ctpop: done = fold_ctpop(&ctx, op); break; - CASE_OP_32_64(deposit): + case INDEX_op_deposit: done = fold_deposit(&ctx, op); break; - CASE_OP_32_64(div): - CASE_OP_32_64(divu): + case INDEX_op_divs: + case INDEX_op_divu: done = fold_divide(&ctx, op); break; case INDEX_op_dup_vec: @@ -2927,123 +3097,114 @@ void tcg_optimize(TCGContext *s) case INDEX_op_dup2_vec: done = fold_dup2(&ctx, op); break; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: done = fold_eqv(&ctx, op); break; - CASE_OP_32_64(extract): + case INDEX_op_extract: done = fold_extract(&ctx, op); break; - CASE_OP_32_64(extract2): + case INDEX_op_extract2: done = fold_extract2(&ctx, op); break; - CASE_OP_32_64(ext8s): - CASE_OP_32_64(ext16s): - case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: done = fold_exts(&ctx, op); break; - CASE_OP_32_64(ext8u): - CASE_OP_32_64(ext16u): - case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: done = fold_extu(&ctx, op); break; - CASE_OP_32_64(ld8s): - CASE_OP_32_64(ld8u): - CASE_OP_32_64(ld16s): - CASE_OP_32_64(ld16u): - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: + case INDEX_op_ld8s: + case INDEX_op_ld8u: + case INDEX_op_ld16s: + case INDEX_op_ld16u: + case INDEX_op_ld32s: + case INDEX_op_ld32u: done = fold_tcg_ld(&ctx, op); break; - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: + case INDEX_op_ld: case INDEX_op_ld_vec: done = fold_tcg_ld_memcopy(&ctx, op); break; - CASE_OP_32_64(st8): - CASE_OP_32_64(st16): - case INDEX_op_st32_i64: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st32: done = fold_tcg_st(&ctx, op); break; - case INDEX_op_st_i32: - case INDEX_op_st_i64: + case INDEX_op_st: case INDEX_op_st_vec: done = fold_tcg_st_memcopy(&ctx, op); break; case INDEX_op_mb: done = fold_mb(&ctx, op); break; - CASE_OP_32_64_VEC(mov): + case INDEX_op_mov: + case INDEX_op_mov_vec: done = fold_mov(&ctx, op); break; - CASE_OP_32_64(movcond): + case INDEX_op_movcond: done = fold_movcond(&ctx, op); break; - CASE_OP_32_64(mul): + case INDEX_op_mul: done = fold_mul(&ctx, op); break; - CASE_OP_32_64(mulsh): - CASE_OP_32_64(muluh): + case INDEX_op_mulsh: + case INDEX_op_muluh: done = fold_mul_highpart(&ctx, op); break; - CASE_OP_32_64(muls2): - CASE_OP_32_64(mulu2): + case INDEX_op_muls2: + case INDEX_op_mulu2: done = fold_multiply2(&ctx, op); break; - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: done = fold_nand(&ctx, op); break; - CASE_OP_32_64(neg): + case INDEX_op_neg: done = fold_neg(&ctx, op); break; - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: done = fold_nor(&ctx, op); break; - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: done = fold_not(&ctx, op); break; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: done = fold_or(&ctx, op); break; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: done = fold_orc(&ctx, op); break; - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: done = fold_qemu_ld_1reg(&ctx, op); break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - done = fold_qemu_ld_1reg(&ctx, op); - break; - } - QEMU_FALLTHROUGH; - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: done = fold_qemu_ld_2reg(&ctx, op); break; - case INDEX_op_qemu_st8_i32: - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st: + case INDEX_op_qemu_st2: done = fold_qemu_st(&ctx, op); break; - CASE_OP_32_64(rem): - CASE_OP_32_64(remu): + case INDEX_op_rems: + case INDEX_op_remu: done = fold_remainder(&ctx, op); break; - CASE_OP_32_64(rotl): - CASE_OP_32_64(rotr): - CASE_OP_32_64(sar): - CASE_OP_32_64(shl): - CASE_OP_32_64(shr): + case INDEX_op_rotl: + case INDEX_op_rotr: + case INDEX_op_sar: + case INDEX_op_shl: + case INDEX_op_shr: done = fold_shift(&ctx, op); break; - CASE_OP_32_64(setcond): + case INDEX_op_setcond: done = fold_setcond(&ctx, op); break; - CASE_OP_32_64(negsetcond): + case INDEX_op_negsetcond: done = fold_negsetcond(&ctx, op); break; case INDEX_op_setcond2_i32: @@ -3058,19 +3219,26 @@ void tcg_optimize(TCGContext *s) case INDEX_op_bitsel_vec: done = fold_bitsel_vec(&ctx, op); break; - CASE_OP_32_64(sextract): + case INDEX_op_sextract: done = fold_sextract(&ctx, op); break; - CASE_OP_32_64(sub): + case INDEX_op_sub: done = fold_sub(&ctx, op); break; + case INDEX_op_subbi: + done = fold_subbi(&ctx, op); + break; + case INDEX_op_subbio: + done = fold_subbio(&ctx, op); + break; + case INDEX_op_subbo: + done = fold_subbo(&ctx, op); + break; case INDEX_op_sub_vec: done = fold_sub_vec(&ctx, op); break; - CASE_OP_32_64(sub2): - done = fold_sub2(&ctx, op); - break; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: done = fold_xor(&ctx, op); break; case INDEX_op_set_label: |