diff options
Diffstat (limited to 'tcg/optimize.c')
-rw-r--r-- | tcg/optimize.c | 1482 |
1 files changed, 871 insertions, 611 deletions
diff --git a/tcg/optimize.c b/tcg/optimize.c index f922f86..62a128b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -30,14 +30,6 @@ #include "tcg-internal.h" #include "tcg-has.h" -#define CASE_OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64) - -#define CASE_OP_32_64_VEC(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64): \ - glue(glue(case INDEX_op_, x), _vec) typedef struct MemCopyInfo { IntervalTreeNode itree; @@ -47,12 +39,11 @@ typedef struct MemCopyInfo { } MemCopyInfo; typedef struct TempOptInfo { - bool is_const; TCGTemp *prev_copy; TCGTemp *next_copy; QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; - uint64_t val; uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ + uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */ } TempOptInfo; @@ -66,6 +57,7 @@ typedef struct OptContext { /* In flight values from optimization. */ TCGType type; + int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ } OptContext; static inline TempOptInfo *ts_info(TCGTemp *ts) @@ -80,12 +72,14 @@ static inline TempOptInfo *arg_info(TCGArg arg) static inline bool ti_is_const(TempOptInfo *ti) { - return ti->is_const; + /* If all bits that are not known zeros are known ones, it's constant. */ + return ti->z_mask == ti->o_mask; } static inline uint64_t ti_const_val(TempOptInfo *ti) { - return ti->val; + /* If constant, both z_mask and o_mask contain the value. */ + return ti->z_mask; } static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val) @@ -108,6 +102,11 @@ static inline bool arg_is_const(TCGArg arg) return ts_is_const(arg_temp(arg)); } +static inline uint64_t arg_const_val(TCGArg arg) +{ + return ti_const_val(arg_info(arg)); +} + static inline bool arg_is_const_val(TCGArg arg, uint64_t val) { return ts_is_const_val(arg_temp(arg), val); @@ -144,13 +143,12 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts) ti->prev_copy = ts; QSIMPLEQ_INIT(&ti->mem_copy); if (ts->kind == TEMP_CONST) { - ti->is_const = true; - ti->val = ts->val; ti->z_mask = ts->val; + ti->o_mask = ts->val; ti->s_mask = INT64_MIN >> clrsb64(ts->val); } else { - ti->is_const = false; ti->z_mask = -1; + ti->o_mask = 0; ti->s_mask = 0; } } @@ -236,8 +234,8 @@ static void reset_ts(OptContext *ctx, TCGTemp *ts) pi->next_copy = ti->next_copy; ti->next_copy = ts; ti->prev_copy = ts; - ti->is_const = false; ti->z_mask = -1; + ti->o_mask = 0; ti->s_mask = 0; if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { @@ -344,6 +342,18 @@ static TCGArg arg_new_temp(OptContext *ctx) return temp_arg(ts); } +static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); +} + +static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); +} + static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) { TCGTemp *dst_ts = arg_temp(dst); @@ -363,10 +373,8 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) switch (ctx->type) { case TCG_TYPE_I32: - new_op = INDEX_op_mov_i32; - break; case TCG_TYPE_I64: - new_op = INDEX_op_mov_i64; + new_op = INDEX_op_mov; break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -382,6 +390,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) op->args[1] = src; di->z_mask = si->z_mask; + di->o_mask = si->o_mask; di->s_mask = si->s_mask; if (src_ts->type == dst_ts->type) { @@ -391,13 +400,19 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) di->prev_copy = src_ts; ni->prev_copy = dst_ts; si->next_copy = dst_ts; - di->is_const = si->is_const; - di->val = si->val; if (!QSIMPLEQ_EMPTY(&si->mem_copy) && cmp_better_copy(src_ts, dst_ts) == dst_ts) { move_mem_copies(dst_ts, src_ts); } + } else if (dst_ts->type == TCG_TYPE_I32) { + di->z_mask = (int32_t)di->z_mask; + di->o_mask = (int32_t)di->o_mask; + di->s_mask |= INT32_MIN; + } else { + di->z_mask |= MAKE_64BIT_MASK(32, 32); + di->o_mask = (uint32_t)di->o_mask; + di->s_mask = INT64_MIN; } return true; } @@ -409,162 +424,163 @@ static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); } -static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) +static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, + uint64_t x, uint64_t y) { uint64_t l64, h64; switch (op) { - CASE_OP_32_64(add): + case INDEX_op_add: return x + y; - CASE_OP_32_64(sub): + case INDEX_op_sub: return x - y; - CASE_OP_32_64(mul): + case INDEX_op_mul: return x * y; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: return x & y; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: return x | y; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: return x ^ y; - case INDEX_op_shl_i32: - return (uint32_t)x << (y & 31); - - case INDEX_op_shl_i64: + case INDEX_op_shl: + if (type == TCG_TYPE_I32) { + return (uint32_t)x << (y & 31); + } return (uint64_t)x << (y & 63); - case INDEX_op_shr_i32: - return (uint32_t)x >> (y & 31); - - case INDEX_op_shr_i64: + case INDEX_op_shr: + if (type == TCG_TYPE_I32) { + return (uint32_t)x >> (y & 31); + } return (uint64_t)x >> (y & 63); - case INDEX_op_sar_i32: - return (int32_t)x >> (y & 31); - - case INDEX_op_sar_i64: + case INDEX_op_sar: + if (type == TCG_TYPE_I32) { + return (int32_t)x >> (y & 31); + } return (int64_t)x >> (y & 63); - case INDEX_op_rotr_i32: - return ror32(x, y & 31); - - case INDEX_op_rotr_i64: + case INDEX_op_rotr: + if (type == TCG_TYPE_I32) { + return ror32(x, y & 31); + } return ror64(x, y & 63); - case INDEX_op_rotl_i32: - return rol32(x, y & 31); - - case INDEX_op_rotl_i64: + case INDEX_op_rotl: + if (type == TCG_TYPE_I32) { + return rol32(x, y & 31); + } return rol64(x, y & 63); - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: return ~x; - CASE_OP_32_64(neg): + case INDEX_op_neg: return -x; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: return x & ~y; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: return x | ~y; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: return ~(x ^ y); - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: return ~(x & y); - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: return ~(x | y); - case INDEX_op_clz_i32: - return (uint32_t)x ? clz32(x) : y; - - case INDEX_op_clz_i64: + case INDEX_op_clz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? clz32(x) : y; + } return x ? clz64(x) : y; - case INDEX_op_ctz_i32: - return (uint32_t)x ? ctz32(x) : y; - - case INDEX_op_ctz_i64: + case INDEX_op_ctz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? ctz32(x) : y; + } return x ? ctz64(x) : y; - case INDEX_op_ctpop_i32: - return ctpop32(x); + case INDEX_op_ctpop: + return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); - case INDEX_op_ctpop_i64: - return ctpop64(x); - - CASE_OP_32_64(ext8s): - return (int8_t)x; - - CASE_OP_32_64(ext16s): - return (int16_t)x; - - CASE_OP_32_64(ext8u): - return (uint8_t)x; - - CASE_OP_32_64(ext16u): - return (uint16_t)x; - - CASE_OP_32_64(bswap16): + case INDEX_op_bswap16: x = bswap16(x); return y & TCG_BSWAP_OS ? (int16_t)x : x; - CASE_OP_32_64(bswap32): + case INDEX_op_bswap32: x = bswap32(x); return y & TCG_BSWAP_OS ? (int32_t)x : x; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: return bswap64(x); case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: return (int32_t)x; case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: - case INDEX_op_ext32u_i64: return (uint32_t)x; case INDEX_op_extrh_i64_i32: return (uint64_t)x >> 32; - case INDEX_op_muluh_i32: - return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; - case INDEX_op_mulsh_i32: - return ((int64_t)(int32_t)x * (int32_t)y) >> 32; - - case INDEX_op_muluh_i64: + case INDEX_op_muluh: + if (type == TCG_TYPE_I32) { + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; + } mulu64(&l64, &h64, x, y); return h64; - case INDEX_op_mulsh_i64: + + case INDEX_op_mulsh: + if (type == TCG_TYPE_I32) { + return ((int64_t)(int32_t)x * (int32_t)y) >> 32; + } muls64(&l64, &h64, x, y); return h64; - case INDEX_op_div_i32: + case INDEX_op_divs: /* Avoid crashing on divide by zero, otherwise undefined. */ - return (int32_t)x / ((int32_t)y ? : 1); - case INDEX_op_divu_i32: - return (uint32_t)x / ((uint32_t)y ? : 1); - case INDEX_op_div_i64: + if (type == TCG_TYPE_I32) { + return (int32_t)x / ((int32_t)y ? : 1); + } return (int64_t)x / ((int64_t)y ? : 1); - case INDEX_op_divu_i64: + + case INDEX_op_divu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x / ((uint32_t)y ? : 1); + } return (uint64_t)x / ((uint64_t)y ? : 1); - case INDEX_op_rem_i32: - return (int32_t)x % ((int32_t)y ? : 1); - case INDEX_op_remu_i32: - return (uint32_t)x % ((uint32_t)y ? : 1); - case INDEX_op_rem_i64: + case INDEX_op_rems: + if (type == TCG_TYPE_I32) { + return (int32_t)x % ((int32_t)y ? : 1); + } return (int64_t)x % ((int64_t)y ? : 1); - case INDEX_op_remu_i64: + + case INDEX_op_remu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x % ((uint32_t)y ? : 1); + } return (uint64_t)x % ((uint64_t)y ? : 1); default: @@ -575,7 +591,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) static uint64_t do_constant_folding(TCGOpcode op, TCGType type, uint64_t x, uint64_t y) { - uint64_t res = do_constant_folding_2(op, x, y); + uint64_t res = do_constant_folding_2(op, type, x, y); if (type == TCG_TYPE_I32) { res = (int32_t)res; } @@ -683,8 +699,8 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, TCGArg y, TCGCond c) { if (arg_is_const(x) && arg_is_const(y)) { - uint64_t xv = arg_info(x)->val; - uint64_t yv = arg_info(y)->val; + uint64_t xv = arg_const_val(x); + uint64_t yv = arg_const_val(y); switch (type) { case TCG_TYPE_I32: @@ -725,12 +741,18 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, #define NO_DEST temp_arg(NULL) +static int pref_commutative(TempOptInfo *ti) +{ + /* Slight preference for non-zero constants second. */ + return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; +} + static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { TCGArg a1 = *p1, a2 = *p2; int sum = 0; - sum += arg_is_const(a1); - sum -= arg_is_const(a2); + sum += pref_commutative(arg_info(a1)); + sum -= pref_commutative(arg_info(a2)); /* Prefer the constant in second argument, and then the form op a, a, b, which is better handled on non-RISC hosts. */ @@ -745,10 +767,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { int sum = 0; - sum += arg_is_const(p1[0]); - sum += arg_is_const(p1[1]); - sum -= arg_is_const(p2[0]); - sum -= arg_is_const(p2[1]); + sum += pref_commutative(arg_info(p1[0])); + sum += pref_commutative(arg_info(p1[1])); + sum -= pref_commutative(arg_info(p2[0])); + sum -= pref_commutative(arg_info(p2[1])); if (sum > 0) { TCGArg t; t = p1[0], p1[0] = p2[0], p2[0] = t; @@ -762,6 +784,7 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2) * Return -1 if the condition can't be simplified, * and the result of the condition (0 or 1) if it can. */ +static bool fold_and(OptContext *ctx, TCGOp *op); static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, TCGArg *p1, TCGArg *p2, TCGArg *pcond) { @@ -791,14 +814,14 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x */ if (args_are_copies(*p1, *p2) || - (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) { + (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) { *p2 = arg_new_constant(ctx, 0); *pcond = tcg_tst_eqne_cond(cond); return -1; } /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */ - if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) { + if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) { *p2 = arg_new_constant(ctx, 0); *pcond = tcg_tst_ltge_cond(cond); return -1; @@ -806,14 +829,13 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst) { - TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_and_i32 : INDEX_op_and_i64); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, and_opc, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg tmp = arg_new_temp(ctx); op2->args[0] = tmp; op2->args[1] = *p1; op2->args[2] = *p2; + fold_and(ctx, op2); *p1 = tmp; *p2 = arg_new_constant(ctx, 0); @@ -841,13 +863,13 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) bh = args[3]; if (arg_is_const(bl) && arg_is_const(bh)) { - tcg_target_ulong blv = arg_info(bl)->val; - tcg_target_ulong bhv = arg_info(bh)->val; + tcg_target_ulong blv = arg_const_val(bl); + tcg_target_ulong bhv = arg_const_val(bh); uint64_t b = deposit64(blv, 32, 32, bhv); if (arg_is_const(al) && arg_is_const(ah)) { - tcg_target_ulong alv = arg_info(al)->val; - tcg_target_ulong ahv = arg_info(ah)->val; + tcg_target_ulong alv = arg_const_val(al); + tcg_target_ulong ahv = arg_const_val(ah); uint64_t a = deposit64(alv, 32, 32, ahv); r = do_constant_folding_cond_64(a, b, c); @@ -901,17 +923,20 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { - TCGOp *op1 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); + TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg t1 = arg_new_temp(ctx); TCGArg t2 = arg_new_temp(ctx); op1->args[0] = t1; op1->args[1] = al; op1->args[2] = bl; + fold_and(ctx, op1); + op2->args[0] = t2; op2->args[1] = ah; op2->args[2] = bh; + fold_and(ctx, op1); args[0] = t1; args[1] = t2; @@ -981,9 +1006,8 @@ static bool finish_folding(OptContext *ctx, TCGOp *op) static bool fold_const1(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1])) { - uint64_t t; + uint64_t t = arg_const_val(op->args[1]); - t = arg_info(op->args[1])->val; t = do_constant_folding(op->opc, ctx->type, t, 0); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -993,8 +1017,8 @@ static bool fold_const1(OptContext *ctx, TCGOp *op) static bool fold_const2(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t1 = arg_info(op->args[1])->val; - uint64_t t2 = arg_info(op->args[2])->val; + uint64_t t1 = arg_const_val(op->args[1]); + uint64_t t2 = arg_const_val(op->args[2]); t1 = do_constant_folding(op->opc, ctx->type, t1, t2); return tcg_opt_gen_movi(ctx, op, op->args[0], t1); @@ -1020,8 +1044,9 @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) * If z_mask allows, fold the output to constant zero. * The passed s_mask may be augmented by z_mask. */ -static bool fold_masks_zs(OptContext *ctx, TCGOp *op, - uint64_t z_mask, int64_t s_mask) +static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t o_mask, + int64_t s_mask, uint64_t a_mask) { const TCGOpDef *def = &tcg_op_defs[op->opc]; TCGTemp *ts; @@ -1040,11 +1065,22 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op, */ if (ctx->type == TCG_TYPE_I32) { z_mask = (int32_t)z_mask; + o_mask = (int32_t)o_mask; s_mask |= INT32_MIN; + a_mask = (uint32_t)a_mask; + } + + /* Bits that are known 1 and bits that are known 0 must not overlap. */ + tcg_debug_assert((o_mask & ~z_mask) == 0); + + /* All bits that are not known zero are known one is a constant. */ + if (z_mask == o_mask) { + return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask); } - if (z_mask == 0) { - return tcg_opt_gen_movi(ctx, op, op->args[0], 0); + /* If no bits are affected, the operation devolves to a copy. */ + if (a_mask == 0) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } ts = arg_temp(op->args[0]); @@ -1056,36 +1092,46 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op, /* Canonicalize s_mask and incorporate data from z_mask. */ rep = clz64(~s_mask); rep = MAX(rep, clz64(z_mask)); + rep = MAX(rep, clz64(~o_mask)); rep = MAX(rep - 1, 0); ti->s_mask = INT64_MIN >> rep; + return false; +} + +static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask, + uint64_t o_mask, int64_t s_mask, uint64_t a_mask) +{ + fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1); return true; } -static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) +static bool fold_masks_zos(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t o_mask, uint64_t s_mask) { - return fold_masks_zs(ctx, op, z_mask, 0); + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1); } -static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) +static bool fold_masks_zo(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t o_mask) { - return fold_masks_zs(ctx, op, -1, s_mask); + return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1); } -/* - * An "affected" mask bit is 0 if and only if the result is identical - * to the first input. Thus if the entire mask is 0, the operation - * is equivalent to a copy. - */ -static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask) +static bool fold_masks_zs(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t s_mask) { - if (ctx->type == TCG_TYPE_I32) { - a_mask = (uint32_t)a_mask; - } - if (a_mask == 0) { - return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); - } - return false; + return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1); +} + +static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) +{ + return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1); +} + +static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) +{ + return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1); } /* @@ -1101,12 +1147,9 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) switch (ctx->type) { case TCG_TYPE_I32: - not_op = INDEX_op_not_i32; - have_not = TCG_TARGET_HAS_not_i32; - break; case TCG_TYPE_I64: - not_op = INDEX_op_not_i64; - have_not = TCG_TARGET_HAS_not_i64; + not_op = INDEX_op_not; + have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -1197,8 +1240,10 @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) * 3) those that produce information about the result value. */ +static bool fold_addco(OptContext *ctx, TCGOp *op); static bool fold_or(OptContext *ctx, TCGOp *op); static bool fold_orc(OptContext *ctx, TCGOp *op); +static bool fold_subbo(OptContext *ctx, TCGOp *op); static bool fold_xor(OptContext *ctx, TCGOp *op); static bool fold_add(OptContext *ctx, TCGOp *op) @@ -1220,108 +1265,184 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op) return finish_folding(ctx, op); } -static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) +static void squash_prev_carryout(OptContext *ctx, TCGOp *op) { - bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]); - bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]); + TempOptInfo *t2; - if (a_const && b_const) { - uint64_t al = arg_info(op->args[2])->val; - uint64_t ah = arg_info(op->args[3])->val; - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; - TCGArg rl, rh; - TCGOp *op2; + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_addco: + op->opc = INDEX_op_add; + fold_add(ctx, op); + break; + case INDEX_op_addcio: + op->opc = INDEX_op_addci; + break; + case INDEX_op_addc1o: + op->opc = INDEX_op_add; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, 1); + } + break; + default: + g_assert_not_reached(); + } +} - if (ctx->type == TCG_TYPE_I32) { - uint64_t a = deposit64(al, 32, 32, ah); - uint64_t b = deposit64(bl, 32, 32, bh); +static bool fold_addci(OptContext *ctx, TCGOp *op) +{ + fold_commutative(ctx, op); - if (add) { - a += b; - } else { - a -= b; - } + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_carryout(ctx, op); + op->opc = INDEX_op_add; - al = sextract64(a, 0, 32); - ah = sextract64(a, 32, 32); + if (ctx->carry_state > 0) { + TempOptInfo *t2 = arg_info(op->args[2]); + + /* + * Propagate the known carry-in into a constant, if possible. + * Otherwise emit a second add +1. + */ + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); } else { - Int128 a = int128_make128(al, ah); - Int128 b = int128_make128(bl, bh); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); - if (add) { - a = int128_add(a, b); - } else { - a = int128_sub(a, b); - } + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_add(ctx, op2); - al = int128_getlo(a); - ah = int128_gethi(a); + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, 1); } + } - rl = op->args[0]; - rh = op->args[1]; + ctx->carry_state = -1; + return fold_add(ctx, op); +} - /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); +static bool fold_addcio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t sum, max; - tcg_opt_gen_movi(ctx, op, rl, al); - tcg_opt_gen_movi(ctx, op2, rh, ah); - return true; + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && + !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { + carry_out = 0; } - /* Fold sub2 r,x,i to add2 r,x,-i */ - if (!add && b_const) { - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; + if (ctx->carry_state < 0) { + ctx->carry_state = carry_out; + return finish_folding(ctx, op); + } - /* Negate the two parts without assembling and disassembling. */ - bl = -bl; - bh = ~bh + !bl; + squash_prev_carryout(ctx, op); + if (ctx->carry_state == 0) { + goto do_addco; + } - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add2_i32 : INDEX_op_add2_i64); - op->args[4] = arg_new_constant(ctx, bl); - op->args[5] = arg_new_constant(ctx, bh); + /* Propagate the known carry-in into a constant, if possible. */ + max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + if (ti_is_const(t2)) { + uint64_t v = ti_const_val(t2) & max; + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + /* max + known carry in produces known carry out. */ + carry_out = 1; } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1) & max; + if (v < max) { + op->args[1] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + carry_out = 1; + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_addc1o; + ctx->carry_state = carry_out; return finish_folding(ctx, op); + + do_addco: + op->opc = INDEX_op_addco; + return fold_addco(ctx, op); } -static bool fold_add2(OptContext *ctx, TCGOp *op) +static bool fold_addco(OptContext *ctx, TCGOp *op) { - /* Note that the high and low parts may be independently swapped. */ - swap_commutative(op->args[0], &op->args[2], &op->args[4]); - swap_commutative(op->args[1], &op->args[3], &op->args[5]); + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t ign; - return fold_addsub2(ctx, op, true); + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + + if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + /* Given sign-extension of z_mask for I32, we need not truncate. */ + carry_out = uadd64_overflow(v1, v2, &ign); + } else if (v2 == 0) { + carry_out = 0; + } + } else { + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { + carry_out = 0; + } + } + ctx->carry_state = carry_out; + return finish_folding(ctx, op); } static bool fold_and(OptContext *ctx, TCGOp *op) { - uint64_t z1, z2, z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask, a_mask; TempOptInfo *t1, *t2; - if (fold_const2_commutative(ctx, op) || - fold_xi_to_i(ctx, op, 0) || - fold_xi_to_x(ctx, op, -1) || - fold_xx_to_x(ctx, op)) { + if (fold_const2_commutative(ctx, op)) { return true; } t1 = arg_info(op->args[1]); t2 = arg_info(op->args[2]); - z1 = t1->z_mask; - z2 = t2->z_mask; - - /* - * Known-zeros does not imply known-ones. Therefore unless - * arg2 is constant, we can't infer affected bits from it. - */ - if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) { - return true; - } - z_mask = z1 & z2; + z_mask = t1->z_mask & t2->z_mask; + o_mask = t1->o_mask & t2->o_mask; /* * Sign repetitions are perforce all identical, whether they are 1 or 0. @@ -1329,39 +1450,76 @@ static bool fold_and(OptContext *ctx, TCGOp *op) */ s_mask = t1->s_mask & t2->s_mask; - return fold_masks_zs(ctx, op, z_mask, s_mask); + /* Affected bits are those not known zero, masked by those known one. */ + a_mask = t1->z_mask & ~t2->o_mask; + + if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) { + if (ti_is_const(t2)) { + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + uint64_t val = ti_const_val(t2); + if (!(val & (val + 1))) { + unsigned len = ctz64(~val); + if (TCG_TARGET_extract_valid(ctx->type, 0, len)) { + op->opc = INDEX_op_extract; + op->args[2] = 0; + op->args[3] = len; + } + } + } else { + fold_xx_to_x(ctx, op); + } + } + return true; } static bool fold_andc(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask, a_mask; TempOptInfo *t1, *t2; - if (fold_const2(ctx, op) || - fold_xx_to_i(ctx, op, 0) || - fold_xi_to_x(ctx, op, 0) || - fold_ix_to_not(ctx, op, -1)) { + if (fold_const2(ctx, op)) { return true; } t1 = arg_info(op->args[1]); t2 = arg_info(op->args[2]); - z_mask = t1->z_mask; - /* - * Known-zeros does not imply known-ones. Therefore unless - * arg2 is constant, we can't infer anything from it. - */ if (ti_is_const(t2)) { - uint64_t v2 = ti_const_val(t2); - if (fold_affected_mask(ctx, op, z_mask & v2)) { - return true; + /* Fold andc r,x,i to and r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_and; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_and_vec; + break; + default: + g_assert_not_reached(); } - z_mask &= ~v2; + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_and(ctx, op); + } + if (fold_xx_to_i(ctx, op, 0) || + fold_ix_to_not(ctx, op, -1)) { + return true; } + z_mask = t1->z_mask & ~t2->o_mask; + o_mask = t1->o_mask & ~t2->z_mask; s_mask = t1->s_mask & t2->s_mask; - return fold_masks_zs(ctx, op, z_mask, s_mask); + + /* Affected bits are those not known zero, masked by those known zero. */ + a_mask = t1->z_mask & t2->z_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) @@ -1372,8 +1530,8 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) } if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { - uint64_t tv = arg_info(op->args[2])->val; - uint64_t fv = arg_info(op->args[3])->val; + uint64_t tv = arg_const_val(op->args[2]); + uint64_t fv = arg_const_val(op->args[3]); if (tv == -1 && fv == 0) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); @@ -1390,7 +1548,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) } } if (arg_is_const(op->args[2])) { - uint64_t tv = arg_info(op->args[2])->val; + uint64_t tv = arg_const_val(op->args[2]); if (tv == -1) { op->opc = INDEX_op_or_vec; op->args[2] = op->args[3]; @@ -1404,7 +1562,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) } } if (arg_is_const(op->args[3])) { - uint64_t fv = arg_info(op->args[3])->val; + uint64_t fv = arg_const_val(op->args[3]); if (fv == 0) { op->opc = INDEX_op_and_vec; return fold_and(ctx, op); @@ -1504,14 +1662,14 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) break; do_brcond_low: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[1] = op->args[2]; op->args[2] = cond; op->args[3] = label; return fold_brcond(ctx, op); do_brcond_high: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[0] = op->args[1]; op->args[1] = op->args[3]; op->args[2] = cond; @@ -1535,55 +1693,52 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) static bool fold_bswap(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask, sign; + uint64_t z_mask, o_mask, s_mask; TempOptInfo *t1 = arg_info(op->args[1]); + int flags = op->args[2]; if (ti_is_const(t1)) { return tcg_opt_gen_movi(ctx, op, op->args[0], do_constant_folding(op->opc, ctx->type, - ti_const_val(t1), - op->args[2])); + ti_const_val(t1), flags)); } z_mask = t1->z_mask; + o_mask = t1->o_mask; + s_mask = 0; + switch (op->opc) { - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: + case INDEX_op_bswap16: z_mask = bswap16(z_mask); - sign = INT16_MIN; + o_mask = bswap16(o_mask); + if (flags & TCG_BSWAP_OS) { + z_mask = (int16_t)z_mask; + o_mask = (int16_t)o_mask; + s_mask = INT16_MIN; + } else if (!(flags & TCG_BSWAP_OZ)) { + z_mask |= MAKE_64BIT_MASK(16, 48); + } break; - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: + case INDEX_op_bswap32: z_mask = bswap32(z_mask); - sign = INT32_MIN; + o_mask = bswap32(o_mask); + if (flags & TCG_BSWAP_OS) { + z_mask = (int32_t)z_mask; + o_mask = (int32_t)o_mask; + s_mask = INT32_MIN; + } else if (!(flags & TCG_BSWAP_OZ)) { + z_mask |= MAKE_64BIT_MASK(32, 32); + } break; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: z_mask = bswap64(z_mask); - sign = INT64_MIN; + o_mask = bswap64(o_mask); break; default: g_assert_not_reached(); } - s_mask = 0; - switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { - case TCG_BSWAP_OZ: - break; - case TCG_BSWAP_OS: - /* If the sign bit may be 1, force all the bits above to 1. */ - if (z_mask & sign) { - z_mask |= sign; - } - /* The value and therefore s_mask is explicitly sign-extended. */ - s_mask = sign; - break; - default: - /* The high bits are undefined: force all bits above the sign to 1. */ - z_mask |= sign << 1; - break; - } - - return fold_masks_zs(ctx, op, z_mask, s_mask); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_call(OptContext *ctx, TCGOp *op) @@ -1713,9 +1868,8 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) TempOptInfo *t2 = arg_info(op->args[2]); int ofs = op->args[3]; int len = op->args[4]; - int width; - TCGOpcode and_opc; - uint64_t z_mask, s_mask; + int width = 8 * tcg_type_size(ctx->type); + uint64_t z_mask, o_mask, s_mask; if (ti_is_const(t1) && ti_is_const(t2)) { return tcg_opt_gen_movi(ctx, op, op->args[0], @@ -1723,24 +1877,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) ti_const_val(t2))); } - switch (ctx->type) { - case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; - width = 32; - break; - case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; - width = 64; - break; - default: - g_assert_not_reached(); - } - /* Inserting a value into zero at offset 0. */ if (ti_is_const_val(t1, 0) && ofs == 0) { uint64_t mask = MAKE_64BIT_MASK(0, len); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = op->args[2]; op->args[2] = arg_new_constant(ctx, mask); return fold_and(ctx, op); @@ -1750,7 +1891,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) if (ti_is_const_val(t2, 0)) { uint64_t mask = deposit64(-1, ofs, len, 0); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[2] = arg_new_constant(ctx, mask); return fold_and(ctx, op); } @@ -1763,7 +1904,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) } z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); - return fold_masks_zs(ctx, op, z_mask, s_mask); + o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask); + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_divide(OptContext *ctx, TCGOp *op) @@ -1778,7 +1921,7 @@ static bool fold_divide(OptContext *ctx, TCGOp *op) static bool fold_dup(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1])) { - uint64_t t = arg_info(op->args[1])->val; + uint64_t t = arg_const_val(op->args[1]); t = dup_const(TCGOP_VECE(op), t); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -1788,8 +1931,8 @@ static bool fold_dup(OptContext *ctx, TCGOp *op) static bool fold_dup2(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, - arg_info(op->args[2])->val); + uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32, + arg_const_val(op->args[2])); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -1802,22 +1945,45 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op) static bool fold_eqv(OptContext *ctx, TCGOp *op) { - uint64_t s_mask; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; - if (fold_const2_commutative(ctx, op) || - fold_xi_to_x(ctx, op, -1) || - fold_xi_to_not(ctx, op, 0)) { + if (fold_const2_commutative(ctx, op)) { return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return fold_masks_s(ctx, op, s_mask); + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold eqv r,x,i to xor r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_xor; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_xor_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_xor(ctx, op); + } + + t1 = arg_info(op->args[1]); + + z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask); + o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_extract(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; + uint64_t z_mask, o_mask, a_mask; TempOptInfo *t1 = arg_info(op->args[1]); int pos = op->args[2]; int len = op->args[3]; @@ -1827,38 +1993,41 @@ static bool fold_extract(OptContext *ctx, TCGOp *op) extract64(ti_const_val(t1), pos, len)); } - z_mask_old = t1->z_mask; - z_mask = extract64(z_mask_old, pos, len); - if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) { - return true; - } + z_mask = extract64(t1->z_mask, pos, len); + o_mask = extract64(t1->o_mask, pos, len); + a_mask = pos ? -1 : t1->z_mask ^ z_mask; - return fold_masks_z(ctx, op, z_mask); + return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask); } static bool fold_extract2(OptContext *ctx, TCGOp *op) { - if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t v1 = arg_info(op->args[1])->val; - uint64_t v2 = arg_info(op->args[2])->val; - int shr = op->args[3]; + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + uint64_t z1 = t1->z_mask; + uint64_t z2 = t2->z_mask; + uint64_t o1 = t1->o_mask; + uint64_t o2 = t2->o_mask; + int shr = op->args[3]; - if (op->opc == INDEX_op_extract2_i64) { - v1 >>= shr; - v2 <<= 64 - shr; - } else { - v1 = (uint32_t)v1 >> shr; - v2 = (uint64_t)((int32_t)v2 << (32 - shr)); - } - return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); + if (ctx->type == TCG_TYPE_I32) { + z1 = (uint32_t)z1 >> shr; + o1 = (uint32_t)o1 >> shr; + z2 = (uint64_t)((int32_t)z2 << (32 - shr)); + o2 = (uint64_t)((int32_t)o2 << (32 - shr)); + } else { + z1 >>= shr; + o1 >>= shr; + z2 <<= 64 - shr; + o2 <<= 64 - shr; } - return finish_folding(ctx, op); + + return fold_masks_zo(ctx, op, z1 | z2, o1 | o2); } static bool fold_exts(OptContext *ctx, TCGOp *op) { - uint64_t s_mask_old, s_mask, z_mask; - bool type_change = false; + uint64_t z_mask, o_mask, s_mask; TempOptInfo *t1; if (fold_const1(ctx, op)) { @@ -1867,74 +2036,48 @@ static bool fold_exts(OptContext *ctx, TCGOp *op) t1 = arg_info(op->args[1]); z_mask = t1->z_mask; + o_mask = t1->o_mask; s_mask = t1->s_mask; - s_mask_old = s_mask; switch (op->opc) { - CASE_OP_32_64(ext8s): - s_mask |= INT8_MIN; - z_mask = (int8_t)z_mask; - break; - CASE_OP_32_64(ext16s): - s_mask |= INT16_MIN; - z_mask = (int16_t)z_mask; - break; case INDEX_op_ext_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32s_i64: s_mask |= INT32_MIN; z_mask = (int32_t)z_mask; + o_mask = (int32_t)o_mask; break; default: g_assert_not_reached(); } - - if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) { - return true; - } - - return fold_masks_zs(ctx, op, z_mask, s_mask); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_extu(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; - bool type_change = false; + uint64_t z_mask, o_mask; + TempOptInfo *t1; if (fold_const1(ctx, op)) { return true; } - z_mask_old = z_mask = arg_info(op->args[1])->z_mask; + t1 = arg_info(op->args[1]); + z_mask = t1->z_mask; + o_mask = t1->o_mask; switch (op->opc) { - CASE_OP_32_64(ext8u): - z_mask = (uint8_t)z_mask; - break; - CASE_OP_32_64(ext16u): - z_mask = (uint16_t)z_mask; - break; case INDEX_op_extrl_i64_i32: case INDEX_op_extu_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32u_i64: z_mask = (uint32_t)z_mask; + o_mask = (uint32_t)o_mask; break; case INDEX_op_extrh_i64_i32: - type_change = true; z_mask >>= 32; + o_mask >>= 32; break; default: g_assert_not_reached(); } - - if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) { - return true; - } - - return fold_masks_z(ctx, op, z_mask); + return fold_masks_zo(ctx, op, z_mask, o_mask); } static bool fold_mb(OptContext *ctx, TCGOp *op) @@ -1968,7 +2111,7 @@ static bool fold_mov(OptContext *ctx, TCGOp *op) static bool fold_movcond(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask; TempOptInfo *tt, *ft; int i; @@ -1994,51 +2137,30 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) tt = arg_info(op->args[3]); ft = arg_info(op->args[4]); z_mask = tt->z_mask | ft->z_mask; + o_mask = tt->o_mask & ft->o_mask; s_mask = tt->s_mask & ft->s_mask; if (ti_is_const(tt) && ti_is_const(ft)) { uint64_t tv = ti_const_val(tt); uint64_t fv = ti_const_val(ft); - TCGOpcode opc, negopc = 0; TCGCond cond = op->args[5]; - switch (ctx->type) { - case TCG_TYPE_I32: - opc = INDEX_op_setcond_i32; - if (TCG_TARGET_HAS_negsetcond_i32) { - negopc = INDEX_op_negsetcond_i32; - } - tv = (int32_t)tv; - fv = (int32_t)fv; - break; - case TCG_TYPE_I64: - opc = INDEX_op_setcond_i64; - if (TCG_TARGET_HAS_negsetcond_i64) { - negopc = INDEX_op_negsetcond_i64; - } - break; - default: - g_assert_not_reached(); - } - if (tv == 1 && fv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; op->args[3] = cond; } else if (fv == 1 && tv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; + op->args[3] = tcg_invert_cond(cond); + } else if (tv == -1 && fv == 0) { + op->opc = INDEX_op_negsetcond; + op->args[3] = cond; + } else if (fv == -1 && tv == 0) { + op->opc = INDEX_op_negsetcond; op->args[3] = tcg_invert_cond(cond); - } else if (negopc) { - if (tv == -1 && fv == 0) { - op->opc = negopc; - op->args[3] = cond; - } else if (fv == -1 && tv == 0) { - op->opc = negopc; - op->args[3] = tcg_invert_cond(cond); - } } } - return fold_masks_zs(ctx, op, z_mask, s_mask); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_mul(OptContext *ctx, TCGOp *op) @@ -2065,28 +2187,30 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) swap_commutative(op->args[0], &op->args[2], &op->args[3]); if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { - uint64_t a = arg_info(op->args[2])->val; - uint64_t b = arg_info(op->args[3])->val; + uint64_t a = arg_const_val(op->args[2]); + uint64_t b = arg_const_val(op->args[3]); uint64_t h, l; TCGArg rl, rh; TCGOp *op2; switch (op->opc) { - case INDEX_op_mulu2_i32: - l = (uint64_t)(uint32_t)a * (uint32_t)b; - h = (int32_t)(l >> 32); - l = (int32_t)l; - break; - case INDEX_op_muls2_i32: - l = (int64_t)(int32_t)a * (int32_t)b; - h = l >> 32; - l = (int32_t)l; - break; - case INDEX_op_mulu2_i64: - mulu64(&l, &h, a, b); + case INDEX_op_mulu2: + if (ctx->type == TCG_TYPE_I32) { + l = (uint64_t)(uint32_t)a * (uint32_t)b; + h = (int32_t)(l >> 32); + l = (int32_t)l; + } else { + mulu64(&l, &h, a, b); + } break; - case INDEX_op_muls2_i64: - muls64(&l, &h, a, b); + case INDEX_op_muls2: + if (ctx->type == TCG_TYPE_I32) { + l = (int64_t)(int32_t)a * (int32_t)b; + h = l >> 32; + l = (int32_t)l; + } else { + muls64(&l, &h, a, b); + } break; default: g_assert_not_reached(); @@ -2096,7 +2220,7 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) rh = op->args[1]; /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); + op2 = opt_insert_before(ctx, op, 0, 2); tcg_opt_gen_movi(ctx, op, rl, l); tcg_opt_gen_movi(ctx, op2, rh, h); @@ -2107,16 +2231,22 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) static bool fold_nand(OptContext *ctx, TCGOp *op) { - uint64_t s_mask; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || fold_xi_to_not(ctx, op, -1)) { return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return fold_masks_s(ctx, op, s_mask); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = ~(t1->o_mask & t2->o_mask); + o_mask = ~(t1->z_mask & t2->z_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_neg_no_const(OptContext *ctx, TCGOp *op) @@ -2135,29 +2265,39 @@ static bool fold_neg(OptContext *ctx, TCGOp *op) static bool fold_nor(OptContext *ctx, TCGOp *op) { - uint64_t s_mask; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || fold_xi_to_not(ctx, op, 0)) { return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return fold_masks_s(ctx, op, s_mask); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = ~(t1->o_mask | t2->o_mask); + o_mask = ~(t1->z_mask | t2->z_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_not(OptContext *ctx, TCGOp *op) { + TempOptInfo *t1; + if (fold_const1(ctx, op)) { return true; } - return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask); + + t1 = arg_info(op->args[1]); + return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask); } static bool fold_or(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask, a_mask; TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || @@ -2168,25 +2308,59 @@ static bool fold_or(OptContext *ctx, TCGOp *op) t1 = arg_info(op->args[1]); t2 = arg_info(op->args[2]); + z_mask = t1->z_mask | t2->z_mask; + o_mask = t1->o_mask | t2->o_mask; s_mask = t1->s_mask & t2->s_mask; - return fold_masks_zs(ctx, op, z_mask, s_mask); + + /* Affected bits are those not known one, masked by those known zero. */ + a_mask = ~t1->o_mask & t2->z_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_orc(OptContext *ctx, TCGOp *op) { - uint64_t s_mask; + uint64_t z_mask, o_mask, s_mask, a_mask; + TempOptInfo *t1, *t2; - if (fold_const2(ctx, op) || - fold_xx_to_i(ctx, op, -1) || - fold_xi_to_x(ctx, op, -1) || + if (fold_const2(ctx, op)) { + return true; + } + + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold orc r,x,i to or r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_or; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_or_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_or(ctx, op); + } + if (fold_xx_to_i(ctx, op, -1) || fold_ix_to_not(ctx, op, 0)) { return true; } + t1 = arg_info(op->args[1]); - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return fold_masks_s(ctx, op, s_mask); + z_mask = t1->z_mask | ~t2->o_mask; + o_mask = t1->o_mask | ~t2->z_mask; + s_mask = t1->s_mask & t2->s_mask; + + /* Affected bits are those not known one, masked by those known one. */ + a_mask = ~t1->o_mask & t2->o_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op) @@ -2245,7 +2419,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) } a_zmask = arg_info(op->args[1])->z_mask; - b_val = arg_info(op->args[2])->val; + b_val = arg_const_val(op->args[2]); cond = op->args[3]; if (ctx->type == TCG_TYPE_I32) { @@ -2300,34 +2474,17 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) break; } if (convert) { - TCGOpcode add_opc, xor_opc, neg_opc; - if (!inv && !neg) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } - switch (ctx->type) { - case TCG_TYPE_I32: - add_opc = INDEX_op_add_i32; - neg_opc = INDEX_op_neg_i32; - xor_opc = INDEX_op_xor_i32; - break; - case TCG_TYPE_I64: - add_opc = INDEX_op_add_i64; - neg_opc = INDEX_op_neg_i64; - xor_opc = INDEX_op_xor_i64; - break; - default: - g_assert_not_reached(); - } - if (!inv) { - op->opc = neg_opc; + op->opc = INDEX_op_neg; } else if (neg) { - op->opc = add_opc; + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -1); } else { - op->opc = xor_opc; + op->opc = INDEX_op_xor; op->args[2] = arg_new_constant(ctx, 1); } return -1; @@ -2338,8 +2495,6 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc; - TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; TCGOp *op2; @@ -2352,83 +2507,52 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } src2 = op->args[2]; - val = arg_info(src2)->val; + val = arg_const_val(src2); if (!is_power_of_2(val)) { return; } sh = ctz64(val); - switch (ctx->type) { - case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; - sub_opc = INDEX_op_sub_i32; - xor_opc = INDEX_op_xor_i32; - shr_opc = INDEX_op_shr_i32; - neg_opc = INDEX_op_neg_i32; - if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) { - uext_opc = INDEX_op_extract_i32; - } - if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) { - sext_opc = INDEX_op_sextract_i32; - } - break; - case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; - sub_opc = INDEX_op_sub_i64; - xor_opc = INDEX_op_xor_i64; - shr_opc = INDEX_op_shr_i64; - neg_opc = INDEX_op_neg_i64; - if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) { - uext_opc = INDEX_op_extract_i64; - } - if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) { - sext_opc = INDEX_op_sextract_i64; - } - break; - default: - g_assert_not_reached(); - } - ret = op->args[0]; src1 = op->args[1]; inv = cond == TCG_COND_TSTEQ; - if (sh && sext_opc && neg && !inv) { - op->opc = sext_opc; + if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_sextract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; return; - } else if (sh && uext_opc) { - op->opc = uext_opc; + } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_extract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; } else { if (sh) { - op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3); + op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); op2->args[0] = ret; op2->args[1] = src1; op2->args[2] = arg_new_constant(ctx, sh); src1 = ret; } - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = src1; op->args[2] = arg_new_constant(ctx, 1); } if (neg && inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); op2->args[0] = ret; op2->args[1] = ret; - op2->args[2] = arg_new_constant(ctx, 1); + op2->args[2] = arg_new_constant(ctx, -1); } else if (inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); op2->args[0] = ret; op2->args[1] = ret; op2->args[2] = arg_new_constant(ctx, 1); } else if (neg) { - op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2); + op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); op2->args[0] = ret; op2->args[1] = ret; } @@ -2540,14 +2664,14 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) do_setcond_low: op->args[2] = op->args[3]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); do_setcond_high: op->args[1] = op->args[2]; op->args[2] = op->args[4]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); } @@ -2559,7 +2683,7 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) static bool fold_sextract(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask, s_mask_old; + uint64_t z_mask, o_mask, s_mask, a_mask; TempOptInfo *t1 = arg_info(op->args[1]); int pos = op->args[2]; int len = op->args[3]; @@ -2569,21 +2693,19 @@ static bool fold_sextract(OptContext *ctx, TCGOp *op) sextract64(ti_const_val(t1), pos, len)); } - s_mask_old = t1->s_mask; - s_mask = s_mask_old >> pos; + s_mask = t1->s_mask >> pos; s_mask |= -1ull << (len - 1); - - if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) { - return true; - } + a_mask = pos ? -1 : s_mask & ~t1->s_mask; z_mask = sextract64(t1->z_mask, pos, len); - return fold_masks_zs(ctx, op, z_mask, s_mask); + o_mask = sextract64(t1->o_mask, pos, len); + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_shift(OptContext *ctx, TCGOp *op) { - uint64_t s_mask, z_mask; + uint64_t s_mask, z_mask, o_mask; TempOptInfo *t1, *t2; if (fold_const2(ctx, op) || @@ -2596,24 +2718,26 @@ static bool fold_shift(OptContext *ctx, TCGOp *op) t2 = arg_info(op->args[2]); s_mask = t1->s_mask; z_mask = t1->z_mask; + o_mask = t1->o_mask; if (ti_is_const(t2)) { int sh = ti_const_val(t2); z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); + o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh); s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); - return fold_masks_zs(ctx, op, z_mask, s_mask); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } switch (op->opc) { - CASE_OP_32_64(sar): + case INDEX_op_sar: /* * Arithmetic right shift will not reduce the number of * input sign repetitions. */ return fold_masks_s(ctx, op, s_mask); - CASE_OP_32_64(shr): + case INDEX_op_shr: /* * If the sign bit is known zero, then logical right shift * will not reduce the number of input sign repetitions. @@ -2634,17 +2758,14 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) TCGOpcode neg_op; bool have_neg; - if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { + if (!arg_is_const_val(op->args[1], 0)) { return false; } switch (ctx->type) { case TCG_TYPE_I32: - neg_op = INDEX_op_neg_i32; - have_neg = true; - break; case TCG_TYPE_I64: - neg_op = INDEX_op_neg_i64; + neg_op = INDEX_op_neg; have_neg = true; break; case TCG_TYPE_V64: @@ -2687,18 +2808,151 @@ static bool fold_sub(OptContext *ctx, TCGOp *op) /* Fold sub r,x,i to add r,x,-i */ if (arg_is_const(op->args[2])) { - uint64_t val = arg_info(op->args[2])->val; + uint64_t val = arg_const_val(op->args[2]); - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add_i32 : INDEX_op_add_i64); + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -val); } return finish_folding(ctx, op); } -static bool fold_sub2(OptContext *ctx, TCGOp *op) +static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_subbo: + op->opc = INDEX_op_sub; + fold_sub(ctx, op); + break; + case INDEX_op_subbio: + op->opc = INDEX_op_subbi; + break; + case INDEX_op_subb1o: + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->opc = INDEX_op_add; + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op->opc = INDEX_op_sub; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, -1); + } + break; + default: + g_assert_not_reached(); + } +} + +static bool fold_subbi(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + int borrow_in = ctx->carry_state; + + if (borrow_in < 0) { + return finish_folding(ctx, op); + } + ctx->carry_state = -1; + + squash_prev_borrowout(ctx, op); + if (borrow_in == 0) { + op->opc = INDEX_op_sub; + return fold_sub(ctx, op); + } + + /* + * Propagate the known carry-in into any constant, then negate to + * transform from sub to add. If there is no constant, emit a + * separate add -1. + */ + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + } else { + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); + + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_sub(ctx, op2); + + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, -1); + } + op->opc = INDEX_op_add; + return fold_add(ctx, op); +} + +static bool fold_subbio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int borrow_out = -1; + + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_borrowout(ctx, op); + if (ctx->carry_state == 0) { + goto do_subbo; + } + + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* Propagate the known borrow-in into a constant, if possible. */ + if (ti_is_const(t2)) { + uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + uint64_t v = ti_const_val(t2) & max; + + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_subbo; + } + /* subtracting max + 1 produces known borrow out. */ + borrow_out = 1; + } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1); + if (v != 0) { + op->args[2] = arg_new_constant(ctx, v - 1); + goto do_subbo; + } + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_subb1o; + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); + + do_subbo: + op->opc = INDEX_op_subbo; + return fold_subbo(ctx, op); +} + +static bool fold_subbo(OptContext *ctx, TCGOp *op) { - return fold_addsub2(ctx, op, false); + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + int borrow_out = -1; + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + if (v2 == 0) { + borrow_out = 0; + } else if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + borrow_out = v1 < v2; + } + } + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); } static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) @@ -2707,22 +2961,22 @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) /* We can't do any folding with a load, but we can record bits. */ switch (op->opc) { - CASE_OP_32_64(ld8s): + case INDEX_op_ld8s: s_mask = INT8_MIN; break; - CASE_OP_32_64(ld8u): + case INDEX_op_ld8u: z_mask = MAKE_64BIT_MASK(0, 8); break; - CASE_OP_32_64(ld16s): + case INDEX_op_ld16s: s_mask = INT16_MIN; break; - CASE_OP_32_64(ld16u): + case INDEX_op_ld16u: z_mask = MAKE_64BIT_MASK(0, 16); break; - case INDEX_op_ld32s_i64: + case INDEX_op_ld32s: s_mask = INT32_MIN; break; - case INDEX_op_ld32u_i64: + case INDEX_op_ld32u: z_mask = MAKE_64BIT_MASK(0, 32); break; default: @@ -2765,19 +3019,16 @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op) } switch (op->opc) { - CASE_OP_32_64(st8): + case INDEX_op_st8: lm1 = 0; break; - CASE_OP_32_64(st16): + case INDEX_op_st16: lm1 = 1; break; - case INDEX_op_st32_i64: - case INDEX_op_st_i32: + case INDEX_op_st32: lm1 = 3; break; - case INDEX_op_st_i64: - lm1 = 7; - break; + case INDEX_op_st: case INDEX_op_st_vec: lm1 = tcg_type_size(ctx->type) - 1; break; @@ -2822,7 +3073,7 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) static bool fold_xor(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask; TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || @@ -2834,9 +3085,12 @@ static bool fold_xor(OptContext *ctx, TCGOp *op) t1 = arg_info(op->args[1]); t2 = arg_info(op->args[2]); - z_mask = t1->z_mask | t2->z_mask; + + z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask); + o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask); s_mask = t1->s_mask & t2->s_mask; - return fold_masks_zs(ctx, op, z_mask, s_mask); + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } /* Propagate constants and copies, fold constant expressions. */ @@ -2881,44 +3135,52 @@ void tcg_optimize(TCGContext *s) * Sorted alphabetically by opcode as much as possible. */ switch (opc) { - CASE_OP_32_64(add): + case INDEX_op_add: done = fold_add(&ctx, op); break; case INDEX_op_add_vec: done = fold_add_vec(&ctx, op); break; - CASE_OP_32_64(add2): - done = fold_add2(&ctx, op); + case INDEX_op_addci: + done = fold_addci(&ctx, op); + break; + case INDEX_op_addcio: + done = fold_addcio(&ctx, op); + break; + case INDEX_op_addco: + done = fold_addco(&ctx, op); break; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: done = fold_and(&ctx, op); break; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: done = fold_andc(&ctx, op); break; - CASE_OP_32_64(brcond): + case INDEX_op_brcond: done = fold_brcond(&ctx, op); break; case INDEX_op_brcond2_i32: done = fold_brcond2(&ctx, op); break; - CASE_OP_32_64(bswap16): - CASE_OP_32_64(bswap32): - case INDEX_op_bswap64_i64: + case INDEX_op_bswap16: + case INDEX_op_bswap32: + case INDEX_op_bswap64: done = fold_bswap(&ctx, op); break; - CASE_OP_32_64(clz): - CASE_OP_32_64(ctz): + case INDEX_op_clz: + case INDEX_op_ctz: done = fold_count_zeros(&ctx, op); break; - CASE_OP_32_64(ctpop): + case INDEX_op_ctpop: done = fold_ctpop(&ctx, op); break; - CASE_OP_32_64(deposit): + case INDEX_op_deposit: done = fold_deposit(&ctx, op); break; - CASE_OP_32_64(div): - CASE_OP_32_64(divu): + case INDEX_op_divs: + case INDEX_op_divu: done = fold_divide(&ctx, op); break; case INDEX_op_dup_vec: @@ -2927,123 +3189,114 @@ void tcg_optimize(TCGContext *s) case INDEX_op_dup2_vec: done = fold_dup2(&ctx, op); break; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: done = fold_eqv(&ctx, op); break; - CASE_OP_32_64(extract): + case INDEX_op_extract: done = fold_extract(&ctx, op); break; - CASE_OP_32_64(extract2): + case INDEX_op_extract2: done = fold_extract2(&ctx, op); break; - CASE_OP_32_64(ext8s): - CASE_OP_32_64(ext16s): - case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: done = fold_exts(&ctx, op); break; - CASE_OP_32_64(ext8u): - CASE_OP_32_64(ext16u): - case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: done = fold_extu(&ctx, op); break; - CASE_OP_32_64(ld8s): - CASE_OP_32_64(ld8u): - CASE_OP_32_64(ld16s): - CASE_OP_32_64(ld16u): - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: + case INDEX_op_ld8s: + case INDEX_op_ld8u: + case INDEX_op_ld16s: + case INDEX_op_ld16u: + case INDEX_op_ld32s: + case INDEX_op_ld32u: done = fold_tcg_ld(&ctx, op); break; - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: + case INDEX_op_ld: case INDEX_op_ld_vec: done = fold_tcg_ld_memcopy(&ctx, op); break; - CASE_OP_32_64(st8): - CASE_OP_32_64(st16): - case INDEX_op_st32_i64: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st32: done = fold_tcg_st(&ctx, op); break; - case INDEX_op_st_i32: - case INDEX_op_st_i64: + case INDEX_op_st: case INDEX_op_st_vec: done = fold_tcg_st_memcopy(&ctx, op); break; case INDEX_op_mb: done = fold_mb(&ctx, op); break; - CASE_OP_32_64_VEC(mov): + case INDEX_op_mov: + case INDEX_op_mov_vec: done = fold_mov(&ctx, op); break; - CASE_OP_32_64(movcond): + case INDEX_op_movcond: done = fold_movcond(&ctx, op); break; - CASE_OP_32_64(mul): + case INDEX_op_mul: done = fold_mul(&ctx, op); break; - CASE_OP_32_64(mulsh): - CASE_OP_32_64(muluh): + case INDEX_op_mulsh: + case INDEX_op_muluh: done = fold_mul_highpart(&ctx, op); break; - CASE_OP_32_64(muls2): - CASE_OP_32_64(mulu2): + case INDEX_op_muls2: + case INDEX_op_mulu2: done = fold_multiply2(&ctx, op); break; - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: done = fold_nand(&ctx, op); break; - CASE_OP_32_64(neg): + case INDEX_op_neg: done = fold_neg(&ctx, op); break; - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: done = fold_nor(&ctx, op); break; - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: done = fold_not(&ctx, op); break; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: done = fold_or(&ctx, op); break; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: done = fold_orc(&ctx, op); break; - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: done = fold_qemu_ld_1reg(&ctx, op); break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - done = fold_qemu_ld_1reg(&ctx, op); - break; - } - QEMU_FALLTHROUGH; - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: done = fold_qemu_ld_2reg(&ctx, op); break; - case INDEX_op_qemu_st8_i32: - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st: + case INDEX_op_qemu_st2: done = fold_qemu_st(&ctx, op); break; - CASE_OP_32_64(rem): - CASE_OP_32_64(remu): + case INDEX_op_rems: + case INDEX_op_remu: done = fold_remainder(&ctx, op); break; - CASE_OP_32_64(rotl): - CASE_OP_32_64(rotr): - CASE_OP_32_64(sar): - CASE_OP_32_64(shl): - CASE_OP_32_64(shr): + case INDEX_op_rotl: + case INDEX_op_rotr: + case INDEX_op_sar: + case INDEX_op_shl: + case INDEX_op_shr: done = fold_shift(&ctx, op); break; - CASE_OP_32_64(setcond): + case INDEX_op_setcond: done = fold_setcond(&ctx, op); break; - CASE_OP_32_64(negsetcond): + case INDEX_op_negsetcond: done = fold_negsetcond(&ctx, op); break; case INDEX_op_setcond2_i32: @@ -3058,19 +3311,26 @@ void tcg_optimize(TCGContext *s) case INDEX_op_bitsel_vec: done = fold_bitsel_vec(&ctx, op); break; - CASE_OP_32_64(sextract): + case INDEX_op_sextract: done = fold_sextract(&ctx, op); break; - CASE_OP_32_64(sub): + case INDEX_op_sub: done = fold_sub(&ctx, op); break; + case INDEX_op_subbi: + done = fold_subbi(&ctx, op); + break; + case INDEX_op_subbio: + done = fold_subbio(&ctx, op); + break; + case INDEX_op_subbo: + done = fold_subbo(&ctx, op); + break; case INDEX_op_sub_vec: done = fold_sub_vec(&ctx, op); break; - CASE_OP_32_64(sub2): - done = fold_sub2(&ctx, op); - break; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: done = fold_xor(&ctx, op); break; case INDEX_op_set_label: |