diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/aarch64/tcg-target.c.inc | 10 | ||||
-rw-r--r-- | tcg/arm/tcg-target.c.inc | 10 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c.inc | 10 | ||||
-rw-r--r-- | tcg/loongarch64/tcg-target.c.inc | 4 | ||||
-rw-r--r-- | tcg/meson.build | 4 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c.inc | 6 | ||||
-rw-r--r-- | tcg/optimize.c | 454 | ||||
-rw-r--r-- | tcg/perf.c | 2 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c.inc | 14 | ||||
-rw-r--r-- | tcg/riscv/tcg-target.c.inc | 6 | ||||
-rw-r--r-- | tcg/s390x/tcg-target.c.inc | 4 | ||||
-rw-r--r-- | tcg/sparc64/tcg-target.c.inc | 4 | ||||
-rw-r--r-- | tcg/tcg-op-gvec.c | 380 | ||||
-rw-r--r-- | tcg/tcg-op-ldst.c | 3 | ||||
-rw-r--r-- | tcg/tcg.c | 12 |
15 files changed, 553 insertions, 370 deletions
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 4cb647c..3b088b7 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1661,7 +1661,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, unsigned s_mask = (1u << s_bits) - 1; unsigned mem_index = get_mmuidx(oi); TCGReg addr_adj; - TCGType mask_type; uint64_t compare_mask; ldst = new_ldst_label(s); @@ -1669,9 +1668,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, ldst->oi = oi; ldst->addr_reg = addr_reg; - mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 - ? TCG_TYPE_I64 : TCG_TYPE_I32); - /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); @@ -1679,9 +1675,9 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tlb_mask_table_ofs(s, mem_index), 1, 0); /* Extract the TLB index from the address into X0. */ - tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, + tcg_out_insn(s, 3502S, AND_LSR, TCG_TYPE_I64, TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); /* Add the tlb_table pointer, forming the CPUTLBEntry address. */ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); @@ -1707,7 +1703,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_insn(s, 3401, ADDI, addr_type, addr_adj, addr_reg, s_mask - a_mask); } - compare_mask = (uint64_t)s->page_mask | a_mask; + compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; /* Store the page mask part of the address into TMP2. */ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 447e435..836894b 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1427,7 +1427,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Extract the tlb index from the address into R0. */ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addr, - SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS)); + SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); /* * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. @@ -1463,8 +1463,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, addr, s_mask - a_mask); } - if (use_armv7_instructions && s->page_bits <= 16) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask)); + if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask)); tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, t_addr, TCG_REG_TMP, 0); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, @@ -1475,10 +1475,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addr, a_mask); } tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, - SHIFT_IMM_LSR(s->page_bits)); + SHIFT_IMM_LSR(TARGET_PAGE_BITS)); tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, - SHIFT_IMM_LSL(s->page_bits)); + SHIFT_IMM_LSL(TARGET_PAGE_BITS)); } } else if (a_mask) { ldst = new_ldst_label(s); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 09fce27..088c6c9 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2199,16 +2199,14 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW); if (TCG_TYPE_PTR == TCG_TYPE_I64) { hrexw = P_REXW; - if (s->page_bits + s->tlb_dyn_max_bits > 32) { - tlbtype = TCG_TYPE_I64; - tlbrexw = P_REXW; - } + tlbtype = TCG_TYPE_I64; + tlbrexw = P_REXW; } } tcg_out_mov(s, tlbtype, TCG_REG_L0, addr); tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, fast_ofs + offsetof(CPUTLBDescFast, mask)); @@ -2227,7 +2225,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, addr, s_mask - a_mask); } - tlb_mask = s->page_mask | a_mask; + tlb_mask = TARGET_PAGE_MASK | a_mask; tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index e5580d6..10c6921 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1065,7 +1065,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); @@ -1091,7 +1091,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg); } tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO, - a_bits, s->page_bits - 1); + a_bits, TARGET_PAGE_BITS - 1); /* Compare masked address with the TLB entry. */ ldst->label_ptr[0] = s->code_ptr; diff --git a/tcg/meson.build b/tcg/meson.build index bd2821e..706a6eb 100644 --- a/tcg/meson.build +++ b/tcg/meson.build @@ -27,5 +27,5 @@ if host_os == 'linux' tcg_ss.add(files('perf.c')) endif -libuser_ss.add_all(tcg_ss) -libsystem_ss.add_all(tcg_ss) +user_ss.add_all(tcg_ss) +system_ss.add_all(tcg_ss) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 2c0457e..400eafb 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1199,9 +1199,9 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Extract the TLB index from the address into TMP3. */ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) { tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addr, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } else { - tcg_out_dsrl(s, TCG_TMP3, addr, s->page_bits - CPU_TLB_ENTRY_BITS); + tcg_out_dsrl(s, TCG_TMP3, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0); @@ -1224,7 +1224,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, * For unaligned accesses, compare against the end of the access to * verify that it does not cross a page boundary. */ - tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask); + tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask); if (a_mask < s_mask) { tcg_out_opc_imm(s, (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32 diff --git a/tcg/optimize.c b/tcg/optimize.c index 10a76c5..62a128b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -39,12 +39,11 @@ typedef struct MemCopyInfo { } MemCopyInfo; typedef struct TempOptInfo { - bool is_const; TCGTemp *prev_copy; TCGTemp *next_copy; QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy; - uint64_t val; uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ + uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */ uint64_t s_mask; /* mask bit is 1 if value bit matches msb */ } TempOptInfo; @@ -73,12 +72,14 @@ static inline TempOptInfo *arg_info(TCGArg arg) static inline bool ti_is_const(TempOptInfo *ti) { - return ti->is_const; + /* If all bits that are not known zeros are known ones, it's constant. */ + return ti->z_mask == ti->o_mask; } static inline uint64_t ti_const_val(TempOptInfo *ti) { - return ti->val; + /* If constant, both z_mask and o_mask contain the value. */ + return ti->z_mask; } static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val) @@ -101,6 +102,11 @@ static inline bool arg_is_const(TCGArg arg) return ts_is_const(arg_temp(arg)); } +static inline uint64_t arg_const_val(TCGArg arg) +{ + return ti_const_val(arg_info(arg)); +} + static inline bool arg_is_const_val(TCGArg arg, uint64_t val) { return ts_is_const_val(arg_temp(arg), val); @@ -137,13 +143,12 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts) ti->prev_copy = ts; QSIMPLEQ_INIT(&ti->mem_copy); if (ts->kind == TEMP_CONST) { - ti->is_const = true; - ti->val = ts->val; ti->z_mask = ts->val; + ti->o_mask = ts->val; ti->s_mask = INT64_MIN >> clrsb64(ts->val); } else { - ti->is_const = false; ti->z_mask = -1; + ti->o_mask = 0; ti->s_mask = 0; } } @@ -229,8 +234,8 @@ static void reset_ts(OptContext *ctx, TCGTemp *ts) pi->next_copy = ti->next_copy; ti->next_copy = ts; ti->prev_copy = ts; - ti->is_const = false; ti->z_mask = -1; + ti->o_mask = 0; ti->s_mask = 0; if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) { @@ -385,6 +390,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) op->args[1] = src; di->z_mask = si->z_mask; + di->o_mask = si->o_mask; di->s_mask = si->s_mask; if (src_ts->type == dst_ts->type) { @@ -394,13 +400,19 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) di->prev_copy = src_ts; ni->prev_copy = dst_ts; si->next_copy = dst_ts; - di->is_const = si->is_const; - di->val = si->val; if (!QSIMPLEQ_EMPTY(&si->mem_copy) && cmp_better_copy(src_ts, dst_ts) == dst_ts) { move_mem_copies(dst_ts, src_ts); } + } else if (dst_ts->type == TCG_TYPE_I32) { + di->z_mask = (int32_t)di->z_mask; + di->o_mask = (int32_t)di->o_mask; + di->s_mask |= INT32_MIN; + } else { + di->z_mask |= MAKE_64BIT_MASK(32, 32); + di->o_mask = (uint32_t)di->o_mask; + di->s_mask = INT64_MIN; } return true; } @@ -687,8 +699,8 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, TCGArg y, TCGCond c) { if (arg_is_const(x) && arg_is_const(y)) { - uint64_t xv = arg_info(x)->val; - uint64_t yv = arg_info(y)->val; + uint64_t xv = arg_const_val(x); + uint64_t yv = arg_const_val(y); switch (type) { case TCG_TYPE_I32: @@ -772,6 +784,7 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2) * Return -1 if the condition can't be simplified, * and the result of the condition (0 or 1) if it can. */ +static bool fold_and(OptContext *ctx, TCGOp *op); static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, TCGArg *p1, TCGArg *p2, TCGArg *pcond) { @@ -801,14 +814,14 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x */ if (args_are_copies(*p1, *p2) || - (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) { + (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) { *p2 = arg_new_constant(ctx, 0); *pcond = tcg_tst_eqne_cond(cond); return -1; } /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */ - if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) { + if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) { *p2 = arg_new_constant(ctx, 0); *pcond = tcg_tst_ltge_cond(cond); return -1; @@ -822,6 +835,7 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, op2->args[0] = tmp; op2->args[1] = *p1; op2->args[2] = *p2; + fold_and(ctx, op2); *p1 = tmp; *p2 = arg_new_constant(ctx, 0); @@ -849,13 +863,13 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) bh = args[3]; if (arg_is_const(bl) && arg_is_const(bh)) { - tcg_target_ulong blv = arg_info(bl)->val; - tcg_target_ulong bhv = arg_info(bh)->val; + tcg_target_ulong blv = arg_const_val(bl); + tcg_target_ulong bhv = arg_const_val(bh); uint64_t b = deposit64(blv, 32, 32, bhv); if (arg_is_const(al) && arg_is_const(ah)) { - tcg_target_ulong alv = arg_info(al)->val; - tcg_target_ulong ahv = arg_info(ah)->val; + tcg_target_ulong alv = arg_const_val(al); + tcg_target_ulong ahv = arg_const_val(ah); uint64_t a = deposit64(alv, 32, 32, ahv); r = do_constant_folding_cond_64(a, b, c); @@ -917,9 +931,12 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) op1->args[0] = t1; op1->args[1] = al; op1->args[2] = bl; + fold_and(ctx, op1); + op2->args[0] = t2; op2->args[1] = ah; op2->args[2] = bh; + fold_and(ctx, op1); args[0] = t1; args[1] = t2; @@ -989,9 +1006,8 @@ static bool finish_folding(OptContext *ctx, TCGOp *op) static bool fold_const1(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1])) { - uint64_t t; + uint64_t t = arg_const_val(op->args[1]); - t = arg_info(op->args[1])->val; t = do_constant_folding(op->opc, ctx->type, t, 0); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -1001,8 +1017,8 @@ static bool fold_const1(OptContext *ctx, TCGOp *op) static bool fold_const2(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t1 = arg_info(op->args[1])->val; - uint64_t t2 = arg_info(op->args[2])->val; + uint64_t t1 = arg_const_val(op->args[1]); + uint64_t t2 = arg_const_val(op->args[2]); t1 = do_constant_folding(op->opc, ctx->type, t1, t2); return tcg_opt_gen_movi(ctx, op, op->args[0], t1); @@ -1028,8 +1044,9 @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) * If z_mask allows, fold the output to constant zero. * The passed s_mask may be augmented by z_mask. */ -static bool fold_masks_zs(OptContext *ctx, TCGOp *op, - uint64_t z_mask, int64_t s_mask) +static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t o_mask, + int64_t s_mask, uint64_t a_mask) { const TCGOpDef *def = &tcg_op_defs[op->opc]; TCGTemp *ts; @@ -1048,11 +1065,22 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op, */ if (ctx->type == TCG_TYPE_I32) { z_mask = (int32_t)z_mask; + o_mask = (int32_t)o_mask; s_mask |= INT32_MIN; + a_mask = (uint32_t)a_mask; + } + + /* Bits that are known 1 and bits that are known 0 must not overlap. */ + tcg_debug_assert((o_mask & ~z_mask) == 0); + + /* All bits that are not known zero are known one is a constant. */ + if (z_mask == o_mask) { + return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask); } - if (z_mask == 0) { - return tcg_opt_gen_movi(ctx, op, op->args[0], 0); + /* If no bits are affected, the operation devolves to a copy. */ + if (a_mask == 0) { + return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } ts = arg_temp(op->args[0]); @@ -1064,36 +1092,46 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op, /* Canonicalize s_mask and incorporate data from z_mask. */ rep = clz64(~s_mask); rep = MAX(rep, clz64(z_mask)); + rep = MAX(rep, clz64(~o_mask)); rep = MAX(rep - 1, 0); ti->s_mask = INT64_MIN >> rep; + return false; +} + +static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask, + uint64_t o_mask, int64_t s_mask, uint64_t a_mask) +{ + fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1); return true; } -static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) +static bool fold_masks_zos(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t o_mask, uint64_t s_mask) { - return fold_masks_zs(ctx, op, z_mask, 0); + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1); } -static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) +static bool fold_masks_zo(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t o_mask) { - return fold_masks_zs(ctx, op, -1, s_mask); + return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1); } -/* - * An "affected" mask bit is 0 if and only if the result is identical - * to the first input. Thus if the entire mask is 0, the operation - * is equivalent to a copy. - */ -static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask) +static bool fold_masks_zs(OptContext *ctx, TCGOp *op, + uint64_t z_mask, uint64_t s_mask) { - if (ctx->type == TCG_TYPE_I32) { - a_mask = (uint32_t)a_mask; - } - if (a_mask == 0) { - return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); - } - return false; + return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1); +} + +static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask) +{ + return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1); +} + +static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask) +{ + return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1); } /* @@ -1393,30 +1431,18 @@ static bool fold_addco(OptContext *ctx, TCGOp *op) static bool fold_and(OptContext *ctx, TCGOp *op) { - uint64_t z1, z2, z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask, a_mask; TempOptInfo *t1, *t2; - if (fold_const2_commutative(ctx, op) || - fold_xi_to_i(ctx, op, 0) || - fold_xi_to_x(ctx, op, -1) || - fold_xx_to_x(ctx, op)) { + if (fold_const2_commutative(ctx, op)) { return true; } t1 = arg_info(op->args[1]); t2 = arg_info(op->args[2]); - z1 = t1->z_mask; - z2 = t2->z_mask; - - /* - * Known-zeros does not imply known-ones. Therefore unless - * arg2 is constant, we can't infer affected bits from it. - */ - if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) { - return true; - } - z_mask = z1 & z2; + z_mask = t1->z_mask & t2->z_mask; + o_mask = t1->o_mask & t2->o_mask; /* * Sign repetitions are perforce all identical, whether they are 1 or 0. @@ -1424,24 +1450,44 @@ static bool fold_and(OptContext *ctx, TCGOp *op) */ s_mask = t1->s_mask & t2->s_mask; - return fold_masks_zs(ctx, op, z_mask, s_mask); + /* Affected bits are those not known zero, masked by those known one. */ + a_mask = t1->z_mask & ~t2->o_mask; + + if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) { + if (ti_is_const(t2)) { + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + uint64_t val = ti_const_val(t2); + if (!(val & (val + 1))) { + unsigned len = ctz64(~val); + if (TCG_TARGET_extract_valid(ctx->type, 0, len)) { + op->opc = INDEX_op_extract; + op->args[2] = 0; + op->args[3] = len; + } + } + } else { + fold_xx_to_x(ctx, op); + } + } + return true; } static bool fold_andc(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask, a_mask; TempOptInfo *t1, *t2; - if (fold_const2(ctx, op) || - fold_xx_to_i(ctx, op, 0) || - fold_xi_to_x(ctx, op, 0) || - fold_ix_to_not(ctx, op, -1)) { + if (fold_const2(ctx, op)) { return true; } t1 = arg_info(op->args[1]); t2 = arg_info(op->args[2]); - z_mask = t1->z_mask; if (ti_is_const(t2)) { /* Fold andc r,x,i to and r,x,~i. */ @@ -1461,21 +1507,19 @@ static bool fold_andc(OptContext *ctx, TCGOp *op) op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); return fold_and(ctx, op); } - - /* - * Known-zeros does not imply known-ones. Therefore unless - * arg2 is constant, we can't infer anything from it. - */ - if (ti_is_const(t2)) { - uint64_t v2 = ti_const_val(t2); - if (fold_affected_mask(ctx, op, z_mask & v2)) { - return true; - } - z_mask &= ~v2; + if (fold_xx_to_i(ctx, op, 0) || + fold_ix_to_not(ctx, op, -1)) { + return true; } + z_mask = t1->z_mask & ~t2->o_mask; + o_mask = t1->o_mask & ~t2->z_mask; s_mask = t1->s_mask & t2->s_mask; - return fold_masks_zs(ctx, op, z_mask, s_mask); + + /* Affected bits are those not known zero, masked by those known zero. */ + a_mask = t1->z_mask & t2->z_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) @@ -1486,8 +1530,8 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) } if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { - uint64_t tv = arg_info(op->args[2])->val; - uint64_t fv = arg_info(op->args[3])->val; + uint64_t tv = arg_const_val(op->args[2]); + uint64_t fv = arg_const_val(op->args[3]); if (tv == -1 && fv == 0) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); @@ -1504,7 +1548,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) } } if (arg_is_const(op->args[2])) { - uint64_t tv = arg_info(op->args[2])->val; + uint64_t tv = arg_const_val(op->args[2]); if (tv == -1) { op->opc = INDEX_op_or_vec; op->args[2] = op->args[3]; @@ -1518,7 +1562,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op) } } if (arg_is_const(op->args[3])) { - uint64_t fv = arg_info(op->args[3])->val; + uint64_t fv = arg_const_val(op->args[3]); if (fv == 0) { op->opc = INDEX_op_and_vec; return fold_and(ctx, op); @@ -1649,53 +1693,52 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) static bool fold_bswap(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask, sign; + uint64_t z_mask, o_mask, s_mask; TempOptInfo *t1 = arg_info(op->args[1]); + int flags = op->args[2]; if (ti_is_const(t1)) { return tcg_opt_gen_movi(ctx, op, op->args[0], do_constant_folding(op->opc, ctx->type, - ti_const_val(t1), - op->args[2])); + ti_const_val(t1), flags)); } z_mask = t1->z_mask; + o_mask = t1->o_mask; + s_mask = 0; + switch (op->opc) { case INDEX_op_bswap16: z_mask = bswap16(z_mask); - sign = INT16_MIN; + o_mask = bswap16(o_mask); + if (flags & TCG_BSWAP_OS) { + z_mask = (int16_t)z_mask; + o_mask = (int16_t)o_mask; + s_mask = INT16_MIN; + } else if (!(flags & TCG_BSWAP_OZ)) { + z_mask |= MAKE_64BIT_MASK(16, 48); + } break; case INDEX_op_bswap32: z_mask = bswap32(z_mask); - sign = INT32_MIN; + o_mask = bswap32(o_mask); + if (flags & TCG_BSWAP_OS) { + z_mask = (int32_t)z_mask; + o_mask = (int32_t)o_mask; + s_mask = INT32_MIN; + } else if (!(flags & TCG_BSWAP_OZ)) { + z_mask |= MAKE_64BIT_MASK(32, 32); + } break; case INDEX_op_bswap64: z_mask = bswap64(z_mask); - sign = INT64_MIN; + o_mask = bswap64(o_mask); break; default: g_assert_not_reached(); } - s_mask = 0; - switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { - case TCG_BSWAP_OZ: - break; - case TCG_BSWAP_OS: - /* If the sign bit may be 1, force all the bits above to 1. */ - if (z_mask & sign) { - z_mask |= sign; - } - /* The value and therefore s_mask is explicitly sign-extended. */ - s_mask = sign; - break; - default: - /* The high bits are undefined: force all bits above the sign to 1. */ - z_mask |= sign << 1; - break; - } - - return fold_masks_zs(ctx, op, z_mask, s_mask); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_call(OptContext *ctx, TCGOp *op) @@ -1826,7 +1869,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) int ofs = op->args[3]; int len = op->args[4]; int width = 8 * tcg_type_size(ctx->type); - uint64_t z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask; if (ti_is_const(t1) && ti_is_const(t2)) { return tcg_opt_gen_movi(ctx, op, op->args[0], @@ -1861,7 +1904,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) } z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); - return fold_masks_zs(ctx, op, z_mask, s_mask); + o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask); + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_divide(OptContext *ctx, TCGOp *op) @@ -1876,7 +1921,7 @@ static bool fold_divide(OptContext *ctx, TCGOp *op) static bool fold_dup(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1])) { - uint64_t t = arg_info(op->args[1])->val; + uint64_t t = arg_const_val(op->args[1]); t = dup_const(TCGOP_VECE(op), t); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -1886,8 +1931,8 @@ static bool fold_dup(OptContext *ctx, TCGOp *op) static bool fold_dup2(OptContext *ctx, TCGOp *op) { if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, - arg_info(op->args[2])->val); + uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32, + arg_const_val(op->args[2])); return tcg_opt_gen_movi(ctx, op, op->args[0], t); } @@ -1900,12 +1945,10 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op) static bool fold_eqv(OptContext *ctx, TCGOp *op) { - uint64_t s_mask; + uint64_t z_mask, o_mask, s_mask; TempOptInfo *t1, *t2; - if (fold_const2_commutative(ctx, op) || - fold_xi_to_x(ctx, op, -1) || - fold_xi_to_not(ctx, op, 0)) { + if (fold_const2_commutative(ctx, op)) { return true; } @@ -1930,13 +1973,17 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op) } t1 = arg_info(op->args[1]); + + z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask); + o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask); s_mask = t1->s_mask & t2->s_mask; - return fold_masks_s(ctx, op, s_mask); + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_extract(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; + uint64_t z_mask, o_mask, a_mask; TempOptInfo *t1 = arg_info(op->args[1]); int pos = op->args[2]; int len = op->args[3]; @@ -1946,37 +1993,41 @@ static bool fold_extract(OptContext *ctx, TCGOp *op) extract64(ti_const_val(t1), pos, len)); } - z_mask_old = t1->z_mask; - z_mask = extract64(z_mask_old, pos, len); - if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) { - return true; - } + z_mask = extract64(t1->z_mask, pos, len); + o_mask = extract64(t1->o_mask, pos, len); + a_mask = pos ? -1 : t1->z_mask ^ z_mask; - return fold_masks_z(ctx, op, z_mask); + return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask); } static bool fold_extract2(OptContext *ctx, TCGOp *op) { - if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { - uint64_t v1 = arg_info(op->args[1])->val; - uint64_t v2 = arg_info(op->args[2])->val; - int shr = op->args[3]; + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + uint64_t z1 = t1->z_mask; + uint64_t z2 = t2->z_mask; + uint64_t o1 = t1->o_mask; + uint64_t o2 = t2->o_mask; + int shr = op->args[3]; - if (ctx->type == TCG_TYPE_I32) { - v1 = (uint32_t)v1 >> shr; - v2 = (uint64_t)((int32_t)v2 << (32 - shr)); - } else { - v1 >>= shr; - v2 <<= 64 - shr; - } - return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); + if (ctx->type == TCG_TYPE_I32) { + z1 = (uint32_t)z1 >> shr; + o1 = (uint32_t)o1 >> shr; + z2 = (uint64_t)((int32_t)z2 << (32 - shr)); + o2 = (uint64_t)((int32_t)o2 << (32 - shr)); + } else { + z1 >>= shr; + o1 >>= shr; + z2 <<= 64 - shr; + o2 <<= 64 - shr; } - return finish_folding(ctx, op); + + return fold_masks_zo(ctx, op, z1 | z2, o1 | o2); } static bool fold_exts(OptContext *ctx, TCGOp *op) { - uint64_t s_mask, z_mask; + uint64_t z_mask, o_mask, s_mask; TempOptInfo *t1; if (fold_const1(ctx, op)) { @@ -1985,40 +2036,48 @@ static bool fold_exts(OptContext *ctx, TCGOp *op) t1 = arg_info(op->args[1]); z_mask = t1->z_mask; + o_mask = t1->o_mask; s_mask = t1->s_mask; switch (op->opc) { case INDEX_op_ext_i32_i64: s_mask |= INT32_MIN; z_mask = (int32_t)z_mask; + o_mask = (int32_t)o_mask; break; default: g_assert_not_reached(); } - return fold_masks_zs(ctx, op, z_mask, s_mask); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_extu(OptContext *ctx, TCGOp *op) { - uint64_t z_mask; + uint64_t z_mask, o_mask; + TempOptInfo *t1; if (fold_const1(ctx, op)) { return true; } - z_mask = arg_info(op->args[1])->z_mask; + t1 = arg_info(op->args[1]); + z_mask = t1->z_mask; + o_mask = t1->o_mask; + switch (op->opc) { case INDEX_op_extrl_i64_i32: case INDEX_op_extu_i32_i64: z_mask = (uint32_t)z_mask; + o_mask = (uint32_t)o_mask; break; case INDEX_op_extrh_i64_i32: z_mask >>= 32; + o_mask >>= 32; break; default: g_assert_not_reached(); } - return fold_masks_z(ctx, op, z_mask); + return fold_masks_zo(ctx, op, z_mask, o_mask); } static bool fold_mb(OptContext *ctx, TCGOp *op) @@ -2052,7 +2111,7 @@ static bool fold_mov(OptContext *ctx, TCGOp *op) static bool fold_movcond(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask; TempOptInfo *tt, *ft; int i; @@ -2078,6 +2137,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) tt = arg_info(op->args[3]); ft = arg_info(op->args[4]); z_mask = tt->z_mask | ft->z_mask; + o_mask = tt->o_mask & ft->o_mask; s_mask = tt->s_mask & ft->s_mask; if (ti_is_const(tt) && ti_is_const(ft)) { @@ -2100,7 +2160,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) } } - return fold_masks_zs(ctx, op, z_mask, s_mask); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_mul(OptContext *ctx, TCGOp *op) @@ -2127,8 +2187,8 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) swap_commutative(op->args[0], &op->args[2], &op->args[3]); if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { - uint64_t a = arg_info(op->args[2])->val; - uint64_t b = arg_info(op->args[3])->val; + uint64_t a = arg_const_val(op->args[2]); + uint64_t b = arg_const_val(op->args[3]); uint64_t h, l; TCGArg rl, rh; TCGOp *op2; @@ -2171,16 +2231,22 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) static bool fold_nand(OptContext *ctx, TCGOp *op) { - uint64_t s_mask; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || fold_xi_to_not(ctx, op, -1)) { return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return fold_masks_s(ctx, op, s_mask); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = ~(t1->o_mask & t2->o_mask); + o_mask = ~(t1->z_mask & t2->z_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_neg_no_const(OptContext *ctx, TCGOp *op) @@ -2199,29 +2265,39 @@ static bool fold_neg(OptContext *ctx, TCGOp *op) static bool fold_nor(OptContext *ctx, TCGOp *op) { - uint64_t s_mask; + uint64_t z_mask, o_mask, s_mask; + TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || fold_xi_to_not(ctx, op, 0)) { return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; - return fold_masks_s(ctx, op, s_mask); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + z_mask = ~(t1->o_mask | t2->o_mask); + o_mask = ~(t1->z_mask | t2->z_mask); + s_mask = t1->s_mask & t2->s_mask; + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } static bool fold_not(OptContext *ctx, TCGOp *op) { + TempOptInfo *t1; + if (fold_const1(ctx, op)) { return true; } - return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask); + + t1 = arg_info(op->args[1]); + return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask); } static bool fold_or(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask, a_mask; TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || @@ -2232,20 +2308,23 @@ static bool fold_or(OptContext *ctx, TCGOp *op) t1 = arg_info(op->args[1]); t2 = arg_info(op->args[2]); + z_mask = t1->z_mask | t2->z_mask; + o_mask = t1->o_mask | t2->o_mask; s_mask = t1->s_mask & t2->s_mask; - return fold_masks_zs(ctx, op, z_mask, s_mask); + + /* Affected bits are those not known one, masked by those known zero. */ + a_mask = ~t1->o_mask & t2->z_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_orc(OptContext *ctx, TCGOp *op) { - uint64_t s_mask; + uint64_t z_mask, o_mask, s_mask, a_mask; TempOptInfo *t1, *t2; - if (fold_const2(ctx, op) || - fold_xx_to_i(ctx, op, -1) || - fold_xi_to_x(ctx, op, -1) || - fold_ix_to_not(ctx, op, 0)) { + if (fold_const2(ctx, op)) { return true; } @@ -2268,10 +2347,20 @@ static bool fold_orc(OptContext *ctx, TCGOp *op) op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); return fold_or(ctx, op); } - + if (fold_xx_to_i(ctx, op, -1) || + fold_ix_to_not(ctx, op, 0)) { + return true; + } t1 = arg_info(op->args[1]); + + z_mask = t1->z_mask | ~t2->o_mask; + o_mask = t1->o_mask | ~t2->z_mask; s_mask = t1->s_mask & t2->s_mask; - return fold_masks_s(ctx, op, s_mask); + + /* Affected bits are those not known one, masked by those known one. */ + a_mask = ~t1->o_mask & t2->o_mask; + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op) @@ -2330,7 +2419,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) } a_zmask = arg_info(op->args[1])->z_mask; - b_val = arg_info(op->args[2])->val; + b_val = arg_const_val(op->args[2]); cond = op->args[3]; if (ctx->type == TCG_TYPE_I32) { @@ -2418,7 +2507,7 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } src2 = op->args[2]; - val = arg_info(src2)->val; + val = arg_const_val(src2); if (!is_power_of_2(val)) { return; } @@ -2594,7 +2683,7 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) static bool fold_sextract(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask, s_mask_old; + uint64_t z_mask, o_mask, s_mask, a_mask; TempOptInfo *t1 = arg_info(op->args[1]); int pos = op->args[2]; int len = op->args[3]; @@ -2604,21 +2693,19 @@ static bool fold_sextract(OptContext *ctx, TCGOp *op) sextract64(ti_const_val(t1), pos, len)); } - s_mask_old = t1->s_mask; - s_mask = s_mask_old >> pos; + s_mask = t1->s_mask >> pos; s_mask |= -1ull << (len - 1); - - if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) { - return true; - } + a_mask = pos ? -1 : s_mask & ~t1->s_mask; z_mask = sextract64(t1->z_mask, pos, len); - return fold_masks_zs(ctx, op, z_mask, s_mask); + o_mask = sextract64(t1->o_mask, pos, len); + + return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask); } static bool fold_shift(OptContext *ctx, TCGOp *op) { - uint64_t s_mask, z_mask; + uint64_t s_mask, z_mask, o_mask; TempOptInfo *t1, *t2; if (fold_const2(ctx, op) || @@ -2631,14 +2718,16 @@ static bool fold_shift(OptContext *ctx, TCGOp *op) t2 = arg_info(op->args[2]); s_mask = t1->s_mask; z_mask = t1->z_mask; + o_mask = t1->o_mask; if (ti_is_const(t2)) { int sh = ti_const_val(t2); z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); + o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh); s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); - return fold_masks_zs(ctx, op, z_mask, s_mask); + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } switch (op->opc) { @@ -2669,7 +2758,7 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) TCGOpcode neg_op; bool have_neg; - if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { + if (!arg_is_const_val(op->args[1], 0)) { return false; } @@ -2719,7 +2808,7 @@ static bool fold_sub(OptContext *ctx, TCGOp *op) /* Fold sub r,x,i to add r,x,-i */ if (arg_is_const(op->args[2])) { - uint64_t val = arg_info(op->args[2])->val; + uint64_t val = arg_const_val(op->args[2]); op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -val); @@ -2984,7 +3073,7 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op) static bool fold_xor(OptContext *ctx, TCGOp *op) { - uint64_t z_mask, s_mask; + uint64_t z_mask, o_mask, s_mask; TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || @@ -2996,9 +3085,12 @@ static bool fold_xor(OptContext *ctx, TCGOp *op) t1 = arg_info(op->args[1]); t2 = arg_info(op->args[2]); - z_mask = t1->z_mask | t2->z_mask; + + z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask); + o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask); s_mask = t1->s_mask & t2->s_mask; - return fold_masks_zs(ctx, op, z_mask, s_mask); + + return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask); } /* Propagate constants and copies, fold constant expressions. */ @@ -334,7 +334,7 @@ void perf_report_code(uint64_t guest_pc, TranslationBlock *tb, /* FIXME: This replicates the restore_state_to_opc() logic. */ q[insn].address = gen_insn_data[insn * INSN_START_WORDS + 0]; if (tb_cflags(tb) & CF_PCREL) { - q[insn].address |= (guest_pc & qemu_target_page_mask()); + q[insn].address |= guest_pc & TARGET_PAGE_MASK; } q[insn].flags = DEBUGINFO_SYMBOL | (jitdump ? DEBUGINFO_LINE : 0); } diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 2e94778..b8b23d4 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2440,10 +2440,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Extract the page index, shifted into place for tlb index. */ if (TCG_TARGET_REG_BITS == 32) { tcg_out_shri32(s, TCG_REG_R0, addr, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } else { tcg_out_shri64(s, TCG_REG_R0, addr, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); @@ -2480,7 +2480,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, a_bits = s_bits; } tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0, - (32 - a_bits) & 31, 31 - s->page_bits); + (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); } else { TCGReg t = addr; @@ -2501,13 +2501,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Mask the address for the requested alignment. */ if (addr_type == TCG_TYPE_I32) { tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, - (32 - a_bits) & 31, 31 - s->page_bits); + (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); } else if (a_bits == 0) { - tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); + tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS); } else { tcg_out_rld(s, RLDICL, TCG_REG_R0, t, - 64 - s->page_bits, s->page_bits - a_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); } } diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index f9417d1..31b9f7d 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1706,7 +1706,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); @@ -1722,7 +1722,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI, addr_adj, addr_reg, s_mask - a_mask); } - compare_mask = s->page_mask | a_mask; + compare_mask = TARGET_PAGE_MASK | a_mask; if (compare_mask == sextreg(compare_mask, 0, 12)) { tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask); } else { @@ -2502,7 +2502,7 @@ static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, } } if (ofs + len == 32) { - tgen_shli(s, TCG_TYPE_I32, a0, a1, ofs); + tgen_shri(s, TCG_TYPE_I32, a0, a1, ofs); return; } if (len == 1) { diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 7ca0071..84a9e73 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2004,7 +2004,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, ldst->addr_reg = addr_reg; tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off); tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off); @@ -2016,7 +2016,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, * byte of the access. */ a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask); - tlb_mask = (uint64_t)s->page_mask | a_mask; + tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; if (a_off == 0) { tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask); } else { diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 9e004fb..5e5c3f1 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1120,7 +1120,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Extract the page index, shifted into place for tlb index. */ tcg_out_arithi(s, TCG_REG_T1, addr_reg, - s->page_bits - CPU_TLB_ENTRY_BITS, SHIFT_SRL); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL); tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND); /* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */ @@ -1136,7 +1136,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, h->base = TCG_REG_T1; /* Mask out the page offset, except for the required alignment. */ - compare_mask = s->page_mask | a_mask; + compare_mask = TARGET_PAGE_MASK | a_mask; if (check_fit_tl(compare_mask, 13)) { tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND); } else { diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index d32a4f1..2d18454 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -57,30 +57,39 @@ static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs) tcg_debug_assert((ofs & max_align) == 0); } -/* Verify vector overlap rules for two operands. */ -static void check_overlap_2(uint32_t d, uint32_t a, uint32_t s) +/* + * Verify vector overlap rules for two operands. + * When dbase and abase are not the same pointer, we cannot check for + * overlap at compile-time, but the runtime restrictions remain. + */ +static void check_overlap_2(TCGv_ptr dbase, uint32_t d, + TCGv_ptr abase, uint32_t a, uint32_t s) { - tcg_debug_assert(d == a || d + s <= a || a + s <= d); + tcg_debug_assert(dbase != abase || d == a || d + s <= a || a + s <= d); } /* Verify vector overlap rules for three operands. */ -static void check_overlap_3(uint32_t d, uint32_t a, uint32_t b, uint32_t s) +static void check_overlap_3(TCGv_ptr dbase, uint32_t d, + TCGv_ptr abase, uint32_t a, + TCGv_ptr bbase, uint32_t b, uint32_t s) { - check_overlap_2(d, a, s); - check_overlap_2(d, b, s); - check_overlap_2(a, b, s); + check_overlap_2(dbase, d, abase, a, s); + check_overlap_2(dbase, d, bbase, b, s); + check_overlap_2(abase, a, bbase, b, s); } /* Verify vector overlap rules for four operands. */ -static void check_overlap_4(uint32_t d, uint32_t a, uint32_t b, - uint32_t c, uint32_t s) +static void check_overlap_4(TCGv_ptr dbase, uint32_t d, + TCGv_ptr abase, uint32_t a, + TCGv_ptr bbase, uint32_t b, + TCGv_ptr cbase, uint32_t c, uint32_t s) { - check_overlap_2(d, a, s); - check_overlap_2(d, b, s); - check_overlap_2(d, c, s); - check_overlap_2(a, b, s); - check_overlap_2(a, c, s); - check_overlap_2(b, c, s); + check_overlap_2(dbase, d, abase, a, s); + check_overlap_2(dbase, d, bbase, b, s); + check_overlap_2(dbase, d, cbase, c, s); + check_overlap_2(abase, a, bbase, b, s); + check_overlap_2(abase, a, cbase, c, s); + check_overlap_2(bbase, b, cbase, c, s); } /* Create a descriptor from components. */ @@ -124,9 +133,10 @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data) } /* Generate a call to a gvec-style helper with two vector operands. */ -void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_2 *fn) +static void expand_2_ool(TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_2 *fn) { TCGv_ptr a0, a1; TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data)); @@ -134,8 +144,8 @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs, a0 = tcg_temp_ebb_new_ptr(); a1 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, tcg_env, dofs); - tcg_gen_addi_ptr(a1, tcg_env, aofs); + tcg_gen_addi_ptr(a0, dbase, dofs); + tcg_gen_addi_ptr(a1, abase, aofs); fn(a0, a1, desc); @@ -143,6 +153,13 @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs, tcg_temp_free_ptr(a1); } +void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_2 *fn) +{ + expand_2_ool(tcg_env, dofs, tcg_env, aofs, oprsz, maxsz, data, fn); +} + /* Generate a call to a gvec-style helper with two vector operands and one scalar operand. */ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c, @@ -165,9 +182,11 @@ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c, } /* Generate a call to a gvec-style helper with three vector operands. */ -void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, uint32_t maxsz, int32_t data, - gen_helper_gvec_3 *fn) +static void expand_3_ool(TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, + int32_t data, gen_helper_gvec_3 *fn) { TCGv_ptr a0, a1, a2; TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data)); @@ -176,9 +195,9 @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, a1 = tcg_temp_ebb_new_ptr(); a2 = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(a0, tcg_env, dofs); - tcg_gen_addi_ptr(a1, tcg_env, aofs); - tcg_gen_addi_ptr(a2, tcg_env, bofs); + tcg_gen_addi_ptr(a0, dbase, dofs); + tcg_gen_addi_ptr(a1, abase, aofs); + tcg_gen_addi_ptr(a2, bbase, bofs); fn(a0, a1, a2, desc); @@ -187,6 +206,14 @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, tcg_temp_free_ptr(a2); } +void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, int32_t data, + gen_helper_gvec_3 *fn) +{ + expand_3_ool(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, + oprsz, maxsz, data, fn); +} + /* Generate a call to a gvec-style helper with four vector operands. */ void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t maxsz, @@ -380,7 +407,7 @@ static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz) return q <= MAX_UNROLL; } -static void expand_clr(uint32_t dofs, uint32_t maxsz); +static void expand_clr(TCGv_ptr dbase, uint32_t dofs, uint32_t maxsz); /* Duplicate C as per VECE. */ uint64_t (dup_const)(unsigned vece, uint64_t c) @@ -483,8 +510,8 @@ static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece, return 0; } -static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, TCGv_vec t_vec) +static void do_dup_store(TCGType type, TCGv_ptr dbase, uint32_t dofs, + uint32_t oprsz, uint32_t maxsz, TCGv_vec t_vec) { uint32_t i = 0; @@ -496,7 +523,7 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, * are misaligned wrt the maximum vector size, so do that first. */ if (dofs & 8) { - tcg_gen_stl_vec(t_vec, tcg_env, dofs + i, TCG_TYPE_V64); + tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V64); i += 8; } @@ -508,17 +535,17 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ for (; i + 32 <= oprsz; i += 32) { - tcg_gen_stl_vec(t_vec, tcg_env, dofs + i, TCG_TYPE_V256); + tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V256); } /* fallthru */ case TCG_TYPE_V128: for (; i + 16 <= oprsz; i += 16) { - tcg_gen_stl_vec(t_vec, tcg_env, dofs + i, TCG_TYPE_V128); + tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V128); } break; case TCG_TYPE_V64: for (; i < oprsz; i += 8) { - tcg_gen_stl_vec(t_vec, tcg_env, dofs + i, TCG_TYPE_V64); + tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V64); } break; default: @@ -526,17 +553,18 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, } if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } } -/* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C. +/* + * Set OPRSZ bytes at DBASE + DOFS to replications of IN_32, IN_64 or IN_C. * Only one of IN_32 or IN_64 may be set; * IN_C is used if IN_32 and IN_64 are unset. */ -static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, - uint32_t maxsz, TCGv_i32 in_32, TCGv_i64 in_64, - uint64_t in_c) +static void do_dup(unsigned vece, TCGv_ptr dbase, uint32_t dofs, + uint32_t oprsz, uint32_t maxsz, + TCGv_i32 in_32, TCGv_i64 in_64, uint64_t in_c) { TCGType type; TCGv_i64 t_64; @@ -574,7 +602,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, } else { tcg_gen_dupi_vec(vece, t_vec, in_c); } - do_dup_store(type, dofs, oprsz, maxsz, t_vec); + do_dup_store(type, dbase, dofs, oprsz, maxsz, t_vec); return; } @@ -618,14 +646,14 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, /* Implement inline if we picked an implementation size above. */ if (t_32) { for (i = 0; i < oprsz; i += 4) { - tcg_gen_st_i32(t_32, tcg_env, dofs + i); + tcg_gen_st_i32(t_32, dbase, dofs + i); } tcg_temp_free_i32(t_32); goto done; } if (t_64) { for (i = 0; i < oprsz; i += 8) { - tcg_gen_st_i64(t_64, tcg_env, dofs + i); + tcg_gen_st_i64(t_64, dbase, dofs + i); } tcg_temp_free_i64(t_64); goto done; @@ -634,7 +662,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, /* Otherwise implement out of line. */ t_ptr = tcg_temp_ebb_new_ptr(); - tcg_gen_addi_ptr(t_ptr, tcg_env, dofs); + tcg_gen_addi_ptr(t_ptr, dbase, dofs); /* * This may be expand_clr for the tail of an operation, e.g. @@ -703,31 +731,32 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, done: if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } } /* Likewise, but with zero. */ -static void expand_clr(uint32_t dofs, uint32_t maxsz) +static void expand_clr(TCGv_ptr dbase, uint32_t dofs, uint32_t maxsz) { - do_dup(MO_8, dofs, maxsz, maxsz, NULL, NULL, 0); + do_dup(MO_8, dbase, dofs, maxsz, maxsz, NULL, NULL, 0); } /* Expand OPSZ bytes worth of two-operand operations using i32 elements. */ -static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz, - bool load_dest, void (*fni)(TCGv_i32, TCGv_i32)) +static void expand_2_i32(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, + uint32_t aofs, uint32_t oprsz, bool load_dest, + void (*fni)(TCGv_i32, TCGv_i32)) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t0, tcg_env, aofs + i); + tcg_gen_ld_i32(t0, abase, aofs + i); if (load_dest) { - tcg_gen_ld_i32(t1, tcg_env, dofs + i); + tcg_gen_ld_i32(t1, dbase, dofs + i); } fni(t1, t0); - tcg_gen_st_i32(t1, tcg_env, dofs + i); + tcg_gen_st_i32(t1, dbase, dofs + i); } tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); @@ -775,8 +804,10 @@ static void expand_2s_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz, } /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */ -static void expand_3_i32(uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, bool load_dest, +static void expand_3_i32(TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, bool load_dest, void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32)) { TCGv_i32 t0 = tcg_temp_new_i32(); @@ -785,13 +816,13 @@ static void expand_3_i32(uint32_t dofs, uint32_t aofs, uint32_t i; for (i = 0; i < oprsz; i += 4) { - tcg_gen_ld_i32(t0, tcg_env, aofs + i); - tcg_gen_ld_i32(t1, tcg_env, bofs + i); + tcg_gen_ld_i32(t0, abase, aofs + i); + tcg_gen_ld_i32(t1, bbase, bofs + i); if (load_dest) { - tcg_gen_ld_i32(t2, tcg_env, dofs + i); + tcg_gen_ld_i32(t2, dbase, dofs + i); } fni(t2, t0, t1); - tcg_gen_st_i32(t2, tcg_env, dofs + i); + tcg_gen_st_i32(t2, dbase, dofs + i); } tcg_temp_free_i32(t2); tcg_temp_free_i32(t1); @@ -877,20 +908,21 @@ static void expand_4i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, } /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */ -static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz, - bool load_dest, void (*fni)(TCGv_i64, TCGv_i64)) +static void expand_2_i64(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase, + uint32_t aofs, uint32_t oprsz, bool load_dest, + void (*fni)(TCGv_i64, TCGv_i64)) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t0, tcg_env, aofs + i); + tcg_gen_ld_i64(t0, abase, aofs + i); if (load_dest) { - tcg_gen_ld_i64(t1, tcg_env, dofs + i); + tcg_gen_ld_i64(t1, dbase, dofs + i); } fni(t1, t0); - tcg_gen_st_i64(t1, tcg_env, dofs + i); + tcg_gen_st_i64(t1, dbase, dofs + i); } tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -938,8 +970,10 @@ static void expand_2s_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz, } /* Expand OPSZ bytes worth of three-operand operations using i64 elements. */ -static void expand_3_i64(uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, bool load_dest, +static void expand_3_i64(TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, bool load_dest, void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64)) { TCGv_i64 t0 = tcg_temp_new_i64(); @@ -948,13 +982,13 @@ static void expand_3_i64(uint32_t dofs, uint32_t aofs, uint32_t i; for (i = 0; i < oprsz; i += 8) { - tcg_gen_ld_i64(t0, tcg_env, aofs + i); - tcg_gen_ld_i64(t1, tcg_env, bofs + i); + tcg_gen_ld_i64(t0, abase, aofs + i); + tcg_gen_ld_i64(t1, bbase, bofs + i); if (load_dest) { - tcg_gen_ld_i64(t2, tcg_env, dofs + i); + tcg_gen_ld_i64(t2, dbase, dofs + i); } fni(t2, t0, t1); - tcg_gen_st_i64(t2, tcg_env, dofs + i); + tcg_gen_st_i64(t2, dbase, dofs + i); } tcg_temp_free_i64(t2); tcg_temp_free_i64(t1); @@ -1040,7 +1074,8 @@ static void expand_4i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, } /* Expand OPSZ bytes worth of two-operand operations using host vectors. */ -static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs, +static void expand_2_vec(unsigned vece, TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, bool load_dest, void (*fni)(unsigned, TCGv_vec, TCGv_vec)) @@ -1049,12 +1084,12 @@ static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_vec t0 = tcg_temp_new_vec(type); TCGv_vec t1 = tcg_temp_new_vec(type); - tcg_gen_ld_vec(t0, tcg_env, aofs + i); + tcg_gen_ld_vec(t0, abase, aofs + i); if (load_dest) { - tcg_gen_ld_vec(t1, tcg_env, dofs + i); + tcg_gen_ld_vec(t1, dbase, dofs + i); } fni(vece, t1, t0); - tcg_gen_st_vec(t1, tcg_env, dofs + i); + tcg_gen_st_vec(t1, dbase, dofs + i); } } @@ -1098,8 +1133,9 @@ static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs, } /* Expand OPSZ bytes worth of three-operand operations using host vectors. */ -static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, +static void expand_3_vec(unsigned vece, TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + TCGv_ptr bbase, uint32_t bofs, uint32_t oprsz, uint32_t tysz, TCGType type, bool load_dest, void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) { @@ -1108,13 +1144,13 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_vec t1 = tcg_temp_new_vec(type); TCGv_vec t2 = tcg_temp_new_vec(type); - tcg_gen_ld_vec(t0, tcg_env, aofs + i); - tcg_gen_ld_vec(t1, tcg_env, bofs + i); + tcg_gen_ld_vec(t0, abase, aofs + i); + tcg_gen_ld_vec(t1, bbase, bofs + i); if (load_dest) { - tcg_gen_ld_vec(t2, tcg_env, dofs + i); + tcg_gen_ld_vec(t2, dbase, dofs + i); } fni(vece, t2, t0, t1); - tcg_gen_st_vec(t2, tcg_env, dofs + i); + tcg_gen_st_vec(t2, dbase, dofs + i); } } @@ -1196,8 +1232,9 @@ static void expand_4i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, } /* Expand a vector two-operand operation. */ -void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g) +void tcg_gen_gvec_2_var(TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g) { const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); @@ -1205,7 +1242,7 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs); - check_overlap_2(dofs, aofs, maxsz); + check_overlap_2(dbase, dofs, abase, aofs, maxsz); type = 0; if (g->fniv) { @@ -1218,8 +1255,8 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, - g->load_dest, g->fniv); + expand_2_vec(g->vece, dbase, dofs, abase, aofs, some, 32, + TCG_TYPE_V256, g->load_dest, g->fniv); if (some == oprsz) { break; } @@ -1229,22 +1266,25 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, maxsz -= some; /* fallthru */ case TCG_TYPE_V128: - expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, - g->load_dest, g->fniv); + expand_2_vec(g->vece, dbase, dofs, abase, aofs, oprsz, 16, + TCG_TYPE_V128, g->load_dest, g->fniv); break; case TCG_TYPE_V64: - expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, - g->load_dest, g->fniv); + expand_2_vec(g->vece, dbase, dofs, abase, aofs, oprsz, 8, + TCG_TYPE_V64, g->load_dest, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_2_i64(dofs, aofs, oprsz, g->load_dest, g->fni8); + expand_2_i64(dbase, dofs, abase, aofs, + oprsz, g->load_dest, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_2_i32(dofs, aofs, oprsz, g->load_dest, g->fni4); + expand_2_i32(dbase, dofs, abase, aofs, + oprsz, g->load_dest, g->fni4); } else { assert(g->fno != NULL); - tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno); + expand_2_ool(dbase, dofs, abase, aofs, + oprsz, maxsz, g->data, g->fno); oprsz = maxsz; } break; @@ -1255,10 +1295,16 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } } +void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g) +{ + tcg_gen_gvec_2_var(tcg_env, dofs, tcg_env, aofs, oprsz, maxsz, g); +} + /* Expand a vector operation with two vectors and an immediate. */ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen2i *g) @@ -1269,7 +1315,7 @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs); - check_overlap_2(dofs, aofs, maxsz); + check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz); type = 0; if (g->fniv) { @@ -1324,7 +1370,7 @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -1335,7 +1381,7 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, TCGType type; check_size_align(oprsz, maxsz, dofs | aofs); - check_overlap_2(dofs, aofs, maxsz); + check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz); type = 0; if (g->fniv) { @@ -1401,13 +1447,15 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, } if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } /* Expand a vector three-operand operation. */ -void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, - uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g) +void tcg_gen_gvec_3_var(TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g) { const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); @@ -1415,7 +1463,7 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs); - check_overlap_3(dofs, aofs, bofs, maxsz); + check_overlap_3(dbase, dofs, abase, aofs, bbase, bofs, maxsz); type = 0; if (g->fniv) { @@ -1428,8 +1476,8 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, - g->load_dest, g->fniv); + expand_3_vec(g->vece, dbase, dofs, abase, aofs, bbase, bofs, + some, 32, TCG_TYPE_V256, g->load_dest, g->fniv); if (some == oprsz) { break; } @@ -1440,23 +1488,25 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, maxsz -= some; /* fallthru */ case TCG_TYPE_V128: - expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, - g->load_dest, g->fniv); + expand_3_vec(g->vece, dbase, dofs, abase, aofs, bbase, bofs, + oprsz, 16, TCG_TYPE_V128, g->load_dest, g->fniv); break; case TCG_TYPE_V64: - expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, - g->load_dest, g->fniv); + expand_3_vec(g->vece, dbase, dofs, abase, aofs, bbase, bofs, + oprsz, 8, TCG_TYPE_V64, g->load_dest, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8); + expand_3_i64(dbase, dofs, abase, aofs, bbase, bofs, + oprsz, g->load_dest, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4); + expand_3_i32(dbase, dofs, abase, aofs, bbase, bofs, + oprsz, g->load_dest, g->fni4); } else { assert(g->fno != NULL); - tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, - maxsz, g->data, g->fno); + expand_3_ool(dbase, dofs, abase, aofs, bbase, bofs, + oprsz, maxsz, g->data, g->fno); oprsz = maxsz; } break; @@ -1467,10 +1517,17 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } } +void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g) +{ + tcg_gen_gvec_3_var(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, + oprsz, maxsz, g); +} + /* Expand a vector operation with three vectors and an immediate. */ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, int64_t c, @@ -1482,7 +1539,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs); - check_overlap_3(dofs, aofs, bofs, maxsz); + check_overlap_3(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, maxsz); type = 0; if (g->fniv) { @@ -1536,7 +1593,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -1550,7 +1607,8 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs); - check_overlap_4(dofs, aofs, bofs, cofs, maxsz); + check_overlap_4(tcg_env, dofs, tcg_env, aofs, + tcg_env, bofs, tcg_env, cofs, maxsz); type = 0; if (g->fniv) { @@ -1605,7 +1663,7 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -1620,7 +1678,8 @@ void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs); - check_overlap_4(dofs, aofs, bofs, cofs, maxsz); + check_overlap_4(tcg_env, dofs, tcg_env, aofs, + tcg_env, bofs, tcg_env, cofs, maxsz); type = 0; if (g->fniv) { @@ -1674,7 +1733,7 @@ void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -1687,8 +1746,9 @@ static void vec_mov2(unsigned vece, TCGv_vec a, TCGv_vec b) tcg_gen_mov_vec(a, b); } -void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t oprsz, uint32_t maxsz) +void tcg_gen_gvec_mov_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz) { static const GVecGen2 g = { .fni8 = tcg_gen_mov_i64, @@ -1696,14 +1756,22 @@ void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs, .fno = gen_helper_gvec_mov, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; - if (dofs != aofs) { - tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g); - } else { + + if (dofs == aofs && dbase == abase) { check_size_align(oprsz, maxsz, dofs); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(dbase, dofs + oprsz, maxsz - oprsz); } + return; } + + tcg_gen_gvec_2_var(dbase, dofs, abase, aofs, oprsz, maxsz, &g); +} + +void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_mov_var(vece, tcg_env, dofs, tcg_env, aofs, oprsz, maxsz); } void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz, @@ -1711,7 +1779,7 @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz, { check_size_align(oprsz, maxsz, dofs); tcg_debug_assert(vece <= MO_32); - do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0); + do_dup(vece, tcg_env, dofs, oprsz, maxsz, in, NULL, 0); } void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz, @@ -1719,7 +1787,7 @@ void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz, { check_size_align(oprsz, maxsz, dofs); tcg_debug_assert(vece <= MO_64); - do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0); + do_dup(vece, tcg_env, dofs, oprsz, maxsz, NULL, in, 0); } void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, @@ -1731,7 +1799,7 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, if (type != 0) { TCGv_vec t_vec = tcg_temp_new_vec(type); tcg_gen_dup_mem_vec(vece, t_vec, tcg_env, aofs); - do_dup_store(type, dofs, oprsz, maxsz, t_vec); + do_dup_store(type, tcg_env, dofs, oprsz, maxsz, t_vec); } else if (vece <= MO_32) { TCGv_i32 in = tcg_temp_ebb_new_i32(); switch (vece) { @@ -1745,12 +1813,12 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_gen_ld_i32(in, tcg_env, aofs); break; } - do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0); + do_dup(vece, tcg_env, dofs, oprsz, maxsz, in, NULL, 0); tcg_temp_free_i32(in); } else { TCGv_i64 in = tcg_temp_ebb_new_i64(); tcg_gen_ld_i64(in, tcg_env, aofs); - do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0); + do_dup(vece, tcg_env, dofs, oprsz, maxsz, NULL, in, 0); tcg_temp_free_i64(in); } } else if (vece == 4) { @@ -1779,7 +1847,7 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_temp_free_i64(in1); } if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } else if (vece == 5) { /* 256-bit duplicate. */ @@ -1822,18 +1890,24 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, } } if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } else { g_assert_not_reached(); } } +void tcg_gen_gvec_dup_imm_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, + uint32_t oprsz, uint32_t maxsz, uint64_t x) +{ + check_size_align(oprsz, maxsz, dofs); + do_dup(vece, dbase, dofs, oprsz, maxsz, NULL, NULL, x); +} + void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz, uint32_t maxsz, uint64_t x) { - check_size_align(oprsz, maxsz, dofs); - do_dup(vece, dofs, oprsz, maxsz, NULL, NULL, x); + tcg_gen_gvec_dup_imm_var(vece, tcg_env, dofs, oprsz, maxsz, x); } void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs, @@ -1931,8 +2005,10 @@ void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) static const TCGOpcode vecop_list_add[] = { INDEX_op_add_vec, 0 }; -void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +void tcg_gen_gvec_add_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fni8 = tcg_gen_vec_add8_i64, @@ -1959,7 +2035,15 @@ void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, }; tcg_debug_assert(vece <= MO_64); - tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); + tcg_gen_gvec_3_var(dbase, dofs, abase, aofs, bbase, bofs, + oprsz, maxsz, &g[vece]); +} + +void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_add_var(vece, tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, + oprsz, maxsz); } void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs, @@ -2112,8 +2196,10 @@ void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) tcg_temp_free_i64(t2); } -void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, - uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +void tcg_gen_gvec_sub_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs, + TCGv_ptr abase, uint32_t aofs, + TCGv_ptr bbase, uint32_t bofs, + uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fni8 = tcg_gen_vec_sub8_i64, @@ -2140,7 +2226,15 @@ void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, }; tcg_debug_assert(vece <= MO_64); - tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); + tcg_gen_gvec_3_var(dbase, dofs, abase, aofs, bbase, bofs, + oprsz, maxsz, &g[vece]); +} + +void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_sub_var(vece, tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, + oprsz, maxsz); } static const TCGOpcode vecop_list_mul[] = { INDEX_op_mul_vec, 0 }; @@ -3149,7 +3243,7 @@ do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs); - check_overlap_2(dofs, aofs, maxsz); + check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz); /* If the backend has a scalar expansion, great. */ type = choose_vector_type(g->s_list, vece, oprsz, vece == MO_64); @@ -3255,7 +3349,7 @@ do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift, clear_tail: if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -3769,10 +3863,10 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, uint32_t some; check_size_align(oprsz, maxsz, dofs | aofs | bofs); - check_overlap_3(dofs, aofs, bofs, maxsz); + check_overlap_3(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, maxsz); if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) { - do_dup(MO_8, dofs, oprsz, maxsz, + do_dup(MO_8, tcg_env, dofs, oprsz, maxsz, NULL, NULL, -(cond == TCG_COND_ALWAYS)); return; } @@ -3834,7 +3928,7 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, tcg_swap_vecop_list(hold_list); if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } @@ -3889,10 +3983,10 @@ void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs, TCGType type; check_size_align(oprsz, maxsz, dofs | aofs); - check_overlap_2(dofs, aofs, maxsz); + check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz); if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) { - do_dup(MO_8, dofs, oprsz, maxsz, + do_dup(MO_8, tcg_env, dofs, oprsz, maxsz, NULL, NULL, -(cond == TCG_COND_ALWAYS)); return; } @@ -3975,7 +4069,7 @@ void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs, } if (oprsz < maxsz) { - expand_clr(dofs + oprsz, maxsz - oprsz); + expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz); } } diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c index fa9e522..5484960 100644 --- a/tcg/tcg-op-ldst.c +++ b/tcg/tcg-op-ldst.c @@ -27,6 +27,7 @@ #include "tcg/tcg-temp-internal.h" #include "tcg/tcg-op-common.h" #include "tcg/tcg-mo.h" +#include "exec/target_page.h" #include "exec/translation-block.h" #include "exec/plugin-gen.h" #include "tcg-internal.h" @@ -40,7 +41,7 @@ static void check_max_alignment(unsigned a_bits) * FIXME: Must keep the count up-to-date with "exec/tlb-flags.h". */ if (tcg_use_softmmu) { - tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits); + tcg_debug_assert(a_bits + 5 <= TARGET_PAGE_BITS); } } @@ -34,6 +34,7 @@ #include "qemu/cacheflush.h" #include "qemu/cacheinfo.h" #include "qemu/timer.h" +#include "exec/target_page.h" #include "exec/translation-block.h" #include "exec/tlb-common.h" #include "tcg/startup.h" @@ -1330,8 +1331,9 @@ void *tcg_malloc_internal(TCGContext *s, int size) p = s->pool_current; if (!p) { p = s->pool_first; - if (!p) + if (!p) { goto new_pool; + } } else { if (!p->next) { new_pool: @@ -1350,8 +1352,8 @@ void *tcg_malloc_internal(TCGContext *s, int size) } } s->pool_current = p; - s->pool_cur = p->data + size; - s->pool_end = p->data + p->size; + s->pool_cur = (uintptr_t)p->data + size; + s->pool_end = (uintptr_t)p->data + p->size; return p->data; } @@ -1363,7 +1365,7 @@ void tcg_pool_reset(TCGContext *s) g_free(p); } s->pool_first_large = NULL; - s->pool_cur = s->pool_end = NULL; + s->pool_cur = s->pool_end = 0; s->pool_current = NULL; } @@ -5153,7 +5155,7 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) if (its->val_type == TEMP_VAL_CONST) { /* Propagate constant via movi -> dupi. */ - tcg_target_ulong val = its->val; + tcg_target_ulong val = dup_const(vece, its->val); if (IS_DEAD_ARG(1)) { temp_dead(s, its); } |