aboutsummaryrefslogtreecommitdiff
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/aarch64/tcg-target.c.inc10
-rw-r--r--tcg/arm/tcg-target.c.inc10
-rw-r--r--tcg/i386/tcg-target.c.inc10
-rw-r--r--tcg/loongarch64/tcg-target.c.inc4
-rw-r--r--tcg/meson.build4
-rw-r--r--tcg/mips/tcg-target.c.inc6
-rw-r--r--tcg/optimize.c454
-rw-r--r--tcg/perf.c2
-rw-r--r--tcg/ppc/tcg-target.c.inc14
-rw-r--r--tcg/riscv/tcg-target.c.inc6
-rw-r--r--tcg/s390x/tcg-target.c.inc4
-rw-r--r--tcg/sparc64/tcg-target.c.inc4
-rw-r--r--tcg/tcg-op-gvec.c380
-rw-r--r--tcg/tcg-op-ldst.c3
-rw-r--r--tcg/tcg.c12
15 files changed, 553 insertions, 370 deletions
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 4cb647c..3b088b7 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1661,7 +1661,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
unsigned s_mask = (1u << s_bits) - 1;
unsigned mem_index = get_mmuidx(oi);
TCGReg addr_adj;
- TCGType mask_type;
uint64_t compare_mask;
ldst = new_ldst_label(s);
@@ -1669,9 +1668,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->oi = oi;
ldst->addr_reg = addr_reg;
- mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
- ? TCG_TYPE_I64 : TCG_TYPE_I32);
-
/* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
@@ -1679,9 +1675,9 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tlb_mask_table_ofs(s, mem_index), 1, 0);
/* Extract the TLB index from the address into X0. */
- tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
+ tcg_out_insn(s, 3502S, AND_LSR, TCG_TYPE_I64,
TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
- s->page_bits - CPU_TLB_ENTRY_BITS);
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
/* Add the tlb_table pointer, forming the CPUTLBEntry address. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
@@ -1707,7 +1703,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_insn(s, 3401, ADDI, addr_type,
addr_adj, addr_reg, s_mask - a_mask);
}
- compare_mask = (uint64_t)s->page_mask | a_mask;
+ compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
/* Store the page mask part of the address into TMP2. */
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 447e435..836894b 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1427,7 +1427,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* Extract the tlb index from the address into R0. */
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addr,
- SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
/*
* Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
@@ -1463,8 +1463,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
addr, s_mask - a_mask);
}
- if (use_armv7_instructions && s->page_bits <= 16) {
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
+ if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
t_addr, TCG_REG_TMP, 0);
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
@@ -1475,10 +1475,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addr, a_mask);
}
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
- SHIFT_IMM_LSR(s->page_bits));
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS));
tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
0, TCG_REG_R2, TCG_REG_TMP,
- SHIFT_IMM_LSL(s->page_bits));
+ SHIFT_IMM_LSL(TARGET_PAGE_BITS));
}
} else if (a_mask) {
ldst = new_ldst_label(s);
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 09fce27..088c6c9 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -2199,16 +2199,14 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
hrexw = P_REXW;
- if (s->page_bits + s->tlb_dyn_max_bits > 32) {
- tlbtype = TCG_TYPE_I64;
- tlbrexw = P_REXW;
- }
+ tlbtype = TCG_TYPE_I64;
+ tlbrexw = P_REXW;
}
}
tcg_out_mov(s, tlbtype, TCG_REG_L0, addr);
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
- s->page_bits - CPU_TLB_ENTRY_BITS);
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
fast_ofs + offsetof(CPUTLBDescFast, mask));
@@ -2227,7 +2225,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
addr, s_mask - a_mask);
}
- tlb_mask = s->page_mask | a_mask;
+ tlb_mask = TARGET_PAGE_MASK | a_mask;
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index e5580d6..10c6921 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1065,7 +1065,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
- s->page_bits - CPU_TLB_ENTRY_BITS);
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
@@ -1091,7 +1091,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
}
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
- a_bits, s->page_bits - 1);
+ a_bits, TARGET_PAGE_BITS - 1);
/* Compare masked address with the TLB entry. */
ldst->label_ptr[0] = s->code_ptr;
diff --git a/tcg/meson.build b/tcg/meson.build
index bd2821e..706a6eb 100644
--- a/tcg/meson.build
+++ b/tcg/meson.build
@@ -27,5 +27,5 @@ if host_os == 'linux'
tcg_ss.add(files('perf.c'))
endif
-libuser_ss.add_all(tcg_ss)
-libsystem_ss.add_all(tcg_ss)
+user_ss.add_all(tcg_ss)
+system_ss.add_all(tcg_ss)
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 2c0457e..400eafb 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1199,9 +1199,9 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* Extract the TLB index from the address into TMP3. */
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addr,
- s->page_bits - CPU_TLB_ENTRY_BITS);
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
} else {
- tcg_out_dsrl(s, TCG_TMP3, addr, s->page_bits - CPU_TLB_ENTRY_BITS);
+ tcg_out_dsrl(s, TCG_TMP3, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
}
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
@@ -1224,7 +1224,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
* For unaligned accesses, compare against the end of the access to
* verify that it does not cross a page boundary.
*/
- tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
+ tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
if (a_mask < s_mask) {
tcg_out_opc_imm(s, (TCG_TARGET_REG_BITS == 32
|| addr_type == TCG_TYPE_I32
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 10a76c5..62a128b 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,12 +39,11 @@ typedef struct MemCopyInfo {
} MemCopyInfo;
typedef struct TempOptInfo {
- bool is_const;
TCGTemp *prev_copy;
TCGTemp *next_copy;
QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
- uint64_t val;
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
+ uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */
uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
} TempOptInfo;
@@ -73,12 +72,14 @@ static inline TempOptInfo *arg_info(TCGArg arg)
static inline bool ti_is_const(TempOptInfo *ti)
{
- return ti->is_const;
+ /* If all bits that are not known zeros are known ones, it's constant. */
+ return ti->z_mask == ti->o_mask;
}
static inline uint64_t ti_const_val(TempOptInfo *ti)
{
- return ti->val;
+ /* If constant, both z_mask and o_mask contain the value. */
+ return ti->z_mask;
}
static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
@@ -101,6 +102,11 @@ static inline bool arg_is_const(TCGArg arg)
return ts_is_const(arg_temp(arg));
}
+static inline uint64_t arg_const_val(TCGArg arg)
+{
+ return ti_const_val(arg_info(arg));
+}
+
static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
{
return ts_is_const_val(arg_temp(arg), val);
@@ -137,13 +143,12 @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
ti->prev_copy = ts;
QSIMPLEQ_INIT(&ti->mem_copy);
if (ts->kind == TEMP_CONST) {
- ti->is_const = true;
- ti->val = ts->val;
ti->z_mask = ts->val;
+ ti->o_mask = ts->val;
ti->s_mask = INT64_MIN >> clrsb64(ts->val);
} else {
- ti->is_const = false;
ti->z_mask = -1;
+ ti->o_mask = 0;
ti->s_mask = 0;
}
}
@@ -229,8 +234,8 @@ static void reset_ts(OptContext *ctx, TCGTemp *ts)
pi->next_copy = ti->next_copy;
ti->next_copy = ts;
ti->prev_copy = ts;
- ti->is_const = false;
ti->z_mask = -1;
+ ti->o_mask = 0;
ti->s_mask = 0;
if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
@@ -385,6 +390,7 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
op->args[1] = src;
di->z_mask = si->z_mask;
+ di->o_mask = si->o_mask;
di->s_mask = si->s_mask;
if (src_ts->type == dst_ts->type) {
@@ -394,13 +400,19 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
di->prev_copy = src_ts;
ni->prev_copy = dst_ts;
si->next_copy = dst_ts;
- di->is_const = si->is_const;
- di->val = si->val;
if (!QSIMPLEQ_EMPTY(&si->mem_copy)
&& cmp_better_copy(src_ts, dst_ts) == dst_ts) {
move_mem_copies(dst_ts, src_ts);
}
+ } else if (dst_ts->type == TCG_TYPE_I32) {
+ di->z_mask = (int32_t)di->z_mask;
+ di->o_mask = (int32_t)di->o_mask;
+ di->s_mask |= INT32_MIN;
+ } else {
+ di->z_mask |= MAKE_64BIT_MASK(32, 32);
+ di->o_mask = (uint32_t)di->o_mask;
+ di->s_mask = INT64_MIN;
}
return true;
}
@@ -687,8 +699,8 @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
TCGArg y, TCGCond c)
{
if (arg_is_const(x) && arg_is_const(y)) {
- uint64_t xv = arg_info(x)->val;
- uint64_t yv = arg_info(y)->val;
+ uint64_t xv = arg_const_val(x);
+ uint64_t yv = arg_const_val(y);
switch (type) {
case TCG_TYPE_I32:
@@ -772,6 +784,7 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
* Return -1 if the condition can't be simplified,
* and the result of the condition (0 or 1) if it can.
*/
+static bool fold_and(OptContext *ctx, TCGOp *op);
static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
TCGArg *p1, TCGArg *p2, TCGArg *pcond)
{
@@ -801,14 +814,14 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
* TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
*/
if (args_are_copies(*p1, *p2) ||
- (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) {
+ (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
*p2 = arg_new_constant(ctx, 0);
*pcond = tcg_tst_eqne_cond(cond);
return -1;
}
/* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
- if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
+ if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
*p2 = arg_new_constant(ctx, 0);
*pcond = tcg_tst_ltge_cond(cond);
return -1;
@@ -822,6 +835,7 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
op2->args[0] = tmp;
op2->args[1] = *p1;
op2->args[2] = *p2;
+ fold_and(ctx, op2);
*p1 = tmp;
*p2 = arg_new_constant(ctx, 0);
@@ -849,13 +863,13 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
bh = args[3];
if (arg_is_const(bl) && arg_is_const(bh)) {
- tcg_target_ulong blv = arg_info(bl)->val;
- tcg_target_ulong bhv = arg_info(bh)->val;
+ tcg_target_ulong blv = arg_const_val(bl);
+ tcg_target_ulong bhv = arg_const_val(bh);
uint64_t b = deposit64(blv, 32, 32, bhv);
if (arg_is_const(al) && arg_is_const(ah)) {
- tcg_target_ulong alv = arg_info(al)->val;
- tcg_target_ulong ahv = arg_info(ah)->val;
+ tcg_target_ulong alv = arg_const_val(al);
+ tcg_target_ulong ahv = arg_const_val(ah);
uint64_t a = deposit64(alv, 32, 32, ahv);
r = do_constant_folding_cond_64(a, b, c);
@@ -917,9 +931,12 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
op1->args[0] = t1;
op1->args[1] = al;
op1->args[2] = bl;
+ fold_and(ctx, op1);
+
op2->args[0] = t2;
op2->args[1] = ah;
op2->args[2] = bh;
+ fold_and(ctx, op1);
args[0] = t1;
args[1] = t2;
@@ -989,9 +1006,8 @@ static bool finish_folding(OptContext *ctx, TCGOp *op)
static bool fold_const1(OptContext *ctx, TCGOp *op)
{
if (arg_is_const(op->args[1])) {
- uint64_t t;
+ uint64_t t = arg_const_val(op->args[1]);
- t = arg_info(op->args[1])->val;
t = do_constant_folding(op->opc, ctx->type, t, 0);
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
}
@@ -1001,8 +1017,8 @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
static bool fold_const2(OptContext *ctx, TCGOp *op)
{
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
- uint64_t t1 = arg_info(op->args[1])->val;
- uint64_t t2 = arg_info(op->args[2])->val;
+ uint64_t t1 = arg_const_val(op->args[1]);
+ uint64_t t2 = arg_const_val(op->args[2]);
t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
@@ -1028,8 +1044,9 @@ static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
* If z_mask allows, fold the output to constant zero.
* The passed s_mask may be augmented by z_mask.
*/
-static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
- uint64_t z_mask, int64_t s_mask)
+static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op,
+ uint64_t z_mask, uint64_t o_mask,
+ int64_t s_mask, uint64_t a_mask)
{
const TCGOpDef *def = &tcg_op_defs[op->opc];
TCGTemp *ts;
@@ -1048,11 +1065,22 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
*/
if (ctx->type == TCG_TYPE_I32) {
z_mask = (int32_t)z_mask;
+ o_mask = (int32_t)o_mask;
s_mask |= INT32_MIN;
+ a_mask = (uint32_t)a_mask;
+ }
+
+ /* Bits that are known 1 and bits that are known 0 must not overlap. */
+ tcg_debug_assert((o_mask & ~z_mask) == 0);
+
+ /* All bits that are not known zero are known one is a constant. */
+ if (z_mask == o_mask) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
}
- if (z_mask == 0) {
- return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
+ /* If no bits are affected, the operation devolves to a copy. */
+ if (a_mask == 0) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
}
ts = arg_temp(op->args[0]);
@@ -1064,36 +1092,46 @@ static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
/* Canonicalize s_mask and incorporate data from z_mask. */
rep = clz64(~s_mask);
rep = MAX(rep, clz64(z_mask));
+ rep = MAX(rep, clz64(~o_mask));
rep = MAX(rep - 1, 0);
ti->s_mask = INT64_MIN >> rep;
+ return false;
+}
+
+static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask,
+ uint64_t o_mask, int64_t s_mask, uint64_t a_mask)
+{
+ fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1);
return true;
}
-static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
+static bool fold_masks_zos(OptContext *ctx, TCGOp *op,
+ uint64_t z_mask, uint64_t o_mask, uint64_t s_mask)
{
- return fold_masks_zs(ctx, op, z_mask, 0);
+ return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1);
}
-static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
+static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
+ uint64_t z_mask, uint64_t o_mask)
{
- return fold_masks_zs(ctx, op, -1, s_mask);
+ return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1);
}
-/*
- * An "affected" mask bit is 0 if and only if the result is identical
- * to the first input. Thus if the entire mask is 0, the operation
- * is equivalent to a copy.
- */
-static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
+static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
+ uint64_t z_mask, uint64_t s_mask)
{
- if (ctx->type == TCG_TYPE_I32) {
- a_mask = (uint32_t)a_mask;
- }
- if (a_mask == 0) {
- return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
- }
- return false;
+ return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1);
+}
+
+static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
+{
+ return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1);
+}
+
+static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
+{
+ return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1);
}
/*
@@ -1393,30 +1431,18 @@ static bool fold_addco(OptContext *ctx, TCGOp *op)
static bool fold_and(OptContext *ctx, TCGOp *op)
{
- uint64_t z1, z2, z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask, a_mask;
TempOptInfo *t1, *t2;
- if (fold_const2_commutative(ctx, op) ||
- fold_xi_to_i(ctx, op, 0) ||
- fold_xi_to_x(ctx, op, -1) ||
- fold_xx_to_x(ctx, op)) {
+ if (fold_const2_commutative(ctx, op)) {
return true;
}
t1 = arg_info(op->args[1]);
t2 = arg_info(op->args[2]);
- z1 = t1->z_mask;
- z2 = t2->z_mask;
-
- /*
- * Known-zeros does not imply known-ones. Therefore unless
- * arg2 is constant, we can't infer affected bits from it.
- */
- if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
- return true;
- }
- z_mask = z1 & z2;
+ z_mask = t1->z_mask & t2->z_mask;
+ o_mask = t1->o_mask & t2->o_mask;
/*
* Sign repetitions are perforce all identical, whether they are 1 or 0.
@@ -1424,24 +1450,44 @@ static bool fold_and(OptContext *ctx, TCGOp *op)
*/
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ /* Affected bits are those not known zero, masked by those known one. */
+ a_mask = t1->z_mask & ~t2->o_mask;
+
+ if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) {
+ if (ti_is_const(t2)) {
+ /*
+ * Canonicalize on extract, if valid. This aids x86 with its
+ * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
+ * which does not require matching operands. Other backends can
+ * trivially expand the extract to AND during code generation.
+ */
+ uint64_t val = ti_const_val(t2);
+ if (!(val & (val + 1))) {
+ unsigned len = ctz64(~val);
+ if (TCG_TARGET_extract_valid(ctx->type, 0, len)) {
+ op->opc = INDEX_op_extract;
+ op->args[2] = 0;
+ op->args[3] = len;
+ }
+ }
+ } else {
+ fold_xx_to_x(ctx, op);
+ }
+ }
+ return true;
}
static bool fold_andc(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask, a_mask;
TempOptInfo *t1, *t2;
- if (fold_const2(ctx, op) ||
- fold_xx_to_i(ctx, op, 0) ||
- fold_xi_to_x(ctx, op, 0) ||
- fold_ix_to_not(ctx, op, -1)) {
+ if (fold_const2(ctx, op)) {
return true;
}
t1 = arg_info(op->args[1]);
t2 = arg_info(op->args[2]);
- z_mask = t1->z_mask;
if (ti_is_const(t2)) {
/* Fold andc r,x,i to and r,x,~i. */
@@ -1461,21 +1507,19 @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
return fold_and(ctx, op);
}
-
- /*
- * Known-zeros does not imply known-ones. Therefore unless
- * arg2 is constant, we can't infer anything from it.
- */
- if (ti_is_const(t2)) {
- uint64_t v2 = ti_const_val(t2);
- if (fold_affected_mask(ctx, op, z_mask & v2)) {
- return true;
- }
- z_mask &= ~v2;
+ if (fold_xx_to_i(ctx, op, 0) ||
+ fold_ix_to_not(ctx, op, -1)) {
+ return true;
}
+ z_mask = t1->z_mask & ~t2->o_mask;
+ o_mask = t1->o_mask & ~t2->z_mask;
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+
+ /* Affected bits are those not known zero, masked by those known zero. */
+ a_mask = t1->z_mask & t2->z_mask;
+
+ return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
}
static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
@@ -1486,8 +1530,8 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
}
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
- uint64_t tv = arg_info(op->args[2])->val;
- uint64_t fv = arg_info(op->args[3])->val;
+ uint64_t tv = arg_const_val(op->args[2]);
+ uint64_t fv = arg_const_val(op->args[3]);
if (tv == -1 && fv == 0) {
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
@@ -1504,7 +1548,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
}
}
if (arg_is_const(op->args[2])) {
- uint64_t tv = arg_info(op->args[2])->val;
+ uint64_t tv = arg_const_val(op->args[2]);
if (tv == -1) {
op->opc = INDEX_op_or_vec;
op->args[2] = op->args[3];
@@ -1518,7 +1562,7 @@ static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
}
}
if (arg_is_const(op->args[3])) {
- uint64_t fv = arg_info(op->args[3])->val;
+ uint64_t fv = arg_const_val(op->args[3]);
if (fv == 0) {
op->opc = INDEX_op_and_vec;
return fold_and(ctx, op);
@@ -1649,53 +1693,52 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
static bool fold_bswap(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask, sign;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1 = arg_info(op->args[1]);
+ int flags = op->args[2];
if (ti_is_const(t1)) {
return tcg_opt_gen_movi(ctx, op, op->args[0],
do_constant_folding(op->opc, ctx->type,
- ti_const_val(t1),
- op->args[2]));
+ ti_const_val(t1), flags));
}
z_mask = t1->z_mask;
+ o_mask = t1->o_mask;
+ s_mask = 0;
+
switch (op->opc) {
case INDEX_op_bswap16:
z_mask = bswap16(z_mask);
- sign = INT16_MIN;
+ o_mask = bswap16(o_mask);
+ if (flags & TCG_BSWAP_OS) {
+ z_mask = (int16_t)z_mask;
+ o_mask = (int16_t)o_mask;
+ s_mask = INT16_MIN;
+ } else if (!(flags & TCG_BSWAP_OZ)) {
+ z_mask |= MAKE_64BIT_MASK(16, 48);
+ }
break;
case INDEX_op_bswap32:
z_mask = bswap32(z_mask);
- sign = INT32_MIN;
+ o_mask = bswap32(o_mask);
+ if (flags & TCG_BSWAP_OS) {
+ z_mask = (int32_t)z_mask;
+ o_mask = (int32_t)o_mask;
+ s_mask = INT32_MIN;
+ } else if (!(flags & TCG_BSWAP_OZ)) {
+ z_mask |= MAKE_64BIT_MASK(32, 32);
+ }
break;
case INDEX_op_bswap64:
z_mask = bswap64(z_mask);
- sign = INT64_MIN;
+ o_mask = bswap64(o_mask);
break;
default:
g_assert_not_reached();
}
- s_mask = 0;
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
- case TCG_BSWAP_OZ:
- break;
- case TCG_BSWAP_OS:
- /* If the sign bit may be 1, force all the bits above to 1. */
- if (z_mask & sign) {
- z_mask |= sign;
- }
- /* The value and therefore s_mask is explicitly sign-extended. */
- s_mask = sign;
- break;
- default:
- /* The high bits are undefined: force all bits above the sign to 1. */
- z_mask |= sign << 1;
- break;
- }
-
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_call(OptContext *ctx, TCGOp *op)
@@ -1826,7 +1869,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
int ofs = op->args[3];
int len = op->args[4];
int width = 8 * tcg_type_size(ctx->type);
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask;
if (ti_is_const(t1) && ti_is_const(t2)) {
return tcg_opt_gen_movi(ctx, op, op->args[0],
@@ -1861,7 +1904,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
}
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_divide(OptContext *ctx, TCGOp *op)
@@ -1876,7 +1921,7 @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
static bool fold_dup(OptContext *ctx, TCGOp *op)
{
if (arg_is_const(op->args[1])) {
- uint64_t t = arg_info(op->args[1])->val;
+ uint64_t t = arg_const_val(op->args[1]);
t = dup_const(TCGOP_VECE(op), t);
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
}
@@ -1886,8 +1931,8 @@ static bool fold_dup(OptContext *ctx, TCGOp *op)
static bool fold_dup2(OptContext *ctx, TCGOp *op)
{
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
- uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
- arg_info(op->args[2])->val);
+ uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
+ arg_const_val(op->args[2]));
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
}
@@ -1900,12 +1945,10 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
static bool fold_eqv(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1, *t2;
- if (fold_const2_commutative(ctx, op) ||
- fold_xi_to_x(ctx, op, -1) ||
- fold_xi_to_not(ctx, op, 0)) {
+ if (fold_const2_commutative(ctx, op)) {
return true;
}
@@ -1930,13 +1973,17 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
}
t1 = arg_info(op->args[1]);
+
+ z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
+ o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_s(ctx, op, s_mask);
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_extract(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask_old, z_mask;
+ uint64_t z_mask, o_mask, a_mask;
TempOptInfo *t1 = arg_info(op->args[1]);
int pos = op->args[2];
int len = op->args[3];
@@ -1946,37 +1993,41 @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
extract64(ti_const_val(t1), pos, len));
}
- z_mask_old = t1->z_mask;
- z_mask = extract64(z_mask_old, pos, len);
- if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
- return true;
- }
+ z_mask = extract64(t1->z_mask, pos, len);
+ o_mask = extract64(t1->o_mask, pos, len);
+ a_mask = pos ? -1 : t1->z_mask ^ z_mask;
- return fold_masks_z(ctx, op, z_mask);
+ return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask);
}
static bool fold_extract2(OptContext *ctx, TCGOp *op)
{
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
- uint64_t v1 = arg_info(op->args[1])->val;
- uint64_t v2 = arg_info(op->args[2])->val;
- int shr = op->args[3];
+ TempOptInfo *t1 = arg_info(op->args[1]);
+ TempOptInfo *t2 = arg_info(op->args[2]);
+ uint64_t z1 = t1->z_mask;
+ uint64_t z2 = t2->z_mask;
+ uint64_t o1 = t1->o_mask;
+ uint64_t o2 = t2->o_mask;
+ int shr = op->args[3];
- if (ctx->type == TCG_TYPE_I32) {
- v1 = (uint32_t)v1 >> shr;
- v2 = (uint64_t)((int32_t)v2 << (32 - shr));
- } else {
- v1 >>= shr;
- v2 <<= 64 - shr;
- }
- return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
+ if (ctx->type == TCG_TYPE_I32) {
+ z1 = (uint32_t)z1 >> shr;
+ o1 = (uint32_t)o1 >> shr;
+ z2 = (uint64_t)((int32_t)z2 << (32 - shr));
+ o2 = (uint64_t)((int32_t)o2 << (32 - shr));
+ } else {
+ z1 >>= shr;
+ o1 >>= shr;
+ z2 <<= 64 - shr;
+ o2 <<= 64 - shr;
}
- return finish_folding(ctx, op);
+
+ return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
}
static bool fold_exts(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask, z_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1;
if (fold_const1(ctx, op)) {
@@ -1985,40 +2036,48 @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
t1 = arg_info(op->args[1]);
z_mask = t1->z_mask;
+ o_mask = t1->o_mask;
s_mask = t1->s_mask;
switch (op->opc) {
case INDEX_op_ext_i32_i64:
s_mask |= INT32_MIN;
z_mask = (int32_t)z_mask;
+ o_mask = (int32_t)o_mask;
break;
default:
g_assert_not_reached();
}
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_extu(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask;
+ uint64_t z_mask, o_mask;
+ TempOptInfo *t1;
if (fold_const1(ctx, op)) {
return true;
}
- z_mask = arg_info(op->args[1])->z_mask;
+ t1 = arg_info(op->args[1]);
+ z_mask = t1->z_mask;
+ o_mask = t1->o_mask;
+
switch (op->opc) {
case INDEX_op_extrl_i64_i32:
case INDEX_op_extu_i32_i64:
z_mask = (uint32_t)z_mask;
+ o_mask = (uint32_t)o_mask;
break;
case INDEX_op_extrh_i64_i32:
z_mask >>= 32;
+ o_mask >>= 32;
break;
default:
g_assert_not_reached();
}
- return fold_masks_z(ctx, op, z_mask);
+ return fold_masks_zo(ctx, op, z_mask, o_mask);
}
static bool fold_mb(OptContext *ctx, TCGOp *op)
@@ -2052,7 +2111,7 @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
static bool fold_movcond(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *tt, *ft;
int i;
@@ -2078,6 +2137,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
tt = arg_info(op->args[3]);
ft = arg_info(op->args[4]);
z_mask = tt->z_mask | ft->z_mask;
+ o_mask = tt->o_mask & ft->o_mask;
s_mask = tt->s_mask & ft->s_mask;
if (ti_is_const(tt) && ti_is_const(ft)) {
@@ -2100,7 +2160,7 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
}
}
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_mul(OptContext *ctx, TCGOp *op)
@@ -2127,8 +2187,8 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
swap_commutative(op->args[0], &op->args[2], &op->args[3]);
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
- uint64_t a = arg_info(op->args[2])->val;
- uint64_t b = arg_info(op->args[3])->val;
+ uint64_t a = arg_const_val(op->args[2]);
+ uint64_t b = arg_const_val(op->args[3]);
uint64_t h, l;
TCGArg rl, rh;
TCGOp *op2;
@@ -2171,16 +2231,22 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
static bool fold_nand(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask;
+ uint64_t z_mask, o_mask, s_mask;
+ TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_not(ctx, op, -1)) {
return true;
}
- s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return fold_masks_s(ctx, op, s_mask);
+ t1 = arg_info(op->args[1]);
+ t2 = arg_info(op->args[2]);
+
+ z_mask = ~(t1->o_mask & t2->o_mask);
+ o_mask = ~(t1->z_mask & t2->z_mask);
+ s_mask = t1->s_mask & t2->s_mask;
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
@@ -2199,29 +2265,39 @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
static bool fold_nor(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask;
+ uint64_t z_mask, o_mask, s_mask;
+ TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
fold_xi_to_not(ctx, op, 0)) {
return true;
}
- s_mask = arg_info(op->args[1])->s_mask
- & arg_info(op->args[2])->s_mask;
- return fold_masks_s(ctx, op, s_mask);
+ t1 = arg_info(op->args[1]);
+ t2 = arg_info(op->args[2]);
+
+ z_mask = ~(t1->o_mask | t2->o_mask);
+ o_mask = ~(t1->z_mask | t2->z_mask);
+ s_mask = t1->s_mask & t2->s_mask;
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
static bool fold_not(OptContext *ctx, TCGOp *op)
{
+ TempOptInfo *t1;
+
if (fold_const1(ctx, op)) {
return true;
}
- return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
+
+ t1 = arg_info(op->args[1]);
+ return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
}
static bool fold_or(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask, a_mask;
TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
@@ -2232,20 +2308,23 @@ static bool fold_or(OptContext *ctx, TCGOp *op)
t1 = arg_info(op->args[1]);
t2 = arg_info(op->args[2]);
+
z_mask = t1->z_mask | t2->z_mask;
+ o_mask = t1->o_mask | t2->o_mask;
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+
+ /* Affected bits are those not known one, masked by those known zero. */
+ a_mask = ~t1->o_mask & t2->z_mask;
+
+ return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
}
static bool fold_orc(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask;
+ uint64_t z_mask, o_mask, s_mask, a_mask;
TempOptInfo *t1, *t2;
- if (fold_const2(ctx, op) ||
- fold_xx_to_i(ctx, op, -1) ||
- fold_xi_to_x(ctx, op, -1) ||
- fold_ix_to_not(ctx, op, 0)) {
+ if (fold_const2(ctx, op)) {
return true;
}
@@ -2268,10 +2347,20 @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
return fold_or(ctx, op);
}
-
+ if (fold_xx_to_i(ctx, op, -1) ||
+ fold_ix_to_not(ctx, op, 0)) {
+ return true;
+ }
t1 = arg_info(op->args[1]);
+
+ z_mask = t1->z_mask | ~t2->o_mask;
+ o_mask = t1->o_mask | ~t2->z_mask;
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_s(ctx, op, s_mask);
+
+ /* Affected bits are those not known one, masked by those known one. */
+ a_mask = ~t1->o_mask & t2->o_mask;
+
+ return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
}
static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
@@ -2330,7 +2419,7 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
}
a_zmask = arg_info(op->args[1])->z_mask;
- b_val = arg_info(op->args[2])->val;
+ b_val = arg_const_val(op->args[2]);
cond = op->args[3];
if (ctx->type == TCG_TYPE_I32) {
@@ -2418,7 +2507,7 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
}
src2 = op->args[2];
- val = arg_info(src2)->val;
+ val = arg_const_val(src2);
if (!is_power_of_2(val)) {
return;
}
@@ -2594,7 +2683,7 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
static bool fold_sextract(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask, s_mask_old;
+ uint64_t z_mask, o_mask, s_mask, a_mask;
TempOptInfo *t1 = arg_info(op->args[1]);
int pos = op->args[2];
int len = op->args[3];
@@ -2604,21 +2693,19 @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
sextract64(ti_const_val(t1), pos, len));
}
- s_mask_old = t1->s_mask;
- s_mask = s_mask_old >> pos;
+ s_mask = t1->s_mask >> pos;
s_mask |= -1ull << (len - 1);
-
- if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
- return true;
- }
+ a_mask = pos ? -1 : s_mask & ~t1->s_mask;
z_mask = sextract64(t1->z_mask, pos, len);
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ o_mask = sextract64(t1->o_mask, pos, len);
+
+ return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
}
static bool fold_shift(OptContext *ctx, TCGOp *op)
{
- uint64_t s_mask, z_mask;
+ uint64_t s_mask, z_mask, o_mask;
TempOptInfo *t1, *t2;
if (fold_const2(ctx, op) ||
@@ -2631,14 +2718,16 @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
t2 = arg_info(op->args[2]);
s_mask = t1->s_mask;
z_mask = t1->z_mask;
+ o_mask = t1->o_mask;
if (ti_is_const(t2)) {
int sh = ti_const_val(t2);
z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
+ o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
switch (op->opc) {
@@ -2669,7 +2758,7 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
TCGOpcode neg_op;
bool have_neg;
- if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
+ if (!arg_is_const_val(op->args[1], 0)) {
return false;
}
@@ -2719,7 +2808,7 @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
/* Fold sub r,x,i to add r,x,-i */
if (arg_is_const(op->args[2])) {
- uint64_t val = arg_info(op->args[2])->val;
+ uint64_t val = arg_const_val(op->args[2]);
op->opc = INDEX_op_add;
op->args[2] = arg_new_constant(ctx, -val);
@@ -2984,7 +3073,7 @@ static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
static bool fold_xor(OptContext *ctx, TCGOp *op)
{
- uint64_t z_mask, s_mask;
+ uint64_t z_mask, o_mask, s_mask;
TempOptInfo *t1, *t2;
if (fold_const2_commutative(ctx, op) ||
@@ -2996,9 +3085,12 @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
t1 = arg_info(op->args[1]);
t2 = arg_info(op->args[2]);
- z_mask = t1->z_mask | t2->z_mask;
+
+ z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
+ o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
s_mask = t1->s_mask & t2->s_mask;
- return fold_masks_zs(ctx, op, z_mask, s_mask);
+
+ return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
}
/* Propagate constants and copies, fold constant expressions. */
diff --git a/tcg/perf.c b/tcg/perf.c
index 4e8d2c1..8fa5fa9 100644
--- a/tcg/perf.c
+++ b/tcg/perf.c
@@ -334,7 +334,7 @@ void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
/* FIXME: This replicates the restore_state_to_opc() logic. */
q[insn].address = gen_insn_data[insn * INSN_START_WORDS + 0];
if (tb_cflags(tb) & CF_PCREL) {
- q[insn].address |= (guest_pc & qemu_target_page_mask());
+ q[insn].address |= guest_pc & TARGET_PAGE_MASK;
}
q[insn].flags = DEBUGINFO_SYMBOL | (jitdump ? DEBUGINFO_LINE : 0);
}
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 2e94778..b8b23d4 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2440,10 +2440,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* Extract the page index, shifted into place for tlb index. */
if (TCG_TARGET_REG_BITS == 32) {
tcg_out_shri32(s, TCG_REG_R0, addr,
- s->page_bits - CPU_TLB_ENTRY_BITS);
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
} else {
tcg_out_shri64(s, TCG_REG_R0, addr,
- s->page_bits - CPU_TLB_ENTRY_BITS);
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
}
tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
@@ -2480,7 +2480,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
a_bits = s_bits;
}
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0,
- (32 - a_bits) & 31, 31 - s->page_bits);
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
} else {
TCGReg t = addr;
@@ -2501,13 +2501,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* Mask the address for the requested alignment. */
if (addr_type == TCG_TYPE_I32) {
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
- (32 - a_bits) & 31, 31 - s->page_bits);
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
} else if (a_bits == 0) {
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
} else {
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
- 64 - s->page_bits, s->page_bits - a_bits);
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
+ 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
}
}
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index f9417d1..31b9f7d 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -1706,7 +1706,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
- s->page_bits - CPU_TLB_ENTRY_BITS);
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
@@ -1722,7 +1722,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
addr_adj, addr_reg, s_mask - a_mask);
}
- compare_mask = s->page_mask | a_mask;
+ compare_mask = TARGET_PAGE_MASK | a_mask;
if (compare_mask == sextreg(compare_mask, 0, 12)) {
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
} else {
@@ -2502,7 +2502,7 @@ static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
}
}
if (ofs + len == 32) {
- tgen_shli(s, TCG_TYPE_I32, a0, a1, ofs);
+ tgen_shri(s, TCG_TYPE_I32, a0, a1, ofs);
return;
}
if (len == 1) {
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 7ca0071..84a9e73 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -2004,7 +2004,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
ldst->addr_reg = addr_reg;
tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
- s->page_bits - CPU_TLB_ENTRY_BITS);
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
@@ -2016,7 +2016,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
* byte of the access.
*/
a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
- tlb_mask = (uint64_t)s->page_mask | a_mask;
+ tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
if (a_off == 0) {
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
} else {
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index 9e004fb..5e5c3f1 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -1120,7 +1120,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
/* Extract the page index, shifted into place for tlb index. */
tcg_out_arithi(s, TCG_REG_T1, addr_reg,
- s->page_bits - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
@@ -1136,7 +1136,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
h->base = TCG_REG_T1;
/* Mask out the page offset, except for the required alignment. */
- compare_mask = s->page_mask | a_mask;
+ compare_mask = TARGET_PAGE_MASK | a_mask;
if (check_fit_tl(compare_mask, 13)) {
tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
} else {
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index d32a4f1..2d18454 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -57,30 +57,39 @@ static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
tcg_debug_assert((ofs & max_align) == 0);
}
-/* Verify vector overlap rules for two operands. */
-static void check_overlap_2(uint32_t d, uint32_t a, uint32_t s)
+/*
+ * Verify vector overlap rules for two operands.
+ * When dbase and abase are not the same pointer, we cannot check for
+ * overlap at compile-time, but the runtime restrictions remain.
+ */
+static void check_overlap_2(TCGv_ptr dbase, uint32_t d,
+ TCGv_ptr abase, uint32_t a, uint32_t s)
{
- tcg_debug_assert(d == a || d + s <= a || a + s <= d);
+ tcg_debug_assert(dbase != abase || d == a || d + s <= a || a + s <= d);
}
/* Verify vector overlap rules for three operands. */
-static void check_overlap_3(uint32_t d, uint32_t a, uint32_t b, uint32_t s)
+static void check_overlap_3(TCGv_ptr dbase, uint32_t d,
+ TCGv_ptr abase, uint32_t a,
+ TCGv_ptr bbase, uint32_t b, uint32_t s)
{
- check_overlap_2(d, a, s);
- check_overlap_2(d, b, s);
- check_overlap_2(a, b, s);
+ check_overlap_2(dbase, d, abase, a, s);
+ check_overlap_2(dbase, d, bbase, b, s);
+ check_overlap_2(abase, a, bbase, b, s);
}
/* Verify vector overlap rules for four operands. */
-static void check_overlap_4(uint32_t d, uint32_t a, uint32_t b,
- uint32_t c, uint32_t s)
+static void check_overlap_4(TCGv_ptr dbase, uint32_t d,
+ TCGv_ptr abase, uint32_t a,
+ TCGv_ptr bbase, uint32_t b,
+ TCGv_ptr cbase, uint32_t c, uint32_t s)
{
- check_overlap_2(d, a, s);
- check_overlap_2(d, b, s);
- check_overlap_2(d, c, s);
- check_overlap_2(a, b, s);
- check_overlap_2(a, c, s);
- check_overlap_2(b, c, s);
+ check_overlap_2(dbase, d, abase, a, s);
+ check_overlap_2(dbase, d, bbase, b, s);
+ check_overlap_2(dbase, d, cbase, c, s);
+ check_overlap_2(abase, a, bbase, b, s);
+ check_overlap_2(abase, a, cbase, c, s);
+ check_overlap_2(bbase, b, cbase, c, s);
}
/* Create a descriptor from components. */
@@ -124,9 +133,10 @@ uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
}
/* Generate a call to a gvec-style helper with two vector operands. */
-void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
- uint32_t oprsz, uint32_t maxsz, int32_t data,
- gen_helper_gvec_2 *fn)
+static void expand_2_ool(TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_2 *fn)
{
TCGv_ptr a0, a1;
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
@@ -134,8 +144,8 @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
a0 = tcg_temp_ebb_new_ptr();
a1 = tcg_temp_ebb_new_ptr();
- tcg_gen_addi_ptr(a0, tcg_env, dofs);
- tcg_gen_addi_ptr(a1, tcg_env, aofs);
+ tcg_gen_addi_ptr(a0, dbase, dofs);
+ tcg_gen_addi_ptr(a1, abase, aofs);
fn(a0, a1, desc);
@@ -143,6 +153,13 @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
tcg_temp_free_ptr(a1);
}
+void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_2 *fn)
+{
+ expand_2_ool(tcg_env, dofs, tcg_env, aofs, oprsz, maxsz, data, fn);
+}
+
/* Generate a call to a gvec-style helper with two vector operands
and one scalar operand. */
void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
@@ -165,9 +182,11 @@ void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
}
/* Generate a call to a gvec-style helper with three vector operands. */
-void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
- uint32_t oprsz, uint32_t maxsz, int32_t data,
- gen_helper_gvec_3 *fn)
+static void expand_3_ool(TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ TCGv_ptr bbase, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_3 *fn)
{
TCGv_ptr a0, a1, a2;
TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
@@ -176,9 +195,9 @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
a1 = tcg_temp_ebb_new_ptr();
a2 = tcg_temp_ebb_new_ptr();
- tcg_gen_addi_ptr(a0, tcg_env, dofs);
- tcg_gen_addi_ptr(a1, tcg_env, aofs);
- tcg_gen_addi_ptr(a2, tcg_env, bofs);
+ tcg_gen_addi_ptr(a0, dbase, dofs);
+ tcg_gen_addi_ptr(a1, abase, aofs);
+ tcg_gen_addi_ptr(a2, bbase, bofs);
fn(a0, a1, a2, desc);
@@ -187,6 +206,14 @@ void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
tcg_temp_free_ptr(a2);
}
+void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_3 *fn)
+{
+ expand_3_ool(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs,
+ oprsz, maxsz, data, fn);
+}
+
/* Generate a call to a gvec-style helper with four vector operands. */
void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
@@ -380,7 +407,7 @@ static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
return q <= MAX_UNROLL;
}
-static void expand_clr(uint32_t dofs, uint32_t maxsz);
+static void expand_clr(TCGv_ptr dbase, uint32_t dofs, uint32_t maxsz);
/* Duplicate C as per VECE. */
uint64_t (dup_const)(unsigned vece, uint64_t c)
@@ -483,8 +510,8 @@ static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece,
return 0;
}
-static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
- uint32_t maxsz, TCGv_vec t_vec)
+static void do_dup_store(TCGType type, TCGv_ptr dbase, uint32_t dofs,
+ uint32_t oprsz, uint32_t maxsz, TCGv_vec t_vec)
{
uint32_t i = 0;
@@ -496,7 +523,7 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
* are misaligned wrt the maximum vector size, so do that first.
*/
if (dofs & 8) {
- tcg_gen_stl_vec(t_vec, tcg_env, dofs + i, TCG_TYPE_V64);
+ tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V64);
i += 8;
}
@@ -508,17 +535,17 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
* that e.g. size == 80 would be expanded with 2x32 + 1x16.
*/
for (; i + 32 <= oprsz; i += 32) {
- tcg_gen_stl_vec(t_vec, tcg_env, dofs + i, TCG_TYPE_V256);
+ tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V256);
}
/* fallthru */
case TCG_TYPE_V128:
for (; i + 16 <= oprsz; i += 16) {
- tcg_gen_stl_vec(t_vec, tcg_env, dofs + i, TCG_TYPE_V128);
+ tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V128);
}
break;
case TCG_TYPE_V64:
for (; i < oprsz; i += 8) {
- tcg_gen_stl_vec(t_vec, tcg_env, dofs + i, TCG_TYPE_V64);
+ tcg_gen_stl_vec(t_vec, dbase, dofs + i, TCG_TYPE_V64);
}
break;
default:
@@ -526,17 +553,18 @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
}
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(dbase, dofs + oprsz, maxsz - oprsz);
}
}
-/* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
+/*
+ * Set OPRSZ bytes at DBASE + DOFS to replications of IN_32, IN_64 or IN_C.
* Only one of IN_32 or IN_64 may be set;
* IN_C is used if IN_32 and IN_64 are unset.
*/
-static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
- uint32_t maxsz, TCGv_i32 in_32, TCGv_i64 in_64,
- uint64_t in_c)
+static void do_dup(unsigned vece, TCGv_ptr dbase, uint32_t dofs,
+ uint32_t oprsz, uint32_t maxsz,
+ TCGv_i32 in_32, TCGv_i64 in_64, uint64_t in_c)
{
TCGType type;
TCGv_i64 t_64;
@@ -574,7 +602,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
} else {
tcg_gen_dupi_vec(vece, t_vec, in_c);
}
- do_dup_store(type, dofs, oprsz, maxsz, t_vec);
+ do_dup_store(type, dbase, dofs, oprsz, maxsz, t_vec);
return;
}
@@ -618,14 +646,14 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
/* Implement inline if we picked an implementation size above. */
if (t_32) {
for (i = 0; i < oprsz; i += 4) {
- tcg_gen_st_i32(t_32, tcg_env, dofs + i);
+ tcg_gen_st_i32(t_32, dbase, dofs + i);
}
tcg_temp_free_i32(t_32);
goto done;
}
if (t_64) {
for (i = 0; i < oprsz; i += 8) {
- tcg_gen_st_i64(t_64, tcg_env, dofs + i);
+ tcg_gen_st_i64(t_64, dbase, dofs + i);
}
tcg_temp_free_i64(t_64);
goto done;
@@ -634,7 +662,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
/* Otherwise implement out of line. */
t_ptr = tcg_temp_ebb_new_ptr();
- tcg_gen_addi_ptr(t_ptr, tcg_env, dofs);
+ tcg_gen_addi_ptr(t_ptr, dbase, dofs);
/*
* This may be expand_clr for the tail of an operation, e.g.
@@ -703,31 +731,32 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
done:
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(dbase, dofs + oprsz, maxsz - oprsz);
}
}
/* Likewise, but with zero. */
-static void expand_clr(uint32_t dofs, uint32_t maxsz)
+static void expand_clr(TCGv_ptr dbase, uint32_t dofs, uint32_t maxsz)
{
- do_dup(MO_8, dofs, maxsz, maxsz, NULL, NULL, 0);
+ do_dup(MO_8, dbase, dofs, maxsz, maxsz, NULL, NULL, 0);
}
/* Expand OPSZ bytes worth of two-operand operations using i32 elements. */
-static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
- bool load_dest, void (*fni)(TCGv_i32, TCGv_i32))
+static void expand_2_i32(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase,
+ uint32_t aofs, uint32_t oprsz, bool load_dest,
+ void (*fni)(TCGv_i32, TCGv_i32))
{
TCGv_i32 t0 = tcg_temp_new_i32();
TCGv_i32 t1 = tcg_temp_new_i32();
uint32_t i;
for (i = 0; i < oprsz; i += 4) {
- tcg_gen_ld_i32(t0, tcg_env, aofs + i);
+ tcg_gen_ld_i32(t0, abase, aofs + i);
if (load_dest) {
- tcg_gen_ld_i32(t1, tcg_env, dofs + i);
+ tcg_gen_ld_i32(t1, dbase, dofs + i);
}
fni(t1, t0);
- tcg_gen_st_i32(t1, tcg_env, dofs + i);
+ tcg_gen_st_i32(t1, dbase, dofs + i);
}
tcg_temp_free_i32(t0);
tcg_temp_free_i32(t1);
@@ -775,8 +804,10 @@ static void expand_2s_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
}
/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
-static void expand_3_i32(uint32_t dofs, uint32_t aofs,
- uint32_t bofs, uint32_t oprsz, bool load_dest,
+static void expand_3_i32(TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ TCGv_ptr bbase, uint32_t bofs,
+ uint32_t oprsz, bool load_dest,
void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))
{
TCGv_i32 t0 = tcg_temp_new_i32();
@@ -785,13 +816,13 @@ static void expand_3_i32(uint32_t dofs, uint32_t aofs,
uint32_t i;
for (i = 0; i < oprsz; i += 4) {
- tcg_gen_ld_i32(t0, tcg_env, aofs + i);
- tcg_gen_ld_i32(t1, tcg_env, bofs + i);
+ tcg_gen_ld_i32(t0, abase, aofs + i);
+ tcg_gen_ld_i32(t1, bbase, bofs + i);
if (load_dest) {
- tcg_gen_ld_i32(t2, tcg_env, dofs + i);
+ tcg_gen_ld_i32(t2, dbase, dofs + i);
}
fni(t2, t0, t1);
- tcg_gen_st_i32(t2, tcg_env, dofs + i);
+ tcg_gen_st_i32(t2, dbase, dofs + i);
}
tcg_temp_free_i32(t2);
tcg_temp_free_i32(t1);
@@ -877,20 +908,21 @@ static void expand_4i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
}
/* Expand OPSZ bytes worth of two-operand operations using i64 elements. */
-static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
- bool load_dest, void (*fni)(TCGv_i64, TCGv_i64))
+static void expand_2_i64(TCGv_ptr dbase, uint32_t dofs, TCGv_ptr abase,
+ uint32_t aofs, uint32_t oprsz, bool load_dest,
+ void (*fni)(TCGv_i64, TCGv_i64))
{
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
uint32_t i;
for (i = 0; i < oprsz; i += 8) {
- tcg_gen_ld_i64(t0, tcg_env, aofs + i);
+ tcg_gen_ld_i64(t0, abase, aofs + i);
if (load_dest) {
- tcg_gen_ld_i64(t1, tcg_env, dofs + i);
+ tcg_gen_ld_i64(t1, dbase, dofs + i);
}
fni(t1, t0);
- tcg_gen_st_i64(t1, tcg_env, dofs + i);
+ tcg_gen_st_i64(t1, dbase, dofs + i);
}
tcg_temp_free_i64(t0);
tcg_temp_free_i64(t1);
@@ -938,8 +970,10 @@ static void expand_2s_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
}
/* Expand OPSZ bytes worth of three-operand operations using i64 elements. */
-static void expand_3_i64(uint32_t dofs, uint32_t aofs,
- uint32_t bofs, uint32_t oprsz, bool load_dest,
+static void expand_3_i64(TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ TCGv_ptr bbase, uint32_t bofs,
+ uint32_t oprsz, bool load_dest,
void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))
{
TCGv_i64 t0 = tcg_temp_new_i64();
@@ -948,13 +982,13 @@ static void expand_3_i64(uint32_t dofs, uint32_t aofs,
uint32_t i;
for (i = 0; i < oprsz; i += 8) {
- tcg_gen_ld_i64(t0, tcg_env, aofs + i);
- tcg_gen_ld_i64(t1, tcg_env, bofs + i);
+ tcg_gen_ld_i64(t0, abase, aofs + i);
+ tcg_gen_ld_i64(t1, bbase, bofs + i);
if (load_dest) {
- tcg_gen_ld_i64(t2, tcg_env, dofs + i);
+ tcg_gen_ld_i64(t2, dbase, dofs + i);
}
fni(t2, t0, t1);
- tcg_gen_st_i64(t2, tcg_env, dofs + i);
+ tcg_gen_st_i64(t2, dbase, dofs + i);
}
tcg_temp_free_i64(t2);
tcg_temp_free_i64(t1);
@@ -1040,7 +1074,8 @@ static void expand_4i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
}
/* Expand OPSZ bytes worth of two-operand operations using host vectors. */
-static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+static void expand_2_vec(unsigned vece, TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
uint32_t oprsz, uint32_t tysz, TCGType type,
bool load_dest,
void (*fni)(unsigned, TCGv_vec, TCGv_vec))
@@ -1049,12 +1084,12 @@ static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_vec t0 = tcg_temp_new_vec(type);
TCGv_vec t1 = tcg_temp_new_vec(type);
- tcg_gen_ld_vec(t0, tcg_env, aofs + i);
+ tcg_gen_ld_vec(t0, abase, aofs + i);
if (load_dest) {
- tcg_gen_ld_vec(t1, tcg_env, dofs + i);
+ tcg_gen_ld_vec(t1, dbase, dofs + i);
}
fni(vece, t1, t0);
- tcg_gen_st_vec(t1, tcg_env, dofs + i);
+ tcg_gen_st_vec(t1, dbase, dofs + i);
}
}
@@ -1098,8 +1133,9 @@ static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
}
/* Expand OPSZ bytes worth of three-operand operations using host vectors. */
-static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
- uint32_t bofs, uint32_t oprsz,
+static void expand_3_vec(unsigned vece, TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ TCGv_ptr bbase, uint32_t bofs, uint32_t oprsz,
uint32_t tysz, TCGType type, bool load_dest,
void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
{
@@ -1108,13 +1144,13 @@ static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_vec t1 = tcg_temp_new_vec(type);
TCGv_vec t2 = tcg_temp_new_vec(type);
- tcg_gen_ld_vec(t0, tcg_env, aofs + i);
- tcg_gen_ld_vec(t1, tcg_env, bofs + i);
+ tcg_gen_ld_vec(t0, abase, aofs + i);
+ tcg_gen_ld_vec(t1, bbase, bofs + i);
if (load_dest) {
- tcg_gen_ld_vec(t2, tcg_env, dofs + i);
+ tcg_gen_ld_vec(t2, dbase, dofs + i);
}
fni(vece, t2, t0, t1);
- tcg_gen_st_vec(t2, tcg_env, dofs + i);
+ tcg_gen_st_vec(t2, dbase, dofs + i);
}
}
@@ -1196,8 +1232,9 @@ static void expand_4i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
}
/* Expand a vector two-operand operation. */
-void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
- uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
+void tcg_gen_gvec_2_var(TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
{
const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
@@ -1205,7 +1242,7 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
uint32_t some;
check_size_align(oprsz, maxsz, dofs | aofs);
- check_overlap_2(dofs, aofs, maxsz);
+ check_overlap_2(dbase, dofs, abase, aofs, maxsz);
type = 0;
if (g->fniv) {
@@ -1218,8 +1255,8 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
* that e.g. size == 80 would be expanded with 2x32 + 1x16.
*/
some = QEMU_ALIGN_DOWN(oprsz, 32);
- expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
- g->load_dest, g->fniv);
+ expand_2_vec(g->vece, dbase, dofs, abase, aofs, some, 32,
+ TCG_TYPE_V256, g->load_dest, g->fniv);
if (some == oprsz) {
break;
}
@@ -1229,22 +1266,25 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
maxsz -= some;
/* fallthru */
case TCG_TYPE_V128:
- expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
- g->load_dest, g->fniv);
+ expand_2_vec(g->vece, dbase, dofs, abase, aofs, oprsz, 16,
+ TCG_TYPE_V128, g->load_dest, g->fniv);
break;
case TCG_TYPE_V64:
- expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
- g->load_dest, g->fniv);
+ expand_2_vec(g->vece, dbase, dofs, abase, aofs, oprsz, 8,
+ TCG_TYPE_V64, g->load_dest, g->fniv);
break;
case 0:
if (g->fni8 && check_size_impl(oprsz, 8)) {
- expand_2_i64(dofs, aofs, oprsz, g->load_dest, g->fni8);
+ expand_2_i64(dbase, dofs, abase, aofs,
+ oprsz, g->load_dest, g->fni8);
} else if (g->fni4 && check_size_impl(oprsz, 4)) {
- expand_2_i32(dofs, aofs, oprsz, g->load_dest, g->fni4);
+ expand_2_i32(dbase, dofs, abase, aofs,
+ oprsz, g->load_dest, g->fni4);
} else {
assert(g->fno != NULL);
- tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
+ expand_2_ool(dbase, dofs, abase, aofs,
+ oprsz, maxsz, g->data, g->fno);
oprsz = maxsz;
}
break;
@@ -1255,10 +1295,16 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
tcg_swap_vecop_list(hold_list);
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(dbase, dofs + oprsz, maxsz - oprsz);
}
}
+void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
+{
+ tcg_gen_gvec_2_var(tcg_env, dofs, tcg_env, aofs, oprsz, maxsz, g);
+}
+
/* Expand a vector operation with two vectors and an immediate. */
void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
uint32_t maxsz, int64_t c, const GVecGen2i *g)
@@ -1269,7 +1315,7 @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
uint32_t some;
check_size_align(oprsz, maxsz, dofs | aofs);
- check_overlap_2(dofs, aofs, maxsz);
+ check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz);
type = 0;
if (g->fniv) {
@@ -1324,7 +1370,7 @@ void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
tcg_swap_vecop_list(hold_list);
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
}
@@ -1335,7 +1381,7 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
TCGType type;
check_size_align(oprsz, maxsz, dofs | aofs);
- check_overlap_2(dofs, aofs, maxsz);
+ check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz);
type = 0;
if (g->fniv) {
@@ -1401,13 +1447,15 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
}
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
}
/* Expand a vector three-operand operation. */
-void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
- uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)
+void tcg_gen_gvec_3_var(TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ TCGv_ptr bbase, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)
{
const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
@@ -1415,7 +1463,7 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
uint32_t some;
check_size_align(oprsz, maxsz, dofs | aofs | bofs);
- check_overlap_3(dofs, aofs, bofs, maxsz);
+ check_overlap_3(dbase, dofs, abase, aofs, bbase, bofs, maxsz);
type = 0;
if (g->fniv) {
@@ -1428,8 +1476,8 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
* that e.g. size == 80 would be expanded with 2x32 + 1x16.
*/
some = QEMU_ALIGN_DOWN(oprsz, 32);
- expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
- g->load_dest, g->fniv);
+ expand_3_vec(g->vece, dbase, dofs, abase, aofs, bbase, bofs,
+ some, 32, TCG_TYPE_V256, g->load_dest, g->fniv);
if (some == oprsz) {
break;
}
@@ -1440,23 +1488,25 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
maxsz -= some;
/* fallthru */
case TCG_TYPE_V128:
- expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
- g->load_dest, g->fniv);
+ expand_3_vec(g->vece, dbase, dofs, abase, aofs, bbase, bofs,
+ oprsz, 16, TCG_TYPE_V128, g->load_dest, g->fniv);
break;
case TCG_TYPE_V64:
- expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
- g->load_dest, g->fniv);
+ expand_3_vec(g->vece, dbase, dofs, abase, aofs, bbase, bofs,
+ oprsz, 8, TCG_TYPE_V64, g->load_dest, g->fniv);
break;
case 0:
if (g->fni8 && check_size_impl(oprsz, 8)) {
- expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8);
+ expand_3_i64(dbase, dofs, abase, aofs, bbase, bofs,
+ oprsz, g->load_dest, g->fni8);
} else if (g->fni4 && check_size_impl(oprsz, 4)) {
- expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4);
+ expand_3_i32(dbase, dofs, abase, aofs, bbase, bofs,
+ oprsz, g->load_dest, g->fni4);
} else {
assert(g->fno != NULL);
- tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz,
- maxsz, g->data, g->fno);
+ expand_3_ool(dbase, dofs, abase, aofs, bbase, bofs,
+ oprsz, maxsz, g->data, g->fno);
oprsz = maxsz;
}
break;
@@ -1467,10 +1517,17 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
tcg_swap_vecop_list(hold_list);
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(dbase, dofs + oprsz, maxsz - oprsz);
}
}
+void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)
+{
+ tcg_gen_gvec_3_var(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs,
+ oprsz, maxsz, g);
+}
+
/* Expand a vector operation with three vectors and an immediate. */
void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
uint32_t oprsz, uint32_t maxsz, int64_t c,
@@ -1482,7 +1539,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
uint32_t some;
check_size_align(oprsz, maxsz, dofs | aofs | bofs);
- check_overlap_3(dofs, aofs, bofs, maxsz);
+ check_overlap_3(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, maxsz);
type = 0;
if (g->fniv) {
@@ -1536,7 +1593,7 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
tcg_swap_vecop_list(hold_list);
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
}
@@ -1550,7 +1607,8 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
uint32_t some;
check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs);
- check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
+ check_overlap_4(tcg_env, dofs, tcg_env, aofs,
+ tcg_env, bofs, tcg_env, cofs, maxsz);
type = 0;
if (g->fniv) {
@@ -1605,7 +1663,7 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
tcg_swap_vecop_list(hold_list);
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
}
@@ -1620,7 +1678,8 @@ void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
uint32_t some;
check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs);
- check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
+ check_overlap_4(tcg_env, dofs, tcg_env, aofs,
+ tcg_env, bofs, tcg_env, cofs, maxsz);
type = 0;
if (g->fniv) {
@@ -1674,7 +1733,7 @@ void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
tcg_swap_vecop_list(hold_list);
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
}
@@ -1687,8 +1746,9 @@ static void vec_mov2(unsigned vece, TCGv_vec a, TCGv_vec b)
tcg_gen_mov_vec(a, b);
}
-void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
- uint32_t oprsz, uint32_t maxsz)
+void tcg_gen_gvec_mov_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen2 g = {
.fni8 = tcg_gen_mov_i64,
@@ -1696,14 +1756,22 @@ void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
.fno = gen_helper_gvec_mov,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
- if (dofs != aofs) {
- tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g);
- } else {
+
+ if (dofs == aofs && dbase == abase) {
check_size_align(oprsz, maxsz, dofs);
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(dbase, dofs + oprsz, maxsz - oprsz);
}
+ return;
}
+
+ tcg_gen_gvec_2_var(dbase, dofs, abase, aofs, oprsz, maxsz, &g);
+}
+
+void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_mov_var(vece, tcg_env, dofs, tcg_env, aofs, oprsz, maxsz);
}
void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz,
@@ -1711,7 +1779,7 @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz,
{
check_size_align(oprsz, maxsz, dofs);
tcg_debug_assert(vece <= MO_32);
- do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0);
+ do_dup(vece, tcg_env, dofs, oprsz, maxsz, in, NULL, 0);
}
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz,
@@ -1719,7 +1787,7 @@ void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz,
{
check_size_align(oprsz, maxsz, dofs);
tcg_debug_assert(vece <= MO_64);
- do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0);
+ do_dup(vece, tcg_env, dofs, oprsz, maxsz, NULL, in, 0);
}
void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
@@ -1731,7 +1799,7 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
if (type != 0) {
TCGv_vec t_vec = tcg_temp_new_vec(type);
tcg_gen_dup_mem_vec(vece, t_vec, tcg_env, aofs);
- do_dup_store(type, dofs, oprsz, maxsz, t_vec);
+ do_dup_store(type, tcg_env, dofs, oprsz, maxsz, t_vec);
} else if (vece <= MO_32) {
TCGv_i32 in = tcg_temp_ebb_new_i32();
switch (vece) {
@@ -1745,12 +1813,12 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
tcg_gen_ld_i32(in, tcg_env, aofs);
break;
}
- do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0);
+ do_dup(vece, tcg_env, dofs, oprsz, maxsz, in, NULL, 0);
tcg_temp_free_i32(in);
} else {
TCGv_i64 in = tcg_temp_ebb_new_i64();
tcg_gen_ld_i64(in, tcg_env, aofs);
- do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0);
+ do_dup(vece, tcg_env, dofs, oprsz, maxsz, NULL, in, 0);
tcg_temp_free_i64(in);
}
} else if (vece == 4) {
@@ -1779,7 +1847,7 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
tcg_temp_free_i64(in1);
}
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
} else if (vece == 5) {
/* 256-bit duplicate. */
@@ -1822,18 +1890,24 @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
}
}
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
} else {
g_assert_not_reached();
}
}
+void tcg_gen_gvec_dup_imm_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs,
+ uint32_t oprsz, uint32_t maxsz, uint64_t x)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ do_dup(vece, dbase, dofs, oprsz, maxsz, NULL, NULL, x);
+}
+
void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz,
uint32_t maxsz, uint64_t x)
{
- check_size_align(oprsz, maxsz, dofs);
- do_dup(vece, dofs, oprsz, maxsz, NULL, NULL, x);
+ tcg_gen_gvec_dup_imm_var(vece, tcg_env, dofs, oprsz, maxsz, x);
}
void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
@@ -1931,8 +2005,10 @@ void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
static const TCGOpcode vecop_list_add[] = { INDEX_op_add_vec, 0 };
-void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
- uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+void tcg_gen_gvec_add_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ TCGv_ptr bbase, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen3 g[4] = {
{ .fni8 = tcg_gen_vec_add8_i64,
@@ -1959,7 +2035,15 @@ void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
};
tcg_debug_assert(vece <= MO_64);
- tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+ tcg_gen_gvec_3_var(dbase, dofs, abase, aofs, bbase, bofs,
+ oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_add_var(vece, tcg_env, dofs, tcg_env, aofs, tcg_env, bofs,
+ oprsz, maxsz);
}
void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
@@ -2112,8 +2196,10 @@ void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
tcg_temp_free_i64(t2);
}
-void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
- uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+void tcg_gen_gvec_sub_var(unsigned vece, TCGv_ptr dbase, uint32_t dofs,
+ TCGv_ptr abase, uint32_t aofs,
+ TCGv_ptr bbase, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen3 g[4] = {
{ .fni8 = tcg_gen_vec_sub8_i64,
@@ -2140,7 +2226,15 @@ void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
};
tcg_debug_assert(vece <= MO_64);
- tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+ tcg_gen_gvec_3_var(dbase, dofs, abase, aofs, bbase, bofs,
+ oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_sub_var(vece, tcg_env, dofs, tcg_env, aofs, tcg_env, bofs,
+ oprsz, maxsz);
}
static const TCGOpcode vecop_list_mul[] = { INDEX_op_mul_vec, 0 };
@@ -3149,7 +3243,7 @@ do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift,
uint32_t some;
check_size_align(oprsz, maxsz, dofs | aofs);
- check_overlap_2(dofs, aofs, maxsz);
+ check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz);
/* If the backend has a scalar expansion, great. */
type = choose_vector_type(g->s_list, vece, oprsz, vece == MO_64);
@@ -3255,7 +3349,7 @@ do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift,
clear_tail:
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
}
@@ -3769,10 +3863,10 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t some;
check_size_align(oprsz, maxsz, dofs | aofs | bofs);
- check_overlap_3(dofs, aofs, bofs, maxsz);
+ check_overlap_3(tcg_env, dofs, tcg_env, aofs, tcg_env, bofs, maxsz);
if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
- do_dup(MO_8, dofs, oprsz, maxsz,
+ do_dup(MO_8, tcg_env, dofs, oprsz, maxsz,
NULL, NULL, -(cond == TCG_COND_ALWAYS));
return;
}
@@ -3834,7 +3928,7 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
tcg_swap_vecop_list(hold_list);
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
}
@@ -3889,10 +3983,10 @@ void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
TCGType type;
check_size_align(oprsz, maxsz, dofs | aofs);
- check_overlap_2(dofs, aofs, maxsz);
+ check_overlap_2(tcg_env, dofs, tcg_env, aofs, maxsz);
if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
- do_dup(MO_8, dofs, oprsz, maxsz,
+ do_dup(MO_8, tcg_env, dofs, oprsz, maxsz,
NULL, NULL, -(cond == TCG_COND_ALWAYS));
return;
}
@@ -3975,7 +4069,7 @@ void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
}
if (oprsz < maxsz) {
- expand_clr(dofs + oprsz, maxsz - oprsz);
+ expand_clr(tcg_env, dofs + oprsz, maxsz - oprsz);
}
}
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
index fa9e522..5484960 100644
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@@ -27,6 +27,7 @@
#include "tcg/tcg-temp-internal.h"
#include "tcg/tcg-op-common.h"
#include "tcg/tcg-mo.h"
+#include "exec/target_page.h"
#include "exec/translation-block.h"
#include "exec/plugin-gen.h"
#include "tcg-internal.h"
@@ -40,7 +41,7 @@ static void check_max_alignment(unsigned a_bits)
* FIXME: Must keep the count up-to-date with "exec/tlb-flags.h".
*/
if (tcg_use_softmmu) {
- tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
+ tcg_debug_assert(a_bits + 5 <= TARGET_PAGE_BITS);
}
}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index ae27a26..afac55a 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -34,6 +34,7 @@
#include "qemu/cacheflush.h"
#include "qemu/cacheinfo.h"
#include "qemu/timer.h"
+#include "exec/target_page.h"
#include "exec/translation-block.h"
#include "exec/tlb-common.h"
#include "tcg/startup.h"
@@ -1330,8 +1331,9 @@ void *tcg_malloc_internal(TCGContext *s, int size)
p = s->pool_current;
if (!p) {
p = s->pool_first;
- if (!p)
+ if (!p) {
goto new_pool;
+ }
} else {
if (!p->next) {
new_pool:
@@ -1350,8 +1352,8 @@ void *tcg_malloc_internal(TCGContext *s, int size)
}
}
s->pool_current = p;
- s->pool_cur = p->data + size;
- s->pool_end = p->data + p->size;
+ s->pool_cur = (uintptr_t)p->data + size;
+ s->pool_end = (uintptr_t)p->data + p->size;
return p->data;
}
@@ -1363,7 +1365,7 @@ void tcg_pool_reset(TCGContext *s)
g_free(p);
}
s->pool_first_large = NULL;
- s->pool_cur = s->pool_end = NULL;
+ s->pool_cur = s->pool_end = 0;
s->pool_current = NULL;
}
@@ -5153,7 +5155,7 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
if (its->val_type == TEMP_VAL_CONST) {
/* Propagate constant via movi -> dupi. */
- tcg_target_ulong val = its->val;
+ tcg_target_ulong val = dup_const(vece, its->val);
if (IS_DEAD_ARG(1)) {
temp_dead(s, its);
}