diff options
Diffstat (limited to 'target/i386/tcg/emit.c.inc')
-rw-r--r-- | target/i386/tcg/emit.c.inc | 246 |
1 files changed, 244 insertions, 2 deletions
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 9b50419..fd17a9b 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -78,9 +78,26 @@ static void gen_NM_exception(DisasContext *s) gen_exception(s, EXCP07_PREX); } -static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) +static void gen_lea_modrm(DisasContext *s, X86DecodedInsn *decode) { - TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); + AddressParts *mem = &decode->mem; + TCGv ea; + + ea = gen_lea_modrm_1(s, *mem, decode->e.vex_class == 12); + if (decode->e.special == X86_SPECIAL_BitTest) { + MemOp ot = decode->op[1].ot; + int poslen = 8 << ot; + int opn = decode->op[2].n; + TCGv ofs = tcg_temp_new(); + + /* Extract memory displacement from the second operand. */ + assert(decode->op[2].unit == X86_OP_INT && decode->op[2].ot != MO_8); + tcg_gen_sextract_tl(ofs, cpu_regs[opn], 3, poslen - 3); + tcg_gen_andi_tl(ofs, ofs, -1 << ot); + tcg_gen_add_tl(s->A0, ea, ofs); + ea = s->A0; + } + gen_lea_v_seg(s, ea, mem->def_seg, s->override); } @@ -412,6 +429,32 @@ static void prepare_update3_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op, decode->cc_op = op; } +/* Set up decode->cc_* to modify CF while keeping other flags unchanged. */ +static void prepare_update_cf(X86DecodedInsn *decode, DisasContext *s, TCGv cf) +{ + switch (s->cc_op) { + case CC_OP_ADOX: + case CC_OP_ADCOX: + decode->cc_src2 = cpu_cc_src2; + decode->cc_src = cpu_cc_src; + decode->cc_op = CC_OP_ADCOX; + break; + + case CC_OP_EFLAGS: + case CC_OP_ADCX: + decode->cc_src = cpu_cc_src; + decode->cc_op = CC_OP_ADCX; + break; + + default: + decode->cc_src = tcg_temp_new(); + gen_mov_eflags(s, decode->cc_src); + decode->cc_op = CC_OP_ADCX; + break; + } + decode->cc_dst = cf; +} + static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs) { MemOp ot = decode->op[0].ot; @@ -1390,6 +1433,109 @@ static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode) tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_OZ); } +static TCGv gen_bt_mask(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + TCGv mask = tcg_temp_new(); + + tcg_gen_andi_tl(s->T1, s->T1, (8 << ot) - 1); + tcg_gen_shl_tl(mask, tcg_constant_tl(1), s->T1); + return mask; +} + +/* Expects truncated bit index in s->T1, 1 << s->T1 in MASK. */ +static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv mask) +{ + TCGv cf; + + /* + * C is the result of the test, Z is unchanged, and the others + * are all undefined. + */ + switch (s->cc_op) { + case CC_OP_DYNAMIC: + case CC_OP_CLR: + case CC_OP_EFLAGS: + case CC_OP_ADCX: + case CC_OP_ADOX: + case CC_OP_ADCOX: + /* Generate EFLAGS and replace the C bit. */ + cf = tcg_temp_new(); + tcg_gen_setcond_tl(TCG_COND_TSTNE, cf, src, mask); + prepare_update_cf(decode, s, cf); + break; + default: + /* + * Z was going to be computed from the non-zero status of CC_DST. + * We can get that same Z value (and the new C value) by leaving + * CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the + * same width. + */ + decode->cc_src = tcg_temp_new(); + decode->cc_dst = cpu_cc_dst; + decode->cc_op = ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB; + tcg_gen_shr_tl(decode->cc_src, src, s->T1); + break; + } +} + +static void gen_BT(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv mask = gen_bt_mask(s, decode); + + gen_bt_flags(s, decode, s->T0, mask); +} + +static void gen_BTC(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv old = tcg_temp_new(); + TCGv mask = gen_bt_mask(s, decode); + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_fetch_xor_tl(old, s->A0, mask, s->mem_index, ot | MO_LE); + } else { + tcg_gen_mov_tl(old, s->T0); + tcg_gen_xor_tl(s->T0, s->T0, mask); + } + + gen_bt_flags(s, decode, old, mask); +} + +static void gen_BTR(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv old = tcg_temp_new(); + TCGv mask = gen_bt_mask(s, decode); + + if (s->prefix & PREFIX_LOCK) { + TCGv maskc = tcg_temp_new(); + tcg_gen_not_tl(maskc, mask); + tcg_gen_atomic_fetch_and_tl(old, s->A0, maskc, s->mem_index, ot | MO_LE); + } else { + tcg_gen_mov_tl(old, s->T0); + tcg_gen_andc_tl(s->T0, s->T0, mask); + } + + gen_bt_flags(s, decode, old, mask); +} + +static void gen_BTS(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv old = tcg_temp_new(); + TCGv mask = gen_bt_mask(s, decode); + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_fetch_or_tl(old, s->A0, mask, s->mem_index, ot | MO_LE); + } else { + tcg_gen_mov_tl(old, s->T0); + tcg_gen_or_tl(s->T0, s->T0, mask); + } + + gen_bt_flags(s, decode, old, mask); +} + static void gen_BZHI(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1642,6 +1788,102 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode) decode->cc_op = CC_OP_SUBB + ot; } +static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode) +{ +#ifdef TARGET_X86_64 + MemOp mop = MO_TE | MO_128 | MO_ALIGN; + TCGv_i64 t0, t1; + TCGv_i128 cmp, val; + + cmp = tcg_temp_new_i128(); + val = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); + + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); + } else { + tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); + } + + tcg_gen_extr_i128_i64(s->T0, s->T1, val); + + /* Determine success after the fact. */ + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]); + tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]); + tcg_gen_or_i64(t0, t0, t1); + + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1); + + /* + * Extract the result values for the register pair. We may do this + * unconditionally, because on success (Z=1), the old value matches + * the previous value in RDX:RAX. + */ + tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0); + tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1); +#else + abort(); +#endif +} + +static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv_i64 cmp, val, old; + TCGv Z; + + cmp = tcg_temp_new_i64(); + val = tcg_temp_new_i64(); + old = tcg_temp_new_i64(); + + /* Construct the comparison values from the register pair. */ + tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); + + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); + } else { + tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, + s->mem_index, MO_TEUQ); + } + + /* Set tmp0 to match the required value of Z. */ + tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); + Z = tcg_temp_new(); + tcg_gen_trunc_i64_tl(Z, cmp); + + /* + * Extract the result values for the register pair. + * For 32-bit, we may do this unconditionally, because on success (Z=1), + * the old value matches the previous value in EDX:EAX. For x86_64, + * the store must be conditional, because we must leave the source + * registers unchanged on success, and zero-extend the writeback + * on failure (Z=0). + */ + if (TARGET_LONG_BITS == 32) { + tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old); + } else { + TCGv zero = tcg_constant_tl(0); + + tcg_gen_extr_i64_tl(s->T0, s->T1, old); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero, + s->T0, cpu_regs[R_EAX]); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero, + s->T1, cpu_regs[R_EDX]); + } + + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1); +} + static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); |