diff options
Diffstat (limited to 'target/i386/tcg/emit.c.inc')
-rw-r--r-- | target/i386/tcg/emit.c.inc | 202 |
1 files changed, 135 insertions, 67 deletions
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 0fa1664..1a7fab93 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,16 +19,6 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -/* - * Sometimes, knowing what the backend has can produce better code. - * The exact opcode to check depends on 32- vs. 64-bit. - */ -#ifdef TARGET_X86_64 -#define INDEX_op_extract2_tl INDEX_op_extract2_i64 -#else -#define INDEX_op_extract2_tl INDEX_op_extract2_i32 -#endif - #define MMX_OFFSET(reg) \ ({ assert((reg) >= 0 && (reg) <= 7); \ offsetof(CPUX86State, fpregs[reg].mmx); }) @@ -352,7 +342,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv break; case X86_OP_SEG: /* Note that gen_movl_seg takes care of interrupt shadow and TF. */ - gen_movl_seg(s, op->n, s->T0); + gen_movl_seg(s, op->n, v, op->n == R_SS); break; case X86_OP_INT: if (op->has_ea) { @@ -1170,11 +1160,28 @@ static void gen_AAS(DisasContext *s, X86DecodedInsn *decode) assume_cc_op(s, CC_OP_EFLAGS); } +static void gen_ADD(DisasContext *s, X86DecodedInsn *decode); static void gen_ADC(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; - TCGv c_in = tcg_temp_new(); + TCGv c_in; + + /* + * Try to avoid CC_OP_ADC by transforming as follows: + * CC_ADC: src1 = dst + c_in, src2 = 0, src3 = c_in + * CC_ADD: src1 = dst + c_in, src2 = c_in (no src3) + * + * In general src2 vs. src3 matters when computing AF and OF, but not here: + * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases + * - OF is a function of the two MSBs, and in both cases they are zero for src2 + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_ADD(s, decode); + return; + } + c_in = tcg_temp_new(); gen_compute_eflags_c(s, c_in); if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, c_in, s->T1); @@ -1693,22 +1700,22 @@ static void gen_CMPccXADD(DisasContext *s, X86DecodedInsn *decode) switch (jcc_op) { case JCC_O: /* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */ + cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(0); tcg_gen_xor_tl(newv, s->cc_srcT, s->T0); - tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv); - tcg_gen_and_tl(s->tmp0, s->tmp0, newv); - tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + tcg_gen_xor_tl(cmp_lhs, s->cc_srcT, cmpv); + tcg_gen_and_tl(cmp_lhs, cmp_lhs, newv); + tcg_gen_sextract_tl(cmp_lhs, cmp_lhs, 0, 8 << ot); break; case JCC_P: - tcg_gen_ext8u_tl(s->tmp0, s->T0); - tcg_gen_ctpop_tl(s->tmp0, s->tmp0); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(1); + cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(1); + tcg_gen_ext8u_tl(cmp_lhs, s->T0); + tcg_gen_ctpop_tl(cmp_lhs, cmp_lhs); break; case JCC_S: - tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(0); + tcg_gen_sextract_tl(cmp_lhs, s->T0, 0, 8 << ot); break; default: @@ -1796,7 +1803,7 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode) static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode) { #ifdef TARGET_X86_64 - MemOp mop = MO_TE | MO_128 | MO_ALIGN; + MemOp mop = MO_LE | MO_128 | MO_ALIGN; TCGv_i64 t0, t1; TCGv_i128 cmp, val; @@ -1853,13 +1860,13 @@ static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode) /* Only require atomic with LOCK; non-parallel handled in generator. */ if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); + tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_LEUQ); } else { tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, - s->mem_index, MO_TEUQ); + s->mem_index, MO_LEUQ); } - /* Set tmp0 to match the required value of Z. */ + /* Compute the required value of Z. */ tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); Z = tcg_temp_new(); tcg_gen_trunc_i64_tl(Z, cmp); @@ -1899,9 +1906,10 @@ static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode) static void gen_CRC32(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); + tcg_gen_trunc_tl_i32(tmp, s->T0); + gen_helper_crc32(s->T0, tmp, s->T1, tcg_constant_i32(8 << ot)); } static void gen_CVTPI2Px(DisasContext *s, X86DecodedInsn *decode) @@ -2359,8 +2367,10 @@ static void gen_LAR(DisasContext *s, X86DecodedInsn *decode) static void gen_LDMXCSR(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(tmp, s->T0); + gen_helper_ldmxcsr(tcg_env, tmp); } static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg) @@ -2372,7 +2382,7 @@ static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg) gen_op_ld_v(s, MO_16, s->T1, s->A0); /* load the segment here to handle exceptions properly */ - gen_movl_seg(s, seg, s->T1); + gen_movl_seg(s, seg, s->T1, false); } static void gen_LDS(DisasContext *s, X86DecodedInsn *decode) @@ -2573,11 +2583,13 @@ static void gen_MOVDQ(DisasContext *s, X86DecodedInsn *decode) static void gen_MOVMSK(DisasContext *s, X86DecodedInsn *decode) { typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn; + TCGv_i32 tmp = tcg_temp_new_i32(); + ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm; pd = s->vex_l ? gen_helper_movmskpd_ymm : gen_helper_movmskpd_xmm; fn = s->prefix & PREFIX_DATA ? pd : ps; - fn(s->tmp2_i32, tcg_env, OP_PTR2); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + fn(tmp, tcg_env, OP_PTR2); + tcg_gen_extu_i32_tl(s->T0, tmp); } static void gen_MOVQ(DisasContext *s, X86DecodedInsn *decode) @@ -2674,13 +2686,17 @@ static void gen_MULX(DisasContext *s, X86DecodedInsn *decode) switch (ot) { case MO_32: #ifdef TARGET_X86_64 - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); - tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, - s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); - tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32); - break; + { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(t0, s->T0); + tcg_gen_trunc_tl_i32(t1, s->T1); + tcg_gen_mulu2_i32(t0, t1, t0, t1); + tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], t0); + tcg_gen_extu_i32_tl(s->T0, t1); + break; + } case MO_64: #endif @@ -2997,7 +3013,7 @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode) tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); while (vec_len > 8) { vec_len -= 8; - if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_TL, 0)) { /* * Load the next byte of the result into the high byte of T. * TCG does a similar expansion of deposit to shl+extract2; by @@ -3724,10 +3740,14 @@ static void gen_RORX(DisasContext *s, X86DecodedInsn *decode) switch (ot) { case MO_32: #ifdef TARGET_X86_64 - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - break; + { + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(tmp, s->T0); + tcg_gen_rotri_i32(tmp, tmp, b); + tcg_gen_extu_i32_tl(s->T0, tmp); + break; + } case MO_64: #endif @@ -3830,22 +3850,64 @@ static void gen_SARX(DisasContext *s, X86DecodedInsn *decode) tcg_gen_sar_tl(s->T0, s->T0, s->T1); } +static void gen_SUB(DisasContext *s, X86DecodedInsn *decode); static void gen_SBB(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - TCGv c_in = tcg_temp_new(); + TCGv c_in; + + /* + * Try to avoid CC_OP_SBB by transforming as follows: + * CC_SBB: src1 = dst + c_in, src2 = 0, src3 = c_in + * CC_SUB: src1 = dst + c_in, src2 = c_in (no src3) + * + * In general src2 vs. src3 matters when computing AF and OF, but not here: + * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases + * - OF is a function of the two MSBs, and in both cases they are zero for src2 + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_SUB(s, decode); + return; + } + c_in = tcg_temp_new(); gen_compute_eflags_c(s, c_in); + + /* + * Here the change is as follows: + * CC_SBB: src1 = T0, src2 = T0, src3 = c_in + * CC_SUB: src1 = 0, src2 = c_in (no src3) + * + * The difference also does not matter: + * - AF is bit 4 of dst^src1^src2, but bit 4 of src1^src2 is zero in both cases + * therefore AF comes straight from dst (in fact it is c_in) + * - for OF, src1 and src2 have the same sign in both cases, meaning there + * can be no overflow + */ + if (decode->e.op2 != X86_TYPE_I && !decode->op[0].has_ea && decode->op[0].n == decode->op[2].n) { + if (s->cc_op == CC_OP_DYNAMIC) { + tcg_gen_neg_tl(s->T0, c_in); + } else { + /* + * Do not negate c_in because it will often be dead and only the + * instruction generated by negsetcond will survive. + */ + gen_neg_setcc(s, JCC_B << 1, s->T0); + } + tcg_gen_movi_tl(s->cc_srcT, 0); + decode->cc_src = c_in; + decode->cc_dst = s->T0; + decode->cc_op = CC_OP_SUBB + ot; + return; + } + if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, s->T1, c_in); tcg_gen_neg_tl(s->T0, s->T0); tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0, s->mem_index, ot | MO_LE); } else { - /* - * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by - * negsetcond, and CC_OP_SUBB as the cc_op. - */ tcg_gen_sub_tl(s->T0, s->T0, s->T1); tcg_gen_sub_tl(s->T0, s->T0, c_in); } @@ -3956,8 +4018,7 @@ static void gen_SHLD(DisasContext *s, X86DecodedInsn *decode) } decode->cc_dst = s->T0; - decode->cc_src = s->tmp0; - gen_shiftd_rm_T1(s, ot, false, count); + decode->cc_src = gen_shiftd_rm_T1(s, ot, false, count); if (can_be_zero) { gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot); } else { @@ -4009,8 +4070,7 @@ static void gen_SHRD(DisasContext *s, X86DecodedInsn *decode) } decode->cc_dst = s->T0; - decode->cc_src = s->tmp0; - gen_shiftd_rm_T1(s, ot, true, count); + decode->cc_src = gen_shiftd_rm_T1(s, ot, true, count); if (can_be_zero) { gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot); } else { @@ -4277,7 +4337,7 @@ static void gen_VCVTSI2Sx(DisasContext *s, X86DecodedInsn *decode) } return; } - in = s->tmp2_i32; + in = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(in, s->T1); #else in = s->T1; @@ -4307,7 +4367,7 @@ static inline void gen_VCVTtSx2SI(DisasContext *s, X86DecodedInsn *decode, return; } - out = s->tmp2_i32; + out = tcg_temp_new_i32(); #else out = s->T0; #endif @@ -4359,7 +4419,7 @@ static void gen_VEXTRACTPS(DisasContext *s, X86DecodedInsn *decode) gen_pextr(s, decode, MO_32); } -static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) +static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode, TCGv_i32 tmp) { int val = decode->immediate; int dest_word = (val >> 4) & 3; @@ -4376,7 +4436,7 @@ static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) } if (new_mask != (val & 15)) { - tcg_gen_st_i32(s->tmp2_i32, tcg_env, + tcg_gen_st_i32(tmp, tcg_env, vector_elem_offset(&decode->op[0], MO_32, dest_word)); } @@ -4395,15 +4455,19 @@ static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) static void gen_VINSERTPS_r(DisasContext *s, X86DecodedInsn *decode) { int val = decode->immediate; - tcg_gen_ld_i32(s->tmp2_i32, tcg_env, + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_ld_i32(tmp, tcg_env, vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3)); - gen_vinsertps(s, decode); + gen_vinsertps(s, decode, tmp); } static void gen_VINSERTPS_m(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_vinsertps(s, decode); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_qemu_ld_i32(tmp, s->A0, s->mem_index, MO_LEUL); + gen_vinsertps(s, decode, tmp); } static void gen_VINSERTx128(DisasContext *s, X86DecodedInsn *decode) @@ -4524,25 +4588,29 @@ static void gen_VMOVSD_ld(DisasContext *s, X86DecodedInsn *decode) static void gen_VMOVSS(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_ld_i32(tmp, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); - tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_st_i32(tmp, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); } static void gen_VMOVSS_ld(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + tcg_gen_qemu_ld_i32(tmp, s->A0, s->mem_index, MO_LEUL); tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); - tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_st_i32(tmp, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); } static void gen_VMOVSS_st(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_ld_i32(tmp, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_qemu_st_i32(tmp, s->A0, s->mem_index, MO_LEUL); } static void gen_VPMASKMOV_st(DisasContext *s, X86DecodedInsn *decode) |