diff options
Diffstat (limited to 'target/i386/tcg')
-rw-r--r-- | target/i386/tcg/access.c | 5 | ||||
-rw-r--r-- | target/i386/tcg/cc_helper_template.h.inc | 90 | ||||
-rw-r--r-- | target/i386/tcg/decode-new.c.inc | 44 | ||||
-rw-r--r-- | target/i386/tcg/emit.c.inc | 202 | ||||
-rw-r--r-- | target/i386/tcg/excp_helper.c | 1 | ||||
-rw-r--r-- | target/i386/tcg/fpu_helper.c | 103 | ||||
-rw-r--r-- | target/i386/tcg/helper-tcg.h | 5 | ||||
-rw-r--r-- | target/i386/tcg/int_helper.c | 1 | ||||
-rw-r--r-- | target/i386/tcg/mem_helper.c | 3 | ||||
-rw-r--r-- | target/i386/tcg/mpx_helper.c | 4 | ||||
-rw-r--r-- | target/i386/tcg/seg_helper.c | 102 | ||||
-rw-r--r-- | target/i386/tcg/seg_helper.h | 10 | ||||
-rw-r--r-- | target/i386/tcg/system/bpt_helper.c | 2 | ||||
-rw-r--r-- | target/i386/tcg/system/excp_helper.c | 5 | ||||
-rw-r--r-- | target/i386/tcg/system/misc_helper.c | 5 | ||||
-rw-r--r-- | target/i386/tcg/system/seg_helper.c | 2 | ||||
-rw-r--r-- | target/i386/tcg/system/svm_helper.c | 2 | ||||
-rw-r--r-- | target/i386/tcg/system/tcg-cpu.c | 3 | ||||
-rw-r--r-- | target/i386/tcg/tcg-cpu.c | 82 | ||||
-rw-r--r-- | target/i386/tcg/tcg-cpu.h | 6 | ||||
-rw-r--r-- | target/i386/tcg/translate.c | 184 | ||||
-rw-r--r-- | target/i386/tcg/user/excp_helper.c | 1 | ||||
-rw-r--r-- | target/i386/tcg/user/seg_helper.c | 3 |
23 files changed, 521 insertions, 344 deletions
diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c index e68b73a..97e3f0e 100644 --- a/target/i386/tcg/access.c +++ b/target/i386/tcg/access.c @@ -3,8 +3,9 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" +#include "exec/target_page.h" #include "access.h" diff --git a/target/i386/tcg/cc_helper_template.h.inc b/target/i386/tcg/cc_helper_template.h.inc index 9aff16b..d8fd976 100644 --- a/target/i386/tcg/cc_helper_template.h.inc +++ b/target/i386/tcg/cc_helper_template.h.inc @@ -44,18 +44,32 @@ /* dynamic flags computation */ -static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static uint32_t glue(compute_all_cout, SUFFIX)(DATA_TYPE dst, DATA_TYPE carries) { - uint32_t cf, pf, af, zf, sf, of; - DATA_TYPE src2 = dst - src1; + uint32_t af_cf, pf, zf, sf, of; - cf = dst < src1; + /* PF, ZF, SF computed from result. */ pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & CC_A; zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf + pf + af + zf + sf + of; + + /* + * AF, CF, OF computed from carry out vector. To compute AF and CF, rotate it + * left by one so cout(DATA_BITS - 1) is in bit 0 and cout(3) in bit 4. + * + * To compute OF, place the highest two carry bits into OF and the bit + * immediately to the right of it; then, adding CC_O / 2 XORs them. + */ + af_cf = ((carries << 1) | (carries >> (DATA_BITS - 1))) & (CC_A | CC_C); + of = (lshift(carries, 12 - DATA_BITS) + CC_O / 2) & CC_O; + return pf + zf + sf + af_cf + of; +} + +static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +{ + DATA_TYPE src2 = dst - src1; + DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst); + return glue(compute_all_cout, SUFFIX)(dst, carries); } static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -66,25 +80,9 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, DATA_TYPE src3) { - uint32_t cf, pf, af, zf, sf, of; - -#ifdef WIDER_TYPE - WIDER_TYPE src13 = (WIDER_TYPE) src1 + (WIDER_TYPE) src3; - DATA_TYPE src2 = dst - src13; - - cf = dst < src13; -#else DATA_TYPE src2 = dst - src1 - src3; - - cf = (src3 ? dst <= src1 : dst < src1); -#endif - - pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & 0x10; - zf = (dst == 0) << 6; - sf = lshift(dst, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf + pf + af + zf + sf + of; + DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst); + return glue(compute_all_cout, SUFFIX)(dst, carries); } static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, @@ -101,16 +99,9 @@ static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) { - uint32_t cf, pf, af, zf, sf, of; DATA_TYPE src1 = dst + src2; - - cf = src1 < src2; - pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & CC_A; - zf = (dst == 0) * CC_Z; - sf = lshift(dst, 8 - DATA_BITS) & CC_S; - of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf + pf + af + zf + sf + of; + DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst); + return glue(compute_all_cout, SUFFIX)(dst, carries); } static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) @@ -123,25 +114,9 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, DATA_TYPE src3) { - uint32_t cf, pf, af, zf, sf, of; - -#ifdef WIDER_TYPE - WIDER_TYPE src23 = (WIDER_TYPE) src2 + (WIDER_TYPE) src3; - DATA_TYPE src1 = dst + src23; - - cf = src1 < src23; -#else DATA_TYPE src1 = dst + src2 + src3; - - cf = (src3 ? src1 <= src2 : src1 < src2); -#endif - - pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & 0x10; - zf = (dst == 0) << 6; - sf = lshift(dst, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf + pf + af + zf + sf + of; + DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst); + return glue(compute_all_cout, SUFFIX)(dst, carries); } static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, @@ -175,13 +150,10 @@ static uint32_t glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { uint32_t cf, pf, af, zf, sf, of; - DATA_TYPE src2; cf = src1; - src1 = dst - 1; - src2 = 1; pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & CC_A; + af = (dst ^ (dst - 1)) & CC_A; /* bits 0..3 are all clear */ zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = (dst == SIGN_MASK) * CC_O; @@ -191,13 +163,10 @@ static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) static uint32_t glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { uint32_t cf, pf, af, zf, sf, of; - DATA_TYPE src2; cf = src1; - src1 = dst + 1; - src2 = 1; pf = compute_pf(dst); - af = (dst ^ src1 ^ src2) & CC_A; + af = (dst ^ (dst + 1)) & CC_A; /* bits 0..3 are all set */ zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = (dst == SIGN_MASK - 1) * CC_O; @@ -292,6 +261,5 @@ static int glue(compute_c_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) #undef DATA_BITS #undef SIGN_MASK #undef DATA_TYPE -#undef DATA_MASK #undef SUFFIX #undef WIDER_TYPE diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index cda32ee..5103865 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -878,10 +878,10 @@ static const X86OpEntry opcodes_0F3A[256] = { [0x0e] = X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), [0x0f] = X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), - [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), + [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX) p_66), [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX) p_66), - [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), + [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX2) p_66), [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX2) p_66), /* Listed incorrectly as type 4 */ @@ -2542,7 +2542,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu) s->has_modrm = false; s->prefix = 0; - next_byte: + next_byte:; +#ifdef TARGET_X86_64 + /* clear any REX prefix followed by other prefixes. */ + int rex; + rex = -1; + next_byte_rex: +#endif b = x86_ldub_code(env, s); /* Collect prefixes. */ @@ -2585,13 +2591,12 @@ static void disas_insn(DisasContext *s, CPUState *cpu) #ifdef TARGET_X86_64 case 0x40 ... 0x4f: if (CODE64(s)) { - /* REX prefix */ - s->prefix |= PREFIX_REX; - s->vex_w = (b >> 3) & 1; - s->rex_r = (b & 0x4) << 1; - s->rex_x = (b & 0x2) << 2; - s->rex_b = (b & 0x1) << 3; - goto next_byte; + /* + * REX prefix; ignored unless it is the last prefix, so + * for now just stash it + */ + rex = b; + goto next_byte_rex; } break; #endif @@ -2618,10 +2623,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu) /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */ if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ - | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) { + | PREFIX_LOCK | PREFIX_DATA)) { goto illegal_op; } #ifdef TARGET_X86_64 + if (rex != -1) { + goto illegal_op; + } s->rex_r = (~vex2 >> 4) & 8; #endif if (b == 0xc5) { @@ -2661,6 +2669,16 @@ static void disas_insn(DisasContext *s, CPUState *cpu) /* Post-process prefixes. */ if (CODE64(s)) { +#ifdef TARGET_X86_64 + if (rex != -1) { + s->prefix |= PREFIX_REX; + s->vex_w = (rex >> 3) & 1; + s->rex_r = (rex & 0x4) << 1; + s->rex_x = (rex & 0x2) << 2; + s->rex_b = (rex & 0x1) << 3; + } +#endif + /* * In 64-bit mode, the default data size is 32-bit. Select 64-bit * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence @@ -2704,14 +2722,14 @@ static void disas_insn(DisasContext *s, CPUState *cpu) if (decode.e.check & X86_CHECK_i64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_i64_amd) && !IS_INTEL_CPU(env)) { goto illegal_op; } } else { if (decode.e.check & X86_CHECK_o64) { goto illegal_op; } - if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { + if ((decode.e.check & X86_CHECK_o64_intel) && IS_INTEL_CPU(env)) { goto illegal_op; } } diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 0fa1664..1a7fab93 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,16 +19,6 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -/* - * Sometimes, knowing what the backend has can produce better code. - * The exact opcode to check depends on 32- vs. 64-bit. - */ -#ifdef TARGET_X86_64 -#define INDEX_op_extract2_tl INDEX_op_extract2_i64 -#else -#define INDEX_op_extract2_tl INDEX_op_extract2_i32 -#endif - #define MMX_OFFSET(reg) \ ({ assert((reg) >= 0 && (reg) <= 7); \ offsetof(CPUX86State, fpregs[reg].mmx); }) @@ -352,7 +342,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv break; case X86_OP_SEG: /* Note that gen_movl_seg takes care of interrupt shadow and TF. */ - gen_movl_seg(s, op->n, s->T0); + gen_movl_seg(s, op->n, v, op->n == R_SS); break; case X86_OP_INT: if (op->has_ea) { @@ -1170,11 +1160,28 @@ static void gen_AAS(DisasContext *s, X86DecodedInsn *decode) assume_cc_op(s, CC_OP_EFLAGS); } +static void gen_ADD(DisasContext *s, X86DecodedInsn *decode); static void gen_ADC(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; - TCGv c_in = tcg_temp_new(); + TCGv c_in; + + /* + * Try to avoid CC_OP_ADC by transforming as follows: + * CC_ADC: src1 = dst + c_in, src2 = 0, src3 = c_in + * CC_ADD: src1 = dst + c_in, src2 = c_in (no src3) + * + * In general src2 vs. src3 matters when computing AF and OF, but not here: + * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases + * - OF is a function of the two MSBs, and in both cases they are zero for src2 + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_ADD(s, decode); + return; + } + c_in = tcg_temp_new(); gen_compute_eflags_c(s, c_in); if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, c_in, s->T1); @@ -1693,22 +1700,22 @@ static void gen_CMPccXADD(DisasContext *s, X86DecodedInsn *decode) switch (jcc_op) { case JCC_O: /* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */ + cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(0); tcg_gen_xor_tl(newv, s->cc_srcT, s->T0); - tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv); - tcg_gen_and_tl(s->tmp0, s->tmp0, newv); - tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + tcg_gen_xor_tl(cmp_lhs, s->cc_srcT, cmpv); + tcg_gen_and_tl(cmp_lhs, cmp_lhs, newv); + tcg_gen_sextract_tl(cmp_lhs, cmp_lhs, 0, 8 << ot); break; case JCC_P: - tcg_gen_ext8u_tl(s->tmp0, s->T0); - tcg_gen_ctpop_tl(s->tmp0, s->tmp0); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(1); + cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(1); + tcg_gen_ext8u_tl(cmp_lhs, s->T0); + tcg_gen_ctpop_tl(cmp_lhs, cmp_lhs); break; case JCC_S: - tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(0); + tcg_gen_sextract_tl(cmp_lhs, s->T0, 0, 8 << ot); break; default: @@ -1796,7 +1803,7 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode) static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode) { #ifdef TARGET_X86_64 - MemOp mop = MO_TE | MO_128 | MO_ALIGN; + MemOp mop = MO_LE | MO_128 | MO_ALIGN; TCGv_i64 t0, t1; TCGv_i128 cmp, val; @@ -1853,13 +1860,13 @@ static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode) /* Only require atomic with LOCK; non-parallel handled in generator. */ if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); + tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_LEUQ); } else { tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, - s->mem_index, MO_TEUQ); + s->mem_index, MO_LEUQ); } - /* Set tmp0 to match the required value of Z. */ + /* Compute the required value of Z. */ tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); Z = tcg_temp_new(); tcg_gen_trunc_i64_tl(Z, cmp); @@ -1899,9 +1906,10 @@ static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode) static void gen_CRC32(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); + tcg_gen_trunc_tl_i32(tmp, s->T0); + gen_helper_crc32(s->T0, tmp, s->T1, tcg_constant_i32(8 << ot)); } static void gen_CVTPI2Px(DisasContext *s, X86DecodedInsn *decode) @@ -2359,8 +2367,10 @@ static void gen_LAR(DisasContext *s, X86DecodedInsn *decode) static void gen_LDMXCSR(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(tmp, s->T0); + gen_helper_ldmxcsr(tcg_env, tmp); } static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg) @@ -2372,7 +2382,7 @@ static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg) gen_op_ld_v(s, MO_16, s->T1, s->A0); /* load the segment here to handle exceptions properly */ - gen_movl_seg(s, seg, s->T1); + gen_movl_seg(s, seg, s->T1, false); } static void gen_LDS(DisasContext *s, X86DecodedInsn *decode) @@ -2573,11 +2583,13 @@ static void gen_MOVDQ(DisasContext *s, X86DecodedInsn *decode) static void gen_MOVMSK(DisasContext *s, X86DecodedInsn *decode) { typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn; + TCGv_i32 tmp = tcg_temp_new_i32(); + ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm; pd = s->vex_l ? gen_helper_movmskpd_ymm : gen_helper_movmskpd_xmm; fn = s->prefix & PREFIX_DATA ? pd : ps; - fn(s->tmp2_i32, tcg_env, OP_PTR2); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + fn(tmp, tcg_env, OP_PTR2); + tcg_gen_extu_i32_tl(s->T0, tmp); } static void gen_MOVQ(DisasContext *s, X86DecodedInsn *decode) @@ -2674,13 +2686,17 @@ static void gen_MULX(DisasContext *s, X86DecodedInsn *decode) switch (ot) { case MO_32: #ifdef TARGET_X86_64 - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); - tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, - s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); - tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32); - break; + { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(t0, s->T0); + tcg_gen_trunc_tl_i32(t1, s->T1); + tcg_gen_mulu2_i32(t0, t1, t0, t1); + tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], t0); + tcg_gen_extu_i32_tl(s->T0, t1); + break; + } case MO_64: #endif @@ -2997,7 +3013,7 @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode) tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); while (vec_len > 8) { vec_len -= 8; - if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_TL, 0)) { /* * Load the next byte of the result into the high byte of T. * TCG does a similar expansion of deposit to shl+extract2; by @@ -3724,10 +3740,14 @@ static void gen_RORX(DisasContext *s, X86DecodedInsn *decode) switch (ot) { case MO_32: #ifdef TARGET_X86_64 - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - break; + { + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(tmp, s->T0); + tcg_gen_rotri_i32(tmp, tmp, b); + tcg_gen_extu_i32_tl(s->T0, tmp); + break; + } case MO_64: #endif @@ -3830,22 +3850,64 @@ static void gen_SARX(DisasContext *s, X86DecodedInsn *decode) tcg_gen_sar_tl(s->T0, s->T0, s->T1); } +static void gen_SUB(DisasContext *s, X86DecodedInsn *decode); static void gen_SBB(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - TCGv c_in = tcg_temp_new(); + TCGv c_in; + + /* + * Try to avoid CC_OP_SBB by transforming as follows: + * CC_SBB: src1 = dst + c_in, src2 = 0, src3 = c_in + * CC_SUB: src1 = dst + c_in, src2 = c_in (no src3) + * + * In general src2 vs. src3 matters when computing AF and OF, but not here: + * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases + * - OF is a function of the two MSBs, and in both cases they are zero for src2 + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_SUB(s, decode); + return; + } + c_in = tcg_temp_new(); gen_compute_eflags_c(s, c_in); + + /* + * Here the change is as follows: + * CC_SBB: src1 = T0, src2 = T0, src3 = c_in + * CC_SUB: src1 = 0, src2 = c_in (no src3) + * + * The difference also does not matter: + * - AF is bit 4 of dst^src1^src2, but bit 4 of src1^src2 is zero in both cases + * therefore AF comes straight from dst (in fact it is c_in) + * - for OF, src1 and src2 have the same sign in both cases, meaning there + * can be no overflow + */ + if (decode->e.op2 != X86_TYPE_I && !decode->op[0].has_ea && decode->op[0].n == decode->op[2].n) { + if (s->cc_op == CC_OP_DYNAMIC) { + tcg_gen_neg_tl(s->T0, c_in); + } else { + /* + * Do not negate c_in because it will often be dead and only the + * instruction generated by negsetcond will survive. + */ + gen_neg_setcc(s, JCC_B << 1, s->T0); + } + tcg_gen_movi_tl(s->cc_srcT, 0); + decode->cc_src = c_in; + decode->cc_dst = s->T0; + decode->cc_op = CC_OP_SUBB + ot; + return; + } + if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, s->T1, c_in); tcg_gen_neg_tl(s->T0, s->T0); tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0, s->mem_index, ot | MO_LE); } else { - /* - * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by - * negsetcond, and CC_OP_SUBB as the cc_op. - */ tcg_gen_sub_tl(s->T0, s->T0, s->T1); tcg_gen_sub_tl(s->T0, s->T0, c_in); } @@ -3956,8 +4018,7 @@ static void gen_SHLD(DisasContext *s, X86DecodedInsn *decode) } decode->cc_dst = s->T0; - decode->cc_src = s->tmp0; - gen_shiftd_rm_T1(s, ot, false, count); + decode->cc_src = gen_shiftd_rm_T1(s, ot, false, count); if (can_be_zero) { gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot); } else { @@ -4009,8 +4070,7 @@ static void gen_SHRD(DisasContext *s, X86DecodedInsn *decode) } decode->cc_dst = s->T0; - decode->cc_src = s->tmp0; - gen_shiftd_rm_T1(s, ot, true, count); + decode->cc_src = gen_shiftd_rm_T1(s, ot, true, count); if (can_be_zero) { gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot); } else { @@ -4277,7 +4337,7 @@ static void gen_VCVTSI2Sx(DisasContext *s, X86DecodedInsn *decode) } return; } - in = s->tmp2_i32; + in = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(in, s->T1); #else in = s->T1; @@ -4307,7 +4367,7 @@ static inline void gen_VCVTtSx2SI(DisasContext *s, X86DecodedInsn *decode, return; } - out = s->tmp2_i32; + out = tcg_temp_new_i32(); #else out = s->T0; #endif @@ -4359,7 +4419,7 @@ static void gen_VEXTRACTPS(DisasContext *s, X86DecodedInsn *decode) gen_pextr(s, decode, MO_32); } -static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) +static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode, TCGv_i32 tmp) { int val = decode->immediate; int dest_word = (val >> 4) & 3; @@ -4376,7 +4436,7 @@ static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) } if (new_mask != (val & 15)) { - tcg_gen_st_i32(s->tmp2_i32, tcg_env, + tcg_gen_st_i32(tmp, tcg_env, vector_elem_offset(&decode->op[0], MO_32, dest_word)); } @@ -4395,15 +4455,19 @@ static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) static void gen_VINSERTPS_r(DisasContext *s, X86DecodedInsn *decode) { int val = decode->immediate; - tcg_gen_ld_i32(s->tmp2_i32, tcg_env, + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_ld_i32(tmp, tcg_env, vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3)); - gen_vinsertps(s, decode); + gen_vinsertps(s, decode, tmp); } static void gen_VINSERTPS_m(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_vinsertps(s, decode); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_qemu_ld_i32(tmp, s->A0, s->mem_index, MO_LEUL); + gen_vinsertps(s, decode, tmp); } static void gen_VINSERTx128(DisasContext *s, X86DecodedInsn *decode) @@ -4524,25 +4588,29 @@ static void gen_VMOVSD_ld(DisasContext *s, X86DecodedInsn *decode) static void gen_VMOVSS(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_ld_i32(tmp, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); - tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_st_i32(tmp, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); } static void gen_VMOVSS_ld(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + tcg_gen_qemu_ld_i32(tmp, s->A0, s->mem_index, MO_LEUL); tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); - tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_st_i32(tmp, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); } static void gen_VMOVSS_st(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + TCGv_i32 tmp = tcg_temp_new_i32(); + + tcg_gen_ld_i32(tmp, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_qemu_st_i32(tmp, s->A0, s->mem_index, MO_LEUL); } static void gen_VPMASKMOV_st(DisasContext *s, X86DecodedInsn *decode) diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c index de71e68..6fb8036 100644 --- a/target/i386/tcg/excp_helper.c +++ b/target/i386/tcg/excp_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "qemu/log.h" #include "system/runstate.h" #include "exec/helper-proto.h" diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index c1184ca..b3b2382 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -22,7 +22,7 @@ #include "cpu.h" #include "tcg-cpu.h" #include "exec/cputlb.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" #include "fpu/softfloat-macros.h" @@ -189,25 +189,25 @@ void cpu_init_fp_statuses(CPUX86State *env) set_float_default_nan_pattern(0b11000000, &env->mmx_status); set_float_default_nan_pattern(0b11000000, &env->sse_status); /* - * TODO: x86 does flush-to-zero detection after rounding (the SDM + * x86 does flush-to-zero detection after rounding (the SDM * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush * when we detect underflow, which x86 does after rounding). */ - set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->fp_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->mmx_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->sse_status); } -static inline uint8_t save_exception_flags(CPUX86State *env) +static inline int save_exception_flags(CPUX86State *env) { - uint8_t old_flags = get_float_exception_flags(&env->fp_status); + int old_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); return old_flags; } -static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) +static void merge_exception_flags(CPUX86State *env, int old_flags) { - uint8_t new_flags = get_float_exception_flags(&env->fp_status); + int new_flags = get_float_exception_flags(&env->fp_status); float_raise(old_flags, &env->fp_status); fpu_set_exception(env, ((new_flags & float_flag_invalid ? FPUS_IE : 0) | @@ -215,12 +215,12 @@ static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) (new_flags & float_flag_overflow ? FPUS_OE : 0) | (new_flags & float_flag_underflow ? FPUS_UE : 0) | (new_flags & float_flag_inexact ? FPUS_PE : 0) | - (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0))); + (new_flags & float_flag_input_denormal_used ? FPUS_DE : 0))); } static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); floatx80 ret = floatx80_div(a, b, &env->fp_status); merge_exception_flags(env, old_flags); return ret; @@ -240,7 +240,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) void helper_flds_FT0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -253,7 +253,7 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val) void helper_fldl_FT0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -271,7 +271,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val) void helper_flds_ST0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float32 f; @@ -288,7 +288,7 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val) void helper_fldl_ST0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float64 f; @@ -338,7 +338,7 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val) uint32_t helper_fsts_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -351,7 +351,7 @@ uint32_t helper_fsts_ST0(CPUX86State *env) uint64_t helper_fstl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -364,7 +364,7 @@ uint64_t helper_fstl_ST0(CPUX86State *env) int32_t helper_fist_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -378,7 +378,7 @@ int32_t helper_fist_ST0(CPUX86State *env) int32_t helper_fistl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -391,7 +391,7 @@ int32_t helper_fistl_ST0(CPUX86State *env) int64_t helper_fistll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64(ST0, &env->fp_status); @@ -404,7 +404,7 @@ int64_t helper_fistll_ST0(CPUX86State *env) int32_t helper_fistt_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -418,7 +418,7 @@ int32_t helper_fistt_ST0(CPUX86State *env) int32_t helper_fisttl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -431,7 +431,7 @@ int32_t helper_fisttl_ST0(CPUX86State *env) int64_t helper_fisttll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); @@ -527,7 +527,7 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; void helper_fcom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); @@ -537,7 +537,7 @@ void helper_fcom_ST0_FT0(CPUX86State *env) void helper_fucom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); @@ -549,7 +549,7 @@ static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_fcomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -562,7 +562,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env) void helper_fucomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -575,28 +575,28 @@ void helper_fucomi_ST0_FT0(CPUX86State *env) void helper_fadd_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_add(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_mul(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(FT0, ST0, &env->fp_status); merge_exception_flags(env, old_flags); } @@ -615,28 +615,28 @@ void helper_fdivr_ST0_FT0(CPUX86State *env) void helper_fadd_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); merge_exception_flags(env, old_flags); } @@ -861,7 +861,7 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int v; target_ulong mem_ref, mem_end; int64_t val; @@ -1136,7 +1136,7 @@ static const struct f2xm1_data f2xm1_table[65] = { void helper_f2xm1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t sig = extractFloatx80Frac(ST0); int32_t exp = extractFloatx80Exp(ST0); bool sign = extractFloatx80Sign(ST0); @@ -1369,7 +1369,7 @@ static const struct fpatan_data fpatan_table[9] = { void helper_fpatan(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -1808,7 +1808,7 @@ void helper_fpatan(CPUX86State *env) void helper_fxtract(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); CPU_LDoubleU temp; temp.d = ST0; @@ -1857,7 +1857,7 @@ void helper_fxtract(CPUX86State *env) static void helper_fprem_common(CPUX86State *env, bool mod) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t quotient; CPU_LDoubleU temp0, temp1; int exp0, exp1, expdiff; @@ -2053,7 +2053,7 @@ static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, void helper_fyl2xp1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2151,7 +2151,7 @@ void helper_fyl2xp1(CPUX86State *env) void helper_fyl2x(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2298,7 +2298,7 @@ void helper_fyl2x(CPUX86State *env) void helper_fsqrt(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_is_neg(ST0)) { env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ env->fpus |= 0x400; @@ -2324,14 +2324,14 @@ void helper_fsincos(CPUX86State *env) void helper_frndint(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_round_to_int(ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fscale(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_invalid_encoding(ST1, &env->fp_status) || floatx80_invalid_encoding(ST0, &env->fp_status)) { float_raise(float_flag_invalid, &env->fp_status); @@ -2369,7 +2369,7 @@ void helper_fscale(CPUX86State *env) } else { int n; FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; - uint8_t save_flags = get_float_exception_flags(&env->fp_status); + int save_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); set_float_exception_flags(save_flags, &env->fp_status); @@ -3254,6 +3254,7 @@ void update_mxcsr_status(CPUX86State *env) /* Set exception flags. */ set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | + (mxcsr & FPUS_DE ? float_flag_input_denormal_used : 0) | (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | (mxcsr & FPUS_OE ? float_flag_overflow : 0) | (mxcsr & FPUS_UE ? float_flag_underflow : 0) | @@ -3269,15 +3270,9 @@ void update_mxcsr_status(CPUX86State *env) void update_mxcsr_from_sse_status(CPUX86State *env) { - uint8_t flags = get_float_exception_flags(&env->sse_status); - /* - * The MXCSR denormal flag has opposite semantics to - * float_flag_input_denormal_flushed (the softfloat code sets that flag - * only when flushing input denormals to zero, but SSE sets it - * only when not flushing them to zero), so is not converted - * here. - */ + int flags = get_float_exception_flags(&env->sse_status); env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | + (flags & float_flag_input_denormal_used ? FPUS_DE : 0) | (flags & float_flag_divbyzero ? FPUS_ZE : 0) | (flags & float_flag_overflow ? FPUS_OE : 0) | (flags & float_flag_underflow ? FPUS_UE : 0) | diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 54d8453..be011b0 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -20,7 +20,6 @@ #ifndef I386_HELPER_TCG_H #define I386_HELPER_TCG_H -#include "exec/exec-all.h" #include "qemu/host-utils.h" /* Maximum instruction code size */ @@ -98,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x) /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); -/* sysemu/svm_helper.c */ +/* system/svm_helper.c */ #ifndef CONFIG_USER_ONLY G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1, uintptr_t retaddr); @@ -116,7 +115,7 @@ int exception_has_error_code(int intno); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -/* sysemu/bpt_helper.c */ +/* system/bpt_helper.c */ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); /* diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c index 1a02e9d..46741d9 100644 --- a/target/i386/tcg/int_helper.c +++ b/target/i386/tcg/int_helper.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" -#include "exec/exec-all.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" #include "qapi/error.h" diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c index 3ef84e9..9e7c2d8 100644 --- a/target/i386/tcg/mem_helper.c +++ b/target/i386/tcg/mem_helper.c @@ -20,8 +20,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "qemu/int128.h" #include "qemu/atomic128.h" #include "tcg/tcg.h" diff --git a/target/i386/tcg/mpx_helper.c b/target/i386/tcg/mpx_helper.c index 22423eed..fa8abcc 100644 --- a/target/i386/tcg/mpx_helper.c +++ b/target/i386/tcg/mpx_helper.c @@ -20,8 +20,8 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-ldst.h" +#include "exec/target_page.h" #include "helper-tcg.h" diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 7196211..071f3fb 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -22,12 +22,13 @@ #include "cpu.h" #include "qemu/log.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/log.h" #include "helper-tcg.h" #include "seg_helper.h" #include "access.h" +#include "tcg-cpu.h" #ifdef TARGET_X86_64 #define SET_ESP(val, sp_mask) \ @@ -128,6 +129,22 @@ int get_pg_mode(CPUX86State *env) return pg_mode; } +static int x86_mmu_index_kernel_pl(CPUX86State *env, unsigned pl) +{ + int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1; + int mmu_index_base = + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + (pl < 3 && (env->eflags & AC_MASK) + ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX); + + return mmu_index_base + mmu_index_32; +} + +int cpu_mmu_index_kernel(CPUX86State *env) +{ + return x86_mmu_index_kernel_pl(env, env->hflags & HF_CPL_MASK); +} + /* return non zero if error */ static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr, uint32_t *e2_ptr, int selector, @@ -309,10 +326,10 @@ static void tss_set_busy(CPUX86State *env, int tss_selector, bool value, #define SWITCH_TSS_IRET 1 #define SWITCH_TSS_CALL 2 -/* return 0 if switching to a 16-bit selector */ -static int switch_tss_ra(CPUX86State *env, int tss_selector, - uint32_t e1, uint32_t e2, int source, - uint32_t next_eip, uintptr_t retaddr) +static void switch_tss_ra(CPUX86State *env, int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip, bool has_error_code, + uint32_t error_code, uintptr_t retaddr) { int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, i; target_ulong tss_base; @@ -456,10 +473,6 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector, new_segs[R_GS] = 0; new_trap = 0; } - /* XXX: avoid a compiler warning, see - http://support.amd.com/us/Processor_TechDocs/24593.pdf - chapters 12.2.5 and 13.2.4 on how to implement TSS Trap bit */ - (void)new_trap; /* clear busy bit (it is restartable) */ if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) { @@ -582,14 +595,43 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector, cpu_x86_update_dr7(env, env->dr[7] & ~DR7_LOCAL_BP_MASK); } #endif - return type >> 3; + + if (has_error_code) { + int cpl = env->hflags & HF_CPL_MASK; + StackAccess sa; + + /* push the error code */ + sa.env = env; + sa.ra = retaddr; + sa.mmu_index = x86_mmu_index_pl(env, cpl); + sa.sp = env->regs[R_ESP]; + if (env->segs[R_SS].flags & DESC_B_MASK) { + sa.sp_mask = 0xffffffff; + } else { + sa.sp_mask = 0xffff; + } + sa.ss_base = env->segs[R_SS].base; + if (type & 8) { + pushl(&sa, error_code); + } else { + pushw(&sa, error_code); + } + SET_ESP(sa.sp, sa.sp_mask); + } + + if (new_trap) { + env->dr[6] |= DR6_BT; + raise_exception_ra(env, EXCP01_DB, retaddr); + } } -static int switch_tss(CPUX86State *env, int tss_selector, - uint32_t e1, uint32_t e2, int source, - uint32_t next_eip) +static void switch_tss(CPUX86State *env, int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip, bool has_error_code, + int error_code) { - return switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, 0); + switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, + has_error_code, error_code, 0); } static inline unsigned int get_sp_mask(unsigned int e2) @@ -702,25 +744,8 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, if (!(e2 & DESC_P_MASK)) { raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2); } - shift = switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip); - if (has_error_code) { - /* push the error code on the destination stack */ - cpl = env->hflags & HF_CPL_MASK; - sa.mmu_index = x86_mmu_index_pl(env, cpl); - if (env->segs[R_SS].flags & DESC_B_MASK) { - sa.sp_mask = 0xffffffff; - } else { - sa.sp_mask = 0xffff; - } - sa.sp = env->regs[R_ESP]; - sa.ss_base = env->segs[R_SS].base; - if (shift) { - pushl(&sa, error_code); - } else { - pushw(&sa, error_code); - } - SET_ESP(sa.sp, sa.sp_mask); - } + switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip, + has_error_code, error_code); return; } @@ -1516,7 +1541,8 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip, if (dpl < cpl || dpl < rpl) { raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC()); } - switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip, GETPC()); + switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip, + false, 0, GETPC()); break; case 4: /* 286 call gate */ case 12: /* 386 call gate */ @@ -1728,7 +1754,8 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, if (dpl < cpl || dpl < rpl) { raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC()); } - switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip, GETPC()); + switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip, + false, 0, GETPC()); return; case 4: /* 286 call gate */ case 12: /* 386 call gate */ @@ -2239,7 +2266,8 @@ void helper_iret_protected(CPUX86State *env, int shift, int next_eip) if (type != 3) { raise_exception_err_ra(env, EXCP0A_TSS, tss_selector & 0xfffc, GETPC()); } - switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip, GETPC()); + switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip, + false, 0, GETPC()); } else { helper_ret_protected(env, shift, 1, 0, GETPC()); } diff --git a/target/i386/tcg/seg_helper.h b/target/i386/tcg/seg_helper.h index ebf1035..ea98e1a 100644 --- a/target/i386/tcg/seg_helper.h +++ b/target/i386/tcg/seg_helper.h @@ -20,6 +20,8 @@ #ifndef SEG_HELPER_H #define SEG_HELPER_H +#include "cpu.h" + //#define DEBUG_PCALL #ifdef DEBUG_PCALL @@ -31,12 +33,12 @@ # define LOG_PCALL_STATE(cpu) do { } while (0) #endif +int cpu_mmu_index_kernel(CPUX86State *env); + /* * TODO: Convert callers to compute cpu_mmu_index_kernel once * and use *_mmuidx_ra directly. */ -#define cpu_ldub_kernel_ra(e, p, r) \ - cpu_ldub_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) #define cpu_lduw_kernel_ra(e, p, r) \ cpu_lduw_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) #define cpu_ldl_kernel_ra(e, p, r) \ @@ -44,8 +46,6 @@ #define cpu_ldq_kernel_ra(e, p, r) \ cpu_ldq_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) -#define cpu_stb_kernel_ra(e, p, v, r) \ - cpu_stb_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) #define cpu_stw_kernel_ra(e, p, v, r) \ cpu_stw_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) #define cpu_stl_kernel_ra(e, p, v, r) \ @@ -53,12 +53,10 @@ #define cpu_stq_kernel_ra(e, p, v, r) \ cpu_stq_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) -#define cpu_ldub_kernel(e, p) cpu_ldub_kernel_ra(e, p, 0) #define cpu_lduw_kernel(e, p) cpu_lduw_kernel_ra(e, p, 0) #define cpu_ldl_kernel(e, p) cpu_ldl_kernel_ra(e, p, 0) #define cpu_ldq_kernel(e, p) cpu_ldq_kernel_ra(e, p, 0) -#define cpu_stb_kernel(e, p, v) cpu_stb_kernel_ra(e, p, v, 0) #define cpu_stw_kernel(e, p, v) cpu_stw_kernel_ra(e, p, v, 0) #define cpu_stl_kernel(e, p, v) cpu_stl_kernel_ra(e, p, v, 0) #define cpu_stq_kernel(e, p, v) cpu_stq_kernel_ra(e, p, v, 0) diff --git a/target/i386/tcg/system/bpt_helper.c b/target/i386/tcg/system/bpt_helper.c index be232c1..aebb5ca 100644 --- a/target/i386/tcg/system/bpt_helper.c +++ b/target/i386/tcg/system/bpt_helper.c @@ -19,8 +19,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "exec/watchpoint.h" #include "tcg/helper-tcg.h" diff --git a/target/i386/tcg/system/excp_helper.c b/target/i386/tcg/system/excp_helper.c index 6876329..50040f6 100644 --- a/target/i386/tcg/system/excp_helper.c +++ b/target/i386/tcg/system/excp_helper.c @@ -19,9 +19,12 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" +#include "accel/tcg/probe.h" #include "exec/cputlb.h" #include "exec/page-protection.h" +#include "exec/target_page.h" +#include "exec/tlb-flags.h" #include "tcg/helper-tcg.h" typedef struct TranslateParams { diff --git a/target/i386/tcg/system/misc_helper.c b/target/i386/tcg/system/misc_helper.c index ce18c75..9c3f5cc 100644 --- a/target/i386/tcg/system/misc_helper.c +++ b/target/i386/tcg/system/misc_helper.c @@ -21,8 +21,9 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" -#include "exec/address-spaces.h" +#include "accel/tcg/cpu-ldst.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "exec/cputlb.h" #include "tcg/helper-tcg.h" #include "hw/i386/apic.h" diff --git a/target/i386/tcg/system/seg_helper.c b/target/i386/tcg/system/seg_helper.c index b07cc9f..d4ea890 100644 --- a/target/i386/tcg/system/seg_helper.c +++ b/target/i386/tcg/system/seg_helper.c @@ -23,7 +23,7 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/helper-tcg.h" #include "../seg_helper.h" diff --git a/target/i386/tcg/system/svm_helper.c b/target/i386/tcg/system/svm_helper.c index f9982b7..b27049b 100644 --- a/target/i386/tcg/system/svm_helper.c +++ b/target/i386/tcg/system/svm_helper.c @@ -22,7 +22,7 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "exec/cputlb.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/helper-tcg.h" /* Secure Virtual Machine helpers */ diff --git a/target/i386/tcg/system/tcg-cpu.c b/target/i386/tcg/system/tcg-cpu.c index 13a3507..0538a4f 100644 --- a/target/i386/tcg/system/tcg-cpu.c +++ b/target/i386/tcg/system/tcg-cpu.c @@ -23,7 +23,8 @@ #include "system/system.h" #include "qemu/units.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "tcg/tcg-cpu.h" diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index b8aff82..6f5dc06 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -23,7 +23,8 @@ #include "qemu/accel.h" #include "accel/accel-cpu-target.h" #include "exec/translation-block.h" - +#include "exec/target_page.h" +#include "accel/tcg/cpu-ops.h" #include "tcg-cpu.h" /* Frob eflags into and out of the CPU temporary format. */ @@ -47,6 +48,25 @@ static void x86_cpu_exec_exit(CPUState *cs) env->eflags = cpu_compute_eflags(env); } +static TCGTBCPUState x86_get_tb_cpu_state(CPUState *cs) +{ + CPUX86State *env = cpu_env(cs); + uint32_t flags, cs_base; + vaddr pc; + + flags = env->hflags | + (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK)); + if (env->hflags & HF_CS64_MASK) { + cs_base = 0; + pc = env->eip; + } else { + cs_base = env->segs[R_CS].base; + pc = (uint32_t)(cs_base + env->eip); + } + + return (TCGTBCPUState){ .pc = pc, .flags = flags, .cs_base = cs_base }; +} + static void x86_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { @@ -94,6 +114,23 @@ static void x86_restore_state_to_opc(CPUState *cs, } } +int x86_mmu_index_pl(CPUX86State *env, unsigned pl) +{ + int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1; + int mmu_index_base = + pl == 3 ? MMU_USER64_IDX : + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX; + + return mmu_index_base + mmu_index_32; +} + +static int x86_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + CPUX86State *env = cpu_env(cs); + return x86_mmu_index_pl(env, env->hflags & HF_CPL_MASK); +} + #ifndef CONFIG_USER_ONLY static bool x86_debug_check_breakpoint(CPUState *cs) { @@ -103,15 +140,36 @@ static bool x86_debug_check_breakpoint(CPUState *cs) /* RF disables all architectural breakpoints. */ return !(env->eflags & RF_MASK); } -#endif -#include "accel/tcg/cpu-ops.h" +static void x86_cpu_exec_reset(CPUState *cs) +{ + CPUArchState *env = cpu_env(cs); + + cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0); + do_cpu_init(env_archcpu(env)); + cs->exception_index = EXCP_HALTED; +} -static const TCGCPUOps x86_tcg_ops = { +static vaddr x86_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return cpu_env(cs)->hflags & HF_CS64_MASK ? result : (uint32_t)result; +} +#endif + +const TCGCPUOps x86_tcg_ops = { + .mttcg_supported = true, + .precise_smc = true, + /* + * The x86 has a strong memory model with some store-after-load re-ordering + */ + .guest_default_memory_order = TCG_MO_ALL & ~TCG_MO_ST_LD, .initialize = tcg_x86_init, .translate_code = x86_translate_code, + .get_tb_cpu_state = x86_get_tb_cpu_state, .synchronize_from_tb = x86_cpu_synchronize_from_tb, .restore_state_to_opc = x86_restore_state_to_opc, + .mmu_index = x86_cpu_mmu_index, .cpu_exec_enter = x86_cpu_exec_enter, .cpu_exec_exit = x86_cpu_exec_exit, #ifdef CONFIG_USER_ONLY @@ -120,9 +178,11 @@ static const TCGCPUOps x86_tcg_ops = { .record_sigbus = x86_cpu_record_sigbus, #else .tlb_fill = x86_cpu_tlb_fill, + .pointer_wrap = x86_pointer_wrap, .do_interrupt = x86_cpu_do_interrupt, .cpu_exec_halt = x86_cpu_exec_halt, .cpu_exec_interrupt = x86_cpu_exec_interrupt, + .cpu_exec_reset = x86_cpu_exec_reset, .do_unaligned_access = x86_cpu_do_unaligned_access, .debug_excp_handler = breakpoint_handler, .debug_check_breakpoint = x86_debug_check_breakpoint, @@ -130,17 +190,6 @@ static const TCGCPUOps x86_tcg_ops = { #endif /* !CONFIG_USER_ONLY */ }; -static void x86_tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc) -{ - /* for x86, all cpus use the same set of operations */ - cc->tcg_ops = &x86_tcg_ops; -} - -static void x86_tcg_cpu_class_init(CPUClass *cc) -{ - cc->init_accel_cpu = x86_tcg_cpu_init_ops; -} - static void x86_tcg_cpu_xsave_init(void) { #define XO(bit, field) \ @@ -181,7 +230,7 @@ static void x86_tcg_cpu_instance_init(CPUState *cs) x86_tcg_cpu_xsave_init(); } -static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data) +static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, const void *data) { AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); @@ -189,7 +238,6 @@ static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data) acc->cpu_target_realize = tcg_cpu_realizefn; #endif /* CONFIG_USER_ONLY */ - acc->cpu_class_init = x86_tcg_cpu_class_init; acc->cpu_instance_init = x86_tcg_cpu_instance_init; } static const TypeInfo x86_tcg_cpu_accel_type_info = { diff --git a/target/i386/tcg/tcg-cpu.h b/target/i386/tcg/tcg-cpu.h index 53a8494..85bcd61 100644 --- a/target/i386/tcg/tcg-cpu.h +++ b/target/i386/tcg/tcg-cpu.h @@ -19,6 +19,8 @@ #ifndef TCG_CPU_H #define TCG_CPU_H +#include "cpu.h" + #define XSAVE_FCW_FSW_OFFSET 0x000 #define XSAVE_FTW_FOP_OFFSET 0x004 #define XSAVE_CWD_RIP_OFFSET 0x008 @@ -76,6 +78,10 @@ QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != XSAVE_ZMM_HI256_OFF QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != XSAVE_HI16_ZMM_OFFSET); QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != XSAVE_PKRU_OFFSET); +extern const TCGCPUOps x86_tcg_ops; + bool tcg_cpu_realizefn(CPUState *cs, Error **errp); +int x86_mmu_index_pl(CPUX86State *env, unsigned pl); + #endif /* TCG_CPU_H */ diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a8935f4..0cb87d0 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -20,11 +20,12 @@ #include "qemu/host-utils.h" #include "cpu.h" -#include "exec/exec-all.h" +#include "accel/tcg/cpu-mmu-index.h" #include "exec/translation-block.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" #include "exec/translator.h" +#include "exec/target_page.h" #include "fpu/softfloat.h" #include "exec/helper-proto.h" @@ -134,10 +135,7 @@ typedef struct DisasContext { TCGv T1; /* TCG local register indexes (only used inside old micro ops) */ - TCGv tmp0; - TCGv tmp4; TCGv_i32 tmp2_i32; - TCGv_i32 tmp3_i32; TCGv_i64 tmp1_i64; sigjmp_buf jmpbuf; @@ -1183,6 +1181,26 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) return cc; } +static void gen_neg_setcc(DisasContext *s, int b, TCGv reg) +{ + CCPrepare cc = gen_prepare_cc(s, b, reg); + + if (cc.no_setcond) { + if (cc.cond == TCG_COND_EQ) { + tcg_gen_addi_tl(reg, cc.reg, -1); + } else { + tcg_gen_neg_tl(reg, cc.reg); + } + return; + } + + if (cc.use_reg2) { + tcg_gen_negsetcond_tl(cc.cond, reg, cc.reg, cc.reg2); + } else { + tcg_gen_negsetcondi_tl(cc.cond, reg, cc.reg, cc.imm); + } +} + static void gen_setcc(DisasContext *s, int b, TCGv reg) { CCPrepare cc = gen_prepare_cc(s, b, reg); @@ -1300,30 +1318,35 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot) static void gen_ins(DisasContext *s, MemOp ot, TCGv dshift) { + TCGv_i32 port = tcg_temp_new_i32(); + gen_string_movl_A0_EDI(s); /* Note: we must do this dummy write first to be restartable in case of page fault. */ tcg_gen_movi_tl(s->T0, 0); gen_op_st_v(s, ot, s->T0, s->A0); - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); - gen_helper_in_func(ot, s->T0, s->tmp2_i32); + tcg_gen_trunc_tl_i32(port, cpu_regs[R_EDX]); + tcg_gen_andi_i32(port, port, 0xffff); + gen_helper_in_func(ot, s->T0, port); gen_op_st_v(s, ot, s->T0, s->A0); gen_op_add_reg(s, s->aflag, R_EDI, dshift); - gen_bpt_io(s, s->tmp2_i32, ot); + gen_bpt_io(s, port, ot); } static void gen_outs(DisasContext *s, MemOp ot, TCGv dshift) { + TCGv_i32 port = tcg_temp_new_i32(); + TCGv_i32 value = tcg_temp_new_i32(); + gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0); - gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); + tcg_gen_trunc_tl_i32(port, cpu_regs[R_EDX]); + tcg_gen_andi_i32(port, port, 0xffff); + tcg_gen_trunc_tl_i32(value, s->T0); + gen_helper_out_func(ot, port, value); gen_op_add_reg(s, s->aflag, R_ESI, dshift); - gen_bpt_io(s, s->tmp2_i32, ot); + gen_bpt_io(s, port, ot); } #define REP_MAX 65535 @@ -1560,10 +1583,13 @@ static bool check_cpl0(DisasContext *s) } /* XXX: add faster immediate case */ -static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, +static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot, bool is_right, TCGv count) { target_ulong mask = (ot == MO_64 ? 63 : 31); + TCGv cc_src = tcg_temp_new(); + TCGv tmp = tcg_temp_new(); + TCGv hishift; switch (ot) { case MO_16: @@ -1571,9 +1597,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A portion by constructing it as a 32-bit value. */ if (is_right) { - tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16); + tcg_gen_deposit_tl(tmp, s->T0, s->T1, 16, 16); tcg_gen_mov_tl(s->T1, s->T0); - tcg_gen_mov_tl(s->T0, s->tmp0); + tcg_gen_mov_tl(s->T0, tmp); } else { tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16); } @@ -1584,47 +1610,52 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, case MO_32: #ifdef TARGET_X86_64 /* Concatenate the two 32-bit values and use a 64-bit shift. */ - tcg_gen_subi_tl(s->tmp0, count, 1); + tcg_gen_subi_tl(tmp, count, 1); if (is_right) { tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1); - tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0); + tcg_gen_shr_i64(cc_src, s->T0, tmp); tcg_gen_shr_i64(s->T0, s->T0, count); } else { tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0); - tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0); + tcg_gen_shl_i64(cc_src, s->T0, tmp); tcg_gen_shl_i64(s->T0, s->T0, count); - tcg_gen_shri_i64(s->tmp0, s->tmp0, 32); + tcg_gen_shri_i64(cc_src, cc_src, 32); tcg_gen_shri_i64(s->T0, s->T0, 32); } break; #endif default: - tcg_gen_subi_tl(s->tmp0, count, 1); + hishift = tcg_temp_new(); + tcg_gen_subi_tl(tmp, count, 1); if (is_right) { - tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0); + tcg_gen_shr_tl(cc_src, s->T0, tmp); - tcg_gen_subfi_tl(s->tmp4, mask + 1, count); + /* mask + 1 - count = mask - tmp = mask ^ tmp */ + tcg_gen_xori_tl(hishift, tmp, mask); tcg_gen_shr_tl(s->T0, s->T0, count); - tcg_gen_shl_tl(s->T1, s->T1, s->tmp4); + tcg_gen_shl_tl(s->T1, s->T1, hishift); } else { - tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0); + tcg_gen_shl_tl(cc_src, s->T0, tmp); + + /* mask + 1 - count = mask - tmp = mask ^ tmp */ + tcg_gen_xori_tl(hishift, tmp, mask); + tcg_gen_shl_tl(s->T0, s->T0, count); + tcg_gen_shr_tl(s->T1, s->T1, hishift); + if (ot == MO_16) { /* Only needed if count > 16, for Intel behaviour. */ - tcg_gen_subfi_tl(s->tmp4, 33, count); - tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4); - tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4); + tcg_gen_shri_tl(tmp, s->T1, 1); + tcg_gen_or_tl(cc_src, cc_src, tmp); } - - tcg_gen_subfi_tl(s->tmp4, mask + 1, count); - tcg_gen_shl_tl(s->T0, s->T0, count); - tcg_gen_shr_tl(s->T1, s->T1, s->tmp4); } - tcg_gen_movi_tl(s->tmp4, 0); - tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4, - s->tmp4, s->T1); + tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, + count, tcg_constant_tl(0), + tcg_constant_tl(0), s->T1); tcg_gen_or_tl(s->T0, s->T0, s->T1); break; } + + return cc_src; } #define X86_MAX_INSN_LENGTH 15 @@ -1843,14 +1874,16 @@ static void gen_bndck(DisasContext *s, X86DecodedInsn *decode, TCGCond cond, TCGv_i64 bndv) { TCGv ea = gen_lea_modrm_1(s, decode->mem, false); + TCGv_i32 t32 = tcg_temp_new_i32(); + TCGv_i64 t64 = tcg_temp_new_i64(); - tcg_gen_extu_tl_i64(s->tmp1_i64, ea); + tcg_gen_extu_tl_i64(t64, ea); if (!CODE64(s)) { - tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64); + tcg_gen_ext32u_i64(t64, t64); } - tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv); - tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64); - gen_helper_bndck(tcg_env, s->tmp2_i32); + tcg_gen_setcond_i64(cond, t64, t64, bndv); + tcg_gen_extrl_i64_i32(t32, t64); + gen_helper_bndck(tcg_env, t32); } /* generate modrm load of memory or register. */ @@ -1992,25 +2025,39 @@ static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg) /* move SRC to seg_reg and compute if the CPU state may change. Never call this function with seg_reg == R_CS */ -static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src) +static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit_irq) { if (PE(s) && !VM86(s)) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, src); - gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), s->tmp2_i32); - /* abort translation because the addseg value may change or - because ss32 may change. For R_SS, translation must always - stop as a special handling must be done to disable hardware - interrupts for the next instruction */ - if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; - } else if (CODE32(s) && seg_reg < R_FS) { + TCGv_i32 sel = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(sel, src); + gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel); + + /* + * For moves to SS, the SS32 flag may change. For CODE32 only, changes + * to SS, DS and ES may change the ADDSEG flags. + */ + if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) { s->base.is_jmp = DISAS_EOB_NEXT; } } else { gen_op_movl_seg_real(s, seg_reg, src); - if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; - } + } + + /* + * For MOV or POP to SS (but not LSS) translation must always + * stop as a special handling must be done to disable hardware + * interrupts for the next instruction. + * + * This is the last instruction, so it's okay to overwrite + * HF_TF_MASK; the next TB will start with the flag set. + * + * DISAS_EOB_INHIBIT_IRQ is a superset of DISAS_EOB_NEXT which + * might have been set above. + */ + if (inhibit_irq) { + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; + s->flags &= ~HF_TF_MASK; } } @@ -2148,14 +2195,17 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) level &= 31; if (level != 0) { int i; + if (level > 1) { + TCGv fp = tcg_temp_new(); - /* Copy level-1 pointers from the previous frame. */ - for (i = 1; i < level; ++i) { - gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], -size * i); - gen_op_ld_v(s, d_ot, s->tmp0, s->A0); + /* Copy level-1 pointers from the previous frame. */ + for (i = 1; i < level; ++i) { + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], -size * i); + gen_op_ld_v(s, d_ot, fp, s->A0); - gen_lea_ss_ofs(s, s->A0, s->T1, -size * i); - gen_op_st_v(s, d_ot, s->tmp0, s->A0); + gen_lea_ss_ofs(s, s->A0, s->T1, -size * i); + gen_op_st_v(s, d_ot, fp, s->A0); + } } /* Push the current FrameTemp as the last level. */ @@ -2258,7 +2308,7 @@ gen_eob(DisasContext *s, int mode) if (mode == DISAS_EOB_RECHECK_TF) { gen_helper_rechecking_single_step(tcg_env); tcg_gen_exit_tb(NULL, 0); - } else if ((s->flags & HF_TF_MASK) && mode != DISAS_EOB_INHIBIT_IRQ) { + } else if (s->flags & HF_TF_MASK) { gen_helper_single_step(tcg_env); } else if (mode == DISAS_JUMP && /* give irqs a chance to happen */ @@ -2378,10 +2428,11 @@ static void gen_ldy_env_A0(DisasContext *s, int offset, bool align) int mem_index = s->mem_index; TCGv_i128 t0 = tcg_temp_new_i128(); TCGv_i128 t1 = tcg_temp_new_i128(); + TCGv a0_hi = tcg_temp_new(); tcg_gen_qemu_ld_i128(t0, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0)); - tcg_gen_addi_tl(s->tmp0, s->A0, 16); - tcg_gen_qemu_ld_i128(t1, s->tmp0, mem_index, mop); + tcg_gen_addi_tl(a0_hi, s->A0, 16); + tcg_gen_qemu_ld_i128(t1, a0_hi, mem_index, mop); tcg_gen_st_i128(t0, tcg_env, offset + offsetof(YMMReg, YMM_X(0))); tcg_gen_st_i128(t1, tcg_env, offset + offsetof(YMMReg, YMM_X(1))); @@ -2392,12 +2443,13 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align) MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR; int mem_index = s->mem_index; TCGv_i128 t = tcg_temp_new_i128(); + TCGv a0_hi = tcg_temp_new(); tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(0))); tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0)); - tcg_gen_addi_tl(s->tmp0, s->A0, 16); + tcg_gen_addi_tl(a0_hi, s->A0, 16); tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(1))); - tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop); + tcg_gen_qemu_st_i128(t, a0_hi, mem_index, mop); } #include "emit.c.inc" @@ -3601,7 +3653,6 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode) return; illegal_op: gen_illegal_opcode(s); - return; } #include "decode-new.c.inc" @@ -3744,11 +3795,8 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) dc->T1 = tcg_temp_new(); dc->A0 = tcg_temp_new(); - dc->tmp0 = tcg_temp_new(); dc->tmp1_i64 = tcg_temp_new_i64(); dc->tmp2_i32 = tcg_temp_new_i32(); - dc->tmp3_i32 = tcg_temp_new_i32(); - dc->tmp4 = tcg_temp_new(); dc->cc_srcT = tcg_temp_new(); } diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c index b3bdb78..98fab4cb 100644 --- a/target/i386/tcg/user/excp_helper.c +++ b/target/i386/tcg/user/excp_helper.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "exec/exec-all.h" #include "tcg/helper-tcg.h" void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr, diff --git a/target/i386/tcg/user/seg_helper.c b/target/i386/tcg/user/seg_helper.c index c45f2ac..263f599 100644 --- a/target/i386/tcg/user/seg_helper.c +++ b/target/i386/tcg/user/seg_helper.c @@ -21,8 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" +#include "accel/tcg/cpu-ldst.h" #include "tcg/helper-tcg.h" #include "tcg/seg_helper.h" |