aboutsummaryrefslogtreecommitdiff
path: root/target/i386/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'target/i386/tcg')
-rw-r--r--target/i386/tcg/access.c5
-rw-r--r--target/i386/tcg/cc_helper_template.h.inc90
-rw-r--r--target/i386/tcg/decode-new.c.inc44
-rw-r--r--target/i386/tcg/emit.c.inc202
-rw-r--r--target/i386/tcg/excp_helper.c1
-rw-r--r--target/i386/tcg/fpu_helper.c103
-rw-r--r--target/i386/tcg/helper-tcg.h5
-rw-r--r--target/i386/tcg/int_helper.c1
-rw-r--r--target/i386/tcg/mem_helper.c3
-rw-r--r--target/i386/tcg/mpx_helper.c4
-rw-r--r--target/i386/tcg/seg_helper.c102
-rw-r--r--target/i386/tcg/seg_helper.h10
-rw-r--r--target/i386/tcg/system/bpt_helper.c2
-rw-r--r--target/i386/tcg/system/excp_helper.c5
-rw-r--r--target/i386/tcg/system/misc_helper.c5
-rw-r--r--target/i386/tcg/system/seg_helper.c2
-rw-r--r--target/i386/tcg/system/svm_helper.c2
-rw-r--r--target/i386/tcg/system/tcg-cpu.c3
-rw-r--r--target/i386/tcg/tcg-cpu.c82
-rw-r--r--target/i386/tcg/tcg-cpu.h6
-rw-r--r--target/i386/tcg/translate.c184
-rw-r--r--target/i386/tcg/user/excp_helper.c1
-rw-r--r--target/i386/tcg/user/seg_helper.c3
23 files changed, 521 insertions, 344 deletions
diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c
index e68b73a..97e3f0e 100644
--- a/target/i386/tcg/access.c
+++ b/target/i386/tcg/access.c
@@ -3,8 +3,9 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/cpu_ldst.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/probe.h"
+#include "exec/target_page.h"
#include "access.h"
diff --git a/target/i386/tcg/cc_helper_template.h.inc b/target/i386/tcg/cc_helper_template.h.inc
index 9aff16b..d8fd976 100644
--- a/target/i386/tcg/cc_helper_template.h.inc
+++ b/target/i386/tcg/cc_helper_template.h.inc
@@ -44,18 +44,32 @@
/* dynamic flags computation */
-static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+static uint32_t glue(compute_all_cout, SUFFIX)(DATA_TYPE dst, DATA_TYPE carries)
{
- uint32_t cf, pf, af, zf, sf, of;
- DATA_TYPE src2 = dst - src1;
+ uint32_t af_cf, pf, zf, sf, of;
- cf = dst < src1;
+ /* PF, ZF, SF computed from result. */
pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & CC_A;
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
- of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+
+ /*
+ * AF, CF, OF computed from carry out vector. To compute AF and CF, rotate it
+ * left by one so cout(DATA_BITS - 1) is in bit 0 and cout(3) in bit 4.
+ *
+ * To compute OF, place the highest two carry bits into OF and the bit
+ * immediately to the right of it; then, adding CC_O / 2 XORs them.
+ */
+ af_cf = ((carries << 1) | (carries >> (DATA_BITS - 1))) & (CC_A | CC_C);
+ of = (lshift(carries, 12 - DATA_BITS) + CC_O / 2) & CC_O;
+ return pf + zf + sf + af_cf + of;
+}
+
+static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+ DATA_TYPE src2 = dst - src1;
+ DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
@@ -66,25 +80,9 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
DATA_TYPE src3)
{
- uint32_t cf, pf, af, zf, sf, of;
-
-#ifdef WIDER_TYPE
- WIDER_TYPE src13 = (WIDER_TYPE) src1 + (WIDER_TYPE) src3;
- DATA_TYPE src2 = dst - src13;
-
- cf = dst < src13;
-#else
DATA_TYPE src2 = dst - src1 - src3;
-
- cf = (src3 ? dst <= src1 : dst < src1);
-#endif
-
- pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & 0x10;
- zf = (dst == 0) << 6;
- sf = lshift(dst, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+ DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
@@ -101,16 +99,9 @@ static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
{
- uint32_t cf, pf, af, zf, sf, of;
DATA_TYPE src1 = dst + src2;
-
- cf = src1 < src2;
- pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & CC_A;
- zf = (dst == 0) * CC_Z;
- sf = lshift(dst, 8 - DATA_BITS) & CC_S;
- of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+ DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
@@ -123,25 +114,9 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
DATA_TYPE src3)
{
- uint32_t cf, pf, af, zf, sf, of;
-
-#ifdef WIDER_TYPE
- WIDER_TYPE src23 = (WIDER_TYPE) src2 + (WIDER_TYPE) src3;
- DATA_TYPE src1 = dst + src23;
-
- cf = src1 < src23;
-#else
DATA_TYPE src1 = dst + src2 + src3;
-
- cf = (src3 ? src1 <= src2 : src1 < src2);
-#endif
-
- pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & 0x10;
- zf = (dst == 0) << 6;
- sf = lshift(dst, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+ DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
@@ -175,13 +150,10 @@ static uint32_t glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
uint32_t cf, pf, af, zf, sf, of;
- DATA_TYPE src2;
cf = src1;
- src1 = dst - 1;
- src2 = 1;
pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & CC_A;
+ af = (dst ^ (dst - 1)) & CC_A; /* bits 0..3 are all clear */
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
of = (dst == SIGN_MASK) * CC_O;
@@ -191,13 +163,10 @@ static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
uint32_t cf, pf, af, zf, sf, of;
- DATA_TYPE src2;
cf = src1;
- src1 = dst + 1;
- src2 = 1;
pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & CC_A;
+ af = (dst ^ (dst + 1)) & CC_A; /* bits 0..3 are all set */
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
of = (dst == SIGN_MASK - 1) * CC_O;
@@ -292,6 +261,5 @@ static int glue(compute_c_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
#undef DATA_BITS
#undef SIGN_MASK
#undef DATA_TYPE
-#undef DATA_MASK
#undef SUFFIX
#undef WIDER_TYPE
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index cda32ee..5103865 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -878,10 +878,10 @@ static const X86OpEntry opcodes_0F3A[256] = {
[0x0e] = X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
[0x0f] = X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
- [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
+ [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX) p_66),
[0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX) p_66),
- [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
+ [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,dq, vex6 chk(W0) cpuid(AVX2) p_66),
[0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX2) p_66),
/* Listed incorrectly as type 4 */
@@ -2542,7 +2542,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
s->has_modrm = false;
s->prefix = 0;
- next_byte:
+ next_byte:;
+#ifdef TARGET_X86_64
+ /* clear any REX prefix followed by other prefixes. */
+ int rex;
+ rex = -1;
+ next_byte_rex:
+#endif
b = x86_ldub_code(env, s);
/* Collect prefixes. */
@@ -2585,13 +2591,12 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
#ifdef TARGET_X86_64
case 0x40 ... 0x4f:
if (CODE64(s)) {
- /* REX prefix */
- s->prefix |= PREFIX_REX;
- s->vex_w = (b >> 3) & 1;
- s->rex_r = (b & 0x4) << 1;
- s->rex_x = (b & 0x2) << 2;
- s->rex_b = (b & 0x1) << 3;
- goto next_byte;
+ /*
+ * REX prefix; ignored unless it is the last prefix, so
+ * for now just stash it
+ */
+ rex = b;
+ goto next_byte_rex;
}
break;
#endif
@@ -2618,10 +2623,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
/* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
- | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
+ | PREFIX_LOCK | PREFIX_DATA)) {
goto illegal_op;
}
#ifdef TARGET_X86_64
+ if (rex != -1) {
+ goto illegal_op;
+ }
s->rex_r = (~vex2 >> 4) & 8;
#endif
if (b == 0xc5) {
@@ -2661,6 +2669,16 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
/* Post-process prefixes. */
if (CODE64(s)) {
+#ifdef TARGET_X86_64
+ if (rex != -1) {
+ s->prefix |= PREFIX_REX;
+ s->vex_w = (rex >> 3) & 1;
+ s->rex_r = (rex & 0x4) << 1;
+ s->rex_x = (rex & 0x2) << 2;
+ s->rex_b = (rex & 0x1) << 3;
+ }
+#endif
+
/*
* In 64-bit mode, the default data size is 32-bit. Select 64-bit
* data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
@@ -2704,14 +2722,14 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
if (decode.e.check & X86_CHECK_i64) {
goto illegal_op;
}
- if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) {
+ if ((decode.e.check & X86_CHECK_i64_amd) && !IS_INTEL_CPU(env)) {
goto illegal_op;
}
} else {
if (decode.e.check & X86_CHECK_o64) {
goto illegal_op;
}
- if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) {
+ if ((decode.e.check & X86_CHECK_o64_intel) && IS_INTEL_CPU(env)) {
goto illegal_op;
}
}
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 0fa1664..1a7fab93 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -19,16 +19,6 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
-/*
- * Sometimes, knowing what the backend has can produce better code.
- * The exact opcode to check depends on 32- vs. 64-bit.
- */
-#ifdef TARGET_X86_64
-#define INDEX_op_extract2_tl INDEX_op_extract2_i64
-#else
-#define INDEX_op_extract2_tl INDEX_op_extract2_i32
-#endif
-
#define MMX_OFFSET(reg) \
({ assert((reg) >= 0 && (reg) <= 7); \
offsetof(CPUX86State, fpregs[reg].mmx); })
@@ -352,7 +342,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv
break;
case X86_OP_SEG:
/* Note that gen_movl_seg takes care of interrupt shadow and TF. */
- gen_movl_seg(s, op->n, s->T0);
+ gen_movl_seg(s, op->n, v, op->n == R_SS);
break;
case X86_OP_INT:
if (op->has_ea) {
@@ -1170,11 +1160,28 @@ static void gen_AAS(DisasContext *s, X86DecodedInsn *decode)
assume_cc_op(s, CC_OP_EFLAGS);
}
+static void gen_ADD(DisasContext *s, X86DecodedInsn *decode);
static void gen_ADC(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
- TCGv c_in = tcg_temp_new();
+ TCGv c_in;
+
+ /*
+ * Try to avoid CC_OP_ADC by transforming as follows:
+ * CC_ADC: src1 = dst + c_in, src2 = 0, src3 = c_in
+ * CC_ADD: src1 = dst + c_in, src2 = c_in (no src3)
+ *
+ * In general src2 vs. src3 matters when computing AF and OF, but not here:
+ * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases
+ * - OF is a function of the two MSBs, and in both cases they are zero for src2
+ */
+ if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) {
+ gen_compute_eflags_c(s, s->T1);
+ gen_ADD(s, decode);
+ return;
+ }
+ c_in = tcg_temp_new();
gen_compute_eflags_c(s, c_in);
if (s->prefix & PREFIX_LOCK) {
tcg_gen_add_tl(s->T0, c_in, s->T1);
@@ -1693,22 +1700,22 @@ static void gen_CMPccXADD(DisasContext *s, X86DecodedInsn *decode)
switch (jcc_op) {
case JCC_O:
/* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */
+ cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(0);
tcg_gen_xor_tl(newv, s->cc_srcT, s->T0);
- tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv);
- tcg_gen_and_tl(s->tmp0, s->tmp0, newv);
- tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot);
- cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
+ tcg_gen_xor_tl(cmp_lhs, s->cc_srcT, cmpv);
+ tcg_gen_and_tl(cmp_lhs, cmp_lhs, newv);
+ tcg_gen_sextract_tl(cmp_lhs, cmp_lhs, 0, 8 << ot);
break;
case JCC_P:
- tcg_gen_ext8u_tl(s->tmp0, s->T0);
- tcg_gen_ctpop_tl(s->tmp0, s->tmp0);
- cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(1);
+ cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(1);
+ tcg_gen_ext8u_tl(cmp_lhs, s->T0);
+ tcg_gen_ctpop_tl(cmp_lhs, cmp_lhs);
break;
case JCC_S:
- tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot);
- cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
+ cmp_lhs = tcg_temp_new(), cmp_rhs = tcg_constant_tl(0);
+ tcg_gen_sextract_tl(cmp_lhs, s->T0, 0, 8 << ot);
break;
default:
@@ -1796,7 +1803,7 @@ static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode)
static void gen_CMPXCHG16B(DisasContext *s, X86DecodedInsn *decode)
{
#ifdef TARGET_X86_64
- MemOp mop = MO_TE | MO_128 | MO_ALIGN;
+ MemOp mop = MO_LE | MO_128 | MO_ALIGN;
TCGv_i64 t0, t1;
TCGv_i128 cmp, val;
@@ -1853,13 +1860,13 @@ static void gen_CMPXCHG8B(DisasContext *s, X86DecodedInsn *decode)
/* Only require atomic with LOCK; non-parallel handled in generator. */
if (s->prefix & PREFIX_LOCK) {
- tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
+ tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_LEUQ);
} else {
tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
- s->mem_index, MO_TEUQ);
+ s->mem_index, MO_LEUQ);
}
- /* Set tmp0 to match the required value of Z. */
+ /* Compute the required value of Z. */
tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
Z = tcg_temp_new();
tcg_gen_trunc_i64_tl(Z, cmp);
@@ -1899,9 +1906,10 @@ static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode)
static void gen_CRC32(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
+ TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot));
+ tcg_gen_trunc_tl_i32(tmp, s->T0);
+ gen_helper_crc32(s->T0, tmp, s->T1, tcg_constant_i32(8 << ot));
}
static void gen_CVTPI2Px(DisasContext *s, X86DecodedInsn *decode)
@@ -2359,8 +2367,10 @@ static void gen_LAR(DisasContext *s, X86DecodedInsn *decode)
static void gen_LDMXCSR(DisasContext *s, X86DecodedInsn *decode)
{
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- gen_helper_ldmxcsr(tcg_env, s->tmp2_i32);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ tcg_gen_trunc_tl_i32(tmp, s->T0);
+ gen_helper_ldmxcsr(tcg_env, tmp);
}
static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg)
@@ -2372,7 +2382,7 @@ static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg)
gen_op_ld_v(s, MO_16, s->T1, s->A0);
/* load the segment here to handle exceptions properly */
- gen_movl_seg(s, seg, s->T1);
+ gen_movl_seg(s, seg, s->T1, false);
}
static void gen_LDS(DisasContext *s, X86DecodedInsn *decode)
@@ -2573,11 +2583,13 @@ static void gen_MOVDQ(DisasContext *s, X86DecodedInsn *decode)
static void gen_MOVMSK(DisasContext *s, X86DecodedInsn *decode)
{
typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn;
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm;
pd = s->vex_l ? gen_helper_movmskpd_ymm : gen_helper_movmskpd_xmm;
fn = s->prefix & PREFIX_DATA ? pd : ps;
- fn(s->tmp2_i32, tcg_env, OP_PTR2);
- tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
+ fn(tmp, tcg_env, OP_PTR2);
+ tcg_gen_extu_i32_tl(s->T0, tmp);
}
static void gen_MOVQ(DisasContext *s, X86DecodedInsn *decode)
@@ -2674,13 +2686,17 @@ static void gen_MULX(DisasContext *s, X86DecodedInsn *decode)
switch (ot) {
case MO_32:
#ifdef TARGET_X86_64
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
- tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
- s->tmp2_i32, s->tmp3_i32);
- tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
- tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32);
- break;
+ {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+
+ tcg_gen_trunc_tl_i32(t0, s->T0);
+ tcg_gen_trunc_tl_i32(t1, s->T1);
+ tcg_gen_mulu2_i32(t0, t1, t0, t1);
+ tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], t0);
+ tcg_gen_extu_i32_tl(s->T0, t1);
+ break;
+ }
case MO_64:
#endif
@@ -2997,7 +3013,7 @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode)
tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1)));
while (vec_len > 8) {
vec_len -= 8;
- if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) {
+ if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_TL, 0)) {
/*
* Load the next byte of the result into the high byte of T.
* TCG does a similar expansion of deposit to shl+extract2; by
@@ -3724,10 +3740,14 @@ static void gen_RORX(DisasContext *s, X86DecodedInsn *decode)
switch (ot) {
case MO_32:
#ifdef TARGET_X86_64
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b);
- tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
- break;
+ {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ tcg_gen_trunc_tl_i32(tmp, s->T0);
+ tcg_gen_rotri_i32(tmp, tmp, b);
+ tcg_gen_extu_i32_tl(s->T0, tmp);
+ break;
+ }
case MO_64:
#endif
@@ -3830,22 +3850,64 @@ static void gen_SARX(DisasContext *s, X86DecodedInsn *decode)
tcg_gen_sar_tl(s->T0, s->T0, s->T1);
}
+static void gen_SUB(DisasContext *s, X86DecodedInsn *decode);
static void gen_SBB(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
- TCGv c_in = tcg_temp_new();
+ TCGv c_in;
+
+ /*
+ * Try to avoid CC_OP_SBB by transforming as follows:
+ * CC_SBB: src1 = dst + c_in, src2 = 0, src3 = c_in
+ * CC_SUB: src1 = dst + c_in, src2 = c_in (no src3)
+ *
+ * In general src2 vs. src3 matters when computing AF and OF, but not here:
+ * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases
+ * - OF is a function of the two MSBs, and in both cases they are zero for src2
+ */
+ if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) {
+ gen_compute_eflags_c(s, s->T1);
+ gen_SUB(s, decode);
+ return;
+ }
+ c_in = tcg_temp_new();
gen_compute_eflags_c(s, c_in);
+
+ /*
+ * Here the change is as follows:
+ * CC_SBB: src1 = T0, src2 = T0, src3 = c_in
+ * CC_SUB: src1 = 0, src2 = c_in (no src3)
+ *
+ * The difference also does not matter:
+ * - AF is bit 4 of dst^src1^src2, but bit 4 of src1^src2 is zero in both cases
+ * therefore AF comes straight from dst (in fact it is c_in)
+ * - for OF, src1 and src2 have the same sign in both cases, meaning there
+ * can be no overflow
+ */
+ if (decode->e.op2 != X86_TYPE_I && !decode->op[0].has_ea && decode->op[0].n == decode->op[2].n) {
+ if (s->cc_op == CC_OP_DYNAMIC) {
+ tcg_gen_neg_tl(s->T0, c_in);
+ } else {
+ /*
+ * Do not negate c_in because it will often be dead and only the
+ * instruction generated by negsetcond will survive.
+ */
+ gen_neg_setcc(s, JCC_B << 1, s->T0);
+ }
+ tcg_gen_movi_tl(s->cc_srcT, 0);
+ decode->cc_src = c_in;
+ decode->cc_dst = s->T0;
+ decode->cc_op = CC_OP_SUBB + ot;
+ return;
+ }
+
if (s->prefix & PREFIX_LOCK) {
tcg_gen_add_tl(s->T0, s->T1, c_in);
tcg_gen_neg_tl(s->T0, s->T0);
tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0,
s->mem_index, ot | MO_LE);
} else {
- /*
- * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by
- * negsetcond, and CC_OP_SUBB as the cc_op.
- */
tcg_gen_sub_tl(s->T0, s->T0, s->T1);
tcg_gen_sub_tl(s->T0, s->T0, c_in);
}
@@ -3956,8 +4018,7 @@ static void gen_SHLD(DisasContext *s, X86DecodedInsn *decode)
}
decode->cc_dst = s->T0;
- decode->cc_src = s->tmp0;
- gen_shiftd_rm_T1(s, ot, false, count);
+ decode->cc_src = gen_shiftd_rm_T1(s, ot, false, count);
if (can_be_zero) {
gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot);
} else {
@@ -4009,8 +4070,7 @@ static void gen_SHRD(DisasContext *s, X86DecodedInsn *decode)
}
decode->cc_dst = s->T0;
- decode->cc_src = s->tmp0;
- gen_shiftd_rm_T1(s, ot, true, count);
+ decode->cc_src = gen_shiftd_rm_T1(s, ot, true, count);
if (can_be_zero) {
gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot);
} else {
@@ -4277,7 +4337,7 @@ static void gen_VCVTSI2Sx(DisasContext *s, X86DecodedInsn *decode)
}
return;
}
- in = s->tmp2_i32;
+ in = tcg_temp_new_i32();
tcg_gen_trunc_tl_i32(in, s->T1);
#else
in = s->T1;
@@ -4307,7 +4367,7 @@ static inline void gen_VCVTtSx2SI(DisasContext *s, X86DecodedInsn *decode,
return;
}
- out = s->tmp2_i32;
+ out = tcg_temp_new_i32();
#else
out = s->T0;
#endif
@@ -4359,7 +4419,7 @@ static void gen_VEXTRACTPS(DisasContext *s, X86DecodedInsn *decode)
gen_pextr(s, decode, MO_32);
}
-static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode)
+static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode, TCGv_i32 tmp)
{
int val = decode->immediate;
int dest_word = (val >> 4) & 3;
@@ -4376,7 +4436,7 @@ static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode)
}
if (new_mask != (val & 15)) {
- tcg_gen_st_i32(s->tmp2_i32, tcg_env,
+ tcg_gen_st_i32(tmp, tcg_env,
vector_elem_offset(&decode->op[0], MO_32, dest_word));
}
@@ -4395,15 +4455,19 @@ static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode)
static void gen_VINSERTPS_r(DisasContext *s, X86DecodedInsn *decode)
{
int val = decode->immediate;
- tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ tcg_gen_ld_i32(tmp, tcg_env,
vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3));
- gen_vinsertps(s, decode);
+ gen_vinsertps(s, decode, tmp);
}
static void gen_VINSERTPS_m(DisasContext *s, X86DecodedInsn *decode)
{
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
- gen_vinsertps(s, decode);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ tcg_gen_qemu_ld_i32(tmp, s->A0, s->mem_index, MO_LEUL);
+ gen_vinsertps(s, decode, tmp);
}
static void gen_VINSERTx128(DisasContext *s, X86DecodedInsn *decode)
@@ -4524,25 +4588,29 @@ static void gen_VMOVSD_ld(DisasContext *s, X86DecodedInsn *decode)
static void gen_VMOVSS(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
+ TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0)));
+ tcg_gen_ld_i32(tmp, OP_PTR2, offsetof(ZMMReg, ZMM_L(0)));
tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len);
- tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0)));
+ tcg_gen_st_i32(tmp, OP_PTR0, offsetof(ZMMReg, ZMM_L(0)));
}
static void gen_VMOVSS_ld(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
+ TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
+ tcg_gen_qemu_ld_i32(tmp, s->A0, s->mem_index, MO_LEUL);
tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0);
- tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0)));
+ tcg_gen_st_i32(tmp, OP_PTR0, offsetof(ZMMReg, ZMM_L(0)));
}
static void gen_VMOVSS_st(DisasContext *s, X86DecodedInsn *decode)
{
- tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0)));
- tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ tcg_gen_ld_i32(tmp, OP_PTR2, offsetof(ZMMReg, ZMM_L(0)));
+ tcg_gen_qemu_st_i32(tmp, s->A0, s->mem_index, MO_LEUL);
}
static void gen_VPMASKMOV_st(DisasContext *s, X86DecodedInsn *decode)
diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c
index de71e68..6fb8036 100644
--- a/target/i386/tcg/excp_helper.c
+++ b/target/i386/tcg/excp_helper.c
@@ -19,7 +19,6 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/exec-all.h"
#include "qemu/log.h"
#include "system/runstate.h"
#include "exec/helper-proto.h"
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index c1184ca..b3b2382 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -22,7 +22,7 @@
#include "cpu.h"
#include "tcg-cpu.h"
#include "exec/cputlb.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "exec/helper-proto.h"
#include "fpu/softfloat.h"
#include "fpu/softfloat-macros.h"
@@ -189,25 +189,25 @@ void cpu_init_fp_statuses(CPUX86State *env)
set_float_default_nan_pattern(0b11000000, &env->mmx_status);
set_float_default_nan_pattern(0b11000000, &env->sse_status);
/*
- * TODO: x86 does flush-to-zero detection after rounding (the SDM
+ * x86 does flush-to-zero detection after rounding (the SDM
* section 10.2.3.3 on the FTZ bit of MXCSR says that we flush
* when we detect underflow, which x86 does after rounding).
*/
- set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
- set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status);
- set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status);
+ set_float_ftz_detection(float_ftz_after_rounding, &env->fp_status);
+ set_float_ftz_detection(float_ftz_after_rounding, &env->mmx_status);
+ set_float_ftz_detection(float_ftz_after_rounding, &env->sse_status);
}
-static inline uint8_t save_exception_flags(CPUX86State *env)
+static inline int save_exception_flags(CPUX86State *env)
{
- uint8_t old_flags = get_float_exception_flags(&env->fp_status);
+ int old_flags = get_float_exception_flags(&env->fp_status);
set_float_exception_flags(0, &env->fp_status);
return old_flags;
}
-static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
+static void merge_exception_flags(CPUX86State *env, int old_flags)
{
- uint8_t new_flags = get_float_exception_flags(&env->fp_status);
+ int new_flags = get_float_exception_flags(&env->fp_status);
float_raise(old_flags, &env->fp_status);
fpu_set_exception(env,
((new_flags & float_flag_invalid ? FPUS_IE : 0) |
@@ -215,12 +215,12 @@ static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
(new_flags & float_flag_overflow ? FPUS_OE : 0) |
(new_flags & float_flag_underflow ? FPUS_UE : 0) |
(new_flags & float_flag_inexact ? FPUS_PE : 0) |
- (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0)));
+ (new_flags & float_flag_input_denormal_used ? FPUS_DE : 0)));
}
static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
floatx80 ret = floatx80_div(a, b, &env->fp_status);
merge_exception_flags(env, old_flags);
return ret;
@@ -240,7 +240,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
void helper_flds_FT0(CPUX86State *env, uint32_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float32 f;
uint32_t i;
@@ -253,7 +253,7 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val)
void helper_fldl_FT0(CPUX86State *env, uint64_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float64 f;
uint64_t i;
@@ -271,7 +271,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val)
void helper_flds_ST0(CPUX86State *env, uint32_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int new_fpstt;
union {
float32 f;
@@ -288,7 +288,7 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val)
void helper_fldl_ST0(CPUX86State *env, uint64_t val)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int new_fpstt;
union {
float64 f;
@@ -338,7 +338,7 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val)
uint32_t helper_fsts_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float32 f;
uint32_t i;
@@ -351,7 +351,7 @@ uint32_t helper_fsts_ST0(CPUX86State *env)
uint64_t helper_fstl_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
union {
float64 f;
uint64_t i;
@@ -364,7 +364,7 @@ uint64_t helper_fstl_ST0(CPUX86State *env)
int32_t helper_fist_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32(ST0, &env->fp_status);
@@ -378,7 +378,7 @@ int32_t helper_fist_ST0(CPUX86State *env)
int32_t helper_fistl_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32(ST0, &env->fp_status);
@@ -391,7 +391,7 @@ int32_t helper_fistl_ST0(CPUX86State *env)
int64_t helper_fistll_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int64_t val;
val = floatx80_to_int64(ST0, &env->fp_status);
@@ -404,7 +404,7 @@ int64_t helper_fistll_ST0(CPUX86State *env)
int32_t helper_fistt_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
@@ -418,7 +418,7 @@ int32_t helper_fistt_ST0(CPUX86State *env)
int32_t helper_fisttl_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int32_t val;
val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
@@ -431,7 +431,7 @@ int32_t helper_fisttl_ST0(CPUX86State *env)
int64_t helper_fisttll_ST0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int64_t val;
val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
@@ -527,7 +527,7 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
void helper_fcom_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
FloatRelation ret;
ret = floatx80_compare(ST0, FT0, &env->fp_status);
@@ -537,7 +537,7 @@ void helper_fcom_ST0_FT0(CPUX86State *env)
void helper_fucom_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
FloatRelation ret;
ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
@@ -549,7 +549,7 @@ static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
void helper_fcomi_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int eflags;
FloatRelation ret;
@@ -562,7 +562,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env)
void helper_fucomi_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int eflags;
FloatRelation ret;
@@ -575,28 +575,28 @@ void helper_fucomi_ST0_FT0(CPUX86State *env)
void helper_fadd_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_add(ST0, FT0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fmul_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsub_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsubr_ST0_FT0(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
@@ -615,28 +615,28 @@ void helper_fdivr_ST0_FT0(CPUX86State *env)
void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
merge_exception_flags(env, old_flags);
}
@@ -861,7 +861,7 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
int v;
target_ulong mem_ref, mem_end;
int64_t val;
@@ -1136,7 +1136,7 @@ static const struct f2xm1_data f2xm1_table[65] = {
void helper_f2xm1(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t sig = extractFloatx80Frac(ST0);
int32_t exp = extractFloatx80Exp(ST0);
bool sign = extractFloatx80Sign(ST0);
@@ -1369,7 +1369,7 @@ static const struct fpatan_data fpatan_table[9] = {
void helper_fpatan(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t arg0_sig = extractFloatx80Frac(ST0);
int32_t arg0_exp = extractFloatx80Exp(ST0);
bool arg0_sign = extractFloatx80Sign(ST0);
@@ -1808,7 +1808,7 @@ void helper_fpatan(CPUX86State *env)
void helper_fxtract(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
CPU_LDoubleU temp;
temp.d = ST0;
@@ -1857,7 +1857,7 @@ void helper_fxtract(CPUX86State *env)
static void helper_fprem_common(CPUX86State *env, bool mod)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t quotient;
CPU_LDoubleU temp0, temp1;
int exp0, exp1, expdiff;
@@ -2053,7 +2053,7 @@ static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
void helper_fyl2xp1(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t arg0_sig = extractFloatx80Frac(ST0);
int32_t arg0_exp = extractFloatx80Exp(ST0);
bool arg0_sign = extractFloatx80Sign(ST0);
@@ -2151,7 +2151,7 @@ void helper_fyl2xp1(CPUX86State *env)
void helper_fyl2x(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
uint64_t arg0_sig = extractFloatx80Frac(ST0);
int32_t arg0_exp = extractFloatx80Exp(ST0);
bool arg0_sign = extractFloatx80Sign(ST0);
@@ -2298,7 +2298,7 @@ void helper_fyl2x(CPUX86State *env)
void helper_fsqrt(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
if (floatx80_is_neg(ST0)) {
env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
env->fpus |= 0x400;
@@ -2324,14 +2324,14 @@ void helper_fsincos(CPUX86State *env)
void helper_frndint(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
ST0 = floatx80_round_to_int(ST0, &env->fp_status);
merge_exception_flags(env, old_flags);
}
void helper_fscale(CPUX86State *env)
{
- uint8_t old_flags = save_exception_flags(env);
+ int old_flags = save_exception_flags(env);
if (floatx80_invalid_encoding(ST1, &env->fp_status) ||
floatx80_invalid_encoding(ST0, &env->fp_status)) {
float_raise(float_flag_invalid, &env->fp_status);
@@ -2369,7 +2369,7 @@ void helper_fscale(CPUX86State *env)
} else {
int n;
FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
- uint8_t save_flags = get_float_exception_flags(&env->fp_status);
+ int save_flags = get_float_exception_flags(&env->fp_status);
set_float_exception_flags(0, &env->fp_status);
n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
set_float_exception_flags(save_flags, &env->fp_status);
@@ -3254,6 +3254,7 @@ void update_mxcsr_status(CPUX86State *env)
/* Set exception flags. */
set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
+ (mxcsr & FPUS_DE ? float_flag_input_denormal_used : 0) |
(mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
(mxcsr & FPUS_OE ? float_flag_overflow : 0) |
(mxcsr & FPUS_UE ? float_flag_underflow : 0) |
@@ -3269,15 +3270,9 @@ void update_mxcsr_status(CPUX86State *env)
void update_mxcsr_from_sse_status(CPUX86State *env)
{
- uint8_t flags = get_float_exception_flags(&env->sse_status);
- /*
- * The MXCSR denormal flag has opposite semantics to
- * float_flag_input_denormal_flushed (the softfloat code sets that flag
- * only when flushing input denormals to zero, but SSE sets it
- * only when not flushing them to zero), so is not converted
- * here.
- */
+ int flags = get_float_exception_flags(&env->sse_status);
env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
+ (flags & float_flag_input_denormal_used ? FPUS_DE : 0) |
(flags & float_flag_divbyzero ? FPUS_ZE : 0) |
(flags & float_flag_overflow ? FPUS_OE : 0) |
(flags & float_flag_underflow ? FPUS_UE : 0) |
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index 54d8453..be011b0 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -20,7 +20,6 @@
#ifndef I386_HELPER_TCG_H
#define I386_HELPER_TCG_H
-#include "exec/exec-all.h"
#include "qemu/host-utils.h"
/* Maximum instruction code size */
@@ -98,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x)
/* misc_helper.c */
void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask);
-/* sysemu/svm_helper.c */
+/* system/svm_helper.c */
#ifndef CONFIG_USER_ONLY
G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code,
uint64_t exit_info_1, uintptr_t retaddr);
@@ -116,7 +115,7 @@ int exception_has_error_code(int intno);
/* smm_helper.c */
void do_smm_enter(X86CPU *cpu);
-/* sysemu/bpt_helper.c */
+/* system/bpt_helper.c */
bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update);
/*
diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c
index 1a02e9d..46741d9 100644
--- a/target/i386/tcg/int_helper.c
+++ b/target/i386/tcg/int_helper.c
@@ -20,7 +20,6 @@
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "cpu.h"
-#include "exec/exec-all.h"
#include "qemu/host-utils.h"
#include "exec/helper-proto.h"
#include "qapi/error.h"
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
index 3ef84e9..9e7c2d8 100644
--- a/target/i386/tcg/mem_helper.c
+++ b/target/i386/tcg/mem_helper.c
@@ -20,8 +20,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "qemu/int128.h"
#include "qemu/atomic128.h"
#include "tcg/tcg.h"
diff --git a/target/i386/tcg/mpx_helper.c b/target/i386/tcg/mpx_helper.c
index 22423eed..fa8abcc 100644
--- a/target/i386/tcg/mpx_helper.c
+++ b/target/i386/tcg/mpx_helper.c
@@ -20,8 +20,8 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "exec/target_page.h"
#include "helper-tcg.h"
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index 7196211..071f3fb 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -22,12 +22,13 @@
#include "cpu.h"
#include "qemu/log.h"
#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/probe.h"
#include "exec/log.h"
#include "helper-tcg.h"
#include "seg_helper.h"
#include "access.h"
+#include "tcg-cpu.h"
#ifdef TARGET_X86_64
#define SET_ESP(val, sp_mask) \
@@ -128,6 +129,22 @@ int get_pg_mode(CPUX86State *env)
return pg_mode;
}
+static int x86_mmu_index_kernel_pl(CPUX86State *env, unsigned pl)
+{
+ int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1;
+ int mmu_index_base =
+ !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX :
+ (pl < 3 && (env->eflags & AC_MASK)
+ ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX);
+
+ return mmu_index_base + mmu_index_32;
+}
+
+int cpu_mmu_index_kernel(CPUX86State *env)
+{
+ return x86_mmu_index_kernel_pl(env, env->hflags & HF_CPL_MASK);
+}
+
/* return non zero if error */
static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr,
uint32_t *e2_ptr, int selector,
@@ -309,10 +326,10 @@ static void tss_set_busy(CPUX86State *env, int tss_selector, bool value,
#define SWITCH_TSS_IRET 1
#define SWITCH_TSS_CALL 2
-/* return 0 if switching to a 16-bit selector */
-static int switch_tss_ra(CPUX86State *env, int tss_selector,
- uint32_t e1, uint32_t e2, int source,
- uint32_t next_eip, uintptr_t retaddr)
+static void switch_tss_ra(CPUX86State *env, int tss_selector,
+ uint32_t e1, uint32_t e2, int source,
+ uint32_t next_eip, bool has_error_code,
+ uint32_t error_code, uintptr_t retaddr)
{
int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, i;
target_ulong tss_base;
@@ -456,10 +473,6 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector,
new_segs[R_GS] = 0;
new_trap = 0;
}
- /* XXX: avoid a compiler warning, see
- http://support.amd.com/us/Processor_TechDocs/24593.pdf
- chapters 12.2.5 and 13.2.4 on how to implement TSS Trap bit */
- (void)new_trap;
/* clear busy bit (it is restartable) */
if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) {
@@ -582,14 +595,43 @@ static int switch_tss_ra(CPUX86State *env, int tss_selector,
cpu_x86_update_dr7(env, env->dr[7] & ~DR7_LOCAL_BP_MASK);
}
#endif
- return type >> 3;
+
+ if (has_error_code) {
+ int cpl = env->hflags & HF_CPL_MASK;
+ StackAccess sa;
+
+ /* push the error code */
+ sa.env = env;
+ sa.ra = retaddr;
+ sa.mmu_index = x86_mmu_index_pl(env, cpl);
+ sa.sp = env->regs[R_ESP];
+ if (env->segs[R_SS].flags & DESC_B_MASK) {
+ sa.sp_mask = 0xffffffff;
+ } else {
+ sa.sp_mask = 0xffff;
+ }
+ sa.ss_base = env->segs[R_SS].base;
+ if (type & 8) {
+ pushl(&sa, error_code);
+ } else {
+ pushw(&sa, error_code);
+ }
+ SET_ESP(sa.sp, sa.sp_mask);
+ }
+
+ if (new_trap) {
+ env->dr[6] |= DR6_BT;
+ raise_exception_ra(env, EXCP01_DB, retaddr);
+ }
}
-static int switch_tss(CPUX86State *env, int tss_selector,
- uint32_t e1, uint32_t e2, int source,
- uint32_t next_eip)
+static void switch_tss(CPUX86State *env, int tss_selector,
+ uint32_t e1, uint32_t e2, int source,
+ uint32_t next_eip, bool has_error_code,
+ int error_code)
{
- return switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, 0);
+ switch_tss_ra(env, tss_selector, e1, e2, source, next_eip,
+ has_error_code, error_code, 0);
}
static inline unsigned int get_sp_mask(unsigned int e2)
@@ -702,25 +744,8 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
if (!(e2 & DESC_P_MASK)) {
raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2);
}
- shift = switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip);
- if (has_error_code) {
- /* push the error code on the destination stack */
- cpl = env->hflags & HF_CPL_MASK;
- sa.mmu_index = x86_mmu_index_pl(env, cpl);
- if (env->segs[R_SS].flags & DESC_B_MASK) {
- sa.sp_mask = 0xffffffff;
- } else {
- sa.sp_mask = 0xffff;
- }
- sa.sp = env->regs[R_ESP];
- sa.ss_base = env->segs[R_SS].base;
- if (shift) {
- pushl(&sa, error_code);
- } else {
- pushw(&sa, error_code);
- }
- SET_ESP(sa.sp, sa.sp_mask);
- }
+ switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip,
+ has_error_code, error_code);
return;
}
@@ -1516,7 +1541,8 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
if (dpl < cpl || dpl < rpl) {
raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC());
}
- switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip, GETPC());
+ switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_JMP, next_eip,
+ false, 0, GETPC());
break;
case 4: /* 286 call gate */
case 12: /* 386 call gate */
@@ -1728,7 +1754,8 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
if (dpl < cpl || dpl < rpl) {
raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC());
}
- switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip, GETPC());
+ switch_tss_ra(env, new_cs, e1, e2, SWITCH_TSS_CALL, next_eip,
+ false, 0, GETPC());
return;
case 4: /* 286 call gate */
case 12: /* 386 call gate */
@@ -2239,7 +2266,8 @@ void helper_iret_protected(CPUX86State *env, int shift, int next_eip)
if (type != 3) {
raise_exception_err_ra(env, EXCP0A_TSS, tss_selector & 0xfffc, GETPC());
}
- switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip, GETPC());
+ switch_tss_ra(env, tss_selector, e1, e2, SWITCH_TSS_IRET, next_eip,
+ false, 0, GETPC());
} else {
helper_ret_protected(env, shift, 1, 0, GETPC());
}
diff --git a/target/i386/tcg/seg_helper.h b/target/i386/tcg/seg_helper.h
index ebf1035..ea98e1a 100644
--- a/target/i386/tcg/seg_helper.h
+++ b/target/i386/tcg/seg_helper.h
@@ -20,6 +20,8 @@
#ifndef SEG_HELPER_H
#define SEG_HELPER_H
+#include "cpu.h"
+
//#define DEBUG_PCALL
#ifdef DEBUG_PCALL
@@ -31,12 +33,12 @@
# define LOG_PCALL_STATE(cpu) do { } while (0)
#endif
+int cpu_mmu_index_kernel(CPUX86State *env);
+
/*
* TODO: Convert callers to compute cpu_mmu_index_kernel once
* and use *_mmuidx_ra directly.
*/
-#define cpu_ldub_kernel_ra(e, p, r) \
- cpu_ldub_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
#define cpu_lduw_kernel_ra(e, p, r) \
cpu_lduw_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
#define cpu_ldl_kernel_ra(e, p, r) \
@@ -44,8 +46,6 @@
#define cpu_ldq_kernel_ra(e, p, r) \
cpu_ldq_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
-#define cpu_stb_kernel_ra(e, p, v, r) \
- cpu_stb_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
#define cpu_stw_kernel_ra(e, p, v, r) \
cpu_stw_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
#define cpu_stl_kernel_ra(e, p, v, r) \
@@ -53,12 +53,10 @@
#define cpu_stq_kernel_ra(e, p, v, r) \
cpu_stq_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
-#define cpu_ldub_kernel(e, p) cpu_ldub_kernel_ra(e, p, 0)
#define cpu_lduw_kernel(e, p) cpu_lduw_kernel_ra(e, p, 0)
#define cpu_ldl_kernel(e, p) cpu_ldl_kernel_ra(e, p, 0)
#define cpu_ldq_kernel(e, p) cpu_ldq_kernel_ra(e, p, 0)
-#define cpu_stb_kernel(e, p, v) cpu_stb_kernel_ra(e, p, v, 0)
#define cpu_stw_kernel(e, p, v) cpu_stw_kernel_ra(e, p, v, 0)
#define cpu_stl_kernel(e, p, v) cpu_stl_kernel_ra(e, p, v, 0)
#define cpu_stq_kernel(e, p, v) cpu_stq_kernel_ra(e, p, v, 0)
diff --git a/target/i386/tcg/system/bpt_helper.c b/target/i386/tcg/system/bpt_helper.c
index be232c1..aebb5ca 100644
--- a/target/i386/tcg/system/bpt_helper.c
+++ b/target/i386/tcg/system/bpt_helper.c
@@ -19,8 +19,8 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/exec-all.h"
#include "exec/helper-proto.h"
+#include "exec/watchpoint.h"
#include "tcg/helper-tcg.h"
diff --git a/target/i386/tcg/system/excp_helper.c b/target/i386/tcg/system/excp_helper.c
index 6876329..50040f6 100644
--- a/target/i386/tcg/system/excp_helper.c
+++ b/target/i386/tcg/system/excp_helper.c
@@ -19,9 +19,12 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "accel/tcg/probe.h"
#include "exec/cputlb.h"
#include "exec/page-protection.h"
+#include "exec/target_page.h"
+#include "exec/tlb-flags.h"
#include "tcg/helper-tcg.h"
typedef struct TranslateParams {
diff --git a/target/i386/tcg/system/misc_helper.c b/target/i386/tcg/system/misc_helper.c
index ce18c75..9c3f5cc 100644
--- a/target/i386/tcg/system/misc_helper.c
+++ b/target/i386/tcg/system/misc_helper.c
@@ -21,8 +21,9 @@
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
-#include "exec/address-spaces.h"
+#include "accel/tcg/cpu-ldst.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
#include "exec/cputlb.h"
#include "tcg/helper-tcg.h"
#include "hw/i386/apic.h"
diff --git a/target/i386/tcg/system/seg_helper.c b/target/i386/tcg/system/seg_helper.c
index b07cc9f..d4ea890 100644
--- a/target/i386/tcg/system/seg_helper.c
+++ b/target/i386/tcg/system/seg_helper.c
@@ -23,7 +23,7 @@
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "tcg/helper-tcg.h"
#include "../seg_helper.h"
diff --git a/target/i386/tcg/system/svm_helper.c b/target/i386/tcg/system/svm_helper.c
index f9982b7..b27049b 100644
--- a/target/i386/tcg/system/svm_helper.c
+++ b/target/i386/tcg/system/svm_helper.c
@@ -22,7 +22,7 @@
#include "cpu.h"
#include "exec/helper-proto.h"
#include "exec/cputlb.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "tcg/helper-tcg.h"
/* Secure Virtual Machine helpers */
diff --git a/target/i386/tcg/system/tcg-cpu.c b/target/i386/tcg/system/tcg-cpu.c
index 13a3507..0538a4f 100644
--- a/target/i386/tcg/system/tcg-cpu.c
+++ b/target/i386/tcg/system/tcg-cpu.c
@@ -23,7 +23,8 @@
#include "system/system.h"
#include "qemu/units.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
#include "tcg/tcg-cpu.h"
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index b8aff82..6f5dc06 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -23,7 +23,8 @@
#include "qemu/accel.h"
#include "accel/accel-cpu-target.h"
#include "exec/translation-block.h"
-
+#include "exec/target_page.h"
+#include "accel/tcg/cpu-ops.h"
#include "tcg-cpu.h"
/* Frob eflags into and out of the CPU temporary format. */
@@ -47,6 +48,25 @@ static void x86_cpu_exec_exit(CPUState *cs)
env->eflags = cpu_compute_eflags(env);
}
+static TCGTBCPUState x86_get_tb_cpu_state(CPUState *cs)
+{
+ CPUX86State *env = cpu_env(cs);
+ uint32_t flags, cs_base;
+ vaddr pc;
+
+ flags = env->hflags |
+ (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK));
+ if (env->hflags & HF_CS64_MASK) {
+ cs_base = 0;
+ pc = env->eip;
+ } else {
+ cs_base = env->segs[R_CS].base;
+ pc = (uint32_t)(cs_base + env->eip);
+ }
+
+ return (TCGTBCPUState){ .pc = pc, .flags = flags, .cs_base = cs_base };
+}
+
static void x86_cpu_synchronize_from_tb(CPUState *cs,
const TranslationBlock *tb)
{
@@ -94,6 +114,23 @@ static void x86_restore_state_to_opc(CPUState *cs,
}
}
+int x86_mmu_index_pl(CPUX86State *env, unsigned pl)
+{
+ int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1;
+ int mmu_index_base =
+ pl == 3 ? MMU_USER64_IDX :
+ !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX :
+ (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX;
+
+ return mmu_index_base + mmu_index_32;
+}
+
+static int x86_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+ CPUX86State *env = cpu_env(cs);
+ return x86_mmu_index_pl(env, env->hflags & HF_CPL_MASK);
+}
+
#ifndef CONFIG_USER_ONLY
static bool x86_debug_check_breakpoint(CPUState *cs)
{
@@ -103,15 +140,36 @@ static bool x86_debug_check_breakpoint(CPUState *cs)
/* RF disables all architectural breakpoints. */
return !(env->eflags & RF_MASK);
}
-#endif
-#include "accel/tcg/cpu-ops.h"
+static void x86_cpu_exec_reset(CPUState *cs)
+{
+ CPUArchState *env = cpu_env(cs);
+
+ cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
+ do_cpu_init(env_archcpu(env));
+ cs->exception_index = EXCP_HALTED;
+}
-static const TCGCPUOps x86_tcg_ops = {
+static vaddr x86_pointer_wrap(CPUState *cs, int mmu_idx,
+ vaddr result, vaddr base)
+{
+ return cpu_env(cs)->hflags & HF_CS64_MASK ? result : (uint32_t)result;
+}
+#endif
+
+const TCGCPUOps x86_tcg_ops = {
+ .mttcg_supported = true,
+ .precise_smc = true,
+ /*
+ * The x86 has a strong memory model with some store-after-load re-ordering
+ */
+ .guest_default_memory_order = TCG_MO_ALL & ~TCG_MO_ST_LD,
.initialize = tcg_x86_init,
.translate_code = x86_translate_code,
+ .get_tb_cpu_state = x86_get_tb_cpu_state,
.synchronize_from_tb = x86_cpu_synchronize_from_tb,
.restore_state_to_opc = x86_restore_state_to_opc,
+ .mmu_index = x86_cpu_mmu_index,
.cpu_exec_enter = x86_cpu_exec_enter,
.cpu_exec_exit = x86_cpu_exec_exit,
#ifdef CONFIG_USER_ONLY
@@ -120,9 +178,11 @@ static const TCGCPUOps x86_tcg_ops = {
.record_sigbus = x86_cpu_record_sigbus,
#else
.tlb_fill = x86_cpu_tlb_fill,
+ .pointer_wrap = x86_pointer_wrap,
.do_interrupt = x86_cpu_do_interrupt,
.cpu_exec_halt = x86_cpu_exec_halt,
.cpu_exec_interrupt = x86_cpu_exec_interrupt,
+ .cpu_exec_reset = x86_cpu_exec_reset,
.do_unaligned_access = x86_cpu_do_unaligned_access,
.debug_excp_handler = breakpoint_handler,
.debug_check_breakpoint = x86_debug_check_breakpoint,
@@ -130,17 +190,6 @@ static const TCGCPUOps x86_tcg_ops = {
#endif /* !CONFIG_USER_ONLY */
};
-static void x86_tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc)
-{
- /* for x86, all cpus use the same set of operations */
- cc->tcg_ops = &x86_tcg_ops;
-}
-
-static void x86_tcg_cpu_class_init(CPUClass *cc)
-{
- cc->init_accel_cpu = x86_tcg_cpu_init_ops;
-}
-
static void x86_tcg_cpu_xsave_init(void)
{
#define XO(bit, field) \
@@ -181,7 +230,7 @@ static void x86_tcg_cpu_instance_init(CPUState *cs)
x86_tcg_cpu_xsave_init();
}
-static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data)
+static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, const void *data)
{
AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
@@ -189,7 +238,6 @@ static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data)
acc->cpu_target_realize = tcg_cpu_realizefn;
#endif /* CONFIG_USER_ONLY */
- acc->cpu_class_init = x86_tcg_cpu_class_init;
acc->cpu_instance_init = x86_tcg_cpu_instance_init;
}
static const TypeInfo x86_tcg_cpu_accel_type_info = {
diff --git a/target/i386/tcg/tcg-cpu.h b/target/i386/tcg/tcg-cpu.h
index 53a8494..85bcd61 100644
--- a/target/i386/tcg/tcg-cpu.h
+++ b/target/i386/tcg/tcg-cpu.h
@@ -19,6 +19,8 @@
#ifndef TCG_CPU_H
#define TCG_CPU_H
+#include "cpu.h"
+
#define XSAVE_FCW_FSW_OFFSET 0x000
#define XSAVE_FTW_FOP_OFFSET 0x004
#define XSAVE_CWD_RIP_OFFSET 0x008
@@ -76,6 +78,10 @@ QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != XSAVE_ZMM_HI256_OFF
QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != XSAVE_HI16_ZMM_OFFSET);
QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != XSAVE_PKRU_OFFSET);
+extern const TCGCPUOps x86_tcg_ops;
+
bool tcg_cpu_realizefn(CPUState *cs, Error **errp);
+int x86_mmu_index_pl(CPUX86State *env, unsigned pl);
+
#endif /* TCG_CPU_H */
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index a8935f4..0cb87d0 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -20,11 +20,12 @@
#include "qemu/host-utils.h"
#include "cpu.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-mmu-index.h"
#include "exec/translation-block.h"
#include "tcg/tcg-op.h"
#include "tcg/tcg-op-gvec.h"
#include "exec/translator.h"
+#include "exec/target_page.h"
#include "fpu/softfloat.h"
#include "exec/helper-proto.h"
@@ -134,10 +135,7 @@ typedef struct DisasContext {
TCGv T1;
/* TCG local register indexes (only used inside old micro ops) */
- TCGv tmp0;
- TCGv tmp4;
TCGv_i32 tmp2_i32;
- TCGv_i32 tmp3_i32;
TCGv_i64 tmp1_i64;
sigjmp_buf jmpbuf;
@@ -1183,6 +1181,26 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
return cc;
}
+static void gen_neg_setcc(DisasContext *s, int b, TCGv reg)
+{
+ CCPrepare cc = gen_prepare_cc(s, b, reg);
+
+ if (cc.no_setcond) {
+ if (cc.cond == TCG_COND_EQ) {
+ tcg_gen_addi_tl(reg, cc.reg, -1);
+ } else {
+ tcg_gen_neg_tl(reg, cc.reg);
+ }
+ return;
+ }
+
+ if (cc.use_reg2) {
+ tcg_gen_negsetcond_tl(cc.cond, reg, cc.reg, cc.reg2);
+ } else {
+ tcg_gen_negsetcondi_tl(cc.cond, reg, cc.reg, cc.imm);
+ }
+}
+
static void gen_setcc(DisasContext *s, int b, TCGv reg)
{
CCPrepare cc = gen_prepare_cc(s, b, reg);
@@ -1300,30 +1318,35 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
static void gen_ins(DisasContext *s, MemOp ot, TCGv dshift)
{
+ TCGv_i32 port = tcg_temp_new_i32();
+
gen_string_movl_A0_EDI(s);
/* Note: we must do this dummy write first to be restartable in
case of page fault. */
tcg_gen_movi_tl(s->T0, 0);
gen_op_st_v(s, ot, s->T0, s->A0);
- tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
- tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
- gen_helper_in_func(ot, s->T0, s->tmp2_i32);
+ tcg_gen_trunc_tl_i32(port, cpu_regs[R_EDX]);
+ tcg_gen_andi_i32(port, port, 0xffff);
+ gen_helper_in_func(ot, s->T0, port);
gen_op_st_v(s, ot, s->T0, s->A0);
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
- gen_bpt_io(s, s->tmp2_i32, ot);
+ gen_bpt_io(s, port, ot);
}
static void gen_outs(DisasContext *s, MemOp ot, TCGv dshift)
{
+ TCGv_i32 port = tcg_temp_new_i32();
+ TCGv_i32 value = tcg_temp_new_i32();
+
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
- tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
- tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
- gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
+ tcg_gen_trunc_tl_i32(port, cpu_regs[R_EDX]);
+ tcg_gen_andi_i32(port, port, 0xffff);
+ tcg_gen_trunc_tl_i32(value, s->T0);
+ gen_helper_out_func(ot, port, value);
gen_op_add_reg(s, s->aflag, R_ESI, dshift);
- gen_bpt_io(s, s->tmp2_i32, ot);
+ gen_bpt_io(s, port, ot);
}
#define REP_MAX 65535
@@ -1560,10 +1583,13 @@ static bool check_cpl0(DisasContext *s)
}
/* XXX: add faster immediate case */
-static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
+static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
bool is_right, TCGv count)
{
target_ulong mask = (ot == MO_64 ? 63 : 31);
+ TCGv cc_src = tcg_temp_new();
+ TCGv tmp = tcg_temp_new();
+ TCGv hishift;
switch (ot) {
case MO_16:
@@ -1571,9 +1597,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
portion by constructing it as a 32-bit value. */
if (is_right) {
- tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
+ tcg_gen_deposit_tl(tmp, s->T0, s->T1, 16, 16);
tcg_gen_mov_tl(s->T1, s->T0);
- tcg_gen_mov_tl(s->T0, s->tmp0);
+ tcg_gen_mov_tl(s->T0, tmp);
} else {
tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
}
@@ -1584,47 +1610,52 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
case MO_32:
#ifdef TARGET_X86_64
/* Concatenate the two 32-bit values and use a 64-bit shift. */
- tcg_gen_subi_tl(s->tmp0, count, 1);
+ tcg_gen_subi_tl(tmp, count, 1);
if (is_right) {
tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
- tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
+ tcg_gen_shr_i64(cc_src, s->T0, tmp);
tcg_gen_shr_i64(s->T0, s->T0, count);
} else {
tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
- tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
+ tcg_gen_shl_i64(cc_src, s->T0, tmp);
tcg_gen_shl_i64(s->T0, s->T0, count);
- tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
+ tcg_gen_shri_i64(cc_src, cc_src, 32);
tcg_gen_shri_i64(s->T0, s->T0, 32);
}
break;
#endif
default:
- tcg_gen_subi_tl(s->tmp0, count, 1);
+ hishift = tcg_temp_new();
+ tcg_gen_subi_tl(tmp, count, 1);
if (is_right) {
- tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
+ tcg_gen_shr_tl(cc_src, s->T0, tmp);
- tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
+ /* mask + 1 - count = mask - tmp = mask ^ tmp */
+ tcg_gen_xori_tl(hishift, tmp, mask);
tcg_gen_shr_tl(s->T0, s->T0, count);
- tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
+ tcg_gen_shl_tl(s->T1, s->T1, hishift);
} else {
- tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
+ tcg_gen_shl_tl(cc_src, s->T0, tmp);
+
+ /* mask + 1 - count = mask - tmp = mask ^ tmp */
+ tcg_gen_xori_tl(hishift, tmp, mask);
+ tcg_gen_shl_tl(s->T0, s->T0, count);
+ tcg_gen_shr_tl(s->T1, s->T1, hishift);
+
if (ot == MO_16) {
/* Only needed if count > 16, for Intel behaviour. */
- tcg_gen_subfi_tl(s->tmp4, 33, count);
- tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
- tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
+ tcg_gen_shri_tl(tmp, s->T1, 1);
+ tcg_gen_or_tl(cc_src, cc_src, tmp);
}
-
- tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
- tcg_gen_shl_tl(s->T0, s->T0, count);
- tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
}
- tcg_gen_movi_tl(s->tmp4, 0);
- tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
- s->tmp4, s->T1);
+ tcg_gen_movcond_tl(TCG_COND_EQ, s->T1,
+ count, tcg_constant_tl(0),
+ tcg_constant_tl(0), s->T1);
tcg_gen_or_tl(s->T0, s->T0, s->T1);
break;
}
+
+ return cc_src;
}
#define X86_MAX_INSN_LENGTH 15
@@ -1843,14 +1874,16 @@ static void gen_bndck(DisasContext *s, X86DecodedInsn *decode,
TCGCond cond, TCGv_i64 bndv)
{
TCGv ea = gen_lea_modrm_1(s, decode->mem, false);
+ TCGv_i32 t32 = tcg_temp_new_i32();
+ TCGv_i64 t64 = tcg_temp_new_i64();
- tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
+ tcg_gen_extu_tl_i64(t64, ea);
if (!CODE64(s)) {
- tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
+ tcg_gen_ext32u_i64(t64, t64);
}
- tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
- tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
- gen_helper_bndck(tcg_env, s->tmp2_i32);
+ tcg_gen_setcond_i64(cond, t64, t64, bndv);
+ tcg_gen_extrl_i64_i32(t32, t64);
+ gen_helper_bndck(tcg_env, t32);
}
/* generate modrm load of memory or register. */
@@ -1992,25 +2025,39 @@ static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg)
/* move SRC to seg_reg and compute if the CPU state may change. Never
call this function with seg_reg == R_CS */
-static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src)
+static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit_irq)
{
if (PE(s) && !VM86(s)) {
- tcg_gen_trunc_tl_i32(s->tmp2_i32, src);
- gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), s->tmp2_i32);
- /* abort translation because the addseg value may change or
- because ss32 may change. For R_SS, translation must always
- stop as a special handling must be done to disable hardware
- interrupts for the next instruction */
- if (seg_reg == R_SS) {
- s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
- } else if (CODE32(s) && seg_reg < R_FS) {
+ TCGv_i32 sel = tcg_temp_new_i32();
+
+ tcg_gen_trunc_tl_i32(sel, src);
+ gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel);
+
+ /*
+ * For moves to SS, the SS32 flag may change. For CODE32 only, changes
+ * to SS, DS and ES may change the ADDSEG flags.
+ */
+ if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
s->base.is_jmp = DISAS_EOB_NEXT;
}
} else {
gen_op_movl_seg_real(s, seg_reg, src);
- if (seg_reg == R_SS) {
- s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
- }
+ }
+
+ /*
+ * For MOV or POP to SS (but not LSS) translation must always
+ * stop as a special handling must be done to disable hardware
+ * interrupts for the next instruction.
+ *
+ * This is the last instruction, so it's okay to overwrite
+ * HF_TF_MASK; the next TB will start with the flag set.
+ *
+ * DISAS_EOB_INHIBIT_IRQ is a superset of DISAS_EOB_NEXT which
+ * might have been set above.
+ */
+ if (inhibit_irq) {
+ s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
+ s->flags &= ~HF_TF_MASK;
}
}
@@ -2148,14 +2195,17 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
level &= 31;
if (level != 0) {
int i;
+ if (level > 1) {
+ TCGv fp = tcg_temp_new();
- /* Copy level-1 pointers from the previous frame. */
- for (i = 1; i < level; ++i) {
- gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], -size * i);
- gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
+ /* Copy level-1 pointers from the previous frame. */
+ for (i = 1; i < level; ++i) {
+ gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], -size * i);
+ gen_op_ld_v(s, d_ot, fp, s->A0);
- gen_lea_ss_ofs(s, s->A0, s->T1, -size * i);
- gen_op_st_v(s, d_ot, s->tmp0, s->A0);
+ gen_lea_ss_ofs(s, s->A0, s->T1, -size * i);
+ gen_op_st_v(s, d_ot, fp, s->A0);
+ }
}
/* Push the current FrameTemp as the last level. */
@@ -2258,7 +2308,7 @@ gen_eob(DisasContext *s, int mode)
if (mode == DISAS_EOB_RECHECK_TF) {
gen_helper_rechecking_single_step(tcg_env);
tcg_gen_exit_tb(NULL, 0);
- } else if ((s->flags & HF_TF_MASK) && mode != DISAS_EOB_INHIBIT_IRQ) {
+ } else if (s->flags & HF_TF_MASK) {
gen_helper_single_step(tcg_env);
} else if (mode == DISAS_JUMP &&
/* give irqs a chance to happen */
@@ -2378,10 +2428,11 @@ static void gen_ldy_env_A0(DisasContext *s, int offset, bool align)
int mem_index = s->mem_index;
TCGv_i128 t0 = tcg_temp_new_i128();
TCGv_i128 t1 = tcg_temp_new_i128();
+ TCGv a0_hi = tcg_temp_new();
tcg_gen_qemu_ld_i128(t0, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
- tcg_gen_addi_tl(s->tmp0, s->A0, 16);
- tcg_gen_qemu_ld_i128(t1, s->tmp0, mem_index, mop);
+ tcg_gen_addi_tl(a0_hi, s->A0, 16);
+ tcg_gen_qemu_ld_i128(t1, a0_hi, mem_index, mop);
tcg_gen_st_i128(t0, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
tcg_gen_st_i128(t1, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
@@ -2392,12 +2443,13 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR;
int mem_index = s->mem_index;
TCGv_i128 t = tcg_temp_new_i128();
+ TCGv a0_hi = tcg_temp_new();
tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
- tcg_gen_addi_tl(s->tmp0, s->A0, 16);
+ tcg_gen_addi_tl(a0_hi, s->A0, 16);
tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
- tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
+ tcg_gen_qemu_st_i128(t, a0_hi, mem_index, mop);
}
#include "emit.c.inc"
@@ -3601,7 +3653,6 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
return;
illegal_op:
gen_illegal_opcode(s);
- return;
}
#include "decode-new.c.inc"
@@ -3744,11 +3795,8 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
dc->T1 = tcg_temp_new();
dc->A0 = tcg_temp_new();
- dc->tmp0 = tcg_temp_new();
dc->tmp1_i64 = tcg_temp_new_i64();
dc->tmp2_i32 = tcg_temp_new_i32();
- dc->tmp3_i32 = tcg_temp_new_i32();
- dc->tmp4 = tcg_temp_new();
dc->cc_srcT = tcg_temp_new();
}
diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c
index b3bdb78..98fab4cb 100644
--- a/target/i386/tcg/user/excp_helper.c
+++ b/target/i386/tcg/user/excp_helper.c
@@ -19,7 +19,6 @@
#include "qemu/osdep.h"
#include "cpu.h"
-#include "exec/exec-all.h"
#include "tcg/helper-tcg.h"
void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
diff --git a/target/i386/tcg/user/seg_helper.c b/target/i386/tcg/user/seg_helper.c
index c45f2ac..263f599 100644
--- a/target/i386/tcg/user/seg_helper.c
+++ b/target/i386/tcg/user/seg_helper.c
@@ -21,8 +21,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst.h"
#include "tcg/helper-tcg.h"
#include "tcg/seg_helper.h"