aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBlue Swirl <blauwirbel@gmail.com>2013-02-23 17:21:41 +0000
committerBlue Swirl <blauwirbel@gmail.com>2013-02-23 17:21:41 +0000
commitf708e736d0dafc05f8b7e9e73d6440c930b94686 (patch)
treee9239dde2923de6c0c8cab983d93969d08480df3
parent6ab7e5465a4d6188e29398fb43a30dbab1015b75 (diff)
parentf437d0a3c24e471a855da33a086fe529e09a06af (diff)
downloadqemu-f708e736d0dafc05f8b7e9e73d6440c930b94686.zip
qemu-f708e736d0dafc05f8b7e9e73d6440c930b94686.tar.gz
qemu-f708e736d0dafc05f8b7e9e73d6440c930b94686.tar.bz2
Merge branch 'eflags3' of git://github.com/rth7680/qemu
* 'eflags3' of git://github.com/rth7680/qemu: (61 commits) target-i386: Use movcond to implement shiftd. target-i386: Discard CC_OP computation in set_cc_op also target-i386: Use movcond to implement rotate flags. target-i386: Use movcond to implement shift flags. target-i386: Add CC_OP_CLR target-i386: Implement tzcnt and fix lzcnt target-i386: Use clz/ctz for bsf/bsr helpers target-i386: Implement ADX extension target-i386: Implement RORX target-i386: Implement SHLX, SARX, SHRX target-i386: Implement PDEP, PEXT target-i386: Implement MULX target-i386: Implement BZHI target-i386: Implement BLSR, BLSMSK, BLSI target-i386: Implement BEXTR target-i386: Implement ANDN target-i386: Implement MOVBE target-i386: Decode the VEX prefixes target-i386: Tidy prefix parsing target-i386: Use CC_SRC2 for ADC and SBB ...
-rw-r--r--target-i386/cc_helper.c260
-rw-r--r--target-i386/cc_helper_template.h261
-rw-r--r--target-i386/cpu.c18
-rw-r--r--target-i386/cpu.h26
-rw-r--r--target-i386/helper.c13
-rw-r--r--target-i386/helper.h13
-rw-r--r--target-i386/int_helper.c69
-rw-r--r--target-i386/shift_helper_template.h12
-rw-r--r--target-i386/translate.c2637
-rw-r--r--tests/tcg/test-i386.c10
10 files changed, 1870 insertions, 1449 deletions
diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 9422003..9daa1a0 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -75,243 +75,247 @@ const uint8_t parity_table[256] = {
#endif
-static int compute_all_eflags(CPUX86State *env)
+static target_ulong compute_all_adcx(target_ulong dst, target_ulong src1,
+ target_ulong src2)
{
- return CC_SRC;
+ return (src1 & ~CC_C) | (dst * CC_C);
}
-static int compute_c_eflags(CPUX86State *env)
+static target_ulong compute_all_adox(target_ulong dst, target_ulong src1,
+ target_ulong src2)
{
- return CC_SRC & CC_C;
+ return (src1 & ~CC_O) | (src2 * CC_O);
}
-uint32_t helper_cc_compute_all(CPUX86State *env, int op)
+static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1,
+ target_ulong src2)
+{
+ return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O);
+}
+
+target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
+ target_ulong src2, int op)
{
switch (op) {
default: /* should never happen */
return 0;
case CC_OP_EFLAGS:
- return compute_all_eflags(env);
+ return src1;
+ case CC_OP_CLR:
+ return CC_Z;
case CC_OP_MULB:
- return compute_all_mulb(env);
+ return compute_all_mulb(dst, src1);
case CC_OP_MULW:
- return compute_all_mulw(env);
+ return compute_all_mulw(dst, src1);
case CC_OP_MULL:
- return compute_all_mull(env);
+ return compute_all_mull(dst, src1);
case CC_OP_ADDB:
- return compute_all_addb(env);
+ return compute_all_addb(dst, src1);
case CC_OP_ADDW:
- return compute_all_addw(env);
+ return compute_all_addw(dst, src1);
case CC_OP_ADDL:
- return compute_all_addl(env);
+ return compute_all_addl(dst, src1);
case CC_OP_ADCB:
- return compute_all_adcb(env);
+ return compute_all_adcb(dst, src1, src2);
case CC_OP_ADCW:
- return compute_all_adcw(env);
+ return compute_all_adcw(dst, src1, src2);
case CC_OP_ADCL:
- return compute_all_adcl(env);
+ return compute_all_adcl(dst, src1, src2);
case CC_OP_SUBB:
- return compute_all_subb(env);
+ return compute_all_subb(dst, src1);
case CC_OP_SUBW:
- return compute_all_subw(env);
+ return compute_all_subw(dst, src1);
case CC_OP_SUBL:
- return compute_all_subl(env);
+ return compute_all_subl(dst, src1);
case CC_OP_SBBB:
- return compute_all_sbbb(env);
+ return compute_all_sbbb(dst, src1, src2);
case CC_OP_SBBW:
- return compute_all_sbbw(env);
+ return compute_all_sbbw(dst, src1, src2);
case CC_OP_SBBL:
- return compute_all_sbbl(env);
+ return compute_all_sbbl(dst, src1, src2);
case CC_OP_LOGICB:
- return compute_all_logicb(env);
+ return compute_all_logicb(dst, src1);
case CC_OP_LOGICW:
- return compute_all_logicw(env);
+ return compute_all_logicw(dst, src1);
case CC_OP_LOGICL:
- return compute_all_logicl(env);
+ return compute_all_logicl(dst, src1);
case CC_OP_INCB:
- return compute_all_incb(env);
+ return compute_all_incb(dst, src1);
case CC_OP_INCW:
- return compute_all_incw(env);
+ return compute_all_incw(dst, src1);
case CC_OP_INCL:
- return compute_all_incl(env);
+ return compute_all_incl(dst, src1);
case CC_OP_DECB:
- return compute_all_decb(env);
+ return compute_all_decb(dst, src1);
case CC_OP_DECW:
- return compute_all_decw(env);
+ return compute_all_decw(dst, src1);
case CC_OP_DECL:
- return compute_all_decl(env);
+ return compute_all_decl(dst, src1);
case CC_OP_SHLB:
- return compute_all_shlb(env);
+ return compute_all_shlb(dst, src1);
case CC_OP_SHLW:
- return compute_all_shlw(env);
+ return compute_all_shlw(dst, src1);
case CC_OP_SHLL:
- return compute_all_shll(env);
+ return compute_all_shll(dst, src1);
case CC_OP_SARB:
- return compute_all_sarb(env);
+ return compute_all_sarb(dst, src1);
case CC_OP_SARW:
- return compute_all_sarw(env);
+ return compute_all_sarw(dst, src1);
case CC_OP_SARL:
- return compute_all_sarl(env);
+ return compute_all_sarl(dst, src1);
+
+ case CC_OP_BMILGB:
+ return compute_all_bmilgb(dst, src1);
+ case CC_OP_BMILGW:
+ return compute_all_bmilgw(dst, src1);
+ case CC_OP_BMILGL:
+ return compute_all_bmilgl(dst, src1);
+
+ case CC_OP_ADCX:
+ return compute_all_adcx(dst, src1, src2);
+ case CC_OP_ADOX:
+ return compute_all_adox(dst, src1, src2);
+ case CC_OP_ADCOX:
+ return compute_all_adcox(dst, src1, src2);
#ifdef TARGET_X86_64
case CC_OP_MULQ:
- return compute_all_mulq(env);
-
+ return compute_all_mulq(dst, src1);
case CC_OP_ADDQ:
- return compute_all_addq(env);
-
+ return compute_all_addq(dst, src1);
case CC_OP_ADCQ:
- return compute_all_adcq(env);
-
+ return compute_all_adcq(dst, src1, src2);
case CC_OP_SUBQ:
- return compute_all_subq(env);
-
+ return compute_all_subq(dst, src1);
case CC_OP_SBBQ:
- return compute_all_sbbq(env);
-
+ return compute_all_sbbq(dst, src1, src2);
case CC_OP_LOGICQ:
- return compute_all_logicq(env);
-
+ return compute_all_logicq(dst, src1);
case CC_OP_INCQ:
- return compute_all_incq(env);
-
+ return compute_all_incq(dst, src1);
case CC_OP_DECQ:
- return compute_all_decq(env);
-
+ return compute_all_decq(dst, src1);
case CC_OP_SHLQ:
- return compute_all_shlq(env);
-
+ return compute_all_shlq(dst, src1);
case CC_OP_SARQ:
- return compute_all_sarq(env);
+ return compute_all_sarq(dst, src1);
+ case CC_OP_BMILGQ:
+ return compute_all_bmilgq(dst, src1);
#endif
}
}
uint32_t cpu_cc_compute_all(CPUX86State *env, int op)
{
- return helper_cc_compute_all(env, op);
+ return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op);
}
-uint32_t helper_cc_compute_c(CPUX86State *env, int op)
+target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
+ target_ulong src2, int op)
{
switch (op) {
default: /* should never happen */
+ case CC_OP_LOGICB:
+ case CC_OP_LOGICW:
+ case CC_OP_LOGICL:
+ case CC_OP_LOGICQ:
+ case CC_OP_CLR:
return 0;
case CC_OP_EFLAGS:
- return compute_c_eflags(env);
+ case CC_OP_SARB:
+ case CC_OP_SARW:
+ case CC_OP_SARL:
+ case CC_OP_SARQ:
+ case CC_OP_ADOX:
+ return src1 & 1;
+
+ case CC_OP_INCB:
+ case CC_OP_INCW:
+ case CC_OP_INCL:
+ case CC_OP_INCQ:
+ case CC_OP_DECB:
+ case CC_OP_DECW:
+ case CC_OP_DECL:
+ case CC_OP_DECQ:
+ return src1;
case CC_OP_MULB:
- return compute_c_mull(env);
case CC_OP_MULW:
- return compute_c_mull(env);
case CC_OP_MULL:
- return compute_c_mull(env);
+ case CC_OP_MULQ:
+ return src1 != 0;
+
+ case CC_OP_ADCX:
+ case CC_OP_ADCOX:
+ return dst;
case CC_OP_ADDB:
- return compute_c_addb(env);
+ return compute_c_addb(dst, src1);
case CC_OP_ADDW:
- return compute_c_addw(env);
+ return compute_c_addw(dst, src1);
case CC_OP_ADDL:
- return compute_c_addl(env);
+ return compute_c_addl(dst, src1);
case CC_OP_ADCB:
- return compute_c_adcb(env);
+ return compute_c_adcb(dst, src1, src2);
case CC_OP_ADCW:
- return compute_c_adcw(env);
+ return compute_c_adcw(dst, src1, src2);
case CC_OP_ADCL:
- return compute_c_adcl(env);
+ return compute_c_adcl(dst, src1, src2);
case CC_OP_SUBB:
- return compute_c_subb(env);
+ return compute_c_subb(dst, src1);
case CC_OP_SUBW:
- return compute_c_subw(env);
+ return compute_c_subw(dst, src1);
case CC_OP_SUBL:
- return compute_c_subl(env);
+ return compute_c_subl(dst, src1);
case CC_OP_SBBB:
- return compute_c_sbbb(env);
+ return compute_c_sbbb(dst, src1, src2);
case CC_OP_SBBW:
- return compute_c_sbbw(env);
+ return compute_c_sbbw(dst, src1, src2);
case CC_OP_SBBL:
- return compute_c_sbbl(env);
-
- case CC_OP_LOGICB:
- return compute_c_logicb();
- case CC_OP_LOGICW:
- return compute_c_logicw();
- case CC_OP_LOGICL:
- return compute_c_logicl();
-
- case CC_OP_INCB:
- return compute_c_incl(env);
- case CC_OP_INCW:
- return compute_c_incl(env);
- case CC_OP_INCL:
- return compute_c_incl(env);
-
- case CC_OP_DECB:
- return compute_c_incl(env);
- case CC_OP_DECW:
- return compute_c_incl(env);
- case CC_OP_DECL:
- return compute_c_incl(env);
+ return compute_c_sbbl(dst, src1, src2);
case CC_OP_SHLB:
- return compute_c_shlb(env);
+ return compute_c_shlb(dst, src1);
case CC_OP_SHLW:
- return compute_c_shlw(env);
+ return compute_c_shlw(dst, src1);
case CC_OP_SHLL:
- return compute_c_shll(env);
+ return compute_c_shll(dst, src1);
- case CC_OP_SARB:
- return compute_c_sarl(env);
- case CC_OP_SARW:
- return compute_c_sarl(env);
- case CC_OP_SARL:
- return compute_c_sarl(env);
+ case CC_OP_BMILGB:
+ return compute_c_bmilgb(dst, src1);
+ case CC_OP_BMILGW:
+ return compute_c_bmilgw(dst, src1);
+ case CC_OP_BMILGL:
+ return compute_c_bmilgl(dst, src1);
#ifdef TARGET_X86_64
- case CC_OP_MULQ:
- return compute_c_mull(env);
-
case CC_OP_ADDQ:
- return compute_c_addq(env);
-
+ return compute_c_addq(dst, src1);
case CC_OP_ADCQ:
- return compute_c_adcq(env);
-
+ return compute_c_adcq(dst, src1, src2);
case CC_OP_SUBQ:
- return compute_c_subq(env);
-
+ return compute_c_subq(dst, src1);
case CC_OP_SBBQ:
- return compute_c_sbbq(env);
-
- case CC_OP_LOGICQ:
- return compute_c_logicq();
-
- case CC_OP_INCQ:
- return compute_c_incl(env);
-
- case CC_OP_DECQ:
- return compute_c_incl(env);
-
+ return compute_c_sbbq(dst, src1, src2);
case CC_OP_SHLQ:
- return compute_c_shlq(env);
-
- case CC_OP_SARQ:
- return compute_c_sarl(env);
+ return compute_c_shlq(dst, src1);
+ case CC_OP_BMILGQ:
+ return compute_c_bmilgq(dst, src1);
#endif
}
}
@@ -326,7 +330,7 @@ target_ulong helper_read_eflags(CPUX86State *env)
{
uint32_t eflags;
- eflags = helper_cc_compute_all(env, CC_OP);
+ eflags = cpu_cc_compute_all(env, CC_OP);
eflags |= (DF & DF_MASK);
eflags |= env->eflags & ~(VM_MASK | RF_MASK);
return eflags;
diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h
index 1f94e11..607311f 100644
--- a/target-i386/cc_helper_template.h
+++ b/target-i386/cc_helper_template.h
@@ -18,258 +18,223 @@
*/
#define DATA_BITS (1 << (3 + SHIFT))
-#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1))
#if DATA_BITS == 8
#define SUFFIX b
#define DATA_TYPE uint8_t
-#define DATA_MASK 0xff
#elif DATA_BITS == 16
#define SUFFIX w
#define DATA_TYPE uint16_t
-#define DATA_MASK 0xffff
#elif DATA_BITS == 32
#define SUFFIX l
#define DATA_TYPE uint32_t
-#define DATA_MASK 0xffffffff
#elif DATA_BITS == 64
#define SUFFIX q
#define DATA_TYPE uint64_t
-#define DATA_MASK 0xffffffffffffffffULL
#else
#error unhandled operand size
#endif
+#define SIGN_MASK (((DATA_TYPE)1) << (DATA_BITS - 1))
+
/* dynamic flags computation */
-static int glue(compute_all_add, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
int cf, pf, af, zf, sf, of;
- target_long src1, src2;
-
- src1 = CC_SRC;
- src2 = CC_DST - CC_SRC;
- cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
- pf = parity_table[(uint8_t)CC_DST];
- af = (CC_DST ^ src1 ^ src2) & 0x10;
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ DATA_TYPE src2 = dst - src1;
+
+ cf = dst < src1;
+ pf = parity_table[(uint8_t)dst];
+ af = (dst ^ src1 ^ src2) & CC_A;
+ zf = (dst == 0) * CC_Z;
+ sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+ of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_add, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
- int cf;
- target_long src1;
-
- src1 = CC_SRC;
- cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
- return cf;
+ return dst < src1;
}
-static int glue(compute_all_adc, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
+ DATA_TYPE src3)
{
int cf, pf, af, zf, sf, of;
- target_long src1, src2;
-
- src1 = CC_SRC;
- src2 = CC_DST - CC_SRC - 1;
- cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
- pf = parity_table[(uint8_t)CC_DST];
- af = (CC_DST ^ src1 ^ src2) & 0x10;
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ DATA_TYPE src2 = dst - src1 - src3;
+
+ cf = (src3 ? dst <= src1 : dst < src1);
+ pf = parity_table[(uint8_t)dst];
+ af = (dst ^ src1 ^ src2) & 0x10;
+ zf = (dst == 0) << 6;
+ sf = lshift(dst, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_adc, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
+ DATA_TYPE src3)
{
- int cf;
- target_long src1;
-
- src1 = CC_SRC;
- cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
- return cf;
+ return src3 ? dst <= src1 : dst < src1;
}
-static int glue(compute_all_sub, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
{
int cf, pf, af, zf, sf, of;
- target_long src1, src2;
-
- src1 = CC_DST + CC_SRC;
- src2 = CC_SRC;
- cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
- pf = parity_table[(uint8_t)CC_DST];
- af = (CC_DST ^ src1 ^ src2) & 0x10;
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ DATA_TYPE src1 = dst + src2;
+
+ cf = src1 < src2;
+ pf = parity_table[(uint8_t)dst];
+ af = (dst ^ src1 ^ src2) & CC_A;
+ zf = (dst == 0) * CC_Z;
+ sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+ of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_sub, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
{
- int cf;
- target_long src1, src2;
+ DATA_TYPE src1 = dst + src2;
- src1 = CC_DST + CC_SRC;
- src2 = CC_SRC;
- cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
- return cf;
+ return src1 < src2;
}
-static int glue(compute_all_sbb, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
+ DATA_TYPE src3)
{
int cf, pf, af, zf, sf, of;
- target_long src1, src2;
-
- src1 = CC_DST + CC_SRC + 1;
- src2 = CC_SRC;
- cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
- pf = parity_table[(uint8_t)CC_DST];
- af = (CC_DST ^ src1 ^ src2) & 0x10;
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+ DATA_TYPE src1 = dst + src2 + src3;
+
+ cf = (src3 ? src1 <= src2 : src1 < src2);
+ pf = parity_table[(uint8_t)dst];
+ af = (dst ^ src1 ^ src2) & 0x10;
+ zf = (dst == 0) << 6;
+ sf = lshift(dst, 8 - DATA_BITS) & 0x80;
+ of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_sbb, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
+ DATA_TYPE src3)
{
- int cf;
- target_long src1, src2;
+ DATA_TYPE src1 = dst + src2 + src3;
- src1 = CC_DST + CC_SRC + 1;
- src2 = CC_SRC;
- cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
- return cf;
+ return (src3 ? src1 <= src2 : src1 < src2);
}
-static int glue(compute_all_logic, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
int cf, pf, af, zf, sf, of;
cf = 0;
- pf = parity_table[(uint8_t)CC_DST];
+ pf = parity_table[(uint8_t)dst];
af = 0;
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+ zf = (dst == 0) * CC_Z;
+ sf = lshift(dst, 8 - DATA_BITS) & CC_S;
of = 0;
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_logic, SUFFIX)(void)
-{
- return 0;
-}
-
-static int glue(compute_all_inc, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
int cf, pf, af, zf, sf, of;
- target_long src1, src2;
+ DATA_TYPE src2;
- src1 = CC_DST - 1;
+ cf = src1;
+ src1 = dst - 1;
src2 = 1;
- cf = CC_SRC;
- pf = parity_table[(uint8_t)CC_DST];
- af = (CC_DST ^ src1 ^ src2) & 0x10;
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
- of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11;
+ pf = parity_table[(uint8_t)dst];
+ af = (dst ^ src1 ^ src2) & CC_A;
+ zf = (dst == 0) * CC_Z;
+ sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+ of = (dst == SIGN_MASK) * CC_O;
return cf | pf | af | zf | sf | of;
}
-#if DATA_BITS == 32
-static int glue(compute_c_inc, SUFFIX)(CPUX86State *env)
-{
- return CC_SRC;
-}
-#endif
-
-static int glue(compute_all_dec, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
int cf, pf, af, zf, sf, of;
- target_long src1, src2;
+ DATA_TYPE src2;
- src1 = CC_DST + 1;
+ cf = src1;
+ src1 = dst + 1;
src2 = 1;
- cf = CC_SRC;
- pf = parity_table[(uint8_t)CC_DST];
- af = (CC_DST ^ src1 ^ src2) & 0x10;
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
- of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11;
+ pf = parity_table[(uint8_t)dst];
+ af = (dst ^ src1 ^ src2) & CC_A;
+ zf = (dst == 0) * CC_Z;
+ sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+ of = (dst == SIGN_MASK - 1) * CC_O;
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_all_shl, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
int cf, pf, af, zf, sf, of;
- cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
- pf = parity_table[(uint8_t)CC_DST];
+ cf = (src1 >> (DATA_BITS - 1)) & CC_C;
+ pf = parity_table[(uint8_t)dst];
af = 0; /* undefined */
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
- /* of is defined if shift count == 1 */
- of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+ zf = (dst == 0) * CC_Z;
+ sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+ /* of is defined iff shift count == 1 */
+ of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O;
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_shl, SUFFIX)(CPUX86State *env)
-{
- return (CC_SRC >> (DATA_BITS - 1)) & CC_C;
-}
-
-#if DATA_BITS == 32
-static int glue(compute_c_sar, SUFFIX)(CPUX86State *env)
+static int glue(compute_c_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
- return CC_SRC & 1;
+ return (src1 >> (DATA_BITS - 1)) & CC_C;
}
-#endif
-static int glue(compute_all_sar, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
int cf, pf, af, zf, sf, of;
- cf = CC_SRC & 1;
- pf = parity_table[(uint8_t)CC_DST];
+ cf = src1 & 1;
+ pf = parity_table[(uint8_t)dst];
af = 0; /* undefined */
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
- /* of is defined if shift count == 1 */
- of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
+ zf = (dst == 0) * CC_Z;
+ sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+ /* of is defined iff shift count == 1 */
+ of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O;
return cf | pf | af | zf | sf | of;
}
-#if DATA_BITS == 32
-static int glue(compute_c_mul, SUFFIX)(CPUX86State *env)
+/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
+ CF are modified and it is slower to do that. Note as well that we
+ don't truncate SRC1 for computing carry to DATA_TYPE. */
+static int glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1)
{
- int cf;
+ int cf, pf, af, zf, sf, of;
- cf = (CC_SRC != 0);
- return cf;
+ cf = (src1 != 0);
+ pf = parity_table[(uint8_t)dst];
+ af = 0; /* undefined */
+ zf = (dst == 0) * CC_Z;
+ sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+ of = cf * CC_O;
+ return cf | pf | af | zf | sf | of;
}
-#endif
-/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
- CF are modified and it is slower to do that. */
-static int glue(compute_all_mul, SUFFIX)(CPUX86State *env)
+static int glue(compute_all_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
{
int cf, pf, af, zf, sf, of;
- cf = (CC_SRC != 0);
- pf = parity_table[(uint8_t)CC_DST];
+ cf = (src1 == 0);
+ pf = 0; /* undefined */
af = 0; /* undefined */
- zf = ((DATA_TYPE)CC_DST == 0) << 6;
- sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
- of = cf << 11;
+ zf = (dst == 0) * CC_Z;
+ sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+ of = 0;
return cf | pf | af | zf | sf | of;
}
+static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+ return src1 == 0;
+}
+
#undef DATA_BITS
#undef SIGN_MASK
#undef DATA_TYPE
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index dfcf86e..5582e5f 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -389,10 +389,15 @@ typedef struct x86_def_t {
CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | \
CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_POPCNT | \
- CPUID_EXT_HYPERVISOR)
+ CPUID_EXT_MOVBE | CPUID_EXT_HYPERVISOR)
/* missing:
- CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_EST,
- CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_XSAVE */
+ CPUID_EXT_PCLMULQDQ, CPUID_EXT_DTES64, CPUID_EXT_DSCPL,
+ CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2,
+ CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM,
+ CPUID_EXT_PCID, CPUID_EXT_DCA, CPUID_EXT_SSE41, CPUID_EXT_SSE42,
+ CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES,
+ CPUID_EXT_XSAVE, CPUID_EXT_OSXSAVE, CPUID_EXT_AVX,
+ CPUID_EXT_F16C, CPUID_EXT_RDRAND */
#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT)
@@ -401,7 +406,12 @@ typedef struct x86_def_t {
#define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
#define TCG_SVM_FEATURES 0
-#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
+#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \
+ CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX)
+ /* missing:
+ CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
+ CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
+ CPUID_7_0_EBX_RDSEED */
/* built-in CPU model definitions
*/
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 7577e4f..493dda8 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -582,7 +582,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPU_INTERRUPT_TPR CPU_INTERRUPT_TGT_INT_3
-enum {
+typedef enum {
CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */
@@ -636,8 +636,19 @@ enum {
CC_OP_SARL,
CC_OP_SARQ,
+ CC_OP_BMILGB, /* Z,S via CC_DST, C = SRC==0; O=0; P,A undefined */
+ CC_OP_BMILGW,
+ CC_OP_BMILGL,
+ CC_OP_BMILGQ,
+
+ CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */
+ CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */
+ CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */
+
+ CC_OP_CLR, /* Z set, all other flags clear. */
+
CC_OP_NB,
-};
+} CCOp;
typedef struct SegmentCache {
uint32_t selector;
@@ -725,8 +736,9 @@ typedef struct CPUX86State {
stored elsewhere */
/* emulator internal eflags handling */
- target_ulong cc_src;
target_ulong cc_dst;
+ target_ulong cc_src;
+ target_ulong cc_src2;
uint32_t cc_op;
int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
uint32_t hflags; /* TB flags, see HF_xxx constants. These flags
@@ -764,7 +776,6 @@ typedef struct CPUX86State {
XMMReg xmm_regs[CPU_NB_REGS];
XMMReg xmm_t0;
MMXReg mmx_t0;
- target_ulong cc_tmp; /* temporary for rcr/rcl */
/* sysenter registers */
uint32_t sysenter_cs;
@@ -1117,9 +1128,10 @@ static inline int cpu_mmu_index (CPUX86State *env)
#define EIP (env->eip)
#define DF (env->df)
-#define CC_SRC (env->cc_src)
-#define CC_DST (env->cc_dst)
-#define CC_OP (env->cc_op)
+#define CC_DST (env->cc_dst)
+#define CC_SRC (env->cc_src)
+#define CC_SRC2 (env->cc_src2)
+#define CC_OP (env->cc_op)
/* n must be a constant to be efficient */
static inline target_long lshift(target_long x, int n)
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 4bf9db7..82a731c 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -55,7 +55,7 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env)
/***********************************************************/
/* x86 debug */
-static const char *cc_op_str[] = {
+static const char *cc_op_str[CC_OP_NB] = {
"DYNAMIC",
"EFLAGS",
@@ -108,6 +108,17 @@ static const char *cc_op_str[] = {
"SARW",
"SARL",
"SARQ",
+
+ "BMILGB",
+ "BMILGW",
+ "BMILGL",
+ "BMILGQ",
+
+ "ADCX",
+ "ADOX",
+ "ADCOX",
+
+ "CLR",
};
static void
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 9ed720d..26a0cc8 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -1,7 +1,7 @@
#include "exec/def-helper.h"
-DEF_HELPER_FLAGS_2(cc_compute_all, TCG_CALL_NO_SE, i32, env, int)
-DEF_HELPER_FLAGS_2(cc_compute_c, TCG_CALL_NO_SE, i32, env, int)
+DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
+DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
DEF_HELPER_0(lock, void)
DEF_HELPER_0(unlock, void)
@@ -19,6 +19,7 @@ DEF_HELPER_2(imulq_EAX_T0, void, env, tl)
DEF_HELPER_3(imulq_T0_T1, tl, env, tl, tl)
DEF_HELPER_2(divq_EAX, void, env, tl)
DEF_HELPER_2(idivq_EAX, void, env, tl)
+DEF_HELPER_FLAGS_2(umulh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
#endif
DEF_HELPER_2(aam, void, env, int)
@@ -193,9 +194,11 @@ DEF_HELPER_3(fsave, void, env, tl, int)
DEF_HELPER_3(frstor, void, env, tl, int)
DEF_HELPER_3(fxsave, void, env, tl, int)
DEF_HELPER_3(fxrstor, void, env, tl, int)
-DEF_HELPER_1(bsf, tl, tl)
-DEF_HELPER_1(bsr, tl, tl)
-DEF_HELPER_2(lzcnt, tl, tl, int)
+
+DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
/* MMX/SSE */
diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c
index 84b812d..3b56075 100644
--- a/target-i386/int_helper.c
+++ b/target-i386/int_helper.c
@@ -385,6 +385,13 @@ void helper_mulq_EAX_T0(CPUX86State *env, target_ulong t0)
CC_SRC = r1;
}
+target_ulong helper_umulh(target_ulong t0, target_ulong t1)
+{
+ uint64_t h, l;
+ mulu64(&l, &h, t0, t1);
+ return h;
+}
+
void helper_imulq_EAX_T0(CPUX86State *env, target_ulong t0)
{
uint64_t r0, r1;
@@ -440,45 +447,49 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0)
}
#endif
+#if TARGET_LONG_BITS == 32
+# define ctztl ctz32
+# define clztl clz32
+#else
+# define ctztl ctz64
+# define clztl clz64
+#endif
+
/* bit operations */
-target_ulong helper_bsf(target_ulong t0)
+target_ulong helper_ctz(target_ulong t0)
{
- int count;
- target_ulong res;
-
- res = t0;
- count = 0;
- while ((res & 1) == 0) {
- count++;
- res >>= 1;
- }
- return count;
+ return ctztl(t0);
}
-target_ulong helper_lzcnt(target_ulong t0, int wordsize)
+target_ulong helper_clz(target_ulong t0)
{
- int count;
- target_ulong res, mask;
+ return clztl(t0);
+}
- if (wordsize > 0 && t0 == 0) {
- return wordsize;
- }
- res = t0;
- count = TARGET_LONG_BITS - 1;
- mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
- while ((res & mask) == 0) {
- count--;
- res <<= 1;
- }
- if (wordsize > 0) {
- return wordsize - 1 - count;
+target_ulong helper_pdep(target_ulong src, target_ulong mask)
+{
+ target_ulong dest = 0;
+ int i, o;
+
+ for (i = 0; mask != 0; i++) {
+ o = ctztl(mask);
+ mask &= mask - 1;
+ dest |= ((src >> i) & 1) << o;
}
- return count;
+ return dest;
}
-target_ulong helper_bsr(target_ulong t0)
+target_ulong helper_pext(target_ulong src, target_ulong mask)
{
- return helper_lzcnt(t0, 0);
+ target_ulong dest = 0;
+ int i, o;
+
+ for (o = 0; mask != 0; o++) {
+ i = ctztl(mask);
+ mask &= mask - 1;
+ dest |= ((src >> i) & 1) << o;
+ }
+ return dest;
}
#define SHIFT 0
diff --git a/target-i386/shift_helper_template.h b/target-i386/shift_helper_template.h
index dda0da3..cf91a2d 100644
--- a/target-i386/shift_helper_template.h
+++ b/target-i386/shift_helper_template.h
@@ -55,7 +55,7 @@ target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0,
count = rclb_table[count];
#endif
if (count) {
- eflags = helper_cc_compute_all(env, CC_OP);
+ eflags = env->cc_src;
t0 &= DATA_MASK;
src = t0;
res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
@@ -63,11 +63,9 @@ target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0,
res |= t0 >> (DATA_BITS + 1 - count);
}
t0 = res;
- env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+ env->cc_src = (eflags & ~(CC_C | CC_O)) |
(lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
((src >> (DATA_BITS - count)) & CC_C);
- } else {
- env->cc_tmp = -1;
}
return t0;
}
@@ -86,7 +84,7 @@ target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0,
count = rclb_table[count];
#endif
if (count) {
- eflags = helper_cc_compute_all(env, CC_OP);
+ eflags = env->cc_src;
t0 &= DATA_MASK;
src = t0;
res = (t0 >> count) |
@@ -95,11 +93,9 @@ target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0,
res |= t0 << (DATA_BITS + 1 - count);
}
t0 = res;
- env->cc_tmp = (eflags & ~(CC_C | CC_O)) |
+ env->cc_src = (eflags & ~(CC_C | CC_O)) |
(lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
((src >> (count - 1)) & CC_C);
- } else {
- env->cc_tmp = -1;
}
return t0;
}
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 112c310..439d19e 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -23,6 +23,7 @@
#include <inttypes.h>
#include <signal.h>
+#include "qemu/host-utils.h"
#include "cpu.h"
#include "disas/disas.h"
#include "tcg-op.h"
@@ -36,6 +37,7 @@
#define PREFIX_LOCK 0x04
#define PREFIX_DATA 0x08
#define PREFIX_ADR 0x10
+#define PREFIX_VEX 0x20
#ifdef TARGET_X86_64
#define CODE64(s) ((s)->code64)
@@ -47,21 +49,29 @@
#define REX_B(s) 0
#endif
+#ifdef TARGET_X86_64
+# define ctztl ctz64
+# define clztl clz64
+#else
+# define ctztl ctz32
+# define clztl clz32
+#endif
+
//#define MACRO_TEST 1
/* global register indexes */
static TCGv_ptr cpu_env;
-static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
+static TCGv cpu_A0;
+static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
static TCGv_i32 cpu_cc_op;
static TCGv cpu_regs[CPU_NB_REGS];
/* local temps */
-static TCGv cpu_T[2], cpu_T3;
+static TCGv cpu_T[2];
/* local register indexes (only used inside old micro ops) */
static TCGv cpu_tmp0, cpu_tmp4;
static TCGv_ptr cpu_ptr0, cpu_ptr1;
static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
static TCGv_i64 cpu_tmp1_i64;
-static TCGv cpu_tmp5;
static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
@@ -88,8 +98,11 @@ typedef struct DisasContext {
int code64; /* 64 bit code segment */
int rex_x, rex_b;
#endif
+ int vex_l; /* vex vector length */
+ int vex_v; /* vex vvvv register, without 1's compliment. */
int ss32; /* 32 bit stack segment */
- int cc_op; /* current CC operation */
+ CCOp cc_op; /* current CC operation */
+ bool cc_op_dirty;
int addseg; /* non zero if either DS/ES/SS have a non zero base */
int f_st; /* currently unused */
int vm86; /* vm86 mode */
@@ -113,6 +126,7 @@ typedef struct DisasContext {
static void gen_eob(DisasContext *s);
static void gen_jmp(DisasContext *s, target_ulong eip);
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
+static void gen_op(DisasContext *s1, int op, int ot, int d);
/* i386 arith/logic operations */
enum {
@@ -173,6 +187,79 @@ enum {
OR_A0, /* temporary register used when doing address evaluation */
};
+enum {
+ USES_CC_DST = 1,
+ USES_CC_SRC = 2,
+ USES_CC_SRC2 = 4,
+ USES_CC_SRCT = 8,
+};
+
+/* Bit set if the global variable is live after setting CC_OP to X. */
+static const uint8_t cc_op_live[CC_OP_NB] = {
+ [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
+ [CC_OP_EFLAGS] = USES_CC_SRC,
+ [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
+ [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
+ [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
+ [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
+ [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
+ [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
+ [CC_OP_CLR] = 0,
+};
+
+static void set_cc_op(DisasContext *s, CCOp op)
+{
+ int dead;
+
+ if (s->cc_op == op) {
+ return;
+ }
+
+ /* Discard CC computation that will no longer be used. */
+ dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
+ if (dead & USES_CC_DST) {
+ tcg_gen_discard_tl(cpu_cc_dst);
+ }
+ if (dead & USES_CC_SRC) {
+ tcg_gen_discard_tl(cpu_cc_src);
+ }
+ if (dead & USES_CC_SRC2) {
+ tcg_gen_discard_tl(cpu_cc_src2);
+ }
+ if (dead & USES_CC_SRCT) {
+ tcg_gen_discard_tl(cpu_cc_srcT);
+ }
+
+ if (op == CC_OP_DYNAMIC) {
+ /* The DYNAMIC setting is translator only, and should never be
+ stored. Thus we always consider it clean. */
+ s->cc_op_dirty = false;
+ } else {
+ /* Discard any computed CC_OP value (see shifts). */
+ if (s->cc_op == CC_OP_DYNAMIC) {
+ tcg_gen_discard_i32(cpu_cc_op);
+ }
+ s->cc_op_dirty = true;
+ }
+ s->cc_op = op;
+}
+
+static void gen_update_cc_op(DisasContext *s)
+{
+ if (s->cc_op_dirty) {
+ tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
+ s->cc_op_dirty = false;
+ }
+}
+
static inline void gen_op_movl_T0_0(void)
{
tcg_gen_movi_tl(cpu_T[0], 0);
@@ -323,17 +410,17 @@ static inline void gen_op_mov_reg_T1(int ot, int reg)
static inline void gen_op_mov_reg_A0(int size, int reg)
{
switch(size) {
- case 0:
+ case OT_BYTE:
tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_A0, 0, 16);
break;
default: /* XXX this shouldn't be reached; abort? */
- case 1:
+ case OT_WORD:
/* For x86_64, this sets the higher half of register to zero.
For i386, this is equivalent to a mov. */
tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
break;
#ifdef TARGET_X86_64
- case 2:
+ case OT_LONG:
tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
break;
#endif
@@ -398,11 +485,11 @@ static inline void gen_op_jmp_T0(void)
static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
{
switch(size) {
- case 0:
+ case OT_BYTE:
tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
break;
- case 1:
+ case OT_WORD:
tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
/* For x86_64, this sets the higher half of register to zero.
For i386, this is equivalent to a nop. */
@@ -410,7 +497,7 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
break;
#ifdef TARGET_X86_64
- case 2:
+ case OT_LONG:
tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
break;
#endif
@@ -420,11 +507,11 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
static inline void gen_op_add_reg_T0(int size, int reg)
{
switch(size) {
- case 0:
+ case OT_BYTE:
tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
break;
- case 1:
+ case OT_WORD:
tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
/* For x86_64, this sets the higher half of register to zero.
For i386, this is equivalent to a nop. */
@@ -432,18 +519,13 @@ static inline void gen_op_add_reg_T0(int size, int reg)
tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
break;
#ifdef TARGET_X86_64
- case 2:
+ case OT_LONG:
tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
break;
#endif
}
}
-static inline void gen_op_set_cc_op(int32_t val)
-{
- tcg_gen_movi_i32(cpu_cc_op, val);
-}
-
static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
{
tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
@@ -506,14 +588,14 @@ static inline void gen_op_lds_T0_A0(int idx)
{
int mem_index = (idx >> 2) - 1;
switch(idx & 3) {
- case 0:
+ case OT_BYTE:
tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
break;
- case 1:
+ case OT_WORD:
tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
break;
default:
- case 2:
+ case OT_LONG:
tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
break;
}
@@ -523,17 +605,17 @@ static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
{
int mem_index = (idx >> 2) - 1;
switch(idx & 3) {
- case 0:
+ case OT_BYTE:
tcg_gen_qemu_ld8u(t0, a0, mem_index);
break;
- case 1:
+ case OT_WORD:
tcg_gen_qemu_ld16u(t0, a0, mem_index);
break;
- case 2:
+ case OT_LONG:
tcg_gen_qemu_ld32u(t0, a0, mem_index);
break;
default:
- case 3:
+ case OT_QUAD:
/* Should never happen on 32-bit targets. */
#ifdef TARGET_X86_64
tcg_gen_qemu_ld64(t0, a0, mem_index);
@@ -562,17 +644,17 @@ static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
{
int mem_index = (idx >> 2) - 1;
switch(idx & 3) {
- case 0:
+ case OT_BYTE:
tcg_gen_qemu_st8(t0, a0, mem_index);
break;
- case 1:
+ case OT_WORD:
tcg_gen_qemu_st16(t0, a0, mem_index);
break;
- case 2:
+ case OT_LONG:
tcg_gen_qemu_st32(t0, a0, mem_index);
break;
default:
- case 3:
+ case OT_QUAD:
/* Should never happen on 32-bit targets. */
#ifdef TARGET_X86_64
tcg_gen_qemu_st64(t0, a0, mem_index);
@@ -659,38 +741,45 @@ static inline void gen_op_movl_T0_Dshift(int ot)
tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
};
-static void gen_extu(int ot, TCGv reg)
+static TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign)
{
- switch(ot) {
+ switch (size) {
case OT_BYTE:
- tcg_gen_ext8u_tl(reg, reg);
- break;
+ if (sign) {
+ tcg_gen_ext8s_tl(dst, src);
+ } else {
+ tcg_gen_ext8u_tl(dst, src);
+ }
+ return dst;
case OT_WORD:
- tcg_gen_ext16u_tl(reg, reg);
- break;
+ if (sign) {
+ tcg_gen_ext16s_tl(dst, src);
+ } else {
+ tcg_gen_ext16u_tl(dst, src);
+ }
+ return dst;
+#ifdef TARGET_X86_64
case OT_LONG:
- tcg_gen_ext32u_tl(reg, reg);
- break;
+ if (sign) {
+ tcg_gen_ext32s_tl(dst, src);
+ } else {
+ tcg_gen_ext32u_tl(dst, src);
+ }
+ return dst;
+#endif
default:
- break;
+ return src;
}
}
+static void gen_extu(int ot, TCGv reg)
+{
+ gen_ext_tl(reg, reg, ot, false);
+}
+
static void gen_exts(int ot, TCGv reg)
{
- switch(ot) {
- case OT_BYTE:
- tcg_gen_ext8s_tl(reg, reg);
- break;
- case OT_WORD:
- tcg_gen_ext16s_tl(reg, reg);
- break;
- case OT_LONG:
- tcg_gen_ext32s_tl(reg, reg);
- break;
- default:
- break;
- }
+ gen_ext_tl(reg, reg, ot, true);
}
static inline void gen_op_jnz_ecx(int size, int label1)
@@ -710,21 +799,31 @@ static inline void gen_op_jz_ecx(int size, int label1)
static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
{
switch (ot) {
- case 0: gen_helper_inb(v, n); break;
- case 1: gen_helper_inw(v, n); break;
- case 2: gen_helper_inl(v, n); break;
+ case OT_BYTE:
+ gen_helper_inb(v, n);
+ break;
+ case OT_WORD:
+ gen_helper_inw(v, n);
+ break;
+ case OT_LONG:
+ gen_helper_inl(v, n);
+ break;
}
-
}
static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
{
switch (ot) {
- case 0: gen_helper_outb(v, n); break;
- case 1: gen_helper_outw(v, n); break;
- case 2: gen_helper_outl(v, n); break;
+ case OT_BYTE:
+ gen_helper_outb(v, n);
+ break;
+ case OT_WORD:
+ gen_helper_outw(v, n);
+ break;
+ case OT_LONG:
+ gen_helper_outl(v, n);
+ break;
}
-
}
static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
@@ -735,27 +834,25 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
state_saved = 0;
if (s->pe && (s->cpl > s->iopl || s->vm86)) {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(cur_eip);
state_saved = 1;
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
switch (ot) {
- case 0:
+ case OT_BYTE:
gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
break;
- case 1:
+ case OT_WORD:
gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
break;
- case 2:
+ case OT_LONG:
gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
break;
}
}
if(s->flags & HF_SVMI_MASK) {
if (!state_saved) {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(cur_eip);
}
svm_flags |= (1 << (4 + ot));
@@ -778,17 +875,8 @@ static inline void gen_movs(DisasContext *s, int ot)
gen_op_add_reg_T0(s->aflag, R_EDI);
}
-static inline void gen_update_cc_op(DisasContext *s)
-{
- if (s->cc_op != CC_OP_DYNAMIC) {
- gen_op_set_cc_op(s->cc_op);
- s->cc_op = CC_OP_DYNAMIC;
- }
-}
-
static void gen_op_update1_cc(void)
{
- tcg_gen_discard_tl(cpu_cc_src);
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
}
@@ -798,338 +886,392 @@ static void gen_op_update2_cc(void)
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
}
-static inline void gen_op_cmpl_T0_T1_cc(void)
+static void gen_op_update3_cc(TCGv reg)
{
+ tcg_gen_mov_tl(cpu_cc_src2, reg);
tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
- tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
}
static inline void gen_op_testl_T0_T1_cc(void)
{
- tcg_gen_discard_tl(cpu_cc_src);
tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
}
static void gen_op_update_neg_cc(void)
{
- tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
+ tcg_gen_movi_tl(cpu_cc_srcT, 0);
}
-/* compute eflags.C to reg */
-static void gen_compute_eflags_c(TCGv reg)
+/* compute all eflags to cc_src */
+static void gen_compute_eflags(DisasContext *s)
{
- gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
- tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+ TCGv zero, dst, src1, src2;
+ int live, dead;
+
+ if (s->cc_op == CC_OP_EFLAGS) {
+ return;
+ }
+ if (s->cc_op == CC_OP_CLR) {
+ tcg_gen_movi_tl(cpu_cc_src, CC_Z);
+ set_cc_op(s, CC_OP_EFLAGS);
+ return;
+ }
+
+ TCGV_UNUSED(zero);
+ dst = cpu_cc_dst;
+ src1 = cpu_cc_src;
+ src2 = cpu_cc_src2;
+
+ /* Take care to not read values that are not live. */
+ live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
+ dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
+ if (dead) {
+ zero = tcg_const_tl(0);
+ if (dead & USES_CC_DST) {
+ dst = zero;
+ }
+ if (dead & USES_CC_SRC) {
+ src1 = zero;
+ }
+ if (dead & USES_CC_SRC2) {
+ src2 = zero;
+ }
+ }
+
+ gen_update_cc_op(s);
+ gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
+ set_cc_op(s, CC_OP_EFLAGS);
+
+ if (dead) {
+ tcg_temp_free(zero);
+ }
}
-/* compute all eflags to cc_src */
-static void gen_compute_eflags(TCGv reg)
+typedef struct CCPrepare {
+ TCGCond cond;
+ TCGv reg;
+ TCGv reg2;
+ target_ulong imm;
+ target_ulong mask;
+ bool use_reg2;
+ bool no_setcond;
+} CCPrepare;
+
+/* compute eflags.C to reg */
+static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
+{
+ TCGv t0, t1;
+ int size, shift;
+
+ switch (s->cc_op) {
+ case CC_OP_SUBB ... CC_OP_SUBQ:
+ /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
+ size = s->cc_op - CC_OP_SUBB;
+ t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+ /* If no temporary was used, be careful not to alias t1 and t0. */
+ t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
+ tcg_gen_mov_tl(t0, cpu_cc_srcT);
+ gen_extu(size, t0);
+ goto add_sub;
+
+ case CC_OP_ADDB ... CC_OP_ADDQ:
+ /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
+ size = s->cc_op - CC_OP_ADDB;
+ t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+ t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+ add_sub:
+ return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
+ .reg2 = t1, .mask = -1, .use_reg2 = true };
+
+ case CC_OP_LOGICB ... CC_OP_LOGICQ:
+ case CC_OP_CLR:
+ return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+
+ case CC_OP_INCB ... CC_OP_INCQ:
+ case CC_OP_DECB ... CC_OP_DECQ:
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+ .mask = -1, .no_setcond = true };
+
+ case CC_OP_SHLB ... CC_OP_SHLQ:
+ /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
+ size = s->cc_op - CC_OP_SHLB;
+ shift = (8 << size) - 1;
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+ .mask = (target_ulong)1 << shift };
+
+ case CC_OP_MULB ... CC_OP_MULQ:
+ return (CCPrepare) { .cond = TCG_COND_NE,
+ .reg = cpu_cc_src, .mask = -1 };
+
+ case CC_OP_BMILGB ... CC_OP_BMILGQ:
+ size = s->cc_op - CC_OP_BMILGB;
+ t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
+ return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
+
+ case CC_OP_ADCX:
+ case CC_OP_ADCOX:
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
+ .mask = -1, .no_setcond = true };
+
+ case CC_OP_EFLAGS:
+ case CC_OP_SARB ... CC_OP_SARQ:
+ /* CC_SRC & 1 */
+ return (CCPrepare) { .cond = TCG_COND_NE,
+ .reg = cpu_cc_src, .mask = CC_C };
+
+ default:
+ /* The need to compute only C from CC_OP_DYNAMIC is important
+ in efficiently implementing e.g. INC at the start of a TB. */
+ gen_update_cc_op(s);
+ gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
+ cpu_cc_src2, cpu_cc_op);
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
+ .mask = -1, .no_setcond = true };
+ }
+}
+
+/* compute eflags.P to reg */
+static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
{
- gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
- tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+ gen_compute_eflags(s);
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+ .mask = CC_P };
}
-static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
+/* compute eflags.S to reg */
+static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
{
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- switch(jcc_op) {
- case JCC_O:
- gen_compute_eflags(cpu_T[0]);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
- break;
- case JCC_B:
- gen_compute_eflags_c(cpu_T[0]);
- break;
- case JCC_Z:
- gen_compute_eflags(cpu_T[0]);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
- break;
- case JCC_BE:
- gen_compute_eflags(cpu_tmp0);
- tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
- tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
- break;
- case JCC_S:
- gen_compute_eflags(cpu_T[0]);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
- break;
- case JCC_P:
- gen_compute_eflags(cpu_T[0]);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
- break;
- case JCC_L:
- gen_compute_eflags(cpu_tmp0);
- tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
- tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
- tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
- break;
+ switch (s->cc_op) {
+ case CC_OP_DYNAMIC:
+ gen_compute_eflags(s);
+ /* FALLTHRU */
+ case CC_OP_EFLAGS:
+ case CC_OP_ADCX:
+ case CC_OP_ADOX:
+ case CC_OP_ADCOX:
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+ .mask = CC_S };
+ case CC_OP_CLR:
+ return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
default:
- case JCC_LE:
- gen_compute_eflags(cpu_tmp0);
- tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
- tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
- tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
- tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
- tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
- break;
+ {
+ int size = (s->cc_op - CC_OP_ADDB) & 3;
+ TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
+ return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
+ }
}
}
-/* return true if setcc_slow is not needed (WARNING: must be kept in
- sync with gen_jcc1) */
-static int is_fast_jcc_case(DisasContext *s, int b)
+/* compute eflags.O to reg */
+static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
{
- int jcc_op;
- jcc_op = (b >> 1) & 7;
- switch(s->cc_op) {
- /* we optimize the cmp/jcc case */
- case CC_OP_SUBB:
- case CC_OP_SUBW:
- case CC_OP_SUBL:
- case CC_OP_SUBQ:
- if (jcc_op == JCC_O || jcc_op == JCC_P)
- goto slow_jcc;
- break;
+ switch (s->cc_op) {
+ case CC_OP_ADOX:
+ case CC_OP_ADCOX:
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
+ .mask = -1, .no_setcond = true };
+ case CC_OP_CLR:
+ return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+ default:
+ gen_compute_eflags(s);
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+ .mask = CC_O };
+ }
+}
- /* some jumps are easy to compute */
- case CC_OP_ADDB:
- case CC_OP_ADDW:
- case CC_OP_ADDL:
- case CC_OP_ADDQ:
-
- case CC_OP_LOGICB:
- case CC_OP_LOGICW:
- case CC_OP_LOGICL:
- case CC_OP_LOGICQ:
-
- case CC_OP_INCB:
- case CC_OP_INCW:
- case CC_OP_INCL:
- case CC_OP_INCQ:
-
- case CC_OP_DECB:
- case CC_OP_DECW:
- case CC_OP_DECL:
- case CC_OP_DECQ:
-
- case CC_OP_SHLB:
- case CC_OP_SHLW:
- case CC_OP_SHLL:
- case CC_OP_SHLQ:
- if (jcc_op != JCC_Z && jcc_op != JCC_S)
- goto slow_jcc;
- break;
+/* compute eflags.Z to reg */
+static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
+{
+ switch (s->cc_op) {
+ case CC_OP_DYNAMIC:
+ gen_compute_eflags(s);
+ /* FALLTHRU */
+ case CC_OP_EFLAGS:
+ case CC_OP_ADCX:
+ case CC_OP_ADOX:
+ case CC_OP_ADCOX:
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+ .mask = CC_Z };
+ case CC_OP_CLR:
+ return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
default:
- slow_jcc:
- return 0;
+ {
+ int size = (s->cc_op - CC_OP_ADDB) & 3;
+ TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+ return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
+ }
}
- return 1;
}
-/* generate a conditional jump to label 'l1' according to jump opcode
+/* perform a conditional store into register 'reg' according to jump opcode
value 'b'. In the fast case, T0 is guaranted not to be used. */
-static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
+static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
{
int inv, jcc_op, size, cond;
+ CCPrepare cc;
TCGv t0;
inv = b & 1;
jcc_op = (b >> 1) & 7;
- switch(cc_op) {
- /* we optimize the cmp/jcc case */
- case CC_OP_SUBB:
- case CC_OP_SUBW:
- case CC_OP_SUBL:
- case CC_OP_SUBQ:
-
- size = cc_op - CC_OP_SUBB;
- switch(jcc_op) {
- case JCC_Z:
- fast_jcc_z:
- switch(size) {
- case 0:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
- t0 = cpu_tmp0;
- break;
- case 1:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
- t0 = cpu_tmp0;
- break;
-#ifdef TARGET_X86_64
- case 2:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
- t0 = cpu_tmp0;
- break;
-#endif
- default:
- t0 = cpu_cc_dst;
- break;
- }
- tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
- break;
- case JCC_S:
- fast_jcc_s:
- switch(size) {
- case 0:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
- 0, l1);
- break;
- case 1:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
- 0, l1);
- break;
-#ifdef TARGET_X86_64
- case 2:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
- 0, l1);
- break;
-#endif
- default:
- tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst,
- 0, l1);
- break;
- }
- break;
-
- case JCC_B:
- cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
- goto fast_jcc_b;
+ switch (s->cc_op) {
+ case CC_OP_SUBB ... CC_OP_SUBQ:
+ /* We optimize relational operators for the cmp/jcc case. */
+ size = s->cc_op - CC_OP_SUBB;
+ switch (jcc_op) {
case JCC_BE:
- cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
- fast_jcc_b:
- tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
- switch(size) {
- case 0:
- t0 = cpu_tmp0;
- tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
- tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
- break;
- case 1:
- t0 = cpu_tmp0;
- tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
- tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
- break;
-#ifdef TARGET_X86_64
- case 2:
- t0 = cpu_tmp0;
- tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
- tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
- break;
-#endif
- default:
- t0 = cpu_cc_src;
- break;
- }
- tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+ tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
+ gen_extu(size, cpu_tmp4);
+ t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+ cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
+ .reg2 = t0, .mask = -1, .use_reg2 = true };
break;
-
+
case JCC_L:
- cond = inv ? TCG_COND_GE : TCG_COND_LT;
+ cond = TCG_COND_LT;
goto fast_jcc_l;
case JCC_LE:
- cond = inv ? TCG_COND_GT : TCG_COND_LE;
+ cond = TCG_COND_LE;
fast_jcc_l:
- tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
- switch(size) {
- case 0:
- t0 = cpu_tmp0;
- tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
- tcg_gen_ext8s_tl(t0, cpu_cc_src);
- break;
- case 1:
- t0 = cpu_tmp0;
- tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
- tcg_gen_ext16s_tl(t0, cpu_cc_src);
- break;
-#ifdef TARGET_X86_64
- case 2:
- t0 = cpu_tmp0;
- tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
- tcg_gen_ext32s_tl(t0, cpu_cc_src);
- break;
-#endif
- default:
- t0 = cpu_cc_src;
- break;
- }
- tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
+ tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
+ gen_exts(size, cpu_tmp4);
+ t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
+ cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
+ .reg2 = t0, .mask = -1, .use_reg2 = true };
break;
-
+
default:
goto slow_jcc;
}
break;
-
- /* some jumps are easy to compute */
- case CC_OP_ADDB:
- case CC_OP_ADDW:
- case CC_OP_ADDL:
- case CC_OP_ADDQ:
-
- case CC_OP_ADCB:
- case CC_OP_ADCW:
- case CC_OP_ADCL:
- case CC_OP_ADCQ:
-
- case CC_OP_SBBB:
- case CC_OP_SBBW:
- case CC_OP_SBBL:
- case CC_OP_SBBQ:
-
- case CC_OP_LOGICB:
- case CC_OP_LOGICW:
- case CC_OP_LOGICL:
- case CC_OP_LOGICQ:
-
- case CC_OP_INCB:
- case CC_OP_INCW:
- case CC_OP_INCL:
- case CC_OP_INCQ:
-
- case CC_OP_DECB:
- case CC_OP_DECW:
- case CC_OP_DECL:
- case CC_OP_DECQ:
-
- case CC_OP_SHLB:
- case CC_OP_SHLW:
- case CC_OP_SHLL:
- case CC_OP_SHLQ:
-
- case CC_OP_SARB:
- case CC_OP_SARW:
- case CC_OP_SARL:
- case CC_OP_SARQ:
- switch(jcc_op) {
+
+ default:
+ slow_jcc:
+ /* This actually generates good code for JC, JZ and JS. */
+ switch (jcc_op) {
+ case JCC_O:
+ cc = gen_prepare_eflags_o(s, reg);
+ break;
+ case JCC_B:
+ cc = gen_prepare_eflags_c(s, reg);
+ break;
case JCC_Z:
- size = (cc_op - CC_OP_ADDB) & 3;
- goto fast_jcc_z;
+ cc = gen_prepare_eflags_z(s, reg);
+ break;
+ case JCC_BE:
+ gen_compute_eflags(s);
+ cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+ .mask = CC_Z | CC_C };
+ break;
case JCC_S:
- size = (cc_op - CC_OP_ADDB) & 3;
- goto fast_jcc_s;
+ cc = gen_prepare_eflags_s(s, reg);
+ break;
+ case JCC_P:
+ cc = gen_prepare_eflags_p(s, reg);
+ break;
+ case JCC_L:
+ gen_compute_eflags(s);
+ if (TCGV_EQUAL(reg, cpu_cc_src)) {
+ reg = cpu_tmp0;
+ }
+ tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
+ tcg_gen_xor_tl(reg, reg, cpu_cc_src);
+ cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
+ .mask = CC_S };
+ break;
default:
- goto slow_jcc;
+ case JCC_LE:
+ gen_compute_eflags(s);
+ if (TCGV_EQUAL(reg, cpu_cc_src)) {
+ reg = cpu_tmp0;
+ }
+ tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
+ tcg_gen_xor_tl(reg, reg, cpu_cc_src);
+ cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
+ .mask = CC_S | CC_Z };
+ break;
}
break;
- default:
- slow_jcc:
- gen_setcc_slow_T0(s, jcc_op);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
- cpu_T[0], 0, l1);
- break;
+ }
+
+ if (inv) {
+ cc.cond = tcg_invert_cond(cc.cond);
+ }
+ return cc;
+}
+
+static void gen_setcc1(DisasContext *s, int b, TCGv reg)
+{
+ CCPrepare cc = gen_prepare_cc(s, b, reg);
+
+ if (cc.no_setcond) {
+ if (cc.cond == TCG_COND_EQ) {
+ tcg_gen_xori_tl(reg, cc.reg, 1);
+ } else {
+ tcg_gen_mov_tl(reg, cc.reg);
+ }
+ return;
+ }
+
+ if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
+ cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
+ tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
+ tcg_gen_andi_tl(reg, reg, 1);
+ return;
+ }
+ if (cc.mask != -1) {
+ tcg_gen_andi_tl(reg, cc.reg, cc.mask);
+ cc.reg = reg;
+ }
+ if (cc.use_reg2) {
+ tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
+ } else {
+ tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
+ }
+}
+
+static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
+{
+ gen_setcc1(s, JCC_B << 1, reg);
+}
+
+/* generate a conditional jump to label 'l1' according to jump opcode
+ value 'b'. In the fast case, T0 is guaranted not to be used. */
+static inline void gen_jcc1_noeob(DisasContext *s, int b, int l1)
+{
+ CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
+
+ if (cc.mask != -1) {
+ tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
+ cc.reg = cpu_T[0];
+ }
+ if (cc.use_reg2) {
+ tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
+ } else {
+ tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
+ }
+}
+
+/* Generate a conditional jump to label 'l1' according to jump opcode
+ value 'b'. In the fast case, T0 is guaranted not to be used.
+ A translation block must end soon. */
+static inline void gen_jcc1(DisasContext *s, int b, int l1)
+{
+ CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
+
+ gen_update_cc_op(s);
+ if (cc.mask != -1) {
+ tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
+ cc.reg = cpu_T[0];
+ }
+ set_cc_op(s, CC_OP_DYNAMIC);
+ if (cc.use_reg2) {
+ tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
+ } else {
+ tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
}
}
@@ -1168,21 +1310,19 @@ static inline void gen_lods(DisasContext *s, int ot)
static inline void gen_scas(DisasContext *s, int ot)
{
- gen_op_mov_TN_reg(OT_LONG, 0, R_EAX);
gen_string_movl_A0_EDI(s);
gen_op_ld_T1_A0(ot + s->mem_index);
- gen_op_cmpl_T0_T1_cc();
+ gen_op(s, OP_CMPL, ot, R_EAX);
gen_op_movl_T0_Dshift(ot);
gen_op_add_reg_T0(s->aflag, R_EDI);
}
static inline void gen_cmps(DisasContext *s, int ot)
{
- gen_string_movl_A0_ESI(s);
- gen_op_ld_T0_A0(ot + s->mem_index);
gen_string_movl_A0_EDI(s);
gen_op_ld_T1_A0(ot + s->mem_index);
- gen_op_cmpl_T0_T1_cc();
+ gen_string_movl_A0_ESI(s);
+ gen_op(s, OP_CMPL, ot, OR_TMP0);
gen_op_movl_T0_Dshift(ot);
gen_op_add_reg_T0(s->aflag, R_ESI);
gen_op_add_reg_T0(s->aflag, R_EDI);
@@ -1256,8 +1396,8 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \
l2 = gen_jz_ecx_string(s, next_eip); \
gen_ ## op(s, ot); \
gen_op_add_reg_im(s->aflag, R_ECX, -1); \
- gen_op_set_cc_op(CC_OP_SUBB + ot); \
- gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2); \
+ gen_update_cc_op(s); \
+ gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
if (!s->jmp_opt) \
gen_op_jz_ecx(s->aflag, l2); \
gen_jmp(s, cur_eip); \
@@ -1337,38 +1477,26 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
}
switch(op) {
case OP_ADCL:
- if (s1->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s1->cc_op);
- gen_compute_eflags_c(cpu_tmp4);
+ gen_compute_eflags_c(s1, cpu_tmp4);
tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
if (d != OR_TMP0)
gen_op_mov_reg_T0(ot, d);
else
gen_op_st_T0_A0(ot + s1->mem_index);
- tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
- tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
- tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
- s1->cc_op = CC_OP_DYNAMIC;
+ gen_op_update3_cc(cpu_tmp4);
+ set_cc_op(s1, CC_OP_ADCB + ot);
break;
case OP_SBBL:
- if (s1->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s1->cc_op);
- gen_compute_eflags_c(cpu_tmp4);
+ gen_compute_eflags_c(s1, cpu_tmp4);
tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
if (d != OR_TMP0)
gen_op_mov_reg_T0(ot, d);
else
gen_op_st_T0_A0(ot + s1->mem_index);
- tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
- tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
- tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
- s1->cc_op = CC_OP_DYNAMIC;
+ gen_op_update3_cc(cpu_tmp4);
+ set_cc_op(s1, CC_OP_SBBB + ot);
break;
case OP_ADDL:
gen_op_addl_T0_T1();
@@ -1377,16 +1505,17 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
else
gen_op_st_T0_A0(ot + s1->mem_index);
gen_op_update2_cc();
- s1->cc_op = CC_OP_ADDB + ot;
+ set_cc_op(s1, CC_OP_ADDB + ot);
break;
case OP_SUBL:
+ tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
if (d != OR_TMP0)
gen_op_mov_reg_T0(ot, d);
else
gen_op_st_T0_A0(ot + s1->mem_index);
gen_op_update2_cc();
- s1->cc_op = CC_OP_SUBB + ot;
+ set_cc_op(s1, CC_OP_SUBB + ot);
break;
default:
case OP_ANDL:
@@ -1396,7 +1525,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
else
gen_op_st_T0_A0(ot + s1->mem_index);
gen_op_update1_cc();
- s1->cc_op = CC_OP_LOGICB + ot;
+ set_cc_op(s1, CC_OP_LOGICB + ot);
break;
case OP_ORL:
tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -1405,7 +1534,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
else
gen_op_st_T0_A0(ot + s1->mem_index);
gen_op_update1_cc();
- s1->cc_op = CC_OP_LOGICB + ot;
+ set_cc_op(s1, CC_OP_LOGICB + ot);
break;
case OP_XORL:
tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -1414,11 +1543,13 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
else
gen_op_st_T0_A0(ot + s1->mem_index);
gen_op_update1_cc();
- s1->cc_op = CC_OP_LOGICB + ot;
+ set_cc_op(s1, CC_OP_LOGICB + ot);
break;
case OP_CMPL:
- gen_op_cmpl_T0_T1_cc();
- s1->cc_op = CC_OP_SUBB + ot;
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
+ tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+ set_cc_op(s1, CC_OP_SUBB + ot);
break;
}
}
@@ -1430,36 +1561,71 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
gen_op_mov_TN_reg(ot, 0, d);
else
gen_op_ld_T0_A0(ot + s1->mem_index);
- if (s1->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s1->cc_op);
+ gen_compute_eflags_c(s1, cpu_cc_src);
if (c > 0) {
tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
- s1->cc_op = CC_OP_INCB + ot;
+ set_cc_op(s1, CC_OP_INCB + ot);
} else {
tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
- s1->cc_op = CC_OP_DECB + ot;
+ set_cc_op(s1, CC_OP_DECB + ot);
}
if (d != OR_TMP0)
gen_op_mov_reg_T0(ot, d);
else
gen_op_st_T0_A0(ot + s1->mem_index);
- gen_compute_eflags_c(cpu_cc_src);
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
}
-static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
- int is_right, int is_arith)
+static void gen_shift_flags(DisasContext *s, int ot, TCGv result, TCGv shm1,
+ TCGv count, bool is_right)
{
- target_ulong mask;
- int shift_label;
- TCGv t0, t1, t2;
+ TCGv_i32 z32, s32, oldop;
+ TCGv z_tl;
+
+ /* Store the results into the CC variables. If we know that the
+ variable must be dead, store unconditionally. Otherwise we'll
+ need to not disrupt the current contents. */
+ z_tl = tcg_const_tl(0);
+ if (cc_op_live[s->cc_op] & USES_CC_DST) {
+ tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
+ result, cpu_cc_dst);
+ } else {
+ tcg_gen_mov_tl(cpu_cc_dst, result);
+ }
+ if (cc_op_live[s->cc_op] & USES_CC_SRC) {
+ tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
+ shm1, cpu_cc_src);
+ } else {
+ tcg_gen_mov_tl(cpu_cc_src, shm1);
+ }
+ tcg_temp_free(z_tl);
- if (ot == OT_QUAD) {
- mask = 0x3f;
+ /* Get the two potential CC_OP values into temporaries. */
+ tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
+ if (s->cc_op == CC_OP_DYNAMIC) {
+ oldop = cpu_cc_op;
} else {
- mask = 0x1f;
+ tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
+ oldop = cpu_tmp3_i32;
}
+ /* Conditionally store the CC_OP value. */
+ z32 = tcg_const_i32(0);
+ s32 = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(s32, count);
+ tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
+ tcg_temp_free_i32(z32);
+ tcg_temp_free_i32(s32);
+
+ /* The CC_OP value is no longer predictable. */
+ set_cc_op(s, CC_OP_DYNAMIC);
+}
+
+static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
+ int is_right, int is_arith)
+{
+ target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+
/* load */
if (op1 == OR_TMP0) {
gen_op_ld_T0_A0(ot + s->mem_index);
@@ -1467,25 +1633,22 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
gen_op_mov_TN_reg(ot, 0, op1);
}
- t0 = tcg_temp_local_new();
- t1 = tcg_temp_local_new();
- t2 = tcg_temp_local_new();
-
- tcg_gen_andi_tl(t2, cpu_T[1], mask);
+ tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
+ tcg_gen_subi_tl(cpu_tmp0, cpu_T[1], 1);
if (is_right) {
if (is_arith) {
gen_exts(ot, cpu_T[0]);
- tcg_gen_mov_tl(t0, cpu_T[0]);
- tcg_gen_sar_tl(cpu_T[0], cpu_T[0], t2);
+ tcg_gen_sar_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
} else {
gen_extu(ot, cpu_T[0]);
- tcg_gen_mov_tl(t0, cpu_T[0]);
- tcg_gen_shr_tl(cpu_T[0], cpu_T[0], t2);
+ tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
}
} else {
- tcg_gen_mov_tl(t0, cpu_T[0]);
- tcg_gen_shl_tl(cpu_T[0], cpu_T[0], t2);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
}
/* store */
@@ -1495,52 +1658,13 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
gen_op_mov_reg_T0(ot, op1);
}
- /* update eflags if non zero shift */
- if (s->cc_op != CC_OP_DYNAMIC) {
- gen_op_set_cc_op(s->cc_op);
- }
-
- tcg_gen_mov_tl(t1, cpu_T[0]);
-
- shift_label = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, shift_label);
-
- tcg_gen_addi_tl(t2, t2, -1);
- tcg_gen_mov_tl(cpu_cc_dst, t1);
-
- if (is_right) {
- if (is_arith) {
- tcg_gen_sar_tl(cpu_cc_src, t0, t2);
- } else {
- tcg_gen_shr_tl(cpu_cc_src, t0, t2);
- }
- } else {
- tcg_gen_shl_tl(cpu_cc_src, t0, t2);
- }
-
- if (is_right) {
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
- } else {
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
- }
-
- gen_set_label(shift_label);
- s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
-
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- tcg_temp_free(t2);
+ gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right);
}
static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
int is_right, int is_arith)
{
- int mask;
-
- if (ot == OT_QUAD)
- mask = 0x3f;
- else
- mask = 0x1f;
+ int mask = (ot == OT_QUAD ? 0x3f : 0x1f);
/* load */
if (op1 == OR_TMP0)
@@ -1576,10 +1700,7 @@ static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
if (op2 != 0) {
tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- if (is_right)
- s->cc_op = CC_OP_SARB + ot;
- else
- s->cc_op = CC_OP_SHLB + ot;
+ set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
}
}
@@ -1591,187 +1712,180 @@ static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
tcg_gen_shri_tl(ret, arg1, -arg2);
}
-static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
- int is_right)
+static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right)
{
- target_ulong mask;
- int label1, label2, data_bits;
- TCGv t0, t1, t2, a0;
-
- /* XXX: inefficient, but we must use local temps */
- t0 = tcg_temp_local_new();
- t1 = tcg_temp_local_new();
- t2 = tcg_temp_local_new();
- a0 = tcg_temp_local_new();
-
- if (ot == OT_QUAD)
- mask = 0x3f;
- else
- mask = 0x1f;
+ target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+ TCGv_i32 t0, t1;
/* load */
if (op1 == OR_TMP0) {
- tcg_gen_mov_tl(a0, cpu_A0);
- gen_op_ld_v(ot + s->mem_index, t0, a0);
+ gen_op_ld_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_v_reg(ot, t0, op1);
+ gen_op_mov_TN_reg(ot, 0, op1);
}
- tcg_gen_mov_tl(t1, cpu_T[1]);
-
- tcg_gen_andi_tl(t1, t1, mask);
-
- /* Must test zero case to avoid using undefined behaviour in TCG
- shifts. */
- label1 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
-
- if (ot <= OT_WORD)
- tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
- else
- tcg_gen_mov_tl(cpu_tmp0, t1);
-
- gen_extu(ot, t0);
- tcg_gen_mov_tl(t2, t0);
+ tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
- data_bits = 8 << ot;
- /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
- fix TCG definition) */
- if (is_right) {
- tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
- tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
- tcg_gen_shl_tl(t0, t0, cpu_tmp0);
- } else {
- tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
- tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
- tcg_gen_shr_tl(t0, t0, cpu_tmp0);
+ switch (ot) {
+ case OT_BYTE:
+ /* Replicate the 8-bit input so that a 32-bit rotate works. */
+ tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101);
+ goto do_long;
+ case OT_WORD:
+ /* Replicate the 16-bit input so that a 32-bit rotate works. */
+ tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16);
+ goto do_long;
+ do_long:
+#ifdef TARGET_X86_64
+ case OT_LONG:
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+ if (is_right) {
+ tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+ } else {
+ tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+ }
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ break;
+#endif
+ default:
+ if (is_right) {
+ tcg_gen_rotr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ } else {
+ tcg_gen_rotl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ }
+ break;
}
- tcg_gen_or_tl(t0, t0, cpu_tmp4);
- gen_set_label(label1);
/* store */
if (op1 == OR_TMP0) {
- gen_op_st_v(ot + s->mem_index, t0, a0);
+ gen_op_st_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_reg_v(ot, op1, t0);
+ gen_op_mov_reg_T0(ot, op1);
}
-
- /* update eflags */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
-
- label2 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
-
- gen_compute_eflags(cpu_cc_src);
- tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
- tcg_gen_xor_tl(cpu_tmp0, t2, t0);
- tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
- tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+
+ /* We'll need the flags computed into CC_SRC. */
+ gen_compute_eflags(s);
+
+ /* The value that was "rotated out" is now present at the other end
+ of the word. Compute C into CC_DST and O into CC_SRC2. Note that
+ since we've computed the flags into CC_SRC, these variables are
+ currently dead. */
if (is_right) {
- tcg_gen_shri_tl(t0, t0, data_bits - 1);
+ tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
+ tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
+ } else {
+ tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
+ tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
}
- tcg_gen_andi_tl(t0, t0, CC_C);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
- tcg_gen_discard_tl(cpu_cc_dst);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-
- gen_set_label(label2);
- s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
-
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- tcg_temp_free(t2);
- tcg_temp_free(a0);
+ tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
+ tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
+
+ /* Now conditionally store the new CC_OP value. If the shift count
+ is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
+ Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
+ exactly as we computed above. */
+ t0 = tcg_const_i32(0);
+ t1 = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
+ tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
+ tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
+ tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
+ cpu_tmp2_i32, cpu_tmp3_i32);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+
+ /* The CC_OP value is no longer predictable. */
+ set_cc_op(s, CC_OP_DYNAMIC);
}
static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
int is_right)
{
- int mask;
- int data_bits;
- TCGv t0, t1, a0;
-
- /* XXX: inefficient, but we must use local temps */
- t0 = tcg_temp_local_new();
- t1 = tcg_temp_local_new();
- a0 = tcg_temp_local_new();
-
- if (ot == OT_QUAD)
- mask = 0x3f;
- else
- mask = 0x1f;
+ int mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+ int shift;
/* load */
if (op1 == OR_TMP0) {
- tcg_gen_mov_tl(a0, cpu_A0);
- gen_op_ld_v(ot + s->mem_index, t0, a0);
+ gen_op_ld_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_v_reg(ot, t0, op1);
+ gen_op_mov_TN_reg(ot, 0, op1);
}
- gen_extu(ot, t0);
- tcg_gen_mov_tl(t1, t0);
-
op2 &= mask;
- data_bits = 8 << ot;
if (op2 != 0) {
- int shift = op2 & ((1 << (3 + ot)) - 1);
- if (is_right) {
- tcg_gen_shri_tl(cpu_tmp4, t0, shift);
- tcg_gen_shli_tl(t0, t0, data_bits - shift);
- }
- else {
- tcg_gen_shli_tl(cpu_tmp4, t0, shift);
- tcg_gen_shri_tl(t0, t0, data_bits - shift);
+ switch (ot) {
+#ifdef TARGET_X86_64
+ case OT_LONG:
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ if (is_right) {
+ tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+ } else {
+ tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+ }
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ break;
+#endif
+ default:
+ if (is_right) {
+ tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], op2);
+ } else {
+ tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2);
+ }
+ break;
+ case OT_BYTE:
+ mask = 7;
+ goto do_shifts;
+ case OT_WORD:
+ mask = 15;
+ do_shifts:
+ shift = op2 & mask;
+ if (is_right) {
+ shift = mask + 1 - shift;
+ }
+ gen_extu(ot, cpu_T[0]);
+ tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], shift);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], mask + 1 - shift);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ break;
}
- tcg_gen_or_tl(t0, t0, cpu_tmp4);
}
/* store */
if (op1 == OR_TMP0) {
- gen_op_st_v(ot + s->mem_index, t0, a0);
+ gen_op_st_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_reg_v(ot, op1, t0);
+ gen_op_mov_reg_T0(ot, op1);
}
if (op2 != 0) {
- /* update eflags */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
-
- gen_compute_eflags(cpu_cc_src);
- tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
- tcg_gen_xor_tl(cpu_tmp0, t1, t0);
- tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
- tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+ /* Compute the flags into CC_SRC. */
+ gen_compute_eflags(s);
+
+ /* The value that was "rotated out" is now present at the other end
+ of the word. Compute C into CC_DST and O into CC_SRC2. Note that
+ since we've computed the flags into CC_SRC, these variables are
+ currently dead. */
if (is_right) {
- tcg_gen_shri_tl(t0, t0, data_bits - 1);
+ tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
+ tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
+ } else {
+ tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
+ tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
}
- tcg_gen_andi_tl(t0, t0, CC_C);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
- tcg_gen_discard_tl(cpu_cc_dst);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
- s->cc_op = CC_OP_EFLAGS;
+ tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
+ tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
+ set_cc_op(s, CC_OP_ADCOX);
}
-
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- tcg_temp_free(a0);
}
/* XXX: add faster immediate = 1 case */
static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
int is_right)
{
- int label1;
-
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(s);
+ assert(s->cc_op == CC_OP_EFLAGS);
/* load */
if (op1 == OR_TMP0)
@@ -1781,34 +1895,34 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
if (is_right) {
switch (ot) {
- case 0:
+ case OT_BYTE:
gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
- case 1:
+ case OT_WORD:
gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
- case 2:
+ case OT_LONG:
gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
#ifdef TARGET_X86_64
- case 3:
+ case OT_QUAD:
gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
#endif
}
} else {
switch (ot) {
- case 0:
+ case OT_BYTE:
gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
- case 1:
+ case OT_WORD:
gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
- case 2:
+ case OT_LONG:
gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
#ifdef TARGET_X86_64
- case 3:
+ case OT_QUAD:
gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
#endif
@@ -1819,146 +1933,92 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
gen_op_st_T0_A0(ot + s->mem_index);
else
gen_op_mov_reg_T0(ot, op1);
-
- /* update eflags */
- label1 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
-
- tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
- tcg_gen_discard_tl(cpu_cc_dst);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-
- gen_set_label(label1);
- s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
}
/* XXX: add faster immediate case */
-static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
- int is_right)
+static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
+ bool is_right, TCGv count_in)
{
- int label1, label2, data_bits;
- target_ulong mask;
- TCGv t0, t1, t2, a0;
-
- t0 = tcg_temp_local_new();
- t1 = tcg_temp_local_new();
- t2 = tcg_temp_local_new();
- a0 = tcg_temp_local_new();
-
- if (ot == OT_QUAD)
- mask = 0x3f;
- else
- mask = 0x1f;
+ target_ulong mask = (ot == OT_QUAD ? 63 : 31);
+ TCGv count;
/* load */
if (op1 == OR_TMP0) {
- tcg_gen_mov_tl(a0, cpu_A0);
- gen_op_ld_v(ot + s->mem_index, t0, a0);
+ gen_op_ld_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_v_reg(ot, t0, op1);
+ gen_op_mov_TN_reg(ot, 0, op1);
}
- tcg_gen_andi_tl(cpu_T3, cpu_T3, mask);
-
- tcg_gen_mov_tl(t1, cpu_T[1]);
- tcg_gen_mov_tl(t2, cpu_T3);
+ count = tcg_temp_new();
+ tcg_gen_andi_tl(count, count_in, mask);
- /* Must test zero case to avoid using undefined behaviour in TCG
- shifts. */
- label1 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
-
- tcg_gen_addi_tl(cpu_tmp5, t2, -1);
- if (ot == OT_WORD) {
- /* Note: we implement the Intel behaviour for shift count > 16 */
+ switch (ot) {
+ case OT_WORD:
+ /* Note: we implement the Intel behaviour for shift count > 16.
+ This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
+ portion by constructing it as a 32-bit value. */
if (is_right) {
- tcg_gen_andi_tl(t0, t0, 0xffff);
- tcg_gen_shli_tl(cpu_tmp0, t1, 16);
- tcg_gen_or_tl(t0, t0, cpu_tmp0);
- tcg_gen_ext32u_tl(t0, t0);
-
- tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
-
- /* only needed if count > 16, but a test would complicate */
- tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
- tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
-
- tcg_gen_shr_tl(t0, t0, t2);
-
- tcg_gen_or_tl(t0, t0, cpu_tmp0);
+ tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16);
+ tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
+ tcg_gen_mov_tl(cpu_T[0], cpu_tmp0);
} else {
- /* XXX: not optimal */
- tcg_gen_andi_tl(t0, t0, 0xffff);
- tcg_gen_shli_tl(t1, t1, 16);
- tcg_gen_or_tl(t1, t1, t0);
- tcg_gen_ext32u_tl(t1, t1);
-
- tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
- tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5);
- tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0);
- tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5);
-
- tcg_gen_shl_tl(t0, t0, t2);
- tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
- tcg_gen_shr_tl(t1, t1, cpu_tmp5);
- tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16);
}
- } else {
- data_bits = 8 << ot;
+ /* FALLTHRU */
+#ifdef TARGET_X86_64
+ case OT_LONG:
+ /* Concatenate the two 32-bit values and use a 64-bit shift. */
+ tcg_gen_subi_tl(cpu_tmp0, count, 1);
if (is_right) {
- if (ot == OT_LONG)
- tcg_gen_ext32u_tl(t0, t0);
-
- tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+ tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count);
+ } else {
+ tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]);
+ tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count);
+ tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
+ tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32);
+ }
+ break;
+#endif
+ default:
+ tcg_gen_subi_tl(cpu_tmp0, count, 1);
+ if (is_right) {
+ tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
- tcg_gen_shr_tl(t0, t0, t2);
- tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
- tcg_gen_shl_tl(t1, t1, cpu_tmp5);
- tcg_gen_or_tl(t0, t0, t1);
-
+ tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+ tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count);
+ tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
} else {
- if (ot == OT_LONG)
- tcg_gen_ext32u_tl(t1, t1);
-
- tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
-
- tcg_gen_shl_tl(t0, t0, t2);
- tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
- tcg_gen_shr_tl(t1, t1, cpu_tmp5);
- tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ if (ot == OT_WORD) {
+ /* Only needed if count > 16, for Intel behaviour. */
+ tcg_gen_subfi_tl(cpu_tmp4, 33, count);
+ tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4);
+ tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
+ }
+
+ tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+ tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count);
+ tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
}
+ tcg_gen_movi_tl(cpu_tmp4, 0);
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4,
+ cpu_tmp4, cpu_T[1]);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ break;
}
- tcg_gen_mov_tl(t1, cpu_tmp4);
- gen_set_label(label1);
/* store */
if (op1 == OR_TMP0) {
- gen_op_st_v(ot + s->mem_index, t0, a0);
- } else {
- gen_op_mov_reg_v(ot, op1, t0);
- }
-
- /* update eflags */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
-
- label2 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
-
- tcg_gen_mov_tl(cpu_cc_src, t1);
- tcg_gen_mov_tl(cpu_cc_dst, t0);
- if (is_right) {
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+ gen_op_st_T0_A0(ot + s->mem_index);
} else {
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+ gen_op_mov_reg_T0(ot, op1);
}
- gen_set_label(label2);
- s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- tcg_temp_free(t2);
- tcg_temp_free(a0);
+ gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right);
+ tcg_temp_free(count);
}
static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
@@ -2362,24 +2422,21 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
static inline void gen_jcc(DisasContext *s, int b,
target_ulong val, target_ulong next_eip)
{
- int l1, l2, cc_op;
+ int l1, l2;
- cc_op = s->cc_op;
- gen_update_cc_op(s);
if (s->jmp_opt) {
l1 = gen_new_label();
- gen_jcc1(s, cc_op, b, l1);
-
+ gen_jcc1(s, b, l1);
+
gen_goto_tb(s, 0, next_eip);
gen_set_label(l1);
gen_goto_tb(s, 1, val);
s->is_jmp = DISAS_TB_JUMP;
} else {
-
l1 = gen_new_label();
l2 = gen_new_label();
- gen_jcc1(s, cc_op, b, l1);
+ gen_jcc1(s, b, l1);
gen_jmp_im(next_eip);
tcg_gen_br(l2);
@@ -2391,32 +2448,32 @@ static inline void gen_jcc(DisasContext *s, int b,
}
}
-static void gen_setcc(DisasContext *s, int b)
+static void gen_cmovcc1(CPUX86State *env, DisasContext *s, int ot, int b,
+ int modrm, int reg)
{
- int inv, jcc_op, l1;
- TCGv t0;
+ CCPrepare cc;
- if (is_fast_jcc_case(s, b)) {
- /* nominal case: we use a jump */
- /* XXX: make it faster by adding new instructions in TCG */
- t0 = tcg_temp_local_new();
- tcg_gen_movi_tl(t0, 0);
- l1 = gen_new_label();
- gen_jcc1(s, s->cc_op, b ^ 1, l1);
- tcg_gen_movi_tl(t0, 1);
- gen_set_label(l1);
- tcg_gen_mov_tl(cpu_T[0], t0);
- tcg_temp_free(t0);
- } else {
- /* slow case: it is more efficient not to generate a jump,
- although it is questionnable whether this optimization is
- worth to */
- inv = b & 1;
- jcc_op = (b >> 1) & 7;
- gen_setcc_slow_T0(s, jcc_op);
- if (inv) {
- tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
- }
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+
+ cc = gen_prepare_cc(s, b, cpu_T[1]);
+ if (cc.mask != -1) {
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, cc.reg, cc.mask);
+ cc.reg = t0;
+ }
+ if (!cc.use_reg2) {
+ cc.reg2 = tcg_const_tl(cc.imm);
+ }
+
+ tcg_gen_movcond_tl(cc.cond, cpu_T[0], cc.reg, cc.reg2,
+ cpu_T[0], cpu_regs[reg]);
+ gen_op_mov_reg_T0(ot, reg);
+
+ if (cc.mask != -1) {
+ tcg_temp_free(cc.reg);
+ }
+ if (!cc.use_reg2) {
+ tcg_temp_free(cc.reg2);
}
}
@@ -2442,8 +2499,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
{
if (s->pe && !s->vm86) {
/* XXX: optimize by finding processor state dynamically */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(cur_eip);
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
@@ -2472,8 +2528,7 @@ gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
/* no SVM activated; fast case */
if (likely(!(s->flags & HF_SVMI_MASK)))
return;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
tcg_const_i64(param));
@@ -2720,8 +2775,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
{
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(cur_eip);
gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
s->is_jmp = DISAS_TB_JUMP;
@@ -2732,8 +2786,7 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
static void gen_interrupt(DisasContext *s, int intno,
target_ulong cur_eip, target_ulong next_eip)
{
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(cur_eip);
gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
tcg_const_i32(next_eip - cur_eip));
@@ -2742,8 +2795,7 @@ static void gen_interrupt(DisasContext *s, int intno,
static void gen_debug(DisasContext *s, target_ulong cur_eip)
{
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(cur_eip);
gen_helper_debug(cpu_env);
s->is_jmp = DISAS_TB_JUMP;
@@ -2753,8 +2805,7 @@ static void gen_debug(DisasContext *s, target_ulong cur_eip)
if needed */
static void gen_eob(DisasContext *s)
{
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
gen_helper_reset_inhibit_irq(cpu_env);
}
@@ -2775,8 +2826,9 @@ static void gen_eob(DisasContext *s)
direct call to the next block may occur */
static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
{
+ gen_update_cc_op(s);
+ set_cc_op(s, CC_OP_DYNAMIC);
if (s->jmp_opt) {
- gen_update_cc_op(s);
gen_goto_tb(s, tb_num, eip);
s->is_jmp = DISAS_TB_JUMP;
} else {
@@ -2912,8 +2964,9 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
[0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
(SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
- [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
- [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
+ /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */
+ [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
+ [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
/* MMX ops and their SSE extensions */
[0x60] = MMX_OP2(punpcklbw),
@@ -3794,11 +3847,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
reg = ((modrm >> 3) & 7) | rex_r;
gen_op_mov_reg_T0(OT_LONG, reg);
break;
+
case 0x138:
- if (s->prefix & PREFIX_REPNZ)
- goto crc32;
case 0x038:
b = modrm;
+ if ((b & 0xf0) == 0xf0) {
+ goto do_0f_38_fx;
+ }
modrm = cpu_ldub_code(env, s->pc++);
rm = modrm & 7;
reg = ((modrm >> 3) & 7) | rex_r;
@@ -3867,39 +3922,420 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
- if (b == 0x17)
- s->cc_op = CC_OP_EFLAGS;
+ if (b == 0x17) {
+ set_cc_op(s, CC_OP_EFLAGS);
+ }
break;
- case 0x338: /* crc32 */
- crc32:
- b = modrm;
+
+ case 0x238:
+ case 0x338:
+ do_0f_38_fx:
+ /* Various integer extensions at 0f 38 f[0-f]. */
+ b = modrm | (b1 << 8);
modrm = cpu_ldub_code(env, s->pc++);
reg = ((modrm >> 3) & 7) | rex_r;
- if (b != 0xf0 && b != 0xf1)
- goto illegal_op;
- if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
- goto illegal_op;
+ switch (b) {
+ case 0x3f0: /* crc32 Gd,Eb */
+ case 0x3f1: /* crc32 Gd,Ey */
+ do_crc32:
+ if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
+ goto illegal_op;
+ }
+ if ((b & 0xff) == 0xf0) {
+ ot = OT_BYTE;
+ } else if (s->dflag != 2) {
+ ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG);
+ } else {
+ ot = OT_QUAD;
+ }
- if (b == 0xf0)
- ot = OT_BYTE;
- else if (b == 0xf1 && s->dflag != 2)
- if (s->prefix & PREFIX_DATA)
- ot = OT_WORD;
- else
- ot = OT_LONG;
- else
- ot = OT_QUAD;
+ gen_op_mov_TN_reg(OT_LONG, 0, reg);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
+ cpu_T[0], tcg_const_i32(8 << ot));
- gen_op_mov_TN_reg(OT_LONG, 0, reg);
- tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
- cpu_T[0], tcg_const_i32(8 << ot));
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ gen_op_mov_reg_T0(ot, reg);
+ break;
- ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
- gen_op_mov_reg_T0(ot, reg);
+ case 0x1f0: /* crc32 or movbe */
+ case 0x1f1:
+ /* For these insns, the f3 prefix is supposed to have priority
+ over the 66 prefix, but that's not what we implement above
+ setting b1. */
+ if (s->prefix & PREFIX_REPNZ) {
+ goto do_crc32;
+ }
+ /* FALLTHRU */
+ case 0x0f0: /* movbe Gy,My */
+ case 0x0f1: /* movbe My,Gy */
+ if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
+ goto illegal_op;
+ }
+ if (s->dflag != 2) {
+ ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG);
+ } else {
+ ot = OT_QUAD;
+ }
+
+ /* Load the data incoming to the bswap. Note that the TCG
+ implementation of bswap requires the input be zero
+ extended. In the case of the loads, we simply know that
+ gen_op_ld_v via gen_ldst_modrm does that already. */
+ if ((b & 1) == 0) {
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ } else {
+ switch (ot) {
+ case OT_WORD:
+ tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[reg]);
+ break;
+ default:
+ tcg_gen_ext32u_tl(cpu_T[0], cpu_regs[reg]);
+ break;
+ case OT_QUAD:
+ tcg_gen_mov_tl(cpu_T[0], cpu_regs[reg]);
+ break;
+ }
+ }
+
+ switch (ot) {
+ case OT_WORD:
+ tcg_gen_bswap16_tl(cpu_T[0], cpu_T[0]);
+ break;
+ default:
+ tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ tcg_gen_bswap64_tl(cpu_T[0], cpu_T[0]);
+ break;
+#endif
+ }
+
+ if ((b & 1) == 0) {
+ gen_op_mov_reg_T0(ot, reg);
+ } else {
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+ }
+ break;
+
+ case 0x0f2: /* andn Gy, By, Ey */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
+ || !(s->prefix & PREFIX_VEX)
+ || s->vex_l != 0) {
+ goto illegal_op;
+ }
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
+ gen_op_mov_reg_T0(ot, reg);
+ gen_op_update1_cc();
+ set_cc_op(s, CC_OP_LOGICB + ot);
+ break;
+
+ case 0x0f7: /* bextr Gy, Ey, By */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
+ || !(s->prefix & PREFIX_VEX)
+ || s->vex_l != 0) {
+ goto illegal_op;
+ }
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ {
+ TCGv bound, zero;
+
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ /* Extract START, and shift the operand.
+ Shifts larger than operand size get zeros. */
+ tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
+ tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_A0);
+
+ bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31);
+ zero = tcg_const_tl(0);
+ tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T[0], cpu_A0, bound,
+ cpu_T[0], zero);
+ tcg_temp_free(zero);
+
+ /* Extract the LEN into a mask. Lengths larger than
+ operand size get all ones. */
+ tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8);
+ tcg_gen_ext8u_tl(cpu_A0, cpu_A0);
+ tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
+ cpu_A0, bound);
+ tcg_temp_free(bound);
+ tcg_gen_movi_tl(cpu_T[1], 1);
+ tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_A0);
+ tcg_gen_subi_tl(cpu_T[1], cpu_T[1], 1);
+ tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+
+ gen_op_mov_reg_T0(ot, reg);
+ gen_op_update1_cc();
+ set_cc_op(s, CC_OP_LOGICB + ot);
+ }
+ break;
+
+ case 0x0f5: /* bzhi Gy, Ey, By */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+ || !(s->prefix & PREFIX_VEX)
+ || s->vex_l != 0) {
+ goto illegal_op;
+ }
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
+ {
+ TCGv bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31);
+ /* Note that since we're using BMILG (in order to get O
+ cleared) we need to store the inverse into C. */
+ tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
+ cpu_T[1], bound);
+ tcg_gen_movcond_tl(TCG_COND_GT, cpu_T[1], cpu_T[1],
+ bound, bound, cpu_T[1]);
+ tcg_temp_free(bound);
+ }
+ tcg_gen_movi_tl(cpu_A0, -1);
+ tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T[1]);
+ tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_A0);
+ gen_op_mov_reg_T0(ot, reg);
+ gen_op_update1_cc();
+ set_cc_op(s, CC_OP_BMILGB + ot);
+ break;
+
+ case 0x3f6: /* mulx By, Gy, rdx, Ey */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+ || !(s->prefix & PREFIX_VEX)
+ || s->vex_l != 0) {
+ goto illegal_op;
+ }
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ switch (ot) {
+ TCGv_i64 t0, t1;
+ default:
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+#ifdef TARGET_X86_64
+ tcg_gen_ext32u_i64(t0, cpu_T[0]);
+ tcg_gen_ext32u_i64(t1, cpu_regs[R_EDX]);
+#else
+ tcg_gen_extu_i32_i64(t0, cpu_T[0]);
+ tcg_gen_extu_i32_i64(t0, cpu_regs[R_EDX]);
+#endif
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_trunc_i64_tl(cpu_T[0], t0);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_trunc_i64_tl(cpu_T[1], t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ gen_op_mov_reg_T0(OT_LONG, s->vex_v);
+ gen_op_mov_reg_T1(OT_LONG, reg);
+ break;
+#ifdef TARGET_X86_64
+ case OT_QUAD:
+ tcg_gen_mov_tl(cpu_T[1], cpu_regs[R_EDX]);
+ tcg_gen_mul_tl(cpu_regs[s->vex_v], cpu_T[0], cpu_T[1]);
+ gen_helper_umulh(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+ break;
+#endif
+ }
+ break;
+
+ case 0x3f5: /* pdep Gy, By, Ey */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+ || !(s->prefix & PREFIX_VEX)
+ || s->vex_l != 0) {
+ goto illegal_op;
+ }
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ /* Note that by zero-extending the mask operand, we
+ automatically handle zero-extending the result. */
+ if (s->dflag == 2) {
+ tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
+ } else {
+ tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
+ }
+ gen_helper_pdep(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+ break;
+
+ case 0x2f5: /* pext Gy, By, Ey */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+ || !(s->prefix & PREFIX_VEX)
+ || s->vex_l != 0) {
+ goto illegal_op;
+ }
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ /* Note that by zero-extending the mask operand, we
+ automatically handle zero-extending the result. */
+ if (s->dflag == 2) {
+ tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
+ } else {
+ tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
+ }
+ gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+ break;
+
+ case 0x1f6: /* adcx Gy, Ey */
+ case 0x2f6: /* adox Gy, Ey */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
+ goto illegal_op;
+ } else {
+ TCGv carry_in, carry_out;
+ int end_op;
+
+ ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+
+ /* Re-use the carry-out from a previous round. */
+ TCGV_UNUSED(carry_in);
+ carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
+ switch (s->cc_op) {
+ case CC_OP_ADCX:
+ if (b == 0x1f6) {
+ carry_in = cpu_cc_dst;
+ end_op = CC_OP_ADCX;
+ } else {
+ end_op = CC_OP_ADCOX;
+ }
+ break;
+ case CC_OP_ADOX:
+ if (b == 0x1f6) {
+ end_op = CC_OP_ADCOX;
+ } else {
+ carry_in = cpu_cc_src2;
+ end_op = CC_OP_ADOX;
+ }
+ break;
+ case CC_OP_ADCOX:
+ end_op = CC_OP_ADCOX;
+ carry_in = carry_out;
+ break;
+ default:
+ end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADCOX);
+ break;
+ }
+ /* If we can't reuse carry-out, get it out of EFLAGS. */
+ if (TCGV_IS_UNUSED(carry_in)) {
+ if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
+ gen_compute_eflags(s);
+ }
+ carry_in = cpu_tmp0;
+ tcg_gen_shri_tl(carry_in, cpu_cc_src,
+ ctz32(b == 0x1f6 ? CC_C : CC_O));
+ tcg_gen_andi_tl(carry_in, carry_in, 1);
+ }
+
+ switch (ot) {
+#ifdef TARGET_X86_64
+ case OT_LONG:
+ /* If we know TL is 64-bit, and we want a 32-bit
+ result, just do everything in 64-bit arithmetic. */
+ tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
+ tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]);
+ tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]);
+ tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in);
+ tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]);
+ tcg_gen_shri_i64(carry_out, cpu_T[0], 32);
+ break;
+#endif
+ default:
+ /* Otherwise compute the carry-out in two steps. */
+ tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]);
+ tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4,
+ cpu_T[0], cpu_regs[reg]);
+ tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in);
+ tcg_gen_setcond_tl(TCG_COND_LTU, carry_out,
+ cpu_regs[reg], cpu_T[0]);
+ tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4);
+ break;
+ }
+ /* We began with all flags computed to CC_SRC, and we
+ have now placed the carry-out in CC_DST. All that
+ is left is to record the CC_OP. */
+ set_cc_op(s, end_op);
+ }
+ break;
+
+ case 0x1f7: /* shlx Gy, Ey, By */
+ case 0x2f7: /* sarx Gy, Ey, By */
+ case 0x3f7: /* shrx Gy, Ey, By */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+ || !(s->prefix & PREFIX_VEX)
+ || s->vex_l != 0) {
+ goto illegal_op;
+ }
+ ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ if (ot == OT_QUAD) {
+ tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
+ } else {
+ tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 31);
+ }
+ if (b == 0x1f7) {
+ tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ } else if (b == 0x2f7) {
+ if (ot != OT_QUAD) {
+ tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+ }
+ tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ } else {
+ if (ot != OT_QUAD) {
+ tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+ }
+ tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+
+ case 0x0f3:
+ case 0x1f3:
+ case 0x2f3:
+ case 0x3f3: /* Group 17 */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
+ || !(s->prefix & PREFIX_VEX)
+ || s->vex_l != 0) {
+ goto illegal_op;
+ }
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+
+ switch (reg & 7) {
+ case 1: /* blsr By,Ey */
+ tcg_gen_neg_tl(cpu_T[1], cpu_T[0]);
+ tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ gen_op_mov_reg_T0(ot, s->vex_v);
+ gen_op_update2_cc();
+ set_cc_op(s, CC_OP_BMILGB + ot);
+ break;
+
+ case 2: /* blsmsk By,Ey */
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+ tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
+ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+ break;
+
+ case 3: /* blsi By, Ey */
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+ tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
+ tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+ break;
+
+ default:
+ goto illegal_op;
+ }
+ break;
+
+ default:
+ goto illegal_op;
+ }
break;
+
case 0x03a:
case 0x13a:
b = modrm;
@@ -4070,7 +4506,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
val = cpu_ldub_code(env, s->pc++);
if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
if (s->dflag == 2)
/* The helper must use entire 64-bit gp registers */
@@ -4081,6 +4517,38 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
break;
+
+ case 0x33a:
+ /* Various integer extensions at 0f 3a f[0-f]. */
+ b = modrm | (b1 << 8);
+ modrm = cpu_ldub_code(env, s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ switch (b) {
+ case 0x3f0: /* rorx Gy,Ey, Ib */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
+ || !(s->prefix & PREFIX_VEX)
+ || s->vex_l != 0) {
+ goto illegal_op;
+ }
+ ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ b = cpu_ldub_code(env, s->pc++);
+ if (ot == OT_QUAD) {
+ tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
+ } else {
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ }
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+
+ default:
+ goto illegal_op;
+ }
+ break;
+
default:
goto illegal_op;
}
@@ -4191,7 +4659,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
break;
}
if (b == 0x2e || b == 0x2f) {
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
}
}
}
@@ -4223,47 +4691,49 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
x86_64_hregs = 0;
#endif
s->rip_offset = 0; /* for relative ip address */
+ s->vex_l = 0;
+ s->vex_v = 0;
next_byte:
b = cpu_ldub_code(env, s->pc);
s->pc++;
- /* check prefixes */
+ /* Collect prefixes. */
+ switch (b) {
+ case 0xf3:
+ prefixes |= PREFIX_REPZ;
+ goto next_byte;
+ case 0xf2:
+ prefixes |= PREFIX_REPNZ;
+ goto next_byte;
+ case 0xf0:
+ prefixes |= PREFIX_LOCK;
+ goto next_byte;
+ case 0x2e:
+ s->override = R_CS;
+ goto next_byte;
+ case 0x36:
+ s->override = R_SS;
+ goto next_byte;
+ case 0x3e:
+ s->override = R_DS;
+ goto next_byte;
+ case 0x26:
+ s->override = R_ES;
+ goto next_byte;
+ case 0x64:
+ s->override = R_FS;
+ goto next_byte;
+ case 0x65:
+ s->override = R_GS;
+ goto next_byte;
+ case 0x66:
+ prefixes |= PREFIX_DATA;
+ goto next_byte;
+ case 0x67:
+ prefixes |= PREFIX_ADR;
+ goto next_byte;
#ifdef TARGET_X86_64
- if (CODE64(s)) {
- switch (b) {
- case 0xf3:
- prefixes |= PREFIX_REPZ;
- goto next_byte;
- case 0xf2:
- prefixes |= PREFIX_REPNZ;
- goto next_byte;
- case 0xf0:
- prefixes |= PREFIX_LOCK;
- goto next_byte;
- case 0x2e:
- s->override = R_CS;
- goto next_byte;
- case 0x36:
- s->override = R_SS;
- goto next_byte;
- case 0x3e:
- s->override = R_DS;
- goto next_byte;
- case 0x26:
- s->override = R_ES;
- goto next_byte;
- case 0x64:
- s->override = R_FS;
- goto next_byte;
- case 0x65:
- s->override = R_GS;
- goto next_byte;
- case 0x66:
- prefixes |= PREFIX_DATA;
- goto next_byte;
- case 0x67:
- prefixes |= PREFIX_ADR;
- goto next_byte;
- case 0x40 ... 0x4f:
+ case 0x40 ... 0x4f:
+ if (CODE64(s)) {
/* REX prefix */
rex_w = (b >> 3) & 1;
rex_r = (b & 0x4) << 1;
@@ -4272,58 +4742,85 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
x86_64_hregs = 1; /* select uniform byte register addressing */
goto next_byte;
}
+ break;
+#endif
+ case 0xc5: /* 2-byte VEX */
+ case 0xc4: /* 3-byte VEX */
+ /* VEX prefixes cannot be used except in 32-bit mode.
+ Otherwise the instruction is LES or LDS. */
+ if (s->code32 && !s->vm86) {
+ static const int pp_prefix[4] = {
+ 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
+ };
+ int vex3, vex2 = cpu_ldub_code(env, s->pc);
+
+ if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
+ /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
+ otherwise the instruction is LES or LDS. */
+ break;
+ }
+ s->pc++;
+
+ /* 4.1.1-4.1.3: No preceeding lock, 66, f2, f3, or rex prefixes. */
+ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
+ | PREFIX_LOCK | PREFIX_DATA)) {
+ goto illegal_op;
+ }
+#ifdef TARGET_X86_64
+ if (x86_64_hregs) {
+ goto illegal_op;
+ }
+#endif
+ rex_r = (~vex2 >> 4) & 8;
+ if (b == 0xc5) {
+ vex3 = vex2;
+ b = cpu_ldub_code(env, s->pc++);
+ } else {
+#ifdef TARGET_X86_64
+ s->rex_x = (~vex2 >> 3) & 8;
+ s->rex_b = (~vex2 >> 2) & 8;
+#endif
+ vex3 = cpu_ldub_code(env, s->pc++);
+ rex_w = (vex3 >> 7) & 1;
+ switch (vex2 & 0x1f) {
+ case 0x01: /* Implied 0f leading opcode bytes. */
+ b = cpu_ldub_code(env, s->pc++) | 0x100;
+ break;
+ case 0x02: /* Implied 0f 38 leading opcode bytes. */
+ b = 0x138;
+ break;
+ case 0x03: /* Implied 0f 3a leading opcode bytes. */
+ b = 0x13a;
+ break;
+ default: /* Reserved for future use. */
+ goto illegal_op;
+ }
+ }
+ s->vex_v = (~vex3 >> 3) & 0xf;
+ s->vex_l = (vex3 >> 2) & 1;
+ prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
+ }
+ break;
+ }
+
+ /* Post-process prefixes. */
+ if (prefixes & PREFIX_DATA) {
+ dflag ^= 1;
+ }
+ if (prefixes & PREFIX_ADR) {
+ aflag ^= 1;
+ }
+#ifdef TARGET_X86_64
+ if (CODE64(s)) {
if (rex_w == 1) {
/* 0x66 is ignored if rex.w is set */
dflag = 2;
- } else {
- if (prefixes & PREFIX_DATA)
- dflag ^= 1;
}
- if (!(prefixes & PREFIX_ADR))
+ if (!(prefixes & PREFIX_ADR)) {
aflag = 2;
- } else
-#endif
- {
- switch (b) {
- case 0xf3:
- prefixes |= PREFIX_REPZ;
- goto next_byte;
- case 0xf2:
- prefixes |= PREFIX_REPNZ;
- goto next_byte;
- case 0xf0:
- prefixes |= PREFIX_LOCK;
- goto next_byte;
- case 0x2e:
- s->override = R_CS;
- goto next_byte;
- case 0x36:
- s->override = R_SS;
- goto next_byte;
- case 0x3e:
- s->override = R_DS;
- goto next_byte;
- case 0x26:
- s->override = R_ES;
- goto next_byte;
- case 0x64:
- s->override = R_FS;
- goto next_byte;
- case 0x65:
- s->override = R_GS;
- goto next_byte;
- case 0x66:
- prefixes |= PREFIX_DATA;
- goto next_byte;
- case 0x67:
- prefixes |= PREFIX_ADR;
- goto next_byte;
}
- if (prefixes & PREFIX_DATA)
- dflag ^= 1;
- if (prefixes & PREFIX_ADR)
- aflag ^= 1;
}
+#endif
s->prefix = prefixes;
s->aflag = aflag;
@@ -4374,10 +4871,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
} else if (op == OP_XORL && rm == reg) {
xor_zero:
/* xor reg, reg optimisation */
+ set_cc_op(s, CC_OP_CLR);
gen_op_movl_T0_0();
- s->cc_op = CC_OP_LOGICB + ot;
gen_op_mov_reg_T0(ot, reg);
- gen_op_update1_cc();
break;
} else {
opreg = rm;
@@ -4490,7 +4986,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
val = insn_get(env, s, ot);
gen_op_movl_T1_im(val);
gen_op_testl_T0_T1_cc();
- s->cc_op = CC_OP_LOGICB + ot;
+ set_cc_op(s, CC_OP_LOGICB + ot);
break;
case 2: /* not */
tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
@@ -4508,7 +5004,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_mov_reg_T0(ot, rm);
}
gen_op_update_neg_cc();
- s->cc_op = CC_OP_SUBB + ot;
+ set_cc_op(s, CC_OP_SUBB + ot);
break;
case 4: /* mul */
switch(ot) {
@@ -4521,7 +5017,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_mov_reg_T0(OT_WORD, R_EAX);
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
- s->cc_op = CC_OP_MULB;
+ set_cc_op(s, CC_OP_MULB);
break;
case OT_WORD:
gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
@@ -4534,7 +5030,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
gen_op_mov_reg_T0(OT_WORD, R_EDX);
tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
- s->cc_op = CC_OP_MULW;
+ set_cc_op(s, CC_OP_MULW);
break;
default:
case OT_LONG:
@@ -4566,12 +5062,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
}
#endif
- s->cc_op = CC_OP_MULL;
+ set_cc_op(s, CC_OP_MULL);
break;
#ifdef TARGET_X86_64
case OT_QUAD:
gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]);
- s->cc_op = CC_OP_MULQ;
+ set_cc_op(s, CC_OP_MULQ);
break;
#endif
}
@@ -4588,7 +5084,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
- s->cc_op = CC_OP_MULB;
+ set_cc_op(s, CC_OP_MULB);
break;
case OT_WORD:
gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
@@ -4602,7 +5098,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
gen_op_mov_reg_T0(OT_WORD, R_EDX);
- s->cc_op = CC_OP_MULW;
+ set_cc_op(s, CC_OP_MULW);
break;
default:
case OT_LONG:
@@ -4636,12 +5132,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
}
#endif
- s->cc_op = CC_OP_MULL;
+ set_cc_op(s, CC_OP_MULL);
break;
#ifdef TARGET_X86_64
case OT_QUAD:
gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]);
- s->cc_op = CC_OP_MULQ;
+ set_cc_op(s, CC_OP_MULQ);
break;
#endif
}
@@ -4761,8 +5257,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
do_lcall:
if (s->pe && !s->vm86) {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4788,8 +5283,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_ldu_T0_A0(OT_WORD + s->mem_index);
do_ljmp:
if (s->pe && !s->vm86) {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4822,7 +5316,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
gen_op_mov_TN_reg(ot, 1, reg);
gen_op_testl_T0_T1_cc();
- s->cc_op = CC_OP_LOGICB + ot;
+ set_cc_op(s, CC_OP_LOGICB + ot);
break;
case 0xa8: /* test eAX, Iv */
@@ -4836,7 +5330,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_mov_TN_reg(ot, 0, OR_EAX);
gen_op_movl_T1_im(val);
gen_op_testl_T0_T1_cc();
- s->cc_op = CC_OP_LOGICB + ot;
+ set_cc_op(s, CC_OP_LOGICB + ot);
break;
case 0x98: /* CWDE/CBW */
@@ -4937,7 +5431,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
}
gen_op_mov_reg_T0(ot, reg);
- s->cc_op = CC_OP_MULB + ot;
+ set_cc_op(s, CC_OP_MULB + ot);
break;
case 0x1c0:
case 0x1c1: /* xadd Ev, Gv */
@@ -4964,7 +5458,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_mov_reg_T1(ot, reg);
}
gen_op_update2_cc();
- s->cc_op = CC_OP_ADDB + ot;
+ set_cc_op(s, CC_OP_ADDB + ot);
break;
case 0x1b0:
case 0x1b1: /* cmpxchg Ev, Gv */
@@ -4994,9 +5488,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
rm = 0; /* avoid warning */
}
label1 = gen_new_label();
- tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0);
+ tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
+ gen_extu(ot, t0);
gen_extu(ot, t2);
- tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
+ tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
label2 = gen_new_label();
if (mod == 3) {
gen_op_mov_reg_v(ot, R_EAX, t0);
@@ -5015,8 +5510,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
gen_set_label(label2);
tcg_gen_mov_tl(cpu_cc_src, t0);
- tcg_gen_mov_tl(cpu_cc_dst, t2);
- s->cc_op = CC_OP_SUBB + ot;
+ tcg_gen_mov_tl(cpu_cc_srcT, t2);
+ tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
+ set_cc_op(s, CC_OP_SUBB + ot);
tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
@@ -5033,8 +5529,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
goto illegal_op;
gen_jmp_im(pc_start - s->cs_base);
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
gen_helper_cmpxchg16b(cpu_env, cpu_A0);
} else
@@ -5043,12 +5538,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (!(s->cpuid_features & CPUID_CX8))
goto illegal_op;
gen_jmp_im(pc_start - s->cs_base);
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
gen_helper_cmpxchg8b(cpu_env, cpu_A0);
}
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
/**************************/
@@ -5452,13 +5946,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
case 0xc4: /* les Gv */
- if (CODE64(s))
- goto illegal_op;
+ /* In CODE64 this is VEX3; see above. */
op = R_ES;
goto do_lxx;
case 0xc5: /* lds Gv */
- if (CODE64(s))
- goto illegal_op;
+ /* In CODE64 this is VEX2; see above. */
op = R_DS;
goto do_lxx;
case 0x1b2: /* lss Gv */
@@ -5569,12 +6061,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_mov_TN_reg(ot, 1, reg);
if (shift) {
- val = cpu_ldub_code(env, s->pc++);
- tcg_gen_movi_tl(cpu_T3, val);
+ TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
+ gen_shiftd_rm_T1(s, ot, opreg, op, imm);
+ tcg_temp_free(imm);
} else {
- tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
+ gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
}
- gen_shiftd_rm_T1_T3(s, ot, opreg, op);
break;
/************************/
@@ -5717,8 +6209,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
case 0x0c: /* fldenv mem */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
break;
@@ -5728,8 +6219,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
break;
case 0x0e: /* fnstenv mem */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
break;
@@ -5739,27 +6229,23 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_st_T0_A0(OT_WORD + s->mem_index);
break;
case 0x1d: /* fldt mem */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fldt_ST0(cpu_env, cpu_A0);
break;
case 0x1f: /* fstpt mem */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fstt_ST0(cpu_env, cpu_A0);
gen_helper_fpop(cpu_env);
break;
case 0x2c: /* frstor mem */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
break;
case 0x2e: /* fnsave mem */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
break;
@@ -5769,14 +6255,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_st_T0_A0(OT_WORD + s->mem_index);
break;
case 0x3c: /* fbld */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fbld_ST0(cpu_env, cpu_A0);
break;
case 0x3e: /* fbstp */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fbst_ST0(cpu_env, cpu_A0);
gen_helper_fpop(cpu_env);
@@ -5814,8 +6298,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
switch(rm) {
case 0: /* fnop */
/* check exceptions (FreeBSD FPU probe) */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fwait(cpu_env);
break;
@@ -5996,18 +6479,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
case 0x1d: /* fucomi */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
gen_helper_fucomi_ST0_FT0(cpu_env);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
case 0x1e: /* fcomi */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
gen_helper_fcomi_ST0_FT0(cpu_env);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
case 0x28: /* ffree sti */
gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
@@ -6059,20 +6540,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
case 0x3d: /* fucomip */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
gen_helper_fucomi_ST0_FT0(cpu_env);
gen_helper_fpop(cpu_env);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
case 0x3e: /* fcomip */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
gen_helper_fcomi_ST0_FT0(cpu_env);
gen_helper_fpop(cpu_env);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
case 0x10 ... 0x13: /* fcmovxx */
case 0x18 ... 0x1b:
@@ -6086,7 +6565,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
};
op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
l1 = gen_new_label();
- gen_jcc1(s, s->cc_op, op1, l1);
+ gen_jcc1_noeob(s, op1, l1);
gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
gen_set_label(l1);
}
@@ -6150,7 +6629,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
} else {
gen_scas(s, ot);
- s->cc_op = CC_OP_SUBB + ot;
}
break;
@@ -6166,7 +6644,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
} else {
gen_cmps(s, ot);
- s->cc_op = CC_OP_SUBB + ot;
}
break;
case 0x6c: /* insS */
@@ -6323,8 +6800,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
s->pc += 2;
do_lret:
if (s->pe && !s->vm86) {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_lret_protected(cpu_env, tcg_const_i32(s->dflag),
tcg_const_i32(val));
@@ -6354,21 +6830,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (!s->pe) {
/* real mode */
gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
} else if (s->vm86) {
if (s->iopl != 3) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
} else {
gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
}
} else {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_iret_protected(cpu_env, tcg_const_i32(s->dflag),
tcg_const_i32(s->pc - s->cs_base));
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
}
gen_eob(s);
break;
@@ -6455,44 +6930,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 0x190 ... 0x19f: /* setcc Gv */
modrm = cpu_ldub_code(env, s->pc++);
- gen_setcc(s, b);
+ gen_setcc1(s, b, cpu_T[0]);
gen_ldst_modrm(env, s, modrm, OT_BYTE, OR_TMP0, 1);
break;
case 0x140 ... 0x14f: /* cmov Gv, Ev */
- {
- int l1;
- TCGv t0;
-
- ot = dflag + OT_WORD;
- modrm = cpu_ldub_code(env, s->pc++);
- reg = ((modrm >> 3) & 7) | rex_r;
- mod = (modrm >> 6) & 3;
- t0 = tcg_temp_local_new();
- if (mod != 3) {
- gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
- gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
- } else {
- rm = (modrm & 7) | REX_B(s);
- gen_op_mov_v_reg(ot, t0, rm);
- }
-#ifdef TARGET_X86_64
- if (ot == OT_LONG) {
- /* XXX: specific Intel behaviour ? */
- l1 = gen_new_label();
- gen_jcc1(s, s->cc_op, b ^ 1, l1);
- tcg_gen_mov_tl(cpu_regs[reg], t0);
- gen_set_label(l1);
- tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
- } else
-#endif
- {
- l1 = gen_new_label();
- gen_jcc1(s, s->cc_op, b ^ 1, l1);
- gen_op_mov_reg_v(ot, reg, t0);
- gen_set_label(l1);
- }
- tcg_temp_free(t0);
- }
+ ot = dflag + OT_WORD;
+ modrm = cpu_ldub_code(env, s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_cmovcc1(env, s, ot, b, modrm, reg);
break;
/************************/
@@ -6502,8 +6947,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (s->vm86 && s->iopl != 3) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
} else {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_helper_read_eflags(cpu_T[0], cpu_env);
gen_push_T0(s);
}
@@ -6560,7 +7004,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
}
gen_pop_update(s);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
/* abort translation because TF/AC flag may change */
gen_jmp_im(s->pc - s->cs_base);
gen_eob(s);
@@ -6570,44 +7014,30 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
goto illegal_op;
gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
- s->cc_op = CC_OP_EFLAGS;
break;
case 0x9f: /* lahf */
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_T[0]);
+ gen_compute_eflags(s);
/* Note: gen_compute_eflags() only gives the condition codes */
- tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
+ tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
gen_op_mov_reg_T0(OT_BYTE, R_AH);
break;
case 0xf5: /* cmc */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
- s->cc_op = CC_OP_EFLAGS;
break;
case 0xf8: /* clc */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
- s->cc_op = CC_OP_EFLAGS;
break;
case 0xf9: /* stc */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
- s->cc_op = CC_OP_EFLAGS;
break;
case 0xfc: /* cld */
tcg_gen_movi_i32(cpu_tmp2_i32, 1);
@@ -6697,7 +7127,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
break;
}
- s->cc_op = CC_OP_SARB + ot;
+ set_cc_op(s, CC_OP_SARB + ot);
if (op != 0) {
if (mod != 3)
gen_op_st_T0_A0(ot + s->mem_index);
@@ -6707,81 +7137,88 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_movi_tl(cpu_cc_dst, 0);
}
break;
- case 0x1bc: /* bsf */
- case 0x1bd: /* bsr */
- {
- int label1;
- TCGv t0;
-
- ot = dflag + OT_WORD;
- modrm = cpu_ldub_code(env, s->pc++);
- reg = ((modrm >> 3) & 7) | rex_r;
- gen_ldst_modrm(env, s,modrm, ot, OR_TMP0, 0);
- gen_extu(ot, cpu_T[0]);
- t0 = tcg_temp_local_new();
- tcg_gen_mov_tl(t0, cpu_T[0]);
- if ((b & 1) && (prefixes & PREFIX_REPZ) &&
- (s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
- switch(ot) {
- case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0,
- tcg_const_i32(16)); break;
- case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0,
- tcg_const_i32(32)); break;
- case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0,
- tcg_const_i32(64)); break;
- }
- gen_op_mov_reg_T0(ot, reg);
+ case 0x1bc: /* bsf / tzcnt */
+ case 0x1bd: /* bsr / lzcnt */
+ ot = dflag + OT_WORD;
+ modrm = cpu_ldub_code(env, s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ gen_extu(ot, cpu_T[0]);
+
+ /* Note that lzcnt and tzcnt are in different extensions. */
+ if ((prefixes & PREFIX_REPZ)
+ && (b & 1
+ ? s->cpuid_ext3_features & CPUID_EXT3_ABM
+ : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
+ int size = 8 << ot;
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+ if (b & 1) {
+ /* For lzcnt, reduce the target_ulong result by the
+ number of zeros that we expect to find at the top. */
+ gen_helper_clz(cpu_T[0], cpu_T[0]);
+ tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size);
} else {
- label1 = gen_new_label();
- tcg_gen_movi_tl(cpu_cc_dst, 0);
- tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
- if (b & 1) {
- gen_helper_bsr(cpu_T[0], t0);
- } else {
- gen_helper_bsf(cpu_T[0], t0);
- }
- gen_op_mov_reg_T0(ot, reg);
- tcg_gen_movi_tl(cpu_cc_dst, 1);
- gen_set_label(label1);
- tcg_gen_discard_tl(cpu_cc_src);
- s->cc_op = CC_OP_LOGICB + ot;
+ /* For tzcnt, a zero input must return the operand size:
+ force all bits outside the operand size to 1. */
+ target_ulong mask = (target_ulong)-2 << (size - 1);
+ tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask);
+ gen_helper_ctz(cpu_T[0], cpu_T[0]);
}
- tcg_temp_free(t0);
+ /* For lzcnt/tzcnt, C and Z bits are defined and are
+ related to the result. */
+ gen_op_update1_cc();
+ set_cc_op(s, CC_OP_BMILGB + ot);
+ } else {
+ /* For bsr/bsf, only the Z bit is defined and it is related
+ to the input and not the result. */
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ set_cc_op(s, CC_OP_LOGICB + ot);
+ if (b & 1) {
+ /* For bsr, return the bit index of the first 1 bit,
+ not the count of leading zeros. */
+ gen_helper_clz(cpu_T[0], cpu_T[0]);
+ tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1);
+ } else {
+ gen_helper_ctz(cpu_T[0], cpu_T[0]);
+ }
+ /* ??? The manual says that the output is undefined when the
+ input is zero, but real hardware leaves it unchanged, and
+ real programs appear to depend on that. */
+ tcg_gen_movi_tl(cpu_tmp0, 0);
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0,
+ cpu_regs[reg], cpu_T[0]);
}
+ gen_op_mov_reg_T0(ot, reg);
break;
/************************/
/* bcd */
case 0x27: /* daa */
if (CODE64(s))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_helper_daa(cpu_env);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
case 0x2f: /* das */
if (CODE64(s))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_helper_das(cpu_env);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
case 0x37: /* aaa */
if (CODE64(s))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_helper_aaa(cpu_env);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
case 0x3f: /* aas */
if (CODE64(s))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_helper_aas(cpu_env);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
case 0xd4: /* aam */
if (CODE64(s))
@@ -6791,7 +7228,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
} else {
gen_helper_aam(cpu_env, tcg_const_i32(val));
- s->cc_op = CC_OP_LOGICB;
+ set_cc_op(s, CC_OP_LOGICB);
}
break;
case 0xd5: /* aad */
@@ -6799,7 +7236,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
goto illegal_op;
val = cpu_ldub_code(env, s->pc++);
gen_helper_aad(cpu_env, tcg_const_i32(val));
- s->cc_op = CC_OP_LOGICB;
+ set_cc_op(s, CC_OP_LOGICB);
break;
/************************/
/* misc */
@@ -6821,8 +7258,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
(HF_MP_MASK | HF_TS_MASK)) {
gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
} else {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fwait(cpu_env);
}
@@ -6841,8 +7277,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 0xce: /* into */
if (CODE64(s))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
break;
@@ -6935,9 +7370,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 0xd6: /* salc */
if (CODE64(s))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags_c(cpu_T[0]);
+ gen_compute_eflags_c(s, cpu_T[0]);
tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
gen_op_mov_reg_T0(OT_BYTE, R_EAX);
break;
@@ -6961,17 +7394,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
switch(b) {
case 0: /* loopnz */
case 1: /* loopz */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
gen_op_add_reg_im(s->aflag, R_ECX, -1);
gen_op_jz_ecx(s->aflag, l3);
- gen_compute_eflags(cpu_tmp0);
- tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
- if (b == 0) {
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
- } else {
- tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1);
- }
+ gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
break;
case 2: /* loop */
gen_op_add_reg_im(s->aflag, R_ECX, -1);
@@ -6998,8 +7423,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (s->cpl != 0) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
} else {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
if (b & 2) {
gen_helper_rdmsr(cpu_env);
@@ -7009,8 +7433,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
case 0x131: /* rdtsc */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
if (use_icount)
gen_io_start();
@@ -7021,8 +7444,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
case 0x133: /* rdpmc */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_rdpmc(cpu_env);
break;
@@ -7068,15 +7490,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_jmp_im(pc_start - s->cs_base);
gen_helper_sysret(cpu_env, tcg_const_i32(s->dflag));
/* condition codes are modified only in long mode */
- if (s->lma)
- s->cc_op = CC_OP_EFLAGS;
+ if (s->lma) {
+ set_cc_op(s, CC_OP_EFLAGS);
+ }
gen_eob(s);
}
break;
#endif
case 0x1a2: /* cpuid */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_cpuid(cpu_env);
break;
@@ -7084,8 +7506,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (s->cpl != 0) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
} else {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
s->is_jmp = DISAS_TB_JUMP;
@@ -7147,14 +7568,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (!s->pe || s->vm86)
goto illegal_op;
gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
if (op == 4) {
gen_helper_verr(cpu_env, cpu_T[0]);
} else {
gen_helper_verw(cpu_env, cpu_T[0]);
}
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
default:
goto illegal_op;
@@ -7186,8 +7606,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
s->cpl != 0)
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
#ifdef TARGET_X86_64
if (s->aflag == 2) {
@@ -7247,8 +7666,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 2: /* lgdt */
case 3: /* lidt */
if (mod == 3) {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
switch(rm) {
case 0: /* VMRUN */
@@ -7376,8 +7794,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (s->cpl != 0) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
} else {
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
gen_helper_invlpg(cpu_env, cpu_A0);
@@ -7410,8 +7827,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 1: /* rdtscp */
if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
if (use_icount)
gen_io_start();
@@ -7507,12 +7923,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
} else {
gen_op_mov_reg_v(ot, rm, t0);
}
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
- s->cc_op = CC_OP_EFLAGS;
tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
@@ -7530,8 +7943,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
reg = ((modrm >> 3) & 7) | rex_r;
gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
t0 = tcg_temp_local_new();
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
if (b == 0x102) {
gen_helper_lar(t0, cpu_env, cpu_T[0]);
} else {
@@ -7542,7 +7954,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
gen_op_mov_reg_v(ot, reg, t0);
gen_set_label(label1);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
tcg_temp_free(t0);
}
break;
@@ -7596,8 +8008,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
case 3:
case 4:
case 8:
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
if (b & 2) {
gen_op_mov_TN_reg(ot, 0, rm);
@@ -7686,8 +8097,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
break;
}
gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2)));
break;
@@ -7700,8 +8110,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
break;
}
gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
+ gen_update_cc_op(s);
gen_jmp_im(pc_start - s->cs_base);
gen_helper_fxrstor(cpu_env, cpu_A0,
tcg_const_i32((s->dflag == 2)));
@@ -7785,7 +8194,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
gen_op_mov_reg_T0(ot, reg);
- s->cc_op = CC_OP_EFLAGS;
+ set_cc_op(s, CC_OP_EFLAGS);
break;
case 0x10e ... 0x10f:
/* 3DNow! instructions, ignore prefixes */
@@ -7820,12 +8229,12 @@ void optimize_flags_init(void)
cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
offsetof(CPUX86State, cc_op), "cc_op");
- cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
- "cc_src");
cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst),
"cc_dst");
- cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_tmp),
- "cc_tmp");
+ cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
+ "cc_src");
+ cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
+ "cc_src2");
#ifdef TARGET_X86_64
cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
@@ -7918,6 +8327,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
dc->tf = (flags >> TF_SHIFT) & 1;
dc->singlestep_enabled = env->singlestep_enabled;
dc->cc_op = CC_OP_DYNAMIC;
+ dc->cc_op_dirty = false;
dc->cs_base = cs_base;
dc->tb = tb;
dc->popl_esp_hack = 0;
@@ -7951,16 +8361,15 @@ static inline void gen_intermediate_code_internal(CPUX86State *env,
cpu_T[0] = tcg_temp_new();
cpu_T[1] = tcg_temp_new();
cpu_A0 = tcg_temp_new();
- cpu_T3 = tcg_temp_new();
cpu_tmp0 = tcg_temp_new();
cpu_tmp1_i64 = tcg_temp_new_i64();
cpu_tmp2_i32 = tcg_temp_new_i32();
cpu_tmp3_i32 = tcg_temp_new_i32();
cpu_tmp4 = tcg_temp_new();
- cpu_tmp5 = tcg_temp_new();
cpu_ptr0 = tcg_temp_new_ptr();
cpu_ptr1 = tcg_temp_new_ptr();
+ cpu_cc_srcT = tcg_temp_local_new();
gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c
index 6dc730d..b05572b 100644
--- a/tests/tcg/test-i386.c
+++ b/tests/tcg/test-i386.c
@@ -209,7 +209,7 @@ static inline long i2l(long v)
#define TEST_LEA16(STR)\
{\
asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
- : "=wq" (res)\
+ : "=r" (res)\
: "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
printf("lea %s = %08lx\n", STR, res);\
}
@@ -925,7 +925,7 @@ void test_fbcd(double a)
void test_fenv(void)
{
- struct QEMU_PACKED {
+ struct __attribute__((__packed__)) {
uint16_t fpuc;
uint16_t dummy1;
uint16_t fpus;
@@ -935,7 +935,7 @@ void test_fenv(void)
uint32_t ignored[4];
long double fpregs[8];
} float_env32;
- struct QEMU_PACKED {
+ struct __attribute__((__packed__)) {
uint16_t fpuc;
uint16_t fpus;
uint16_t fptag;
@@ -1280,7 +1280,7 @@ void test_segs(void)
struct {
uint32_t offset;
uint16_t seg;
- } QEMU_PACKED segoff;
+ } __attribute__((__packed__)) segoff;
ldt.entry_number = 1;
ldt.base_addr = (unsigned long)&seg_data1;
@@ -1828,7 +1828,7 @@ void test_exceptions(void)
printf("lock nop exception:\n");
if (setjmp(jmp_env) == 0) {
/* now execute an invalid instruction */
- asm volatile(".byte 0xf0, 0x90"); /* lock nop */
+ asm volatile(".byte 0xf0, 0x90");
}
printf("INT exception:\n");