target/i386: introduce flags writeback mechanism

ALU instructions can write to both memory and flags. If the CC_SRC* and CC_DST locations have been written already when a memory access causes a fault, the value in CC_SRC* and CC_DST might be interpreted with the wrong CC_OP (the one that is in effect before the instruction. Besides just using the wrong result for the flags, something like subtracting -1 can have disastrous effects if the current CC_OP is CC_OP_EFLAGS: this is because QEMU does not expect bits outside the ALU flags to be set in CC_SRC, and env->eflags can end up set to all-ones. In the case of the attached testcase, this sets IOPL to 3 and would cause an assertion failure if SUB is moved to the new decoder. This mechanism is not really needed for BMI instructions, which can only write to a register, but put it to use anyway for cleanliness. In the case of BZHI, the code has to be modified slightly to ensure that decode->cc_src is written, otherwise the new assertions trigger. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Paolo Bonzini <pbonzini@redhat.com> 2023-10-11 15:26:40 +0200
committer: Paolo Bonzini <pbonzini@redhat.com> 2023-12-29 22:04:30 +0100
commit: e7bbb7cb71b3001c54f23c66848d84e694c47243 (patch)
tree: 934fe1292455dc6a3a4cd3e26c3ed10db4d661af /target
parent: 4b2baf4a555620f29e75b8194ce2d4fed07a58d0 (diff)
download: qemu-e7bbb7cb71b3001c54f23c66848d84e694c47243.zip
qemu-e7bbb7cb71b3001c54f23c66848d84e694c47243.tar.gz
qemu-e7bbb7cb71b3001c54f23c66848d84e694c47243.tar.bz2
4 files changed, 63 insertions, 12 deletions
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ecdd451..7f0786e 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1285,6 +1285,7 @@ typedef enum {
 
     CC_OP_NB,
 } CCOp;
+QEMU_BUILD_BUG_ON(CC_OP_NB >= 128);
 
 typedef struct SegmentCache {
     uint32_t selector;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index f30889d..717d730 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1662,6 +1662,7 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
     bool first = true;
     X86DecodedInsn decode;
     X86DecodeFunc decode_func = decode_root;
+    uint8_t cc_live;
 
     s->has_modrm = false;
 
@@ -1815,6 +1816,7 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
     }
 
     memset(&decode, 0, sizeof(decode));
+    decode.cc_op = -1;
     decode.b = b;
     if (!decode_insn(s, env, decode_func, &decode)) {
         goto illegal_op;
@@ -1953,6 +1955,38 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
         decode.e.gen(s, env, &decode);
         gen_writeback(s, &decode, 0, s->T0);
     }
+
+    /*
+     * Write back flags after last memory access.  Some newer ALU instructions, as
+     * well as SSE instructions, write flags in the gen_* function, but that can
+     * cause incorrect tracking of CC_OP for instructions that write to both memory
+     * and flags.
+     */
+    if (decode.cc_op != -1) {
+        if (decode.cc_dst) {
+            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
+        }
+        if (decode.cc_src) {
+            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
+        }
+        if (decode.cc_src2) {
+            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
+        }
+        if (decode.cc_op == CC_OP_DYNAMIC) {
+            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
+        }
+        set_cc_op(s, decode.cc_op);
+        cc_live = cc_op_live[decode.cc_op];
+    } else {
+        cc_live = 0;
+    }
+    if (decode.cc_op != CC_OP_DYNAMIC) {
+        assert(!decode.cc_op_dynamic);
+        assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST));
+        assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC));
+        assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2));
+    }
+
     return;
  gp_fault:
     gen_exception_gpf(s);
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 70b6717..25220fc 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -283,6 +283,10 @@ struct X86DecodedInsn {
     target_ulong immediate;
     AddressParts mem;
 
+    TCGv cc_dst, cc_src, cc_src2;
+    TCGv_i32 cc_op_dynamic;
+    int8_t cc_op;
+
     uint8_t b;
 };
 
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 4c2006f..fd120e7 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -339,6 +339,19 @@ static inline int vector_len(DisasContext *s, X86DecodedInsn *decode)
     return s->vex_l ? 32 : 16;
 }
 
+static void prepare_update1_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op)
+{
+    decode->cc_dst = s->T0;
+    decode->cc_op = op;
+}
+
+static void prepare_update2_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op)
+{
+    decode->cc_src = s->T1;
+    decode->cc_dst = s->T0;
+    decode->cc_op = op;
+}
+
 static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs)
 {
     MemOp ot = decode->op[0].ot;
@@ -1027,6 +1040,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
 VSIB_AVX(VPGATHERD, vpgatherd)
 VSIB_AVX(VPGATHERQ, vpgatherq)
 
+/* ADCX/ADOX do not have memory operands and can use set_cc_op.  */
 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 {
     int opposite_cc_op;
@@ -1089,8 +1103,7 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     MemOp ot = decode->op[0].ot;
 
     tcg_gen_andc_tl(s->T0, s->T1, s->T0);
-    gen_op_update1_cc(s);
-    set_cc_op(s, CC_OP_LOGICB + ot);
+    prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
 static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
@@ -1118,10 +1131,10 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero);
     tcg_gen_andc_tl(s->T0, s->T0, s->T1);
 
-    gen_op_update1_cc(s);
-    set_cc_op(s, CC_OP_LOGICB + ot);
+    prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
+/* BLSI do not have memory operands and can use set_cc_op.  */
 static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1133,6 +1146,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     set_cc_op(s, CC_OP_BMILGB + ot);
 }
 
+/* BLSMSK do not have memory operands and can use set_cc_op.  */
 static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1144,6 +1158,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
     set_cc_op(s, CC_OP_BMILGB + ot);
 }
 
+/* BLSR do not have memory operands and can use set_cc_op.  */
 static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1164,18 +1179,15 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 
     tcg_gen_ext8u_tl(s->T1, s->T1);
 
+    tcg_gen_shl_tl(s->A0, mone, s->T1);
+    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero);
+    tcg_gen_andc_tl(s->T0, s->T0, s->A0);
     /*
      * Note that since we're using BMILG (in order to get O
      * cleared) we need to store the inverse into C.
      */
-    tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound);
-
-    tcg_gen_shl_tl(s->A0, mone, s->T1);
-    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero);
-    tcg_gen_andc_tl(s->T0, s->T0, s->A0);
-
-    gen_op_update1_cc(s);
-    set_cc_op(s, CC_OP_BMILGB + ot);
+    tcg_gen_setcond_tl(TCG_COND_LEU, s->T1, s->T1, bound);
+    prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
 }
 
 static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
author	Paolo Bonzini <pbonzini@redhat.com>	2023-10-11 15:26:40 +0200
committer	Paolo Bonzini <pbonzini@redhat.com>	2023-12-29 22:04:30 +0100
commit	e7bbb7cb71b3001c54f23c66848d84e694c47243 (patch)
tree	934fe1292455dc6a3a4cd3e26c3ed10db4d661af /target
parent	4b2baf4a555620f29e75b8194ce2d4fed07a58d0 (diff)
download	qemu-e7bbb7cb71b3001c54f23c66848d84e694c47243.zip qemu-e7bbb7cb71b3001c54f23c66848d84e694c47243.tar.gz qemu-e7bbb7cb71b3001c54f23c66848d84e694c47243.tar.bz2