aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2025-04-03 18:20:06 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2025-04-17 18:23:26 +0200
commit5dcdbd071253e249a76c7771bcf78eca3763a131 (patch)
tree1311dedfd62b837e21254a977c2788b21e93fe7f
parent767149d3d078356073a32238b313cee9d02db5d8 (diff)
downloadqemu-5dcdbd071253e249a76c7771bcf78eca3763a131.zip
qemu-5dcdbd071253e249a76c7771bcf78eca3763a131.tar.gz
qemu-5dcdbd071253e249a76c7771bcf78eca3763a131.tar.bz2
target/i386: tcg: use cout to commonize add/adc/sub/sbb cases
Use the carry-out vector as the basis to compute AF, CF and OF. The cost is pretty much the same, because the carry-out is just four boolean operations, and the code is much smaller because add/adc/sub/sbb now share most of it. A similar algorithm to what is used in target/i386/emulate can also be used for APX, in order to build the result of CCMP/CTEST with a new CC_OP_*. CCMP needs to place into the flags from either a subtraction or a constant value; CTEST likewise place into the flags either an AND or a constant value. The new CC_OP for CCMP and CTEST would store for a successful predcate: - in DST and SRC2, the result of the operation; - in SRC, a carry-out vector for CCMP or zero for CTEST; If the default flag value is used, DST/SRC/SRC2 can be filled with constants: - in DST the negated ZF; - in SRC's top 2 bits, a value that results in the desired OF and CF; - in SRC2 a suitable value (any of 0/1/~0/~1) that can be used instead of DST to compute the desired SF and PF. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--target/i386/cpu.h25
-rw-r--r--target/i386/hvf/x86_flags.c25
-rw-r--r--target/i386/tcg/cc_helper_template.h.inc80
3 files changed, 52 insertions, 78 deletions
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 76f2444..7a8d695 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2843,4 +2843,29 @@ static inline bool ctl_has_irq(CPUX86State *env)
# define TARGET_VSYSCALL_PAGE (UINT64_C(-10) << 20)
#endif
+/* majority(NOT a, b, c) = (a ^ b) ? b : c */
+#define MAJ_INV1(a, b, c) ((((a) ^ (b)) & ((b) ^ (c))) ^ (c))
+
+/*
+ * ADD_COUT_VEC(x, y) = majority((x + y) ^ x ^ y, x, y)
+ *
+ * If two corresponding bits in x and y are the same, that's the carry
+ * independent of the value (x+y)^x^y. Hence x^y can be replaced with
+ * 1 in (x+y)^x^y, resulting in majority(NOT (x+y), x, y)
+ */
+#define ADD_COUT_VEC(op1, op2, result) \
+ MAJ_INV1(result, op1, op2)
+
+/*
+ * SUB_COUT_VEC(x, y) = NOT majority(x, NOT y, (x - y) ^ x ^ NOT y)
+ * = majority(NOT x, y, (x - y) ^ x ^ y)
+ *
+ * Note that the carry out is actually a borrow, i.e. it is inverted.
+ * If two corresponding bits in x and y are different, the value of the
+ * bit in (x-y)^x^y likewise does not matter. Hence, x^y can be replaced
+ * with 0 in (x-y)^x^y, resulting in majority(NOT x, y, x-y)
+ */
+#define SUB_COUT_VEC(op1, op2, result) \
+ MAJ_INV1(op1, op2, result)
+
#endif /* I386_CPU_H */
diff --git a/target/i386/hvf/x86_flags.c b/target/i386/hvf/x86_flags.c
index 60ab4f0..0c75e04 100644
--- a/target/i386/hvf/x86_flags.c
+++ b/target/i386/hvf/x86_flags.c
@@ -45,31 +45,6 @@
#define LF_MASK_CF (0x01 << LF_BIT_CF)
#define LF_MASK_PO (0x01 << LF_BIT_PO)
-/* majority(NOT a, b, c) = (a ^ b) ? b : c */
-#define MAJ_INV1(a, b, c) ((((a) ^ (b)) & ((b) ^ (c))) ^ (c))
-
-/*
- * ADD_COUT_VEC(x, y) = majority((x + y) ^ x ^ y, x, y)
- *
- * If two corresponding bits in x and y are the same, that's the carry
- * independent of the value (x+y)^x^y. Hence x^y can be replaced with
- * 1 in (x+y)^x^y, resulting in majority(NOT (x+y), x, y)
- */
-#define ADD_COUT_VEC(op1, op2, result) \
- MAJ_INV1(result, op1, op2)
-
-/*
- * SUB_COUT_VEC(x, y) = NOT majority(x, NOT y, (x - y) ^ x ^ NOT y)
- * = majority(NOT x, y, (x - y) ^ x ^ y)
- *
- * Note that the carry out is actually a borrow, i.e. it is inverted.
- * If two corresponding bits in x and y are different, the value of the
- * bit in (x-y)^x^y likewise does not matter. Hence, x^y can be replaced
- * with 0 in (x-y)^x^y, resulting in majority(NOT x, y, x-y)
- */
-#define SUB_COUT_VEC(op1, op2, result) \
- MAJ_INV1(op1, op2, result)
-
/* ******************* */
/* OSZAPC */
/* ******************* */
diff --git a/target/i386/tcg/cc_helper_template.h.inc b/target/i386/tcg/cc_helper_template.h.inc
index b821e5b..d8fd976 100644
--- a/target/i386/tcg/cc_helper_template.h.inc
+++ b/target/i386/tcg/cc_helper_template.h.inc
@@ -44,18 +44,32 @@
/* dynamic flags computation */
-static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+static uint32_t glue(compute_all_cout, SUFFIX)(DATA_TYPE dst, DATA_TYPE carries)
{
- uint32_t cf, pf, af, zf, sf, of;
- DATA_TYPE src2 = dst - src1;
+ uint32_t af_cf, pf, zf, sf, of;
- cf = dst < src1;
+ /* PF, ZF, SF computed from result. */
pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & CC_A;
zf = (dst == 0) * CC_Z;
sf = lshift(dst, 8 - DATA_BITS) & CC_S;
- of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+
+ /*
+ * AF, CF, OF computed from carry out vector. To compute AF and CF, rotate it
+ * left by one so cout(DATA_BITS - 1) is in bit 0 and cout(3) in bit 4.
+ *
+ * To compute OF, place the highest two carry bits into OF and the bit
+ * immediately to the right of it; then, adding CC_O / 2 XORs them.
+ */
+ af_cf = ((carries << 1) | (carries >> (DATA_BITS - 1))) & (CC_A | CC_C);
+ of = (lshift(carries, 12 - DATA_BITS) + CC_O / 2) & CC_O;
+ return pf + zf + sf + af_cf + of;
+}
+
+static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+ DATA_TYPE src2 = dst - src1;
+ DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
@@ -66,25 +80,9 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
DATA_TYPE src3)
{
- uint32_t cf, pf, af, zf, sf, of;
-
-#ifdef WIDER_TYPE
- WIDER_TYPE src13 = (WIDER_TYPE) src1 + (WIDER_TYPE) src3;
- DATA_TYPE src2 = dst - src13;
-
- cf = dst < src13;
-#else
DATA_TYPE src2 = dst - src1 - src3;
-
- cf = (src3 ? dst <= src1 : dst < src1);
-#endif
-
- pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & 0x10;
- zf = (dst == 0) << 6;
- sf = lshift(dst, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+ DATA_TYPE carries = ADD_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
@@ -101,16 +99,9 @@ static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
{
- uint32_t cf, pf, af, zf, sf, of;
DATA_TYPE src1 = dst + src2;
-
- cf = src1 < src2;
- pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & CC_A;
- zf = (dst == 0) * CC_Z;
- sf = lshift(dst, 8 - DATA_BITS) & CC_S;
- of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+ DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
@@ -123,25 +114,9 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
DATA_TYPE src3)
{
- uint32_t cf, pf, af, zf, sf, of;
-
-#ifdef WIDER_TYPE
- WIDER_TYPE src23 = (WIDER_TYPE) src2 + (WIDER_TYPE) src3;
- DATA_TYPE src1 = dst + src23;
-
- cf = src1 < src23;
-#else
DATA_TYPE src1 = dst + src2 + src3;
-
- cf = (src3 ? src1 <= src2 : src1 < src2);
-#endif
-
- pf = compute_pf(dst);
- af = (dst ^ src1 ^ src2) & 0x10;
- zf = (dst == 0) << 6;
- sf = lshift(dst, 8 - DATA_BITS) & 0x80;
- of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O;
- return cf + pf + af + zf + sf + of;
+ DATA_TYPE carries = SUB_COUT_VEC(src1, src2, dst);
+ return glue(compute_all_cout, SUFFIX)(dst, carries);
}
static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
@@ -286,6 +261,5 @@ static int glue(compute_c_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
#undef DATA_BITS
#undef SIGN_MASK
#undef DATA_TYPE
-#undef DATA_MASK
#undef SUFFIX
#undef WIDER_TYPE