aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2022-02-24 00:43:18 +0000
committerRichard Henderson <richard.henderson@linaro.org>2023-01-06 23:07:09 +0000
commit29a5ea738a20cbf8974d48a44e3a213451ded8dd (patch)
treefc106b7088fe01e519d006b7465f091b4221c3a5
parentbfff85184254e96abd53187f49c2624e40a34024 (diff)
downloadqemu-29a5ea738a20cbf8974d48a44e3a213451ded8dd.zip
qemu-29a5ea738a20cbf8974d48a44e3a213451ded8dd.tar.gz
qemu-29a5ea738a20cbf8974d48a44e3a213451ded8dd.tar.bz2
tcg/s390x: Implement ctpop operation
There is an older form that produces per-byte results, and a newer form that produces per-register results. Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--tcg/s390x/tcg-target.c.inc36
-rw-r--r--tcg/s390x/tcg-target.h4
2 files changed, 38 insertions, 2 deletions
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 8254f9f..c0434fa 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -206,6 +206,7 @@ typedef enum S390Opcode {
RRFc_LOCR = 0xb9f2,
RRFc_LOCGR = 0xb9e2,
+ RRFc_POPCNT = 0xb9e1,
RR_AR = 0x1a,
RR_ALR = 0x1e,
@@ -1435,6 +1436,32 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
}
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ /* With MIE3, and bit 0 of m4 set, we get the complete result. */
+ if (HAVE_FACILITY(MISC_INSN_EXT3)) {
+ if (type == TCG_TYPE_I32) {
+ tgen_ext32u(s, dest, src);
+ src = dest;
+ }
+ tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
+ return;
+ }
+
+ /* Without MIE3, each byte gets the count of bits for the byte. */
+ tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
+
+ /* Multiply to sum each byte at the top of the word. */
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
+ tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
+ tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
+ tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
+ }
+}
+
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
int ofs, int len, int z)
{
@@ -2584,6 +2611,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tgen_clz(s, args[0], args[1], args[2], const_args[2]);
break;
+ case INDEX_op_ctpop_i32:
+ tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ctpop_i64:
+ tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
@@ -3146,6 +3180,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_extu_i32_i64:
case INDEX_op_extract_i32:
case INDEX_op_extract_i64:
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_i32:
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index dabdae1..68dcbc6 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -91,7 +91,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i32 0
#define TCG_TARGET_HAS_ctz_i32 0
-#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 1
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_extract_i32 1
#define TCG_TARGET_HAS_sextract_i32 0
@@ -128,7 +128,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i64 1
#define TCG_TARGET_HAS_ctz_i64 0
-#define TCG_TARGET_HAS_ctpop_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 1
#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_extract_i64 1
#define TCG_TARGET_HAS_sextract_i64 0