diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2024-10-17 12:10:39 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2025-01-10 23:34:44 +0100 |
commit | 88716ae79f89bd6510f0c9e182a73ad40d1ff531 (patch) | |
tree | a44489b8d37779b6211c1840af908e689388a7df | |
parent | be27b5149c86f81531f8fc609baf3480fc4d9ca0 (diff) | |
download | qemu-88716ae79f89bd6510f0c9e182a73ad40d1ff531.zip qemu-88716ae79f89bd6510f0c9e182a73ad40d1ff531.tar.gz qemu-88716ae79f89bd6510f0c9e182a73ad40d1ff531.tar.bz2 |
target/i386: improve code generation for BT
Because BT does not write back to the source operand, it can modify it to
ensure that one of the operands of TSTNE is a constant (after either gen_BT
or the optimizer's constant propagation). This produces better and more
optimizable TCG ops. For example, the sequence
movl $0x60013f, %ebx
btl %ecx, %ebx
becomes just
and_i32 tmp1,ecx,$0x1f dead: 1 2 pref=0xffff
shr_i32 tmp0,$0x60013f,tmp1 dead: 1 2 pref=0xffff
and_i32 tmp16,tmp0,$0x1 dead: 1 pref=0xbf80
On s390x, it can use four instructions to isolate bit 0 of 0x60013f >> (ecx & 31):
nilf %r12, 0x1f
lgfi %r11, 0x60013f
srlk %r12, %r11, 0(%r12)
nilf %r12, 1
Previously, it used five instructions to build 1 << (ecx & 31) and compute
TSTEQ, and also needed two more to construct the result of setcond:
nilf %r12, 0x1f
lghi %r11, 1
sllk %r12, %r11, 0(%r12)
lgfi %r9, 0x60013f
nrk %r0, %r12, %r9
lghi %r12, 0
locghilh %r12, 1
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r-- | target/i386/tcg/emit.c.inc | 36 |
1 files changed, 28 insertions, 8 deletions
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 785ff63..5c11542 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1443,8 +1443,9 @@ static TCGv gen_bt_mask(DisasContext *s, X86DecodedInsn *decode) return mask; } -/* Expects truncated bit index in s->T1, 1 << s->T1 in MASK. */ -static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv mask) +/* Expects truncated bit index in COUNT, 1 << COUNT in MASK. */ +static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, + TCGv count, TCGv mask) { TCGv cf; @@ -1467,15 +1468,34 @@ static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv decode->cc_src = tcg_temp_new(); decode->cc_dst = cpu_cc_dst; decode->cc_op = CC_OP_SARB + cc_op_size(s->cc_op); - tcg_gen_shr_tl(decode->cc_src, src, s->T1); + tcg_gen_shr_tl(decode->cc_src, src, count); } } static void gen_BT(DisasContext *s, X86DecodedInsn *decode) { - TCGv mask = gen_bt_mask(s, decode); + TCGv count = s->T1; + TCGv mask; + + /* + * Try to ensure that the rhs of the TSTNE condition is a constant (and a + * power of two), as that is more readily available on most TCG backends. + * + * For immediate bit number gen_bt_mask()'s output is already a constant; + * for register bit number, shift the source right and check bit 0. + */ + if (decode->e.op2 == X86_TYPE_I) { + mask = gen_bt_mask(s, decode); + } else { + MemOp ot = decode->op[1].ot; - gen_bt_flags(s, decode, s->T0, mask); + tcg_gen_andi_tl(s->T1, s->T1, (8 << ot) - 1); + tcg_gen_shr_tl(s->T0, s->T0, s->T1); + + count = tcg_constant_tl(0); + mask = tcg_constant_tl(1); + } + gen_bt_flags(s, decode, s->T0, count, mask); } static void gen_BTC(DisasContext *s, X86DecodedInsn *decode) @@ -1491,7 +1511,7 @@ static void gen_BTC(DisasContext *s, X86DecodedInsn *decode) tcg_gen_xor_tl(s->T0, s->T0, mask); } - gen_bt_flags(s, decode, old, mask); + gen_bt_flags(s, decode, old, s->T1, mask); } static void gen_BTR(DisasContext *s, X86DecodedInsn *decode) @@ -1509,7 +1529,7 @@ static void gen_BTR(DisasContext *s, X86DecodedInsn *decode) tcg_gen_andc_tl(s->T0, s->T0, mask); } - gen_bt_flags(s, decode, old, mask); + gen_bt_flags(s, decode, old, s->T1, mask); } static void gen_BTS(DisasContext *s, X86DecodedInsn *decode) @@ -1525,7 +1545,7 @@ static void gen_BTS(DisasContext *s, X86DecodedInsn *decode) tcg_gen_or_tl(s->T0, s->T0, mask); } - gen_bt_flags(s, decode, old, mask); + gen_bt_flags(s, decode, old, s->T1, mask); } static void gen_BZHI(DisasContext *s, X86DecodedInsn *decode) |