aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2024-12-15 10:06:09 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2025-01-23 11:35:33 +0100
commit0360b781870a628379de20e03305c4e62dbdcca4 (patch)
tree3dd5e8959d83a2e31d420507cf4c868694666e0f
parent365811602572054b1c1173b19e8fd28689d827d9 (diff)
downloadqemu-0360b781870a628379de20e03305c4e62dbdcca4.zip
qemu-0360b781870a628379de20e03305c4e62dbdcca4.tar.gz
qemu-0360b781870a628379de20e03305c4e62dbdcca4.tar.bz2
target/i386: optimize CX handling in repeated string operations
In a repeated string operation, CX/ECX will be decremented until it is 0 but never underflow. Use this observation to avoid a deposit or zero-extend operation if the address size of the operation is smaller than MO_TL. As in the previous patch, the patch is structured to include some preparatory work for subsequent changes. In particular, introducing cx_next prepares for when ECX will be decremented *before* calling fn(s, ot), and therefore cannot yet be written back to cpu_regs. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Link: https://lore.kernel.org/r/20241215090613.89588-11-pbonzini@redhat.com Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--target/i386/tcg/translate.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 7a3caf8..0a8f3c8 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1339,6 +1339,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
{
TCGLabel *done = gen_new_label();
target_ulong cx_mask = MAKE_64BIT_MASK(0, 8 << s->aflag);
+ TCGv cx_next = tcg_temp_new();
bool had_rf = s->flags & HF_RF_MASK;
/*
@@ -1364,7 +1365,19 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
tcg_gen_brcondi_tl(TCG_COND_TSTEQ, cpu_regs[R_ECX], cx_mask, done);
fn(s, ot);
- gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+
+ tcg_gen_subi_tl(cx_next, cpu_regs[R_ECX], 1);
+
+ /*
+ * Write back cx_next to CX/ECX/RCX. There can be no carry, so zero
+ * extend if needed but do not do expensive deposit operations.
+ */
+#ifdef TARGET_X86_64
+ if (s->aflag == MO_32) {
+ tcg_gen_ext32u_tl(cx_next, cx_next);
+ }
+#endif
+ tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
gen_update_cc_op(s);
/* Leave if REP condition fails. */