aboutsummaryrefslogtreecommitdiff
path: root/accel
diff options
context:
space:
mode:
authorLeandro Lupori <leandro.lupori@eldorado.org.br>2022-10-25 17:24:22 -0300
committerRichard Henderson <richard.henderson@linaro.org>2022-10-26 11:11:28 +1000
commit69993c4e627a4e4d4d084bef643b446c97fee0f5 (patch)
tree75ef9def31d6c9e4d7071d36b66759957534beab /accel
parent122167659c50958f98cb2a153de97541f03462ff (diff)
downloadqemu-69993c4e627a4e4d4d084bef643b446c97fee0f5.zip
qemu-69993c4e627a4e4d4d084bef643b446c97fee0f5.tar.gz
qemu-69993c4e627a4e4d4d084bef643b446c97fee0f5.tar.bz2
accel/tcg: Add a quicker check for breakpoints
Profiling QEMU during Fedora 35 for PPC64 boot revealed that a considerable amount of time was being spent in check_for_breakpoints() (0.61% of total time on PPC64 and 2.19% on amd64), even though it was just checking that its queue was empty and returning, when no breakpoints were set. It turns out this function is not inlined by the compiler and it's always called by helper_lookup_tb_ptr(), one of the most called functions. By leaving only the check for empty queue in check_for_breakpoints() and moving the remaining code to check_for_breakpoints_slow(), called only when the queue is not empty, it's possible to avoid the call overhead. An improvement of about 3% in total time was measured on POWER9. Signed-off-by: Leandro Lupori <leandro.lupori@eldorado.org.br> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20221025202424.195984-2-leandro.lupori@eldorado.org.br> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'accel')
-rw-r--r--accel/tcg/cpu-exec.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index f9e5cc9..bb4b9e9 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -304,16 +304,12 @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
}
}
-static bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
- uint32_t *cflags)
+static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc,
+ uint32_t *cflags)
{
CPUBreakpoint *bp;
bool match_page = false;
- if (likely(QTAILQ_EMPTY(&cpu->breakpoints))) {
- return false;
- }
-
/*
* Singlestep overrides breakpoints.
* This requirement is visible in the record-replay tests, where
@@ -374,6 +370,13 @@ static bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
return false;
}
+static inline bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
+ uint32_t *cflags)
+{
+ return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) &&
+ check_for_breakpoints_slow(cpu, pc, cflags);
+}
+
/**
* helper_lookup_tb_ptr: quick check for next tb
* @env: current cpu state