From 2e2097b495990b198c45dd8ecde8cc58ae3fcd1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 26 May 2023 17:53:51 +0100 Subject: *-user: remove the guest_user_syscall tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is pure duplication now. Both bsd-user and linux-user have builtin strace support and we can also track syscalls via the plugins system. Reviewed-by: Warner Losh Reviewed-by: Stefan Hajnoczi Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Alex Bennée Message-id: 20230526165401.574474-2-alex.bennee@linaro.org Message-Id: <20230524133952.3971948-2-alex.bennee@linaro.org> [Remove unused variable in do_freebsd_syscall() reported by Richard Henderson. --Stefan] Signed-off-by: Stefan Hajnoczi --- include/user/syscall-trace.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h index 90bda76..557f881 100644 --- a/include/user/syscall-trace.h +++ b/include/user/syscall-trace.h @@ -26,9 +26,6 @@ static inline void record_syscall_start(void *cpu, int num, abi_long arg5, abi_long arg6, abi_long arg7, abi_long arg8) { - trace_guest_user_syscall(cpu, num, - arg1, arg2, arg3, arg4, - arg5, arg6, arg7, arg8); qemu_plugin_vcpu_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); @@ -36,7 +33,6 @@ static inline void record_syscall_start(void *cpu, int num, static inline void record_syscall_return(void *cpu, int num, abi_long ret) { - trace_guest_user_syscall_ret(cpu, num, ret); qemu_plugin_vcpu_syscall_ret(cpu, num, ret); } -- cgit v1.1 From d0aaf08bb9453c12ab33bd5defa1815c34460595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 26 May 2023 17:53:59 +0100 Subject: tcg: remove the final vestiges of dstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we no longer have dynamic state affecting things we can remove the additional fields in cpu.h and simplify the TB hash calculation. For the benchmark: hyperfine -w 2 -m 20 \ "./arm-softmmu/qemu-system-arm -cpu cortex-a15 \ -machine type=virt,highmem=off \ -display none -m 2048 \ -serial mon:stdio \ -netdev user,id=unet,hostfwd=tcp::2222-:22 \ -device virtio-net-pci,netdev=unet \ -device virtio-scsi-pci \ -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf \ -device scsi-hd,drive=hd -smp 4 \ -kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage \ -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' \ -snapshot" It has a marginal effect on runtime, before: Time (mean ± σ): 26.279 s ± 2.438 s [User: 41.113 s, System: 1.843 s] Range (min … max): 24.420 s … 32.565 s 20 runs after: Time (mean ± σ): 24.440 s ± 2.885 s [User: 34.474 s, System: 2.028 s] Range (min … max): 21.663 s … 29.937 s 20 runs Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1358 Reviewed-by: Stefan Hajnoczi Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Alex Bennée Message-id: 20230526165401.574474-10-alex.bennee@linaro.org Message-Id: <20230524133952.3971948-9-alex.bennee@linaro.org> Signed-off-by: Stefan Hajnoczi --- include/exec/exec-all.h | 3 --- include/hw/core/cpu.h | 5 ----- 2 files changed, 8 deletions(-) (limited to 'include') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 4d2b151..3b1b57f 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -545,9 +545,6 @@ struct TranslationBlock { #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_SHIFT 24 - /* Per-vCPU dynamic tracing state used to generate this TB */ - uint32_t trace_vcpu_dstate; - /* * Above fields used for comparing */ diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 39150cf..383456d 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -266,7 +266,6 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data); struct qemu_work_item; #define CPU_UNSET_NUMA_NODE_ID -1 -#define CPU_TRACE_DSTATE_MAX_EVENTS 32 /** * CPUState: @@ -407,10 +406,6 @@ struct CPUState { /* Use by accel-block: CPU is executing an ioctl() */ QemuLockCnt in_ioctl_lock; - /* Used for events with 'vcpu' and *without* the 'disabled' properties */ - DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); - DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); - DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX); #ifdef CONFIG_PLUGIN -- cgit v1.1 From 367189efae8b53ec2ade37a1c079fd8f69244b9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 26 May 2023 17:54:01 +0100 Subject: accel/tcg: include cs_base in our hash calculations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We weren't using cs_base in the hash calculations before. Since the arm front end moved a chunk of flags in a378206a20 (target/arm: Move mode specific TB flags to tb->cs_base) they comprise of an important part of the execution state. Widen the tb_hash_func to include cs_base and expand to qemu_xxhash8() to accommodate it. My initial benchmark shows very little difference in the runtime. Before: armhf ➜ hyperfine -w 2 -m 20 "./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf -device scsi-hd,drive=hd -smp 4 -kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot" Benchmark 1: ./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf -device scsi-hd,drive=hd -smp 4 -kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot Time (mean ± σ): 24.627 s ± 2.708 s [User: 34.309 s, System: 1.797 s] Range (min … max): 22.345 s … 29.864 s 20 runs arm64 ➜ hyperfine -w 2 -n 20 "./qemu-system-aarch64 -cpu max,pauth-impdef=on -machine type=virt,virtualization=on,gic-version=3 -display none -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22,hostfwd=tcp::1234-:1234 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-arm64 -device scsi-hd,drive=hd -smp 4 -kernel ~/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image.gz -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark-pigz.service' -snapshot" Benchmark 1: 20 Time (mean ± σ): 62.559 s ± 2.917 s [User: 189.115 s, System: 4.089 s] Range (min … max): 59.997 s … 70.153 s 10 runs After: armhf Benchmark 1: ./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf -device scsi-hd,drive=hd -smp 4 -kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot Time (mean ± σ): 24.223 s ± 2.151 s [User: 34.284 s, System: 1.906 s] Range (min … max): 22.000 s … 28.476 s 20 runs arm64 hyperfine -w 2 -n 20 "./qemu-system-aarch64 -cpu max,pauth-impdef=on -machine type=virt,virtualization=on,gic-version=3 -display none -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22,hostfwd=tcp::1234-:1234 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-arm64 -device scsi-hd,drive=hd -smp 4 -kernel ~/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image.gz -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark-pigz.service' -snapshot" Benchmark 1: 20 Time (mean ± σ): 62.769 s ± 1.978 s [User: 188.431 s, System: 5.269 s] Range (min … max): 60.285 s … 66.868 s 10 runs Signed-off-by: Alex Bennée Reviewed-by: Richard Henderson Message-id: 20230526165401.574474-12-alex.bennee@linaro.org Message-Id: <20230524133952.3971948-11-alex.bennee@linaro.org> Signed-off-by: Stefan Hajnoczi --- include/qemu/xxhash.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h index c2dccca..0259bbe 100644 --- a/include/qemu/xxhash.h +++ b/include/qemu/xxhash.h @@ -48,8 +48,8 @@ * xxhash32, customized for input variables that are not guaranteed to be * contiguous in memory. */ -static inline uint32_t -qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) +static inline uint32_t qemu_xxhash8(uint64_t ab, uint64_t cd, uint64_t ef, + uint32_t g, uint32_t h) { uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2; uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2; @@ -59,6 +59,8 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) uint32_t b = ab >> 32; uint32_t c = cd; uint32_t d = cd >> 32; + uint32_t e = ef; + uint32_t f = ef >> 32; uint32_t h32; v1 += a * PRIME32_2; @@ -89,6 +91,9 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) h32 += g * PRIME32_3; h32 = rol32(h32, 17) * PRIME32_4; + h32 += h * PRIME32_3; + h32 = rol32(h32, 17) * PRIME32_4; + h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; @@ -100,23 +105,29 @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) static inline uint32_t qemu_xxhash2(uint64_t ab) { - return qemu_xxhash7(ab, 0, 0, 0, 0); + return qemu_xxhash8(ab, 0, 0, 0, 0); } static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd) { - return qemu_xxhash7(ab, cd, 0, 0, 0); + return qemu_xxhash8(ab, cd, 0, 0, 0); } static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e) { - return qemu_xxhash7(ab, cd, e, 0, 0); + return qemu_xxhash8(ab, cd, 0, e, 0); } static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f) { - return qemu_xxhash7(ab, cd, e, f, 0); + return qemu_xxhash8(ab, cd, 0, e, f); +} + +static inline uint32_t qemu_xxhash7(uint64_t ab, uint64_t cd, uint64_t ef, + uint32_t g) +{ + return qemu_xxhash8(ab, cd, ef, g, 0); } /* -- cgit v1.1