From 8d7f2e767d8cd058c817dbe31430b89f2e11535d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Wed, 4 Oct 2023 11:06:28 +0200 Subject: system: Rename softmmu/ directory as system/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The softmmu/ directory contains files specific to system emulation. Rename it as system/. Update meson rules, the MAINTAINERS file and all the documentation and comments. Signed-off-by: Philippe Mathieu-Daudé Message-ID: <20231004090629.37473-14-philmd@linaro.org> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 42 +- accel/tcg/icount-common.c | 2 +- docs/devel/qtest.rst | 2 +- include/sysemu/cpu-timers-internal.h | 71 + include/sysemu/runstate-action.h | 2 +- meson.build | 8 +- scripts/checkpatch.pl | 2 +- scripts/coverity-scan/COMPONENTS.md | 2 +- scripts/get_maintainer.pl | 2 +- scripts/oss-fuzz/build.sh | 6 +- softmmu/arch_init.c | 50 - softmmu/async-teardown.c | 143 -- softmmu/balloon.c | 106 - softmmu/bootdevice.c | 430 ---- softmmu/cpu-throttle.c | 128 -- softmmu/cpu-timers.c | 277 --- softmmu/cpus.c | 822 -------- softmmu/datadir.c | 110 - softmmu/device_tree.c | 703 ------- softmmu/dirtylimit.c | 678 ------ softmmu/dma-helpers.c | 347 ---- softmmu/globals.c | 70 - softmmu/ioport.c | 346 ---- softmmu/main.c | 49 - softmmu/memory.c | 3683 --------------------------------- softmmu/memory_mapping.c | 377 ---- softmmu/meson.build | 36 - softmmu/physmem.c | 3794 --------------------------------- softmmu/qdev-monitor.c | 1148 ---------- softmmu/qemu-seccomp.c | 486 ----- softmmu/qtest.c | 1070 ---------- softmmu/rtc.c | 192 -- softmmu/runstate-action.c | 46 - softmmu/runstate-hmp-cmds.c | 95 - softmmu/runstate.c | 871 -------- softmmu/timers-state.h | 71 - softmmu/tpm-hmp-cmds.c | 65 - softmmu/tpm.c | 239 --- softmmu/trace-events | 40 - softmmu/trace.h | 1 - softmmu/vl.c | 3730 --------------------------------- softmmu/watchpoint.c | 226 -- system/arch_init.c | 50 + system/async-teardown.c | 143 ++ system/balloon.c | 106 + system/bootdevice.c | 430 ++++ system/cpu-throttle.c | 128 ++ system/cpu-timers.c | 277 +++ system/cpus.c | 822 ++++++++ system/datadir.c | 110 + system/device_tree.c | 703 +++++++ system/dirtylimit.c | 678 ++++++ system/dma-helpers.c | 347 ++++ system/globals.c | 70 + system/ioport.c | 346 ++++ system/main.c | 49 + system/memory.c | 3683 +++++++++++++++++++++++++++++++++ system/memory_mapping.c | 377 ++++ system/meson.build | 36 + system/physmem.c | 3796 ++++++++++++++++++++++++++++++++++ system/qdev-monitor.c | 1148 ++++++++++ system/qemu-seccomp.c | 486 +++++ system/qtest.c | 1070 ++++++++++ system/rtc.c | 192 ++ system/runstate-action.c | 46 + system/runstate-hmp-cmds.c | 95 + system/runstate.c | 871 ++++++++ system/tpm-hmp-cmds.c | 65 + system/tpm.c | 239 +++ system/trace-events | 40 + system/trace.h | 1 + system/vl.c | 3730 +++++++++++++++++++++++++++++++++ system/watchpoint.c | 226 ++ tests/unit/meson.build | 2 +- 74 files changed, 20466 insertions(+), 20464 deletions(-) create mode 100644 include/sysemu/cpu-timers-internal.h delete mode 100644 softmmu/arch_init.c delete mode 100644 softmmu/async-teardown.c delete mode 100644 softmmu/balloon.c delete mode 100644 softmmu/bootdevice.c delete mode 100644 softmmu/cpu-throttle.c delete mode 100644 softmmu/cpu-timers.c delete mode 100644 softmmu/cpus.c delete mode 100644 softmmu/datadir.c delete mode 100644 softmmu/device_tree.c delete mode 100644 softmmu/dirtylimit.c delete mode 100644 softmmu/dma-helpers.c delete mode 100644 softmmu/globals.c delete mode 100644 softmmu/ioport.c delete mode 100644 softmmu/main.c delete mode 100644 softmmu/memory.c delete mode 100644 softmmu/memory_mapping.c delete mode 100644 softmmu/meson.build delete mode 100644 softmmu/physmem.c delete mode 100644 softmmu/qdev-monitor.c delete mode 100644 softmmu/qemu-seccomp.c delete mode 100644 softmmu/qtest.c delete mode 100644 softmmu/rtc.c delete mode 100644 softmmu/runstate-action.c delete mode 100644 softmmu/runstate-hmp-cmds.c delete mode 100644 softmmu/runstate.c delete mode 100644 softmmu/timers-state.h delete mode 100644 softmmu/tpm-hmp-cmds.c delete mode 100644 softmmu/tpm.c delete mode 100644 softmmu/trace-events delete mode 100644 softmmu/trace.h delete mode 100644 softmmu/vl.c delete mode 100644 softmmu/watchpoint.c create mode 100644 system/arch_init.c create mode 100644 system/async-teardown.c create mode 100644 system/balloon.c create mode 100644 system/bootdevice.c create mode 100644 system/cpu-throttle.c create mode 100644 system/cpu-timers.c create mode 100644 system/cpus.c create mode 100644 system/datadir.c create mode 100644 system/device_tree.c create mode 100644 system/dirtylimit.c create mode 100644 system/dma-helpers.c create mode 100644 system/globals.c create mode 100644 system/ioport.c create mode 100644 system/main.c create mode 100644 system/memory.c create mode 100644 system/memory_mapping.c create mode 100644 system/meson.build create mode 100644 system/physmem.c create mode 100644 system/qdev-monitor.c create mode 100644 system/qemu-seccomp.c create mode 100644 system/qtest.c create mode 100644 system/rtc.c create mode 100644 system/runstate-action.c create mode 100644 system/runstate-hmp-cmds.c create mode 100644 system/runstate.c create mode 100644 system/tpm-hmp-cmds.c create mode 100644 system/tpm.c create mode 100644 system/trace-events create mode 100644 system/trace.h create mode 100644 system/vl.c create mode 100644 system/watchpoint.c diff --git a/MAINTAINERS b/MAINTAINERS index ea91f9e..a5ce4c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -137,8 +137,8 @@ Overall TCG CPUs M: Richard Henderson R: Paolo Bonzini S: Maintained -F: softmmu/cpus.c -F: softmmu/watchpoint.c +F: system/cpus.c +F: system/watchpoint.c F: cpu-common.c F: cpu-target.c F: page-vary-target.c @@ -2108,7 +2108,7 @@ S: Maintained F: docs/interop/virtio-balloon-stats.rst F: hw/virtio/virtio-balloon*.c F: include/hw/virtio/virtio-balloon.h -F: softmmu/balloon.c +F: system/balloon.c F: include/sysemu/balloon.h virtio-9p @@ -2795,7 +2795,7 @@ Device Tree M: Alistair Francis R: David Gibson S: Maintained -F: softmmu/device_tree.c +F: system/device_tree.c F: include/sysemu/device_tree.h Dump @@ -2851,11 +2851,11 @@ F: include/exec/memory.h F: include/exec/ram_addr.h F: include/exec/ramblock.h F: include/sysemu/memory_mapping.h -F: softmmu/dma-helpers.c -F: softmmu/ioport.c -F: softmmu/memory.c -F: softmmu/memory_mapping.c -F: softmmu/physmem.c +F: system/dma-helpers.c +F: system/ioport.c +F: system/memory.c +F: system/memory_mapping.c +F: system/physmem.c F: include/exec/memory-internal.h F: scripts/coccinelle/memory-region-housekeeping.cocci @@ -2908,12 +2908,12 @@ F: include/sysemu/runstate.h F: include/sysemu/runstate-action.h F: util/main-loop.c F: util/qemu-timer.c -F: softmmu/vl.c -F: softmmu/main.c -F: softmmu/cpus.c -F: softmmu/cpu-throttle.c -F: softmmu/cpu-timers.c -F: softmmu/runstate* +F: system/vl.c +F: system/main.c +F: system/cpus.c +F: system/cpu-throttle.c +F: system/cpu-timers.c +F: system/runstate* F: qapi/run-state.json Read, Copy, Update (RCU) @@ -3087,7 +3087,7 @@ F: qapi/qom.json F: qapi/qdev.json F: scripts/coccinelle/qom-parent-type.cocci F: scripts/qom-cast-macro-clean-cocci-gen.py -F: softmmu/qdev-monitor.c +F: system/qdev-monitor.c F: stubs/qdev.c F: qom/ F: tests/unit/check-qom-interface.c @@ -3121,7 +3121,7 @@ M: Thomas Huth M: Laurent Vivier R: Paolo Bonzini S: Maintained -F: softmmu/qtest.c +F: system/qtest.c F: accel/qtest/ F: tests/qtest/ F: docs/devel/qgraph.rst @@ -3197,7 +3197,7 @@ F: scripts/simpletrace.py TPM M: Stefan Berger S: Maintained -F: softmmu/tpm* +F: system/tpm* F: hw/tpm/* F: include/hw/acpi/tpm.h F: include/sysemu/tpm* @@ -3242,7 +3242,7 @@ F: migration/rdma* Migration dirty limit and dirty page rate M: Hyman Huang S: Maintained -F: softmmu/dirtylimit.c +F: system/dirtylimit.c F: include/sysemu/dirtylimit.h F: migration/dirtyrate.c F: migration/dirtyrate.h @@ -3266,7 +3266,7 @@ F: scripts/xml-preprocess* Seccomp M: Daniel P. Berrange S: Odd Fixes -F: softmmu/qemu-seccomp.c +F: system/qemu-seccomp.c F: include/sysemu/seccomp.h F: tests/unit/test-seccomp.c @@ -3685,7 +3685,7 @@ T: git https://github.com/stefanha/qemu.git block Bootdevice M: Gonglei S: Maintained -F: softmmu/bootdevice.c +F: system/bootdevice.c Quorum M: Alberto Garcia diff --git a/accel/tcg/icount-common.c b/accel/tcg/icount-common.c index 0bf5bb5..ec57192 100644 --- a/accel/tcg/icount-common.c +++ b/accel/tcg/icount-common.c @@ -37,7 +37,7 @@ #include "hw/core/cpu.h" #include "sysemu/cpu-timers.h" #include "sysemu/cpu-throttle.h" -#include "softmmu/timers-state.h" +#include "sysemu/cpu-timers-internal.h" /* * ICOUNT: Instruction Counter diff --git a/docs/devel/qtest.rst b/docs/devel/qtest.rst index 0455aa0..c5b8546 100644 --- a/docs/devel/qtest.rst +++ b/docs/devel/qtest.rst @@ -81,7 +81,7 @@ which you can run manually. QTest Protocol -------------- -.. kernel-doc:: softmmu/qtest.c +.. kernel-doc:: system/qtest.c :doc: QTest Protocol diff --git a/include/sysemu/cpu-timers-internal.h b/include/sysemu/cpu-timers-internal.h new file mode 100644 index 0000000..94bb739 --- /dev/null +++ b/include/sysemu/cpu-timers-internal.h @@ -0,0 +1,71 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TIMERS_STATE_H +#define TIMERS_STATE_H + +/* timers state, for sharing between icount and cpu-timers */ + +typedef struct TimersState { + /* Protected by BQL. */ + int64_t cpu_ticks_prev; + int64_t cpu_ticks_offset; + + /* + * Protect fields that can be respectively read outside the + * BQL, and written from multiple threads. + */ + QemuSeqLock vm_clock_seqlock; + QemuSpin vm_clock_lock; + + int16_t cpu_ticks_enabled; + + /* Conversion factor from emulated instructions to virtual clock ticks. */ + int16_t icount_time_shift; + /* Icount delta used for shift auto adjust. */ + int64_t last_delta; + + /* Compensate for varying guest execution speed. */ + aligned_int64_t qemu_icount_bias; + + int64_t vm_clock_warp_start; + int64_t cpu_clock_offset; + + /* Only written by TCG thread */ + int64_t qemu_icount; + + /* for adjusting icount */ + QEMUTimer *icount_rt_timer; + QEMUTimer *icount_vm_timer; + QEMUTimer *icount_warp_timer; +} TimersState; + +extern TimersState timers_state; + +/* + * icount needs this internal from cpu-timers when adjusting the icount shift. + */ +int64_t cpu_get_clock_locked(void); + +#endif /* TIMERS_STATE_H */ diff --git a/include/sysemu/runstate-action.h b/include/sysemu/runstate-action.h index cff45a0..db4e309 100644 --- a/include/sysemu/runstate-action.h +++ b/include/sysemu/runstate-action.h @@ -11,7 +11,7 @@ #include "qapi/qapi-commands-run-state.h" -/* in softmmu/runstate-action.c */ +/* in system/runstate-action.c */ extern RebootAction reboot_action; extern ShutdownAction shutdown_action; extern PanicAction panic_action; diff --git a/meson.build b/meson.build index 167cb70..79aef19 100644 --- a/meson.build +++ b/meson.build @@ -3291,7 +3291,7 @@ if have_system 'hw/gpio', 'migration', 'net', - 'softmmu', + 'system', 'ui', 'hw/remote', ] @@ -3418,7 +3418,7 @@ endif common_ss.add(files('cpu-common.c')) specific_ss.add(files('cpu-target.c')) -subdir('softmmu') +subdir('system') # Work around a gcc bug/misfeature wherein constant propagation looks # through an alias: @@ -3797,14 +3797,14 @@ foreach target : target_dirs execs = [{ 'name': 'qemu-system-' + target_name, 'win_subsystem': 'console', - 'sources': files('softmmu/main.c'), + 'sources': files('system/main.c'), 'dependencies': [] }] if targetos == 'windows' and (sdl.found() or gtk.found()) execs += [{ 'name': 'qemu-system-' + target_name + 'w', 'win_subsystem': 'windows', - 'sources': files('softmmu/main.c'), + 'sources': files('system/main.c'), 'dependencies': [] }] endif diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1ad9ccb..6e4100d 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -466,7 +466,7 @@ sub top_of_kernel_tree { my @tree_check = ( "COPYING", "MAINTAINERS", "Makefile", "README.rst", "docs", "VERSION", - "linux-user", "softmmu" + "linux-user", "system" ); foreach my $check (@tree_check) { diff --git a/scripts/coverity-scan/COMPONENTS.md b/scripts/coverity-scan/COMPONENTS.md index 883da95..0e62f10 100644 --- a/scripts/coverity-scan/COMPONENTS.md +++ b/scripts/coverity-scan/COMPONENTS.md @@ -148,7 +148,7 @@ tcg ~ (/qemu)?(/accel/tcg|/replay|/tcg)/.* sysemu - ~ (/qemu)?(/softmmu/.*|/accel/.*) + ~ (/qemu)?(/system/.*|/accel/.*) (headers) ~ (/qemu)?(/include/.*) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index e5499b9..02fa828 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -796,7 +796,7 @@ sub top_of_tree { && (-d "${lk_path}docs") && (-f "${lk_path}VERSION") && (-d "${lk_path}linux-user/") - && (-d "${lk_path}softmmu/")) { + && (-d "${lk_path}system/")) { return 1; } return 0; diff --git a/scripts/oss-fuzz/build.sh b/scripts/oss-fuzz/build.sh index 3bda0d7..5238f83 100755 --- a/scripts/oss-fuzz/build.sh +++ b/scripts/oss-fuzz/build.sh @@ -43,10 +43,10 @@ EXTRA_CFLAGS="$CFLAGS -U __OPTIMIZE__" if ! { [ -e "./COPYING" ] && [ -e "./MAINTAINERS" ] && [ -e "./Makefile" ] && - [ -e "./docs" ] && + [ -d "./docs" ] && [ -e "./VERSION" ] && - [ -e "./linux-user" ] && - [ -e "./softmmu" ];} ; then + [ -d "./linux-user" ] && + [ -d "./system" ];} ; then fatal "Please run the script from the top of the QEMU tree" fi diff --git a/softmmu/arch_init.c b/softmmu/arch_init.c deleted file mode 100644 index 79716f9..0000000 --- a/softmmu/arch_init.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu/osdep.h" -#include "qemu/module.h" -#include "sysemu/arch_init.h" - -#ifdef TARGET_SPARC -int graphic_width = 1024; -int graphic_height = 768; -int graphic_depth = 8; -#elif defined(TARGET_M68K) -int graphic_width = 800; -int graphic_height = 600; -int graphic_depth = 8; -#else -int graphic_width = 800; -int graphic_height = 600; -int graphic_depth = 32; -#endif - -const uint32_t arch_type = QEMU_ARCH; - -void qemu_init_arch_modules(void) -{ -#ifdef CONFIG_MODULES - module_init_info(qemu_modinfo); - module_allow_arch(TARGET_NAME); -#endif -} diff --git a/softmmu/async-teardown.c b/softmmu/async-teardown.c deleted file mode 100644 index 396963c..0000000 --- a/softmmu/async-teardown.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Asynchronous teardown - * - * Copyright IBM, Corp. 2022 - * - * Authors: - * Claudio Imbrenda - * - * This work is licensed under the terms of the GNU GPL, version 2 or (at your - * option) any later version. See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include -#include -#include - -#include "qemu/async-teardown.h" - -#ifdef _SC_THREAD_STACK_MIN -#define CLONE_STACK_SIZE sysconf(_SC_THREAD_STACK_MIN) -#else -#define CLONE_STACK_SIZE 16384 -#endif - -static pid_t the_ppid; - -/* - * Close all open file descriptors. - */ -static void close_all_open_fd(void) -{ - struct dirent *de; - int fd, dfd; - DIR *dir; - -#ifdef CONFIG_CLOSE_RANGE - int r = close_range(0, ~0U, 0); - if (!r) { - /* Success, no need to try other ways. */ - return; - } -#endif - - dir = opendir("/proc/self/fd"); - if (!dir) { - /* If /proc is not mounted, there is nothing that can be done. */ - return; - } - /* Avoid closing the directory. */ - dfd = dirfd(dir); - - for (de = readdir(dir); de; de = readdir(dir)) { - fd = atoi(de->d_name); - if (fd != dfd) { - close(fd); - } - } - closedir(dir); -} - -static void hup_handler(int signal) -{ - /* Check every second if this process has been reparented. */ - while (the_ppid == getppid()) { - /* sleep() is safe to use in a signal handler. */ - sleep(1); - } - - /* At this point the parent process has terminated completely. */ - _exit(0); -} - -static int async_teardown_fn(void *arg) -{ - struct sigaction sa = { .sa_handler = hup_handler }; - sigset_t hup_signal; - char name[16]; - - /* Set a meaningful name for this process. */ - snprintf(name, 16, "cleanup/%d", the_ppid); - prctl(PR_SET_NAME, (unsigned long)name); - - /* - * Close all file descriptors that might have been inherited from the - * main qemu process when doing clone, needed to make libvirt happy. - * Not using close_range for increased compatibility with older kernels. - */ - close_all_open_fd(); - - /* Set up a handler for SIGHUP and unblock SIGHUP. */ - sigaction(SIGHUP, &sa, NULL); - sigemptyset(&hup_signal); - sigaddset(&hup_signal, SIGHUP); - sigprocmask(SIG_UNBLOCK, &hup_signal, NULL); - - /* Ask to receive SIGHUP when the parent dies. */ - prctl(PR_SET_PDEATHSIG, SIGHUP); - - /* - * Sleep forever, unless the parent process has already terminated. The - * only interruption can come from the SIGHUP signal, which in normal - * operation is received when the parent process dies. - */ - if (the_ppid == getppid()) { - pause(); - } - - /* At this point the parent process has terminated completely. */ - _exit(0); -} - -/* - * Allocate a new stack of a reasonable size, and return a pointer to its top. - */ -static void *new_stack_for_clone(void) -{ - size_t stack_size = CLONE_STACK_SIZE; - char *stack_ptr; - - /* Allocate a new stack and get a pointer to its top. */ - stack_ptr = qemu_alloc_stack(&stack_size); - stack_ptr += stack_size; - - return stack_ptr; -} - -/* - * Block all signals, start (clone) a new process sharing the address space - * with qemu (CLONE_VM), then restore signals. - */ -void init_async_teardown(void) -{ - sigset_t all_signals, old_signals; - - the_ppid = getpid(); - - sigfillset(&all_signals); - sigprocmask(SIG_BLOCK, &all_signals, &old_signals); - clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL); - sigprocmask(SIG_SETMASK, &old_signals, NULL); -} diff --git a/softmmu/balloon.c b/softmmu/balloon.c deleted file mode 100644 index e0e8969..0000000 --- a/softmmu/balloon.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Generic Balloon handlers and management - * - * Copyright (c) 2003-2008 Fabrice Bellard - * Copyright (C) 2011 Red Hat, Inc. - * Copyright (C) 2011 Amit Shah - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu/atomic.h" -#include "sysemu/kvm.h" -#include "sysemu/balloon.h" -#include "qapi/error.h" -#include "qapi/qapi-commands-machine.h" -#include "qapi/qmp/qerror.h" -#include "trace.h" - -static QEMUBalloonEvent *balloon_event_fn; -static QEMUBalloonStatus *balloon_stat_fn; -static void *balloon_opaque; - -static bool have_balloon(Error **errp) -{ - if (kvm_enabled() && !kvm_has_sync_mmu()) { - error_set(errp, ERROR_CLASS_KVM_MISSING_CAP, - "Using KVM without synchronous MMU, balloon unavailable"); - return false; - } - if (!balloon_event_fn) { - error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, - "No balloon device has been activated"); - return false; - } - return true; -} - -int qemu_add_balloon_handler(QEMUBalloonEvent *event_func, - QEMUBalloonStatus *stat_func, void *opaque) -{ - if (balloon_event_fn || balloon_stat_fn || balloon_opaque) { - /* We're already registered one balloon handler. How many can - * a guest really have? - */ - return -1; - } - balloon_event_fn = event_func; - balloon_stat_fn = stat_func; - balloon_opaque = opaque; - return 0; -} - -void qemu_remove_balloon_handler(void *opaque) -{ - if (balloon_opaque != opaque) { - return; - } - balloon_event_fn = NULL; - balloon_stat_fn = NULL; - balloon_opaque = NULL; -} - -BalloonInfo *qmp_query_balloon(Error **errp) -{ - BalloonInfo *info; - - if (!have_balloon(errp)) { - return NULL; - } - - info = g_malloc0(sizeof(*info)); - balloon_stat_fn(balloon_opaque, info); - return info; -} - -void qmp_balloon(int64_t target, Error **errp) -{ - if (!have_balloon(errp)) { - return; - } - - if (target <= 0) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size"); - return; - } - - trace_balloon_event(balloon_opaque, target); - balloon_event_fn(balloon_opaque, target); -} diff --git a/softmmu/bootdevice.c b/softmmu/bootdevice.c deleted file mode 100644 index 2106f10..0000000 --- a/softmmu/bootdevice.c +++ /dev/null @@ -1,430 +0,0 @@ -/* - * QEMU Boot Device Implement - * - * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO., LTD. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "sysemu/sysemu.h" -#include "qapi/visitor.h" -#include "qemu/error-report.h" -#include "sysemu/reset.h" -#include "hw/qdev-core.h" -#include "hw/boards.h" - -typedef struct FWBootEntry FWBootEntry; - -struct FWBootEntry { - QTAILQ_ENTRY(FWBootEntry) link; - int32_t bootindex; - DeviceState *dev; - char *suffix; -}; - -static QTAILQ_HEAD(, FWBootEntry) fw_boot_order = - QTAILQ_HEAD_INITIALIZER(fw_boot_order); -static QEMUBootSetHandler *boot_set_handler; -static void *boot_set_opaque; - -void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque) -{ - boot_set_handler = func; - boot_set_opaque = opaque; -} - -void qemu_boot_set(const char *boot_order, Error **errp) -{ - Error *local_err = NULL; - - if (!boot_set_handler) { - error_setg(errp, "no function defined to set boot device list for" - " this architecture"); - return; - } - - validate_bootdevices(boot_order, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - - boot_set_handler(boot_set_opaque, boot_order, errp); -} - -void validate_bootdevices(const char *devices, Error **errp) -{ - /* We just do some generic consistency checks */ - const char *p; - int bitmap = 0; - - for (p = devices; *p != '\0'; p++) { - /* Allowed boot devices are: - * a-b: floppy disk drives - * c-f: IDE disk drives - * g-m: machine implementation dependent drives - * n-p: network devices - * It's up to each machine implementation to check if the given boot - * devices match the actual hardware implementation and firmware - * features. - */ - if (*p < 'a' || *p > 'p') { - error_setg(errp, "Invalid boot device '%c'", *p); - return; - } - if (bitmap & (1 << (*p - 'a'))) { - error_setg(errp, "Boot device '%c' was given twice", *p); - return; - } - bitmap |= 1 << (*p - 'a'); - } -} - -void restore_boot_order(void *opaque) -{ - char *normal_boot_order = opaque; - static int first = 1; - - /* Restore boot order and remove ourselves after the first boot */ - if (first) { - first = 0; - return; - } - - if (boot_set_handler) { - qemu_boot_set(normal_boot_order, &error_abort); - } - - qemu_unregister_reset(restore_boot_order, normal_boot_order); - g_free(normal_boot_order); -} - -void check_boot_index(int32_t bootindex, Error **errp) -{ - FWBootEntry *i; - - if (bootindex >= 0) { - QTAILQ_FOREACH(i, &fw_boot_order, link) { - if (i->bootindex == bootindex) { - error_setg(errp, "The bootindex %d has already been used", - bootindex); - return; - } - } - } -} - -void del_boot_device_path(DeviceState *dev, const char *suffix) -{ - FWBootEntry *i; - - if (dev == NULL) { - return; - } - - QTAILQ_FOREACH(i, &fw_boot_order, link) { - if ((!suffix || !g_strcmp0(i->suffix, suffix)) && - i->dev == dev) { - QTAILQ_REMOVE(&fw_boot_order, i, link); - g_free(i->suffix); - g_free(i); - - break; - } - } -} - -void add_boot_device_path(int32_t bootindex, DeviceState *dev, - const char *suffix) -{ - FWBootEntry *node, *i; - - if (bootindex < 0) { - del_boot_device_path(dev, suffix); - return; - } - - assert(dev != NULL || suffix != NULL); - - del_boot_device_path(dev, suffix); - - node = g_new0(FWBootEntry, 1); - node->bootindex = bootindex; - node->suffix = g_strdup(suffix); - node->dev = dev; - - QTAILQ_FOREACH(i, &fw_boot_order, link) { - if (i->bootindex == bootindex) { - error_report("Two devices with same boot index %d", bootindex); - exit(1); - } else if (i->bootindex < bootindex) { - continue; - } - QTAILQ_INSERT_BEFORE(i, node, link); - return; - } - QTAILQ_INSERT_TAIL(&fw_boot_order, node, link); -} - -DeviceState *get_boot_device(uint32_t position) -{ - uint32_t counter = 0; - FWBootEntry *i = NULL; - DeviceState *res = NULL; - - if (!QTAILQ_EMPTY(&fw_boot_order)) { - QTAILQ_FOREACH(i, &fw_boot_order, link) { - if (counter == position) { - res = i->dev; - break; - } - counter++; - } - } - return res; -} - -static char *get_boot_device_path(DeviceState *dev, bool ignore_suffixes, - const char *suffix) -{ - char *devpath = NULL, *s = NULL, *d, *bootpath; - - if (dev) { - devpath = qdev_get_fw_dev_path(dev); - assert(devpath); - } - - if (!ignore_suffixes) { - if (dev) { - d = qdev_get_own_fw_dev_path_from_handler(dev->parent_bus, dev); - if (d) { - assert(!suffix); - s = d; - } else { - s = g_strdup(suffix); - } - } else { - s = g_strdup(suffix); - } - } - - bootpath = g_strdup_printf("%s%s", - devpath ? devpath : "", - s ? s : ""); - g_free(devpath); - g_free(s); - - return bootpath; -} - -/* - * This function returns null terminated string that consist of new line - * separated device paths. - * - * memory pointed by "size" is assigned total length of the array in bytes - * - */ -char *get_boot_devices_list(size_t *size) -{ - FWBootEntry *i; - size_t total = 0; - char *list = NULL; - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - bool ignore_suffixes = mc->ignore_boot_device_suffixes; - - QTAILQ_FOREACH(i, &fw_boot_order, link) { - char *bootpath; - size_t len; - - bootpath = get_boot_device_path(i->dev, ignore_suffixes, i->suffix); - - if (total) { - list[total-1] = '\n'; - } - len = strlen(bootpath) + 1; - list = g_realloc(list, total + len); - memcpy(&list[total], bootpath, len); - total += len; - g_free(bootpath); - } - - *size = total; - - if (current_machine->boot_config.has_strict && - current_machine->boot_config.strict && *size > 0) { - list[total-1] = '\n'; - list = g_realloc(list, total + 5); - memcpy(&list[total], "HALT", 5); - *size = total + 5; - } - return list; -} - -typedef struct { - int32_t *bootindex; - const char *suffix; - DeviceState *dev; -} BootIndexProperty; - -static void device_get_bootindex(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - BootIndexProperty *prop = opaque; - visit_type_int32(v, name, prop->bootindex, errp); -} - -static void device_set_bootindex(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - BootIndexProperty *prop = opaque; - int32_t boot_index; - Error *local_err = NULL; - - if (!visit_type_int32(v, name, &boot_index, errp)) { - return; - } - /* check whether bootindex is present in fw_boot_order list */ - check_boot_index(boot_index, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - /* change bootindex to a new one */ - *prop->bootindex = boot_index; - - add_boot_device_path(*prop->bootindex, prop->dev, prop->suffix); -} - -static void property_release_bootindex(Object *obj, const char *name, - void *opaque) - -{ - BootIndexProperty *prop = opaque; - - del_boot_device_path(prop->dev, prop->suffix); - g_free(prop); -} - -void device_add_bootindex_property(Object *obj, int32_t *bootindex, - const char *name, const char *suffix, - DeviceState *dev) -{ - BootIndexProperty *prop = g_malloc0(sizeof(*prop)); - - prop->bootindex = bootindex; - prop->suffix = suffix; - prop->dev = dev; - - object_property_add(obj, name, "int32", - device_get_bootindex, - device_set_bootindex, - property_release_bootindex, - prop); - - /* initialize devices' bootindex property to -1 */ - object_property_set_int(obj, name, -1, NULL); -} - -typedef struct FWLCHSEntry FWLCHSEntry; - -struct FWLCHSEntry { - QTAILQ_ENTRY(FWLCHSEntry) link; - DeviceState *dev; - char *suffix; - uint32_t lcyls; - uint32_t lheads; - uint32_t lsecs; -}; - -static QTAILQ_HEAD(, FWLCHSEntry) fw_lchs = - QTAILQ_HEAD_INITIALIZER(fw_lchs); - -void add_boot_device_lchs(DeviceState *dev, const char *suffix, - uint32_t lcyls, uint32_t lheads, uint32_t lsecs) -{ - FWLCHSEntry *node; - - if (!lcyls && !lheads && !lsecs) { - return; - } - - assert(dev != NULL || suffix != NULL); - - node = g_new0(FWLCHSEntry, 1); - node->suffix = g_strdup(suffix); - node->dev = dev; - node->lcyls = lcyls; - node->lheads = lheads; - node->lsecs = lsecs; - - QTAILQ_INSERT_TAIL(&fw_lchs, node, link); -} - -void del_boot_device_lchs(DeviceState *dev, const char *suffix) -{ - FWLCHSEntry *i; - - if (dev == NULL) { - return; - } - - QTAILQ_FOREACH(i, &fw_lchs, link) { - if ((!suffix || !g_strcmp0(i->suffix, suffix)) && - i->dev == dev) { - QTAILQ_REMOVE(&fw_lchs, i, link); - g_free(i->suffix); - g_free(i); - - break; - } - } -} - -char *get_boot_devices_lchs_list(size_t *size) -{ - FWLCHSEntry *i; - size_t total = 0; - char *list = NULL; - - QTAILQ_FOREACH(i, &fw_lchs, link) { - char *bootpath; - char *chs_string; - size_t len; - - bootpath = get_boot_device_path(i->dev, false, i->suffix); - chs_string = g_strdup_printf("%s %" PRIu32 " %" PRIu32 " %" PRIu32, - bootpath, i->lcyls, i->lheads, i->lsecs); - - if (total) { - list[total - 1] = '\n'; - } - len = strlen(chs_string) + 1; - list = g_realloc(list, total + len); - memcpy(&list[total], chs_string, len); - total += len; - g_free(chs_string); - g_free(bootpath); - } - - *size = total; - - return list; -} diff --git a/softmmu/cpu-throttle.c b/softmmu/cpu-throttle.c deleted file mode 100644 index d9bb30a..0000000 --- a/softmmu/cpu-throttle.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu/thread.h" -#include "hw/core/cpu.h" -#include "qemu/main-loop.h" -#include "sysemu/cpus.h" -#include "sysemu/cpu-throttle.h" - -/* vcpu throttling controls */ -static QEMUTimer *throttle_timer; -static unsigned int throttle_percentage; - -#define CPU_THROTTLE_PCT_MIN 1 -#define CPU_THROTTLE_PCT_MAX 99 -#define CPU_THROTTLE_TIMESLICE_NS 10000000 - -static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) -{ - double pct; - double throttle_ratio; - int64_t sleeptime_ns, endtime_ns; - - if (!cpu_throttle_get_percentage()) { - return; - } - - pct = (double)cpu_throttle_get_percentage() / 100; - throttle_ratio = pct / (1 - pct); - /* Add 1ns to fix double's rounding error (like 0.9999999...) */ - sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1); - endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns; - while (sleeptime_ns > 0 && !cpu->stop) { - if (sleeptime_ns > SCALE_MS) { - qemu_cond_timedwait_iothread(cpu->halt_cond, - sleeptime_ns / SCALE_MS); - } else { - qemu_mutex_unlock_iothread(); - g_usleep(sleeptime_ns / SCALE_US); - qemu_mutex_lock_iothread(); - } - sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - } - qatomic_set(&cpu->throttle_thread_scheduled, 0); -} - -static void cpu_throttle_timer_tick(void *opaque) -{ - CPUState *cpu; - double pct; - - /* Stop the timer if needed */ - if (!cpu_throttle_get_percentage()) { - return; - } - CPU_FOREACH(cpu) { - if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) { - async_run_on_cpu(cpu, cpu_throttle_thread, - RUN_ON_CPU_NULL); - } - } - - pct = (double)cpu_throttle_get_percentage() / 100; - timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + - CPU_THROTTLE_TIMESLICE_NS / (1 - pct)); -} - -void cpu_throttle_set(int new_throttle_pct) -{ - /* - * boolean to store whether throttle is already active or not, - * before modifying throttle_percentage - */ - bool throttle_active = cpu_throttle_active(); - - /* Ensure throttle percentage is within valid range */ - new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX); - new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN); - - qatomic_set(&throttle_percentage, new_throttle_pct); - - if (!throttle_active) { - cpu_throttle_timer_tick(NULL); - } -} - -void cpu_throttle_stop(void) -{ - qatomic_set(&throttle_percentage, 0); -} - -bool cpu_throttle_active(void) -{ - return (cpu_throttle_get_percentage() != 0); -} - -int cpu_throttle_get_percentage(void) -{ - return qatomic_read(&throttle_percentage); -} - -void cpu_throttle_init(void) -{ - throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, - cpu_throttle_timer_tick, NULL); -} diff --git a/softmmu/cpu-timers.c b/softmmu/cpu-timers.c deleted file mode 100644 index 117408c..0000000 --- a/softmmu/cpu-timers.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu/cutils.h" -#include "migration/vmstate.h" -#include "qapi/error.h" -#include "qemu/error-report.h" -#include "sysemu/cpus.h" -#include "qemu/main-loop.h" -#include "qemu/option.h" -#include "qemu/seqlock.h" -#include "sysemu/replay.h" -#include "sysemu/runstate.h" -#include "hw/core/cpu.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/cpu-throttle.h" -#include "timers-state.h" - -/* clock and ticks */ - -static int64_t cpu_get_ticks_locked(void) -{ - int64_t ticks = timers_state.cpu_ticks_offset; - if (timers_state.cpu_ticks_enabled) { - ticks += cpu_get_host_ticks(); - } - - if (timers_state.cpu_ticks_prev > ticks) { - /* Non increasing ticks may happen if the host uses software suspend. */ - timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks; - ticks = timers_state.cpu_ticks_prev; - } - - timers_state.cpu_ticks_prev = ticks; - return ticks; -} - -/* - * return the time elapsed in VM between vm_start and vm_stop. - * cpu_get_ticks() uses units of the host CPU cycle counter. - */ -int64_t cpu_get_ticks(void) -{ - int64_t ticks; - - qemu_spin_lock(&timers_state.vm_clock_lock); - ticks = cpu_get_ticks_locked(); - qemu_spin_unlock(&timers_state.vm_clock_lock); - return ticks; -} - -int64_t cpu_get_clock_locked(void) -{ - int64_t time; - - time = timers_state.cpu_clock_offset; - if (timers_state.cpu_ticks_enabled) { - time += get_clock(); - } - - return time; -} - -/* - * Return the monotonic time elapsed in VM, i.e., - * the time between vm_start and vm_stop - */ -int64_t cpu_get_clock(void) -{ - int64_t ti; - unsigned start; - - do { - start = seqlock_read_begin(&timers_state.vm_clock_seqlock); - ti = cpu_get_clock_locked(); - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); - - return ti; -} - -/* - * enable cpu_get_ticks() - * Caller must hold BQL which serves as mutex for vm_clock_seqlock. - */ -void cpu_enable_ticks(void) -{ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (!timers_state.cpu_ticks_enabled) { - timers_state.cpu_ticks_offset -= cpu_get_host_ticks(); - timers_state.cpu_clock_offset -= get_clock(); - timers_state.cpu_ticks_enabled = 1; - } - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); -} - -/* - * disable cpu_get_ticks() : the clock is stopped. You must not call - * cpu_get_ticks() after that. - * Caller must hold BQL which serves as mutex for vm_clock_seqlock. - */ -void cpu_disable_ticks(void) -{ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (timers_state.cpu_ticks_enabled) { - timers_state.cpu_ticks_offset += cpu_get_host_ticks(); - timers_state.cpu_clock_offset = cpu_get_clock_locked(); - timers_state.cpu_ticks_enabled = 0; - } - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); -} - -static bool icount_state_needed(void *opaque) -{ - return icount_enabled(); -} - -static bool warp_timer_state_needed(void *opaque) -{ - TimersState *s = opaque; - return s->icount_warp_timer != NULL; -} - -static bool adjust_timers_state_needed(void *opaque) -{ - TimersState *s = opaque; - return s->icount_rt_timer != NULL; -} - -static bool icount_shift_state_needed(void *opaque) -{ - return icount_enabled() == 2; -} - -/* - * Subsection for warp timer migration is optional, because may not be created - */ -static const VMStateDescription icount_vmstate_warp_timer = { - .name = "timer/icount/warp_timer", - .version_id = 1, - .minimum_version_id = 1, - .needed = warp_timer_state_needed, - .fields = (VMStateField[]) { - VMSTATE_INT64(vm_clock_warp_start, TimersState), - VMSTATE_TIMER_PTR(icount_warp_timer, TimersState), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription icount_vmstate_adjust_timers = { - .name = "timer/icount/timers", - .version_id = 1, - .minimum_version_id = 1, - .needed = adjust_timers_state_needed, - .fields = (VMStateField[]) { - VMSTATE_TIMER_PTR(icount_rt_timer, TimersState), - VMSTATE_TIMER_PTR(icount_vm_timer, TimersState), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription icount_vmstate_shift = { - .name = "timer/icount/shift", - .version_id = 2, - .minimum_version_id = 2, - .needed = icount_shift_state_needed, - .fields = (VMStateField[]) { - VMSTATE_INT16(icount_time_shift, TimersState), - VMSTATE_INT64(last_delta, TimersState), - VMSTATE_END_OF_LIST() - } -}; - -/* - * This is a subsection for icount migration. - */ -static const VMStateDescription icount_vmstate_timers = { - .name = "timer/icount", - .version_id = 1, - .minimum_version_id = 1, - .needed = icount_state_needed, - .fields = (VMStateField[]) { - VMSTATE_INT64(qemu_icount_bias, TimersState), - VMSTATE_INT64(qemu_icount, TimersState), - VMSTATE_END_OF_LIST() - }, - .subsections = (const VMStateDescription * []) { - &icount_vmstate_warp_timer, - &icount_vmstate_adjust_timers, - &icount_vmstate_shift, - NULL - } -}; - -static const VMStateDescription vmstate_timers = { - .name = "timer", - .version_id = 2, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_INT64(cpu_ticks_offset, TimersState), - VMSTATE_UNUSED(8), - VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2), - VMSTATE_END_OF_LIST() - }, - .subsections = (const VMStateDescription * []) { - &icount_vmstate_timers, - NULL - } -}; - -static void do_nothing(CPUState *cpu, run_on_cpu_data unused) -{ -} - -void qemu_timer_notify_cb(void *opaque, QEMUClockType type) -{ - if (!icount_enabled() || type != QEMU_CLOCK_VIRTUAL) { - qemu_notify_event(); - return; - } - - if (qemu_in_vcpu_thread()) { - /* - * A CPU is currently running; kick it back out to the - * tcg_cpu_exec() loop so it will recalculate its - * icount deadline immediately. - */ - qemu_cpu_kick(current_cpu); - } else if (first_cpu) { - /* - * qemu_cpu_kick is not enough to kick a halted CPU out of - * qemu_tcg_wait_io_event. async_run_on_cpu, instead, - * causes cpu_thread_is_idle to return false. This way, - * handle_icount_deadline can run. - * If we have no CPUs at all for some reason, we don't - * need to do anything. - */ - async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL); - } -} - -TimersState timers_state; - -/* initialize timers state and the cpu throttle for convenience */ -void cpu_timers_init(void) -{ - seqlock_init(&timers_state.vm_clock_seqlock); - qemu_spin_init(&timers_state.vm_clock_lock); - vmstate_register(NULL, 0, &vmstate_timers, &timers_state); - - cpu_throttle_init(); -} diff --git a/softmmu/cpus.c b/softmmu/cpus.c deleted file mode 100644 index 0848e0d..0000000 --- a/softmmu/cpus.c +++ /dev/null @@ -1,822 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "monitor/monitor.h" -#include "qemu/coroutine-tls.h" -#include "qapi/error.h" -#include "qapi/qapi-commands-machine.h" -#include "qapi/qapi-commands-misc.h" -#include "qapi/qapi-events-run-state.h" -#include "qapi/qmp/qerror.h" -#include "exec/gdbstub.h" -#include "sysemu/hw_accel.h" -#include "exec/cpu-common.h" -#include "qemu/thread.h" -#include "qemu/main-loop.h" -#include "qemu/plugin.h" -#include "sysemu/cpus.h" -#include "qemu/guest-random.h" -#include "hw/nmi.h" -#include "sysemu/replay.h" -#include "sysemu/runstate.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/whpx.h" -#include "hw/boards.h" -#include "hw/hw.h" -#include "trace.h" - -#ifdef CONFIG_LINUX - -#include - -#ifndef PR_MCE_KILL -#define PR_MCE_KILL 33 -#endif - -#ifndef PR_MCE_KILL_SET -#define PR_MCE_KILL_SET 1 -#endif - -#ifndef PR_MCE_KILL_EARLY -#define PR_MCE_KILL_EARLY 1 -#endif - -#endif /* CONFIG_LINUX */ - -static QemuMutex qemu_global_mutex; - -/* - * The chosen accelerator is supposed to register this. - */ -static const AccelOpsClass *cpus_accel; - -bool cpu_is_stopped(CPUState *cpu) -{ - return cpu->stopped || !runstate_is_running(); -} - -bool cpu_work_list_empty(CPUState *cpu) -{ - return QSIMPLEQ_EMPTY_ATOMIC(&cpu->work_list); -} - -bool cpu_thread_is_idle(CPUState *cpu) -{ - if (cpu->stop || !cpu_work_list_empty(cpu)) { - return false; - } - if (cpu_is_stopped(cpu)) { - return true; - } - if (!cpu->halted || cpu_has_work(cpu)) { - return false; - } - if (cpus_accel->cpu_thread_is_idle) { - return cpus_accel->cpu_thread_is_idle(cpu); - } - return true; -} - -bool all_cpu_threads_idle(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - if (!cpu_thread_is_idle(cpu)) { - return false; - } - } - return true; -} - -/***********************************************************/ -void hw_error(const char *fmt, ...) -{ - va_list ap; - CPUState *cpu; - - va_start(ap, fmt); - fprintf(stderr, "qemu: hardware error: "); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - CPU_FOREACH(cpu) { - fprintf(stderr, "CPU #%d:\n", cpu->cpu_index); - cpu_dump_state(cpu, stderr, CPU_DUMP_FPU); - } - va_end(ap); - abort(); -} - -void cpu_synchronize_all_states(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_state(cpu); - } -} - -void cpu_synchronize_all_post_reset(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_post_reset(cpu); - } -} - -void cpu_synchronize_all_post_init(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_post_init(cpu); - } -} - -void cpu_synchronize_all_pre_loadvm(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_pre_loadvm(cpu); - } -} - -void cpu_synchronize_state(CPUState *cpu) -{ - if (cpus_accel->synchronize_state) { - cpus_accel->synchronize_state(cpu); - } -} - -void cpu_synchronize_post_reset(CPUState *cpu) -{ - if (cpus_accel->synchronize_post_reset) { - cpus_accel->synchronize_post_reset(cpu); - } -} - -void cpu_synchronize_post_init(CPUState *cpu) -{ - if (cpus_accel->synchronize_post_init) { - cpus_accel->synchronize_post_init(cpu); - } -} - -void cpu_synchronize_pre_loadvm(CPUState *cpu) -{ - if (cpus_accel->synchronize_pre_loadvm) { - cpus_accel->synchronize_pre_loadvm(cpu); - } -} - -bool cpus_are_resettable(void) -{ - if (cpus_accel->cpus_are_resettable) { - return cpus_accel->cpus_are_resettable(); - } - return true; -} - -int64_t cpus_get_virtual_clock(void) -{ - /* - * XXX - * - * need to check that cpus_accel is not NULL, because qcow2 calls - * qemu_get_clock_ns(CLOCK_VIRTUAL) without any accel initialized and - * with ticks disabled in some io-tests: - * 030 040 041 060 099 120 127 140 156 161 172 181 191 192 195 203 229 249 256 267 - * - * is this expected? - * - * XXX - */ - if (cpus_accel && cpus_accel->get_virtual_clock) { - return cpus_accel->get_virtual_clock(); - } - return cpu_get_clock(); -} - -/* - * return the time elapsed in VM between vm_start and vm_stop. Unless - * icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle - * counter. - */ -int64_t cpus_get_elapsed_ticks(void) -{ - if (cpus_accel->get_elapsed_ticks) { - return cpus_accel->get_elapsed_ticks(); - } - return cpu_get_ticks(); -} - -static void generic_handle_interrupt(CPUState *cpu, int mask) -{ - cpu->interrupt_request |= mask; - - if (!qemu_cpu_is_self(cpu)) { - qemu_cpu_kick(cpu); - } -} - -void cpu_interrupt(CPUState *cpu, int mask) -{ - if (cpus_accel->handle_interrupt) { - cpus_accel->handle_interrupt(cpu, mask); - } else { - generic_handle_interrupt(cpu, mask); - } -} - -static int do_vm_stop(RunState state, bool send_stop) -{ - int ret = 0; - - if (runstate_is_running()) { - runstate_set(state); - cpu_disable_ticks(); - pause_all_vcpus(); - vm_state_notify(0, state); - if (send_stop) { - qapi_event_send_stop(); - } - } - - bdrv_drain_all(); - ret = bdrv_flush_all(); - trace_vm_stop_flush_all(ret); - - return ret; -} - -/* Special vm_stop() variant for terminating the process. Historically clients - * did not expect a QMP STOP event and so we need to retain compatibility. - */ -int vm_shutdown(void) -{ - return do_vm_stop(RUN_STATE_SHUTDOWN, false); -} - -bool cpu_can_run(CPUState *cpu) -{ - if (cpu->stop) { - return false; - } - if (cpu_is_stopped(cpu)) { - return false; - } - return true; -} - -void cpu_handle_guest_debug(CPUState *cpu) -{ - if (replay_running_debug()) { - if (!cpu->singlestep_enabled) { - /* - * Report about the breakpoint and - * make a single step to skip it - */ - replay_breakpoint(); - cpu_single_step(cpu, SSTEP_ENABLE); - } else { - cpu_single_step(cpu, 0); - } - } else { - gdb_set_stop_cpu(cpu); - qemu_system_debug_request(); - cpu->stopped = true; - } -} - -#ifdef CONFIG_LINUX -static void sigbus_reraise(void) -{ - sigset_t set; - struct sigaction action; - - memset(&action, 0, sizeof(action)); - action.sa_handler = SIG_DFL; - if (!sigaction(SIGBUS, &action, NULL)) { - raise(SIGBUS); - sigemptyset(&set); - sigaddset(&set, SIGBUS); - pthread_sigmask(SIG_UNBLOCK, &set, NULL); - } - perror("Failed to re-raise SIGBUS!"); - abort(); -} - -static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) -{ - if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) { - sigbus_reraise(); - } - - if (current_cpu) { - /* Called asynchronously in VCPU thread. */ - if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) { - sigbus_reraise(); - } - } else { - /* Called synchronously (via signalfd) in main thread. */ - if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { - sigbus_reraise(); - } - } -} - -static void qemu_init_sigbus(void) -{ - struct sigaction action; - - /* - * ALERT: when modifying this, take care that SIGBUS forwarding in - * qemu_prealloc_mem() will continue working as expected. - */ - memset(&action, 0, sizeof(action)); - action.sa_flags = SA_SIGINFO; - action.sa_sigaction = sigbus_handler; - sigaction(SIGBUS, &action, NULL); - - prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); -} -#else /* !CONFIG_LINUX */ -static void qemu_init_sigbus(void) -{ -} -#endif /* !CONFIG_LINUX */ - -static QemuThread io_thread; - -/* cpu creation */ -static QemuCond qemu_cpu_cond; -/* system init */ -static QemuCond qemu_pause_cond; - -void qemu_init_cpu_loop(void) -{ - qemu_init_sigbus(); - qemu_cond_init(&qemu_cpu_cond); - qemu_cond_init(&qemu_pause_cond); - qemu_mutex_init(&qemu_global_mutex); - - qemu_thread_get_self(&io_thread); -} - -void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data) -{ - do_run_on_cpu(cpu, func, data, &qemu_global_mutex); -} - -static void qemu_cpu_stop(CPUState *cpu, bool exit) -{ - g_assert(qemu_cpu_is_self(cpu)); - cpu->stop = false; - cpu->stopped = true; - if (exit) { - cpu_exit(cpu); - } - qemu_cond_broadcast(&qemu_pause_cond); -} - -void qemu_wait_io_event_common(CPUState *cpu) -{ - qatomic_set_mb(&cpu->thread_kicked, false); - if (cpu->stop) { - qemu_cpu_stop(cpu, false); - } - process_queued_cpu_work(cpu); -} - -void qemu_wait_io_event(CPUState *cpu) -{ - bool slept = false; - - while (cpu_thread_is_idle(cpu)) { - if (!slept) { - slept = true; - qemu_plugin_vcpu_idle_cb(cpu); - } - qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); - } - if (slept) { - qemu_plugin_vcpu_resume_cb(cpu); - } - - qemu_wait_io_event_common(cpu); -} - -void cpus_kick_thread(CPUState *cpu) -{ - if (cpu->thread_kicked) { - return; - } - cpu->thread_kicked = true; - -#ifndef _WIN32 - int err = pthread_kill(cpu->thread->thread, SIG_IPI); - if (err && err != ESRCH) { - fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); - exit(1); - } -#else - qemu_sem_post(&cpu->sem); -#endif -} - -void qemu_cpu_kick(CPUState *cpu) -{ - qemu_cond_broadcast(cpu->halt_cond); - if (cpus_accel->kick_vcpu_thread) { - cpus_accel->kick_vcpu_thread(cpu); - } else { /* default */ - cpus_kick_thread(cpu); - } -} - -void qemu_cpu_kick_self(void) -{ - assert(current_cpu); - cpus_kick_thread(current_cpu); -} - -bool qemu_cpu_is_self(CPUState *cpu) -{ - return qemu_thread_is_self(cpu->thread); -} - -bool qemu_in_vcpu_thread(void) -{ - return current_cpu && qemu_cpu_is_self(current_cpu); -} - -QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked) - -bool qemu_mutex_iothread_locked(void) -{ - return get_iothread_locked(); -} - -bool qemu_in_main_thread(void) -{ - return qemu_mutex_iothread_locked(); -} - -/* - * The BQL is taken from so many places that it is worth profiling the - * callers directly, instead of funneling them all through a single function. - */ -void qemu_mutex_lock_iothread_impl(const char *file, int line) -{ - QemuMutexLockFunc bql_lock = qatomic_read(&qemu_bql_mutex_lock_func); - - g_assert(!qemu_mutex_iothread_locked()); - bql_lock(&qemu_global_mutex, file, line); - set_iothread_locked(true); -} - -void qemu_mutex_unlock_iothread(void) -{ - g_assert(qemu_mutex_iothread_locked()); - set_iothread_locked(false); - qemu_mutex_unlock(&qemu_global_mutex); -} - -void qemu_cond_wait_iothread(QemuCond *cond) -{ - qemu_cond_wait(cond, &qemu_global_mutex); -} - -void qemu_cond_timedwait_iothread(QemuCond *cond, int ms) -{ - qemu_cond_timedwait(cond, &qemu_global_mutex, ms); -} - -/* signal CPU creation */ -void cpu_thread_signal_created(CPUState *cpu) -{ - cpu->created = true; - qemu_cond_signal(&qemu_cpu_cond); -} - -/* signal CPU destruction */ -void cpu_thread_signal_destroyed(CPUState *cpu) -{ - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); -} - - -static bool all_vcpus_paused(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - if (!cpu->stopped) { - return false; - } - } - - return true; -} - -void pause_all_vcpus(void) -{ - CPUState *cpu; - - qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); - CPU_FOREACH(cpu) { - if (qemu_cpu_is_self(cpu)) { - qemu_cpu_stop(cpu, true); - } else { - cpu->stop = true; - qemu_cpu_kick(cpu); - } - } - - /* We need to drop the replay_lock so any vCPU threads woken up - * can finish their replay tasks - */ - replay_mutex_unlock(); - - while (!all_vcpus_paused()) { - qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); - CPU_FOREACH(cpu) { - qemu_cpu_kick(cpu); - } - } - - qemu_mutex_unlock_iothread(); - replay_mutex_lock(); - qemu_mutex_lock_iothread(); -} - -void cpu_resume(CPUState *cpu) -{ - cpu->stop = false; - cpu->stopped = false; - qemu_cpu_kick(cpu); -} - -void resume_all_vcpus(void) -{ - CPUState *cpu; - - if (!runstate_is_running()) { - return; - } - - qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); - CPU_FOREACH(cpu) { - cpu_resume(cpu); - } -} - -void cpu_remove_sync(CPUState *cpu) -{ - cpu->stop = true; - cpu->unplug = true; - qemu_cpu_kick(cpu); - qemu_mutex_unlock_iothread(); - qemu_thread_join(cpu->thread); - qemu_mutex_lock_iothread(); -} - -void cpus_register_accel(const AccelOpsClass *ops) -{ - assert(ops != NULL); - assert(ops->create_vcpu_thread != NULL); /* mandatory */ - cpus_accel = ops; -} - -const AccelOpsClass *cpus_get_accel(void) -{ - /* broken if we call this early */ - assert(cpus_accel); - return cpus_accel; -} - -void qemu_init_vcpu(CPUState *cpu) -{ - MachineState *ms = MACHINE(qdev_get_machine()); - - cpu->nr_cores = ms->smp.cores; - cpu->nr_threads = ms->smp.threads; - cpu->stopped = true; - cpu->random_seed = qemu_guest_random_seed_thread_part1(); - - if (!cpu->as) { - /* If the target cpu hasn't set up any address spaces itself, - * give it the default one. - */ - cpu->num_ases = 1; - cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory); - } - - /* accelerators all implement the AccelOpsClass */ - g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL); - cpus_accel->create_vcpu_thread(cpu); - - while (!cpu->created) { - qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); - } -} - -void cpu_stop_current(void) -{ - if (current_cpu) { - current_cpu->stop = true; - cpu_exit(current_cpu); - } -} - -int vm_stop(RunState state) -{ - if (qemu_in_vcpu_thread()) { - qemu_system_vmstop_request_prepare(); - qemu_system_vmstop_request(state); - /* - * FIXME: should not return to device code in case - * vm_stop() has been requested. - */ - cpu_stop_current(); - return 0; - } - - return do_vm_stop(state, true); -} - -/** - * Prepare for (re)starting the VM. - * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already - * running or in case of an error condition), 0 otherwise. - */ -int vm_prepare_start(bool step_pending) -{ - RunState requested; - - qemu_vmstop_requested(&requested); - if (runstate_is_running() && requested == RUN_STATE__MAX) { - return -1; - } - - /* Ensure that a STOP/RESUME pair of events is emitted if a - * vmstop request was pending. The BLOCK_IO_ERROR event, for - * example, according to documentation is always followed by - * the STOP event. - */ - if (runstate_is_running()) { - qapi_event_send_stop(); - qapi_event_send_resume(); - return -1; - } - - /* - * WHPX accelerator needs to know whether we are going to step - * any CPUs, before starting the first one. - */ - if (cpus_accel->synchronize_pre_resume) { - cpus_accel->synchronize_pre_resume(step_pending); - } - - /* We are sending this now, but the CPUs will be resumed shortly later */ - qapi_event_send_resume(); - - cpu_enable_ticks(); - runstate_set(RUN_STATE_RUNNING); - vm_state_notify(1, RUN_STATE_RUNNING); - return 0; -} - -void vm_start(void) -{ - if (!vm_prepare_start(false)) { - resume_all_vcpus(); - } -} - -/* does a state transition even if the VM is already stopped, - current state is forgotten forever */ -int vm_stop_force_state(RunState state) -{ - if (runstate_is_running()) { - return vm_stop(state); - } else { - int ret; - runstate_set(state); - - bdrv_drain_all(); - /* Make sure to return an error if the flush in a previous vm_stop() - * failed. */ - ret = bdrv_flush_all(); - trace_vm_stop_flush_all(ret); - return ret; - } -} - -void qmp_memsave(int64_t addr, int64_t size, const char *filename, - bool has_cpu, int64_t cpu_index, Error **errp) -{ - FILE *f; - uint32_t l; - CPUState *cpu; - uint8_t buf[1024]; - int64_t orig_addr = addr, orig_size = size; - - if (!has_cpu) { - cpu_index = 0; - } - - cpu = qemu_get_cpu(cpu_index); - if (cpu == NULL) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index", - "a CPU number"); - return; - } - - f = fopen(filename, "wb"); - if (!f) { - error_setg_file_open(errp, errno, filename); - return; - } - - while (size != 0) { - l = sizeof(buf); - if (l > size) - l = size; - if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) { - error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64 - " specified", orig_addr, orig_size); - goto exit; - } - if (fwrite(buf, 1, l, f) != l) { - error_setg(errp, QERR_IO_ERROR); - goto exit; - } - addr += l; - size -= l; - } - -exit: - fclose(f); -} - -void qmp_pmemsave(int64_t addr, int64_t size, const char *filename, - Error **errp) -{ - FILE *f; - uint32_t l; - uint8_t buf[1024]; - - f = fopen(filename, "wb"); - if (!f) { - error_setg_file_open(errp, errno, filename); - return; - } - - while (size != 0) { - l = sizeof(buf); - if (l > size) - l = size; - cpu_physical_memory_read(addr, buf, l); - if (fwrite(buf, 1, l, f) != l) { - error_setg(errp, QERR_IO_ERROR); - goto exit; - } - addr += l; - size -= l; - } - -exit: - fclose(f); -} - -void qmp_inject_nmi(Error **errp) -{ - nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp); -} - diff --git a/softmmu/datadir.c b/softmmu/datadir.c deleted file mode 100644 index c9237cb..0000000 --- a/softmmu/datadir.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * QEMU firmware and keymap file search - * - * Copyright (c) 2003-2020 QEMU contributors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu/datadir.h" -#include "qemu/cutils.h" -#include "trace.h" - -static const char *data_dir[16]; -static int data_dir_idx; - -char *qemu_find_file(int type, const char *name) -{ - int i; - const char *subdir; - char *buf; - - /* Try the name as a straight path first */ - if (access(name, R_OK) == 0) { - trace_load_file(name, name); - return g_strdup(name); - } - - switch (type) { - case QEMU_FILE_TYPE_BIOS: - subdir = ""; - break; - case QEMU_FILE_TYPE_KEYMAP: - subdir = "keymaps/"; - break; - default: - abort(); - } - - for (i = 0; i < data_dir_idx; i++) { - buf = g_strdup_printf("%s/%s%s", data_dir[i], subdir, name); - if (access(buf, R_OK) == 0) { - trace_load_file(name, buf); - return buf; - } - g_free(buf); - } - return NULL; -} - -void qemu_add_data_dir(char *path) -{ - int i; - - if (path == NULL) { - return; - } - if (data_dir_idx == ARRAY_SIZE(data_dir)) { - return; - } - for (i = 0; i < data_dir_idx; i++) { - if (strcmp(data_dir[i], path) == 0) { - g_free(path); /* duplicate */ - return; - } - } - data_dir[data_dir_idx++] = path; -} - -void qemu_add_default_firmwarepath(void) -{ - static const char * const dirs[] = { - CONFIG_QEMU_FIRMWAREPATH - NULL - }; - - size_t i; - - /* add configured firmware directories */ - for (i = 0; dirs[i] != NULL; i++) { - qemu_add_data_dir(get_relocated_path(dirs[i])); - } - - /* try to find datadir relative to the executable path */ - qemu_add_data_dir(get_relocated_path(CONFIG_QEMU_DATADIR)); -} - -void qemu_list_data_dirs(void) -{ - int i; - for (i = 0; i < data_dir_idx; i++) { - printf("%s\n", data_dir[i]); - } -} diff --git a/softmmu/device_tree.c b/softmmu/device_tree.c deleted file mode 100644 index eb5166c..0000000 --- a/softmmu/device_tree.c +++ /dev/null @@ -1,703 +0,0 @@ -/* - * Functions to help device tree manipulation using libfdt. - * It also provides functions to read entries from device tree proc - * interface. - * - * Copyright 2008 IBM Corporation. - * Authors: Jerone Young - * Hollis Blanchard - * - * This work is licensed under the GNU GPL license version 2 or later. - * - */ - -#include "qemu/osdep.h" - -#ifdef CONFIG_LINUX -#include -#endif - -#include "qapi/error.h" -#include "qemu/error-report.h" -#include "qemu/option.h" -#include "qemu/bswap.h" -#include "qemu/cutils.h" -#include "qemu/guest-random.h" -#include "sysemu/device_tree.h" -#include "hw/loader.h" -#include "hw/boards.h" -#include "qemu/config-file.h" -#include "qapi/qapi-commands-machine.h" -#include "qapi/qmp/qdict.h" -#include "monitor/hmp.h" - -#include - -#define FDT_MAX_SIZE 0x100000 - -void *create_device_tree(int *sizep) -{ - void *fdt; - int ret; - - *sizep = FDT_MAX_SIZE; - fdt = g_malloc0(FDT_MAX_SIZE); - ret = fdt_create(fdt, FDT_MAX_SIZE); - if (ret < 0) { - goto fail; - } - ret = fdt_finish_reservemap(fdt); - if (ret < 0) { - goto fail; - } - ret = fdt_begin_node(fdt, ""); - if (ret < 0) { - goto fail; - } - ret = fdt_end_node(fdt); - if (ret < 0) { - goto fail; - } - ret = fdt_finish(fdt); - if (ret < 0) { - goto fail; - } - ret = fdt_open_into(fdt, fdt, *sizep); - if (ret) { - error_report("%s: Unable to copy device tree into memory: %s", - __func__, fdt_strerror(ret)); - exit(1); - } - - return fdt; -fail: - error_report("%s Couldn't create dt: %s", __func__, fdt_strerror(ret)); - exit(1); -} - -void *load_device_tree(const char *filename_path, int *sizep) -{ - int dt_size; - int dt_file_load_size; - int ret; - void *fdt = NULL; - - *sizep = 0; - dt_size = get_image_size(filename_path); - if (dt_size < 0) { - error_report("Unable to get size of device tree file '%s'", - filename_path); - goto fail; - } - if (dt_size > INT_MAX / 2 - 10000) { - error_report("Device tree file '%s' is too large", filename_path); - goto fail; - } - - /* Expand to 2x size to give enough room for manipulation. */ - dt_size += 10000; - dt_size *= 2; - /* First allocate space in qemu for device tree */ - fdt = g_malloc0(dt_size); - - dt_file_load_size = load_image_size(filename_path, fdt, dt_size); - if (dt_file_load_size < 0) { - error_report("Unable to open device tree file '%s'", - filename_path); - goto fail; - } - - ret = fdt_open_into(fdt, fdt, dt_size); - if (ret) { - error_report("%s: Unable to copy device tree into memory: %s", - __func__, fdt_strerror(ret)); - goto fail; - } - - /* Check sanity of device tree */ - if (fdt_check_header(fdt)) { - error_report("Device tree file loaded into memory is invalid: %s", - filename_path); - goto fail; - } - *sizep = dt_size; - return fdt; - -fail: - g_free(fdt); - return NULL; -} - -#ifdef CONFIG_LINUX - -#define SYSFS_DT_BASEDIR "/proc/device-tree" - -/** - * read_fstree: this function is inspired from dtc read_fstree - * @fdt: preallocated fdt blob buffer, to be populated - * @dirname: directory to scan under SYSFS_DT_BASEDIR - * the search is recursive and the tree is searched down to the - * leaves (property files). - * - * the function asserts in case of error - */ -static void read_fstree(void *fdt, const char *dirname) -{ - DIR *d; - struct dirent *de; - struct stat st; - const char *root_dir = SYSFS_DT_BASEDIR; - const char *parent_node; - - if (strstr(dirname, root_dir) != dirname) { - error_report("%s: %s must be searched within %s", - __func__, dirname, root_dir); - exit(1); - } - parent_node = &dirname[strlen(SYSFS_DT_BASEDIR)]; - - d = opendir(dirname); - if (!d) { - error_report("%s cannot open %s", __func__, dirname); - exit(1); - } - - while ((de = readdir(d)) != NULL) { - char *tmpnam; - - if (!g_strcmp0(de->d_name, ".") - || !g_strcmp0(de->d_name, "..")) { - continue; - } - - tmpnam = g_strdup_printf("%s/%s", dirname, de->d_name); - - if (lstat(tmpnam, &st) < 0) { - error_report("%s cannot lstat %s", __func__, tmpnam); - exit(1); - } - - if (S_ISREG(st.st_mode)) { - gchar *val; - gsize len; - - if (!g_file_get_contents(tmpnam, &val, &len, NULL)) { - error_report("%s not able to extract info from %s", - __func__, tmpnam); - exit(1); - } - - if (strlen(parent_node) > 0) { - qemu_fdt_setprop(fdt, parent_node, - de->d_name, val, len); - } else { - qemu_fdt_setprop(fdt, "/", de->d_name, val, len); - } - g_free(val); - } else if (S_ISDIR(st.st_mode)) { - char *node_name; - - node_name = g_strdup_printf("%s/%s", - parent_node, de->d_name); - qemu_fdt_add_subnode(fdt, node_name); - g_free(node_name); - read_fstree(fdt, tmpnam); - } - - g_free(tmpnam); - } - - closedir(d); -} - -/* load_device_tree_from_sysfs: extract the dt blob from host sysfs */ -void *load_device_tree_from_sysfs(void) -{ - void *host_fdt; - int host_fdt_size; - - host_fdt = create_device_tree(&host_fdt_size); - read_fstree(host_fdt, SYSFS_DT_BASEDIR); - if (fdt_check_header(host_fdt)) { - error_report("%s host device tree extracted into memory is invalid", - __func__); - exit(1); - } - return host_fdt; -} - -#endif /* CONFIG_LINUX */ - -static int findnode_nofail(void *fdt, const char *node_path) -{ - int offset; - - offset = fdt_path_offset(fdt, node_path); - if (offset < 0) { - error_report("%s Couldn't find node %s: %s", __func__, node_path, - fdt_strerror(offset)); - exit(1); - } - - return offset; -} - -char **qemu_fdt_node_unit_path(void *fdt, const char *name, Error **errp) -{ - char *prefix = g_strdup_printf("%s@", name); - unsigned int path_len = 16, n = 0; - GSList *path_list = NULL, *iter; - const char *iter_name; - int offset, len, ret; - char **path_array; - - offset = fdt_next_node(fdt, -1, NULL); - - while (offset >= 0) { - iter_name = fdt_get_name(fdt, offset, &len); - if (!iter_name) { - offset = len; - break; - } - if (!strcmp(iter_name, name) || g_str_has_prefix(iter_name, prefix)) { - char *path; - - path = g_malloc(path_len); - while ((ret = fdt_get_path(fdt, offset, path, path_len)) - == -FDT_ERR_NOSPACE) { - path_len += 16; - path = g_realloc(path, path_len); - } - path_list = g_slist_prepend(path_list, path); - n++; - } - offset = fdt_next_node(fdt, offset, NULL); - } - g_free(prefix); - - if (offset < 0 && offset != -FDT_ERR_NOTFOUND) { - error_setg(errp, "%s: abort parsing dt for %s node units: %s", - __func__, name, fdt_strerror(offset)); - for (iter = path_list; iter; iter = iter->next) { - g_free(iter->data); - } - g_slist_free(path_list); - return NULL; - } - - path_array = g_new(char *, n + 1); - path_array[n--] = NULL; - - for (iter = path_list; iter; iter = iter->next) { - path_array[n--] = iter->data; - } - - g_slist_free(path_list); - - return path_array; -} - -char **qemu_fdt_node_path(void *fdt, const char *name, const char *compat, - Error **errp) -{ - int offset, len, ret; - const char *iter_name; - unsigned int path_len = 16, n = 0; - GSList *path_list = NULL, *iter; - char **path_array; - - offset = fdt_node_offset_by_compatible(fdt, -1, compat); - - while (offset >= 0) { - iter_name = fdt_get_name(fdt, offset, &len); - if (!iter_name) { - offset = len; - break; - } - if (!name || !strcmp(iter_name, name)) { - char *path; - - path = g_malloc(path_len); - while ((ret = fdt_get_path(fdt, offset, path, path_len)) - == -FDT_ERR_NOSPACE) { - path_len += 16; - path = g_realloc(path, path_len); - } - path_list = g_slist_prepend(path_list, path); - n++; - } - offset = fdt_node_offset_by_compatible(fdt, offset, compat); - } - - if (offset < 0 && offset != -FDT_ERR_NOTFOUND) { - error_setg(errp, "%s: abort parsing dt for %s/%s: %s", - __func__, name, compat, fdt_strerror(offset)); - for (iter = path_list; iter; iter = iter->next) { - g_free(iter->data); - } - g_slist_free(path_list); - return NULL; - } - - path_array = g_new(char *, n + 1); - path_array[n--] = NULL; - - for (iter = path_list; iter; iter = iter->next) { - path_array[n--] = iter->data; - } - - g_slist_free(path_list); - - return path_array; -} - -int qemu_fdt_setprop(void *fdt, const char *node_path, - const char *property, const void *val, int size) -{ - int r; - - r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val, size); - if (r < 0) { - error_report("%s: Couldn't set %s/%s: %s", __func__, node_path, - property, fdt_strerror(r)); - exit(1); - } - - return r; -} - -int qemu_fdt_setprop_cell(void *fdt, const char *node_path, - const char *property, uint32_t val) -{ - int r; - - r = fdt_setprop_cell(fdt, findnode_nofail(fdt, node_path), property, val); - if (r < 0) { - error_report("%s: Couldn't set %s/%s = %#08x: %s", __func__, - node_path, property, val, fdt_strerror(r)); - exit(1); - } - - return r; -} - -int qemu_fdt_setprop_u64(void *fdt, const char *node_path, - const char *property, uint64_t val) -{ - val = cpu_to_be64(val); - return qemu_fdt_setprop(fdt, node_path, property, &val, sizeof(val)); -} - -int qemu_fdt_setprop_string(void *fdt, const char *node_path, - const char *property, const char *string) -{ - int r; - - r = fdt_setprop_string(fdt, findnode_nofail(fdt, node_path), property, string); - if (r < 0) { - error_report("%s: Couldn't set %s/%s = %s: %s", __func__, - node_path, property, string, fdt_strerror(r)); - exit(1); - } - - return r; -} - -/* - * libfdt doesn't allow us to add string arrays directly but they are - * test a series of null terminated strings with a length. We build - * the string up here so we can calculate the final length. - */ -int qemu_fdt_setprop_string_array(void *fdt, const char *node_path, - const char *prop, char **array, int len) -{ - int ret, i, total_len = 0; - char *str, *p; - for (i = 0; i < len; i++) { - total_len += strlen(array[i]) + 1; - } - p = str = g_malloc0(total_len); - for (i = 0; i < len; i++) { - int offset = strlen(array[i]) + 1; - pstrcpy(p, offset, array[i]); - p += offset; - } - - ret = qemu_fdt_setprop(fdt, node_path, prop, str, total_len); - g_free(str); - return ret; -} - -const void *qemu_fdt_getprop(void *fdt, const char *node_path, - const char *property, int *lenp, Error **errp) -{ - int len; - const void *r; - - if (!lenp) { - lenp = &len; - } - r = fdt_getprop(fdt, findnode_nofail(fdt, node_path), property, lenp); - if (!r) { - error_setg(errp, "%s: Couldn't get %s/%s: %s", __func__, - node_path, property, fdt_strerror(*lenp)); - } - return r; -} - -uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path, - const char *property, int *lenp, Error **errp) -{ - int len; - const uint32_t *p; - - if (!lenp) { - lenp = &len; - } - p = qemu_fdt_getprop(fdt, node_path, property, lenp, errp); - if (!p) { - return 0; - } else if (*lenp != 4) { - error_setg(errp, "%s: %s/%s not 4 bytes long (not a cell?)", - __func__, node_path, property); - *lenp = -EINVAL; - return 0; - } - return be32_to_cpu(*p); -} - -uint32_t qemu_fdt_get_phandle(void *fdt, const char *path) -{ - uint32_t r; - - r = fdt_get_phandle(fdt, findnode_nofail(fdt, path)); - if (r == 0) { - error_report("%s: Couldn't get phandle for %s: %s", __func__, - path, fdt_strerror(r)); - exit(1); - } - - return r; -} - -int qemu_fdt_setprop_phandle(void *fdt, const char *node_path, - const char *property, - const char *target_node_path) -{ - uint32_t phandle = qemu_fdt_get_phandle(fdt, target_node_path); - return qemu_fdt_setprop_cell(fdt, node_path, property, phandle); -} - -uint32_t qemu_fdt_alloc_phandle(void *fdt) -{ - static int phandle = 0x0; - - /* - * We need to find out if the user gave us special instruction at - * which phandle id to start allocating phandles. - */ - if (!phandle) { - phandle = machine_phandle_start(current_machine); - } - - if (!phandle) { - /* - * None or invalid phandle given on the command line, so fall back to - * default starting point. - */ - phandle = 0x8000; - } - - return phandle++; -} - -int qemu_fdt_nop_node(void *fdt, const char *node_path) -{ - int r; - - r = fdt_nop_node(fdt, findnode_nofail(fdt, node_path)); - if (r < 0) { - error_report("%s: Couldn't nop node %s: %s", __func__, node_path, - fdt_strerror(r)); - exit(1); - } - - return r; -} - -int qemu_fdt_add_subnode(void *fdt, const char *name) -{ - char *dupname = g_strdup(name); - char *basename = strrchr(dupname, '/'); - int retval; - int parent = 0; - - if (!basename) { - g_free(dupname); - return -1; - } - - basename[0] = '\0'; - basename++; - - if (dupname[0]) { - parent = findnode_nofail(fdt, dupname); - } - - retval = fdt_add_subnode(fdt, parent, basename); - if (retval < 0) { - error_report("%s: Failed to create subnode %s: %s", - __func__, name, fdt_strerror(retval)); - exit(1); - } - - g_free(dupname); - return retval; -} - -/* - * qemu_fdt_add_path: Like qemu_fdt_add_subnode(), but will add - * all missing subnodes from the given path. - */ -int qemu_fdt_add_path(void *fdt, const char *path) -{ - const char *name; - int namelen, retval; - int parent = 0; - - if (path[0] != '/') { - return -1; - } - - do { - name = path + 1; - path = strchr(name, '/'); - namelen = path != NULL ? path - name : strlen(name); - - retval = fdt_subnode_offset_namelen(fdt, parent, name, namelen); - if (retval < 0 && retval != -FDT_ERR_NOTFOUND) { - error_report("%s: Unexpected error in finding subnode %.*s: %s", - __func__, namelen, name, fdt_strerror(retval)); - exit(1); - } else if (retval == -FDT_ERR_NOTFOUND) { - retval = fdt_add_subnode_namelen(fdt, parent, name, namelen); - if (retval < 0) { - error_report("%s: Failed to create subnode %.*s: %s", - __func__, namelen, name, fdt_strerror(retval)); - exit(1); - } - } - - parent = retval; - } while (path); - - return retval; -} - -void qemu_fdt_dumpdtb(void *fdt, int size) -{ - const char *dumpdtb = current_machine->dumpdtb; - - if (dumpdtb) { - /* Dump the dtb to a file and quit */ - if (g_file_set_contents(dumpdtb, fdt, size, NULL)) { - info_report("dtb dumped to %s. Exiting.", dumpdtb); - exit(0); - } - error_report("%s: Failed dumping dtb to %s", __func__, dumpdtb); - exit(1); - } -} - -int qemu_fdt_setprop_sized_cells_from_array(void *fdt, - const char *node_path, - const char *property, - int numvalues, - uint64_t *values) -{ - uint32_t *propcells; - uint64_t value; - int cellnum, vnum, ncells; - uint32_t hival; - int ret; - - propcells = g_new0(uint32_t, numvalues * 2); - - cellnum = 0; - for (vnum = 0; vnum < numvalues; vnum++) { - ncells = values[vnum * 2]; - if (ncells != 1 && ncells != 2) { - ret = -1; - goto out; - } - value = values[vnum * 2 + 1]; - hival = cpu_to_be32(value >> 32); - if (ncells > 1) { - propcells[cellnum++] = hival; - } else if (hival != 0) { - ret = -1; - goto out; - } - propcells[cellnum++] = cpu_to_be32(value); - } - - ret = qemu_fdt_setprop(fdt, node_path, property, propcells, - cellnum * sizeof(uint32_t)); -out: - g_free(propcells); - return ret; -} - -void qmp_dumpdtb(const char *filename, Error **errp) -{ - g_autoptr(GError) err = NULL; - uint32_t size; - - if (!current_machine->fdt) { - error_setg(errp, "This machine doesn't have a FDT"); - return; - } - - size = fdt_totalsize(current_machine->fdt); - - g_assert(size > 0); - - if (!g_file_set_contents(filename, current_machine->fdt, size, &err)) { - error_setg(errp, "Error saving FDT to file %s: %s", - filename, err->message); - } -} - -void hmp_dumpdtb(Monitor *mon, const QDict *qdict) -{ - const char *filename = qdict_get_str(qdict, "filename"); - Error *local_err = NULL; - - qmp_dumpdtb(filename, &local_err); - - if (hmp_handle_error(mon, local_err)) { - return; - } - - info_report("dtb dumped to %s", filename); -} - -void qemu_fdt_randomize_seeds(void *fdt) -{ - int noffset, poffset, len; - const char *name; - uint8_t *data; - - for (noffset = fdt_next_node(fdt, 0, NULL); - noffset >= 0; - noffset = fdt_next_node(fdt, noffset, NULL)) { - for (poffset = fdt_first_property_offset(fdt, noffset); - poffset >= 0; - poffset = fdt_next_property_offset(fdt, poffset)) { - data = (uint8_t *)fdt_getprop_by_offset(fdt, poffset, &name, &len); - if (!data || strcmp(name, "rng-seed")) - continue; - qemu_guest_getrandom_nofail(data, len); - } - } -} diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c deleted file mode 100644 index fa959d7..0000000 --- a/softmmu/dirtylimit.c +++ /dev/null @@ -1,678 +0,0 @@ -/* - * Dirty page rate limit implementation code - * - * Copyright (c) 2022 CHINA TELECOM CO.,LTD. - * - * Authors: - * Hyman Huang(黄勇) - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "qemu/main-loop.h" -#include "qapi/qapi-commands-migration.h" -#include "qapi/qmp/qdict.h" -#include "qapi/error.h" -#include "sysemu/dirtyrate.h" -#include "sysemu/dirtylimit.h" -#include "monitor/hmp.h" -#include "monitor/monitor.h" -#include "exec/memory.h" -#include "exec/target_page.h" -#include "hw/boards.h" -#include "sysemu/kvm.h" -#include "trace.h" -#include "migration/misc.h" -#include "migration/migration.h" -#include "migration/options.h" - -/* - * Dirtylimit stop working if dirty page rate error - * value less than DIRTYLIMIT_TOLERANCE_RANGE - */ -#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ -/* - * Plus or minus vcpu sleep time linearly if dirty - * page rate error value percentage over - * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. - * Otherwise, plus or minus a fixed vcpu sleep time. - */ -#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 -/* - * Max vcpu sleep time percentage during a cycle - * composed of dirty ring full and sleep time. - */ -#define DIRTYLIMIT_THROTTLE_PCT_MAX 99 - -struct { - VcpuStat stat; - bool running; - QemuThread thread; -} *vcpu_dirty_rate_stat; - -typedef struct VcpuDirtyLimitState { - int cpu_index; - bool enabled; - /* - * Quota dirty page rate, unit is MB/s - * zero if not enabled. - */ - uint64_t quota; -} VcpuDirtyLimitState; - -struct { - VcpuDirtyLimitState *states; - /* Max cpus number configured by user */ - int max_cpus; - /* Number of vcpu under dirtylimit */ - int limited_nvcpu; -} *dirtylimit_state; - -/* protect dirtylimit_state */ -static QemuMutex dirtylimit_mutex; - -/* dirtylimit thread quit if dirtylimit_quit is true */ -static bool dirtylimit_quit; - -static void vcpu_dirty_rate_stat_collect(void) -{ - MigrationState *s = migrate_get_current(); - VcpuStat stat; - int i = 0; - int64_t period = DIRTYLIMIT_CALC_TIME_MS; - - if (migrate_dirty_limit() && - migration_is_active(s)) { - period = s->parameters.x_vcpu_dirty_limit_period; - } - - /* calculate vcpu dirtyrate */ - vcpu_calculate_dirtyrate(period, - &stat, - GLOBAL_DIRTY_LIMIT, - false); - - for (i = 0; i < stat.nvcpu; i++) { - vcpu_dirty_rate_stat->stat.rates[i].id = i; - vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = - stat.rates[i].dirty_rate; - } - - g_free(stat.rates); -} - -static void *vcpu_dirty_rate_stat_thread(void *opaque) -{ - rcu_register_thread(); - - /* start log sync */ - global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); - - while (qatomic_read(&vcpu_dirty_rate_stat->running)) { - vcpu_dirty_rate_stat_collect(); - if (dirtylimit_in_service()) { - dirtylimit_process(); - } - } - - /* stop log sync */ - global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); - - rcu_unregister_thread(); - return NULL; -} - -int64_t vcpu_dirty_rate_get(int cpu_index) -{ - DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; - return qatomic_read_i64(&rates[cpu_index].dirty_rate); -} - -void vcpu_dirty_rate_stat_start(void) -{ - if (qatomic_read(&vcpu_dirty_rate_stat->running)) { - return; - } - - qatomic_set(&vcpu_dirty_rate_stat->running, 1); - qemu_thread_create(&vcpu_dirty_rate_stat->thread, - "dirtyrate-stat", - vcpu_dirty_rate_stat_thread, - NULL, - QEMU_THREAD_JOINABLE); -} - -void vcpu_dirty_rate_stat_stop(void) -{ - qatomic_set(&vcpu_dirty_rate_stat->running, 0); - dirtylimit_state_unlock(); - qemu_mutex_unlock_iothread(); - qemu_thread_join(&vcpu_dirty_rate_stat->thread); - qemu_mutex_lock_iothread(); - dirtylimit_state_lock(); -} - -void vcpu_dirty_rate_stat_initialize(void) -{ - MachineState *ms = MACHINE(qdev_get_machine()); - int max_cpus = ms->smp.max_cpus; - - vcpu_dirty_rate_stat = - g_malloc0(sizeof(*vcpu_dirty_rate_stat)); - - vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; - vcpu_dirty_rate_stat->stat.rates = - g_new0(DirtyRateVcpu, max_cpus); - - vcpu_dirty_rate_stat->running = false; -} - -void vcpu_dirty_rate_stat_finalize(void) -{ - g_free(vcpu_dirty_rate_stat->stat.rates); - vcpu_dirty_rate_stat->stat.rates = NULL; - - g_free(vcpu_dirty_rate_stat); - vcpu_dirty_rate_stat = NULL; -} - -void dirtylimit_state_lock(void) -{ - qemu_mutex_lock(&dirtylimit_mutex); -} - -void dirtylimit_state_unlock(void) -{ - qemu_mutex_unlock(&dirtylimit_mutex); -} - -static void -__attribute__((__constructor__)) dirtylimit_mutex_init(void) -{ - qemu_mutex_init(&dirtylimit_mutex); -} - -static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) -{ - return &dirtylimit_state->states[cpu_index]; -} - -void dirtylimit_state_initialize(void) -{ - MachineState *ms = MACHINE(qdev_get_machine()); - int max_cpus = ms->smp.max_cpus; - int i; - - dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); - - dirtylimit_state->states = - g_new0(VcpuDirtyLimitState, max_cpus); - - for (i = 0; i < max_cpus; i++) { - dirtylimit_state->states[i].cpu_index = i; - } - - dirtylimit_state->max_cpus = max_cpus; - trace_dirtylimit_state_initialize(max_cpus); -} - -void dirtylimit_state_finalize(void) -{ - g_free(dirtylimit_state->states); - dirtylimit_state->states = NULL; - - g_free(dirtylimit_state); - dirtylimit_state = NULL; - - trace_dirtylimit_state_finalize(); -} - -bool dirtylimit_in_service(void) -{ - return !!dirtylimit_state; -} - -bool dirtylimit_vcpu_index_valid(int cpu_index) -{ - MachineState *ms = MACHINE(qdev_get_machine()); - - return !(cpu_index < 0 || - cpu_index >= ms->smp.max_cpus); -} - -static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) -{ - static uint64_t max_dirtyrate; - uint64_t dirty_ring_size_MiB; - - dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size()); - - if (max_dirtyrate < dirtyrate) { - max_dirtyrate = dirtyrate; - } - - return dirty_ring_size_MiB * 1000000 / max_dirtyrate; -} - -static inline bool dirtylimit_done(uint64_t quota, - uint64_t current) -{ - uint64_t min, max; - - min = MIN(quota, current); - max = MAX(quota, current); - - return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; -} - -static inline bool -dirtylimit_need_linear_adjustment(uint64_t quota, - uint64_t current) -{ - uint64_t min, max; - - min = MIN(quota, current); - max = MAX(quota, current); - - return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; -} - -static void dirtylimit_set_throttle(CPUState *cpu, - uint64_t quota, - uint64_t current) -{ - int64_t ring_full_time_us = 0; - uint64_t sleep_pct = 0; - uint64_t throttle_us = 0; - - if (current == 0) { - cpu->throttle_us_per_full = 0; - return; - } - - ring_full_time_us = dirtylimit_dirty_ring_full_time(current); - - if (dirtylimit_need_linear_adjustment(quota, current)) { - if (quota < current) { - sleep_pct = (current - quota) * 100 / current; - throttle_us = - ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); - cpu->throttle_us_per_full += throttle_us; - } else { - sleep_pct = (quota - current) * 100 / quota; - throttle_us = - ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); - cpu->throttle_us_per_full -= throttle_us; - } - - trace_dirtylimit_throttle_pct(cpu->cpu_index, - sleep_pct, - throttle_us); - } else { - if (quota < current) { - cpu->throttle_us_per_full += ring_full_time_us / 10; - } else { - cpu->throttle_us_per_full -= ring_full_time_us / 10; - } - } - - /* - * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), - * current dirty page rate may never reach the quota, we should stop - * increasing sleep time? - */ - cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, - ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); - - cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); -} - -static void dirtylimit_adjust_throttle(CPUState *cpu) -{ - uint64_t quota = 0; - uint64_t current = 0; - int cpu_index = cpu->cpu_index; - - quota = dirtylimit_vcpu_get_state(cpu_index)->quota; - current = vcpu_dirty_rate_get(cpu_index); - - if (!dirtylimit_done(quota, current)) { - dirtylimit_set_throttle(cpu, quota, current); - } - - return; -} - -void dirtylimit_process(void) -{ - CPUState *cpu; - - if (!qatomic_read(&dirtylimit_quit)) { - dirtylimit_state_lock(); - - if (!dirtylimit_in_service()) { - dirtylimit_state_unlock(); - return; - } - - CPU_FOREACH(cpu) { - if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { - continue; - } - dirtylimit_adjust_throttle(cpu); - } - dirtylimit_state_unlock(); - } -} - -void dirtylimit_change(bool start) -{ - if (start) { - qatomic_set(&dirtylimit_quit, 0); - } else { - qatomic_set(&dirtylimit_quit, 1); - } -} - -void dirtylimit_set_vcpu(int cpu_index, - uint64_t quota, - bool enable) -{ - trace_dirtylimit_set_vcpu(cpu_index, quota); - - if (enable) { - dirtylimit_state->states[cpu_index].quota = quota; - if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { - dirtylimit_state->limited_nvcpu++; - } - } else { - dirtylimit_state->states[cpu_index].quota = 0; - if (dirtylimit_state->states[cpu_index].enabled) { - dirtylimit_state->limited_nvcpu--; - } - } - - dirtylimit_state->states[cpu_index].enabled = enable; -} - -void dirtylimit_set_all(uint64_t quota, - bool enable) -{ - MachineState *ms = MACHINE(qdev_get_machine()); - int max_cpus = ms->smp.max_cpus; - int i; - - for (i = 0; i < max_cpus; i++) { - dirtylimit_set_vcpu(i, quota, enable); - } -} - -void dirtylimit_vcpu_execute(CPUState *cpu) -{ - if (dirtylimit_in_service() && - dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && - cpu->throttle_us_per_full) { - trace_dirtylimit_vcpu_execute(cpu->cpu_index, - cpu->throttle_us_per_full); - usleep(cpu->throttle_us_per_full); - } -} - -static void dirtylimit_init(void) -{ - dirtylimit_state_initialize(); - dirtylimit_change(true); - vcpu_dirty_rate_stat_initialize(); - vcpu_dirty_rate_stat_start(); -} - -static void dirtylimit_cleanup(void) -{ - vcpu_dirty_rate_stat_stop(); - vcpu_dirty_rate_stat_finalize(); - dirtylimit_change(false); - dirtylimit_state_finalize(); -} - -/* - * dirty page rate limit is not allowed to set if migration - * is running with dirty-limit capability enabled. - */ -static bool dirtylimit_is_allowed(void) -{ - MigrationState *ms = migrate_get_current(); - - if (migration_is_running(ms->state) && - (!qemu_thread_is_self(&ms->thread)) && - migrate_dirty_limit() && - dirtylimit_in_service()) { - return false; - } - return true; -} - -void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, - int64_t cpu_index, - Error **errp) -{ - if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { - return; - } - - if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { - error_setg(errp, "incorrect cpu index specified"); - return; - } - - if (!dirtylimit_is_allowed()) { - error_setg(errp, "can't cancel dirty page rate limit while" - " migration is running"); - return; - } - - if (!dirtylimit_in_service()) { - return; - } - - dirtylimit_state_lock(); - - if (has_cpu_index) { - dirtylimit_set_vcpu(cpu_index, 0, false); - } else { - dirtylimit_set_all(0, false); - } - - if (!dirtylimit_state->limited_nvcpu) { - dirtylimit_cleanup(); - } - - dirtylimit_state_unlock(); -} - -void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) -{ - int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); - Error *err = NULL; - - qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); - if (err) { - hmp_handle_error(mon, err); - return; - } - - monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " - "dirty limit for virtual CPU]\n"); -} - -void qmp_set_vcpu_dirty_limit(bool has_cpu_index, - int64_t cpu_index, - uint64_t dirty_rate, - Error **errp) -{ - if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { - error_setg(errp, "dirty page limit feature requires KVM with" - " accelerator property 'dirty-ring-size' set'"); - return; - } - - if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { - error_setg(errp, "incorrect cpu index specified"); - return; - } - - if (!dirtylimit_is_allowed()) { - error_setg(errp, "can't set dirty page rate limit while" - " migration is running"); - return; - } - - if (!dirty_rate) { - qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); - return; - } - - dirtylimit_state_lock(); - - if (!dirtylimit_in_service()) { - dirtylimit_init(); - } - - if (has_cpu_index) { - dirtylimit_set_vcpu(cpu_index, dirty_rate, true); - } else { - dirtylimit_set_all(dirty_rate, true); - } - - dirtylimit_state_unlock(); -} - -void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) -{ - int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); - int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); - Error *err = NULL; - - if (dirty_rate < 0) { - error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate); - goto out; - } - - qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); - -out: - hmp_handle_error(mon, err); -} - -/* Return the max throttle time of each virtual CPU */ -uint64_t dirtylimit_throttle_time_per_round(void) -{ - CPUState *cpu; - int64_t max = 0; - - CPU_FOREACH(cpu) { - if (cpu->throttle_us_per_full > max) { - max = cpu->throttle_us_per_full; - } - } - - return max; -} - -/* - * Estimate average dirty ring full time of each virtaul CPU. - * Return 0 if guest doesn't dirty memory. - */ -uint64_t dirtylimit_ring_full_time(void) -{ - CPUState *cpu; - uint64_t curr_rate = 0; - int nvcpus = 0; - - CPU_FOREACH(cpu) { - if (cpu->running) { - nvcpus++; - curr_rate += vcpu_dirty_rate_get(cpu->cpu_index); - } - } - - if (!curr_rate || !nvcpus) { - return 0; - } - - return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus); -} - -static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) -{ - DirtyLimitInfo *info = NULL; - - info = g_malloc0(sizeof(*info)); - info->cpu_index = cpu_index; - info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; - info->current_rate = vcpu_dirty_rate_get(cpu_index); - - return info; -} - -static struct DirtyLimitInfoList *dirtylimit_query_all(void) -{ - int i, index; - DirtyLimitInfo *info = NULL; - DirtyLimitInfoList *head = NULL, **tail = &head; - - dirtylimit_state_lock(); - - if (!dirtylimit_in_service()) { - dirtylimit_state_unlock(); - return NULL; - } - - for (i = 0; i < dirtylimit_state->max_cpus; i++) { - index = dirtylimit_state->states[i].cpu_index; - if (dirtylimit_vcpu_get_state(index)->enabled) { - info = dirtylimit_query_vcpu(index); - QAPI_LIST_APPEND(tail, info); - } - } - - dirtylimit_state_unlock(); - - return head; -} - -struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) -{ - if (!dirtylimit_in_service()) { - return NULL; - } - - return dirtylimit_query_all(); -} - -void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) -{ - DirtyLimitInfoList *info; - g_autoptr(DirtyLimitInfoList) head = NULL; - Error *err = NULL; - - if (!dirtylimit_in_service()) { - monitor_printf(mon, "Dirty page limit not enabled!\n"); - return; - } - - head = qmp_query_vcpu_dirty_limit(&err); - if (err) { - hmp_handle_error(mon, err); - return; - } - - for (info = head; info != NULL; info = info->next) { - monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," - " current rate %"PRIi64 " (MB/s)\n", - info->value->cpu_index, - info->value->limit_rate, - info->value->current_rate); - } -} diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c deleted file mode 100644 index 36211ac..0000000 --- a/softmmu/dma-helpers.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - * DMA helper functions - * - * Copyright (c) 2009,2020 Red Hat - * - * This work is licensed under the terms of the GNU General Public License - * (GNU GPL), version 2 or later. - */ - -#include "qemu/osdep.h" -#include "sysemu/block-backend.h" -#include "sysemu/dma.h" -#include "trace/trace-root.h" -#include "qemu/thread.h" -#include "qemu/main-loop.h" -#include "sysemu/cpu-timers.h" -#include "qemu/range.h" - -/* #define DEBUG_IOMMU */ - -MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr, - uint8_t c, dma_addr_t len, MemTxAttrs attrs) -{ - dma_barrier(as, DMA_DIRECTION_FROM_DEVICE); - - return address_space_set(as, addr, c, len, attrs); -} - -void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint, - AddressSpace *as) -{ - qsg->sg = g_new(ScatterGatherEntry, alloc_hint); - qsg->nsg = 0; - qsg->nalloc = alloc_hint; - qsg->size = 0; - qsg->as = as; - qsg->dev = dev; - object_ref(OBJECT(dev)); -} - -void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len) -{ - if (qsg->nsg == qsg->nalloc) { - qsg->nalloc = 2 * qsg->nalloc + 1; - qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc); - } - qsg->sg[qsg->nsg].base = base; - qsg->sg[qsg->nsg].len = len; - qsg->size += len; - ++qsg->nsg; -} - -void qemu_sglist_destroy(QEMUSGList *qsg) -{ - object_unref(OBJECT(qsg->dev)); - g_free(qsg->sg); - memset(qsg, 0, sizeof(*qsg)); -} - -typedef struct { - BlockAIOCB common; - AioContext *ctx; - BlockAIOCB *acb; - QEMUSGList *sg; - uint32_t align; - uint64_t offset; - DMADirection dir; - int sg_cur_index; - dma_addr_t sg_cur_byte; - QEMUIOVector iov; - QEMUBH *bh; - DMAIOFunc *io_func; - void *io_func_opaque; -} DMAAIOCB; - -static void dma_blk_cb(void *opaque, int ret); - -static void reschedule_dma(void *opaque) -{ - DMAAIOCB *dbs = (DMAAIOCB *)opaque; - - assert(!dbs->acb && dbs->bh); - qemu_bh_delete(dbs->bh); - dbs->bh = NULL; - dma_blk_cb(dbs, 0); -} - -static void dma_blk_unmap(DMAAIOCB *dbs) -{ - int i; - - for (i = 0; i < dbs->iov.niov; ++i) { - dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base, - dbs->iov.iov[i].iov_len, dbs->dir, - dbs->iov.iov[i].iov_len); - } - qemu_iovec_reset(&dbs->iov); -} - -static void dma_complete(DMAAIOCB *dbs, int ret) -{ - trace_dma_complete(dbs, ret, dbs->common.cb); - - assert(!dbs->acb && !dbs->bh); - dma_blk_unmap(dbs); - if (dbs->common.cb) { - dbs->common.cb(dbs->common.opaque, ret); - } - qemu_iovec_destroy(&dbs->iov); - qemu_aio_unref(dbs); -} - -static void dma_blk_cb(void *opaque, int ret) -{ - DMAAIOCB *dbs = (DMAAIOCB *)opaque; - AioContext *ctx = dbs->ctx; - dma_addr_t cur_addr, cur_len; - void *mem; - - trace_dma_blk_cb(dbs, ret); - - aio_context_acquire(ctx); - dbs->acb = NULL; - dbs->offset += dbs->iov.size; - - if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { - dma_complete(dbs, ret); - goto out; - } - dma_blk_unmap(dbs); - - while (dbs->sg_cur_index < dbs->sg->nsg) { - cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; - cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; - mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir, - MEMTXATTRS_UNSPECIFIED); - /* - * Make reads deterministic in icount mode. Windows sometimes issues - * disk read requests with overlapping SGs. It leads - * to non-determinism, because resulting buffer contents may be mixed - * from several sectors. This code splits all SGs into several - * groups. SGs in every group do not overlap. - */ - if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) { - int i; - for (i = 0 ; i < dbs->iov.niov ; ++i) { - if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base, - dbs->iov.iov[i].iov_len, (intptr_t)mem, - cur_len)) { - dma_memory_unmap(dbs->sg->as, mem, cur_len, - dbs->dir, cur_len); - mem = NULL; - break; - } - } - } - if (!mem) - break; - qemu_iovec_add(&dbs->iov, mem, cur_len); - dbs->sg_cur_byte += cur_len; - if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) { - dbs->sg_cur_byte = 0; - ++dbs->sg_cur_index; - } - } - - if (dbs->iov.size == 0) { - trace_dma_map_wait(dbs); - dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); - cpu_register_map_client(dbs->bh); - goto out; - } - - if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { - qemu_iovec_discard_back(&dbs->iov, - QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); - } - - dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, - dma_blk_cb, dbs, dbs->io_func_opaque); - assert(dbs->acb); -out: - aio_context_release(ctx); -} - -static void dma_aio_cancel(BlockAIOCB *acb) -{ - DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); - - trace_dma_aio_cancel(dbs); - - assert(!(dbs->acb && dbs->bh)); - if (dbs->acb) { - /* This will invoke dma_blk_cb. */ - blk_aio_cancel_async(dbs->acb); - return; - } - - if (dbs->bh) { - cpu_unregister_map_client(dbs->bh); - qemu_bh_delete(dbs->bh); - dbs->bh = NULL; - } - if (dbs->common.cb) { - dbs->common.cb(dbs->common.opaque, -ECANCELED); - } -} - -static const AIOCBInfo dma_aiocb_info = { - .aiocb_size = sizeof(DMAAIOCB), - .cancel_async = dma_aio_cancel, -}; - -BlockAIOCB *dma_blk_io(AioContext *ctx, - QEMUSGList *sg, uint64_t offset, uint32_t align, - DMAIOFunc *io_func, void *io_func_opaque, - BlockCompletionFunc *cb, - void *opaque, DMADirection dir) -{ - DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque); - - trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE)); - - dbs->acb = NULL; - dbs->sg = sg; - dbs->ctx = ctx; - dbs->offset = offset; - dbs->align = align; - dbs->sg_cur_index = 0; - dbs->sg_cur_byte = 0; - dbs->dir = dir; - dbs->io_func = io_func; - dbs->io_func_opaque = io_func_opaque; - dbs->bh = NULL; - qemu_iovec_init(&dbs->iov, sg->nsg); - dma_blk_cb(dbs, 0); - return &dbs->common; -} - - -static -BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov, - BlockCompletionFunc *cb, void *cb_opaque, - void *opaque) -{ - BlockBackend *blk = opaque; - return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque); -} - -BlockAIOCB *dma_blk_read(BlockBackend *blk, - QEMUSGList *sg, uint64_t offset, uint32_t align, - void (*cb)(void *opaque, int ret), void *opaque) -{ - return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, - dma_blk_read_io_func, blk, cb, opaque, - DMA_DIRECTION_FROM_DEVICE); -} - -static -BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov, - BlockCompletionFunc *cb, void *cb_opaque, - void *opaque) -{ - BlockBackend *blk = opaque; - return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque); -} - -BlockAIOCB *dma_blk_write(BlockBackend *blk, - QEMUSGList *sg, uint64_t offset, uint32_t align, - void (*cb)(void *opaque, int ret), void *opaque) -{ - return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, - dma_blk_write_io_func, blk, cb, opaque, - DMA_DIRECTION_TO_DEVICE); -} - - -static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual, - QEMUSGList *sg, DMADirection dir, - MemTxAttrs attrs) -{ - uint8_t *ptr = buf; - dma_addr_t xresidual; - int sg_cur_index; - MemTxResult res = MEMTX_OK; - - xresidual = sg->size; - sg_cur_index = 0; - len = MIN(len, xresidual); - while (len > 0) { - ScatterGatherEntry entry = sg->sg[sg_cur_index++]; - dma_addr_t xfer = MIN(len, entry.len); - res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs); - ptr += xfer; - len -= xfer; - xresidual -= xfer; - } - - if (residual) { - *residual = xresidual; - } - return res; -} - -MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual, - QEMUSGList *sg, MemTxAttrs attrs) -{ - return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs); -} - -MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual, - QEMUSGList *sg, MemTxAttrs attrs) -{ - return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs); -} - -void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie, - QEMUSGList *sg, enum BlockAcctType type) -{ - block_acct_start(blk_get_stats(blk), cookie, sg->size, type); -} - -uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits) -{ - uint64_t max_mask = UINT64_MAX, addr_mask = end - start; - uint64_t alignment_mask, size_mask; - - if (max_addr_bits != 64) { - max_mask = (1ULL << max_addr_bits) - 1; - } - - alignment_mask = start ? (start & -start) - 1 : max_mask; - alignment_mask = MIN(alignment_mask, max_mask); - size_mask = MIN(addr_mask, max_mask); - - if (alignment_mask <= size_mask) { - /* Increase the alignment of start */ - return alignment_mask; - } else { - /* Find the largest page mask from size */ - if (addr_mask == UINT64_MAX) { - return UINT64_MAX; - } - return (1ULL << (63 - clz64(addr_mask + 1))) - 1; - } -} - diff --git a/softmmu/globals.c b/softmmu/globals.c deleted file mode 100644 index e83b542..0000000 --- a/softmmu/globals.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Global variables that (mostly) should not exist - * - * Copyright (c) 2003-2020 QEMU contributors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "exec/cpu-common.h" -#include "hw/display/vga.h" -#include "hw/loader.h" -#include "hw/xen/xen.h" -#include "net/net.h" -#include "sysemu/cpus.h" -#include "sysemu/sysemu.h" - -enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB; -int display_opengl; -const char* keyboard_layout; -bool enable_mlock; -bool enable_cpu_pm; -int nb_nics; -NICInfo nd_table[MAX_NICS]; -int autostart = 1; -int vga_interface_type = VGA_NONE; -bool vga_interface_created; -Chardev *parallel_hds[MAX_PARALLEL_PORTS]; -int win2k_install_hack; -int fd_bootchk = 1; -int graphic_rotate; -QEMUOptionRom option_rom[MAX_OPTION_ROMS]; -int nb_option_roms; -int old_param; -const char *qemu_name; -unsigned int nb_prom_envs; -const char *prom_envs[MAX_PROM_ENVS]; -uint8_t *boot_splash_filedata; -int only_migratable; /* turn it off unless user states otherwise */ -int icount_align_option; - -/* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the - * little-endian "wire format" described in the SMBIOS 2.6 specification. - */ -QemuUUID qemu_uuid; -bool qemu_uuid_set; - -uint32_t xen_domid; -enum xen_mode xen_mode = XEN_DISABLED; -bool xen_domid_restrict; -struct evtchn_backend_ops *xen_evtchn_ops; -struct gnttab_backend_ops *xen_gnttab_ops; -struct foreignmem_backend_ops *xen_foreignmem_ops; -struct xenstore_backend_ops *xen_xenstore_ops; diff --git a/softmmu/ioport.c b/softmmu/ioport.c deleted file mode 100644 index 1824aa8..0000000 --- a/softmmu/ioport.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -/* - * split out ioport related stuffs from vl.c. - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/ioport.h" -#include "exec/memory.h" -#include "exec/address-spaces.h" -#include "trace.h" - -struct MemoryRegionPortioList { - Object obj; - - MemoryRegion mr; - void *portio_opaque; - MemoryRegionPortio *ports; -}; - -#define TYPE_MEMORY_REGION_PORTIO_LIST "memory-region-portio-list" -OBJECT_DECLARE_SIMPLE_TYPE(MemoryRegionPortioList, MEMORY_REGION_PORTIO_LIST) - -static uint64_t unassigned_io_read(void *opaque, hwaddr addr, unsigned size) -{ - return -1ULL; -} - -static void unassigned_io_write(void *opaque, hwaddr addr, uint64_t val, - unsigned size) -{ -} - -const MemoryRegionOps unassigned_io_ops = { - .read = unassigned_io_read, - .write = unassigned_io_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -void cpu_outb(uint32_t addr, uint8_t val) -{ - trace_cpu_out(addr, 'b', val); - address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, - &val, 1); -} - -void cpu_outw(uint32_t addr, uint16_t val) -{ - uint8_t buf[2]; - - trace_cpu_out(addr, 'w', val); - stw_p(buf, val); - address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, - buf, 2); -} - -void cpu_outl(uint32_t addr, uint32_t val) -{ - uint8_t buf[4]; - - trace_cpu_out(addr, 'l', val); - stl_p(buf, val); - address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, - buf, 4); -} - -uint8_t cpu_inb(uint32_t addr) -{ - uint8_t val; - - address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, - &val, 1); - trace_cpu_in(addr, 'b', val); - return val; -} - -uint16_t cpu_inw(uint32_t addr) -{ - uint8_t buf[2]; - uint16_t val; - - address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 2); - val = lduw_p(buf); - trace_cpu_in(addr, 'w', val); - return val; -} - -uint32_t cpu_inl(uint32_t addr) -{ - uint8_t buf[4]; - uint32_t val; - - address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 4); - val = ldl_p(buf); - trace_cpu_in(addr, 'l', val); - return val; -} - -void portio_list_init(PortioList *piolist, - Object *owner, - const MemoryRegionPortio *callbacks, - void *opaque, const char *name) -{ - unsigned n = 0; - - while (callbacks[n].size) { - ++n; - } - - piolist->ports = callbacks; - piolist->nr = 0; - piolist->regions = g_new0(MemoryRegion *, n); - piolist->address_space = NULL; - piolist->opaque = opaque; - piolist->owner = owner; - piolist->name = name; - piolist->flush_coalesced_mmio = false; -} - -void portio_list_set_flush_coalesced(PortioList *piolist) -{ - piolist->flush_coalesced_mmio = true; -} - -void portio_list_destroy(PortioList *piolist) -{ - MemoryRegionPortioList *mrpio; - unsigned i; - - for (i = 0; i < piolist->nr; ++i) { - mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr); - object_unparent(OBJECT(&mrpio->mr)); - object_unref(mrpio); - } - g_free(piolist->regions); -} - -static const MemoryRegionPortio *find_portio(MemoryRegionPortioList *mrpio, - uint64_t offset, unsigned size, - bool write) -{ - const MemoryRegionPortio *mrp; - - for (mrp = mrpio->ports; mrp->size; ++mrp) { - if (offset >= mrp->offset && offset < mrp->offset + mrp->len && - size == mrp->size && - (write ? (bool)mrp->write : (bool)mrp->read)) { - return mrp; - } - } - return NULL; -} - -static uint64_t portio_read(void *opaque, hwaddr addr, unsigned size) -{ - MemoryRegionPortioList *mrpio = opaque; - const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, false); - uint64_t data; - - data = ((uint64_t)1 << (size * 8)) - 1; - if (mrp) { - data = mrp->read(mrpio->portio_opaque, mrp->base + addr); - } else if (size == 2) { - mrp = find_portio(mrpio, addr, 1, false); - if (mrp) { - data = mrp->read(mrpio->portio_opaque, mrp->base + addr); - if (addr + 1 < mrp->offset + mrp->len) { - data |= mrp->read(mrpio->portio_opaque, mrp->base + addr + 1) << 8; - } else { - data |= 0xff00; - } - } - } - return data; -} - -static void portio_write(void *opaque, hwaddr addr, uint64_t data, - unsigned size) -{ - MemoryRegionPortioList *mrpio = opaque; - const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, true); - - if (mrp) { - mrp->write(mrpio->portio_opaque, mrp->base + addr, data); - } else if (size == 2) { - mrp = find_portio(mrpio, addr, 1, true); - if (mrp) { - mrp->write(mrpio->portio_opaque, mrp->base + addr, data & 0xff); - if (addr + 1 < mrp->offset + mrp->len) { - mrp->write(mrpio->portio_opaque, mrp->base + addr + 1, data >> 8); - } - } - } -} - -static const MemoryRegionOps portio_ops = { - .read = portio_read, - .write = portio_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .valid.unaligned = true, - .impl.unaligned = true, -}; - -static void portio_list_add_1(PortioList *piolist, - const MemoryRegionPortio *pio_init, - unsigned count, unsigned start, - unsigned off_low, unsigned off_high) -{ - MemoryRegionPortioList *mrpio; - Object *owner; - char *name; - unsigned i; - - /* Copy the sub-list and null-terminate it. */ - mrpio = MEMORY_REGION_PORTIO_LIST( - object_new(TYPE_MEMORY_REGION_PORTIO_LIST)); - mrpio->portio_opaque = piolist->opaque; - mrpio->ports = g_malloc0(sizeof(MemoryRegionPortio) * (count + 1)); - memcpy(mrpio->ports, pio_init, sizeof(MemoryRegionPortio) * count); - memset(mrpio->ports + count, 0, sizeof(MemoryRegionPortio)); - - /* Adjust the offsets to all be zero-based for the region. */ - for (i = 0; i < count; ++i) { - mrpio->ports[i].offset -= off_low; - mrpio->ports[i].base = start + off_low; - } - - /* - * The MemoryRegion owner is the MemoryRegionPortioList since that manages - * the lifecycle via the refcount - */ - memory_region_init_io(&mrpio->mr, OBJECT(mrpio), &portio_ops, mrpio, - piolist->name, off_high - off_low); - - /* Reparent the MemoryRegion to the piolist owner */ - object_ref(&mrpio->mr); - object_unparent(OBJECT(&mrpio->mr)); - if (!piolist->owner) { - owner = container_get(qdev_get_machine(), "/unattached"); - } else { - owner = piolist->owner; - } - name = g_strdup_printf("%s[*]", piolist->name); - object_property_add_child(owner, name, OBJECT(&mrpio->mr)); - g_free(name); - - if (piolist->flush_coalesced_mmio) { - memory_region_set_flush_coalesced(&mrpio->mr); - } - memory_region_add_subregion(piolist->address_space, - start + off_low, &mrpio->mr); - piolist->regions[piolist->nr] = &mrpio->mr; - ++piolist->nr; -} - -void portio_list_add(PortioList *piolist, - MemoryRegion *address_space, - uint32_t start) -{ - const MemoryRegionPortio *pio, *pio_start = piolist->ports; - unsigned int off_low, off_high, off_last, count; - - piolist->address_space = address_space; - - /* Handle the first entry specially. */ - off_last = off_low = pio_start->offset; - off_high = off_low + pio_start->len + pio_start->size - 1; - count = 1; - - for (pio = pio_start + 1; pio->size != 0; pio++, count++) { - /* All entries must be sorted by offset. */ - assert(pio->offset >= off_last); - off_last = pio->offset; - - /* If we see a hole, break the region. */ - if (off_last > off_high) { - portio_list_add_1(piolist, pio_start, count, start, off_low, - off_high); - /* ... and start collecting anew. */ - pio_start = pio; - off_low = off_last; - off_high = off_low + pio->len + pio_start->size - 1; - count = 0; - } else if (off_last + pio->len > off_high) { - off_high = off_last + pio->len + pio_start->size - 1; - } - } - - /* There will always be an open sub-list. */ - portio_list_add_1(piolist, pio_start, count, start, off_low, off_high); -} - -void portio_list_del(PortioList *piolist) -{ - MemoryRegionPortioList *mrpio; - unsigned i; - - for (i = 0; i < piolist->nr; ++i) { - mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr); - memory_region_del_subregion(piolist->address_space, &mrpio->mr); - } -} - -static void memory_region_portio_list_finalize(Object *obj) -{ - MemoryRegionPortioList *mrpio = MEMORY_REGION_PORTIO_LIST(obj); - - object_unref(&mrpio->mr); - g_free(mrpio->ports); -} - -static const TypeInfo memory_region_portio_list_info = { - .parent = TYPE_OBJECT, - .name = TYPE_MEMORY_REGION_PORTIO_LIST, - .instance_size = sizeof(MemoryRegionPortioList), - .instance_finalize = memory_region_portio_list_finalize, -}; - -static void ioport_register_types(void) -{ - type_register_static(&memory_region_portio_list_info); -} - -type_init(ioport_register_types) diff --git a/softmmu/main.c b/softmmu/main.c deleted file mode 100644 index 694388b..0000000 --- a/softmmu/main.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2020 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu-main.h" -#include "sysemu/sysemu.h" - -#ifdef CONFIG_SDL -#include -#endif - -int qemu_default_main(void) -{ - int status; - - status = qemu_main_loop(); - qemu_cleanup(); - - return status; -} - -int (*qemu_main)(void) = qemu_default_main; - -int main(int argc, char **argv) -{ - qemu_init(argc, argv); - return qemu_main(); -} diff --git a/softmmu/memory.c b/softmmu/memory.c deleted file mode 100644 index fa1c99f..0000000 --- a/softmmu/memory.c +++ /dev/null @@ -1,3683 +0,0 @@ -/* - * Physical memory management - * - * Copyright 2011 Red Hat, Inc. and/or its affiliates - * - * Authors: - * Avi Kivity - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include "qemu/osdep.h" -#include "qemu/log.h" -#include "qapi/error.h" -#include "exec/memory.h" -#include "qapi/visitor.h" -#include "qemu/bitops.h" -#include "qemu/error-report.h" -#include "qemu/main-loop.h" -#include "qemu/qemu-print.h" -#include "qom/object.h" -#include "trace.h" - -#include "exec/memory-internal.h" -#include "exec/ram_addr.h" -#include "sysemu/kvm.h" -#include "sysemu/runstate.h" -#include "sysemu/tcg.h" -#include "qemu/accel.h" -#include "hw/boards.h" -#include "migration/vmstate.h" -#include "exec/address-spaces.h" - -//#define DEBUG_UNASSIGNED - -static unsigned memory_region_transaction_depth; -static bool memory_region_update_pending; -static bool ioeventfd_update_pending; -unsigned int global_dirty_tracking; - -static QTAILQ_HEAD(, MemoryListener) memory_listeners - = QTAILQ_HEAD_INITIALIZER(memory_listeners); - -static QTAILQ_HEAD(, AddressSpace) address_spaces - = QTAILQ_HEAD_INITIALIZER(address_spaces); - -static GHashTable *flat_views; - -typedef struct AddrRange AddrRange; - -/* - * Note that signed integers are needed for negative offsetting in aliases - * (large MemoryRegion::alias_offset). - */ -struct AddrRange { - Int128 start; - Int128 size; -}; - -static AddrRange addrrange_make(Int128 start, Int128 size) -{ - return (AddrRange) { start, size }; -} - -static bool addrrange_equal(AddrRange r1, AddrRange r2) -{ - return int128_eq(r1.start, r2.start) && int128_eq(r1.size, r2.size); -} - -static Int128 addrrange_end(AddrRange r) -{ - return int128_add(r.start, r.size); -} - -static AddrRange addrrange_shift(AddrRange range, Int128 delta) -{ - int128_addto(&range.start, delta); - return range; -} - -static bool addrrange_contains(AddrRange range, Int128 addr) -{ - return int128_ge(addr, range.start) - && int128_lt(addr, addrrange_end(range)); -} - -static bool addrrange_intersects(AddrRange r1, AddrRange r2) -{ - return addrrange_contains(r1, r2.start) - || addrrange_contains(r2, r1.start); -} - -static AddrRange addrrange_intersection(AddrRange r1, AddrRange r2) -{ - Int128 start = int128_max(r1.start, r2.start); - Int128 end = int128_min(addrrange_end(r1), addrrange_end(r2)); - return addrrange_make(start, int128_sub(end, start)); -} - -enum ListenerDirection { Forward, Reverse }; - -#define MEMORY_LISTENER_CALL_GLOBAL(_callback, _direction, _args...) \ - do { \ - MemoryListener *_listener; \ - \ - switch (_direction) { \ - case Forward: \ - QTAILQ_FOREACH(_listener, &memory_listeners, link) { \ - if (_listener->_callback) { \ - _listener->_callback(_listener, ##_args); \ - } \ - } \ - break; \ - case Reverse: \ - QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners, link) { \ - if (_listener->_callback) { \ - _listener->_callback(_listener, ##_args); \ - } \ - } \ - break; \ - default: \ - abort(); \ - } \ - } while (0) - -#define MEMORY_LISTENER_CALL(_as, _callback, _direction, _section, _args...) \ - do { \ - MemoryListener *_listener; \ - \ - switch (_direction) { \ - case Forward: \ - QTAILQ_FOREACH(_listener, &(_as)->listeners, link_as) { \ - if (_listener->_callback) { \ - _listener->_callback(_listener, _section, ##_args); \ - } \ - } \ - break; \ - case Reverse: \ - QTAILQ_FOREACH_REVERSE(_listener, &(_as)->listeners, link_as) { \ - if (_listener->_callback) { \ - _listener->_callback(_listener, _section, ##_args); \ - } \ - } \ - break; \ - default: \ - abort(); \ - } \ - } while (0) - -/* No need to ref/unref .mr, the FlatRange keeps it alive. */ -#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...) \ - do { \ - MemoryRegionSection mrs = section_from_flat_range(fr, \ - address_space_to_flatview(as)); \ - MEMORY_LISTENER_CALL(as, callback, dir, &mrs, ##_args); \ - } while(0) - -struct CoalescedMemoryRange { - AddrRange addr; - QTAILQ_ENTRY(CoalescedMemoryRange) link; -}; - -struct MemoryRegionIoeventfd { - AddrRange addr; - bool match_data; - uint64_t data; - EventNotifier *e; -}; - -static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd *a, - MemoryRegionIoeventfd *b) -{ - if (int128_lt(a->addr.start, b->addr.start)) { - return true; - } else if (int128_gt(a->addr.start, b->addr.start)) { - return false; - } else if (int128_lt(a->addr.size, b->addr.size)) { - return true; - } else if (int128_gt(a->addr.size, b->addr.size)) { - return false; - } else if (a->match_data < b->match_data) { - return true; - } else if (a->match_data > b->match_data) { - return false; - } else if (a->match_data) { - if (a->data < b->data) { - return true; - } else if (a->data > b->data) { - return false; - } - } - if (a->e < b->e) { - return true; - } else if (a->e > b->e) { - return false; - } - return false; -} - -static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd *a, - MemoryRegionIoeventfd *b) -{ - if (int128_eq(a->addr.start, b->addr.start) && - (!int128_nz(a->addr.size) || !int128_nz(b->addr.size) || - (int128_eq(a->addr.size, b->addr.size) && - (a->match_data == b->match_data) && - ((a->match_data && (a->data == b->data)) || !a->match_data) && - (a->e == b->e)))) - return true; - - return false; -} - -/* Range of memory in the global map. Addresses are absolute. */ -struct FlatRange { - MemoryRegion *mr; - hwaddr offset_in_region; - AddrRange addr; - uint8_t dirty_log_mask; - bool romd_mode; - bool readonly; - bool nonvolatile; -}; - -#define FOR_EACH_FLAT_RANGE(var, view) \ - for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var) - -static inline MemoryRegionSection -section_from_flat_range(FlatRange *fr, FlatView *fv) -{ - return (MemoryRegionSection) { - .mr = fr->mr, - .fv = fv, - .offset_within_region = fr->offset_in_region, - .size = fr->addr.size, - .offset_within_address_space = int128_get64(fr->addr.start), - .readonly = fr->readonly, - .nonvolatile = fr->nonvolatile, - }; -} - -static bool flatrange_equal(FlatRange *a, FlatRange *b) -{ - return a->mr == b->mr - && addrrange_equal(a->addr, b->addr) - && a->offset_in_region == b->offset_in_region - && a->romd_mode == b->romd_mode - && a->readonly == b->readonly - && a->nonvolatile == b->nonvolatile; -} - -static FlatView *flatview_new(MemoryRegion *mr_root) -{ - FlatView *view; - - view = g_new0(FlatView, 1); - view->ref = 1; - view->root = mr_root; - memory_region_ref(mr_root); - trace_flatview_new(view, mr_root); - - return view; -} - -/* Insert a range into a given position. Caller is responsible for maintaining - * sorting order. - */ -static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range) -{ - if (view->nr == view->nr_allocated) { - view->nr_allocated = MAX(2 * view->nr, 10); - view->ranges = g_realloc(view->ranges, - view->nr_allocated * sizeof(*view->ranges)); - } - memmove(view->ranges + pos + 1, view->ranges + pos, - (view->nr - pos) * sizeof(FlatRange)); - view->ranges[pos] = *range; - memory_region_ref(range->mr); - ++view->nr; -} - -static void flatview_destroy(FlatView *view) -{ - int i; - - trace_flatview_destroy(view, view->root); - if (view->dispatch) { - address_space_dispatch_free(view->dispatch); - } - for (i = 0; i < view->nr; i++) { - memory_region_unref(view->ranges[i].mr); - } - g_free(view->ranges); - memory_region_unref(view->root); - g_free(view); -} - -static bool flatview_ref(FlatView *view) -{ - return qatomic_fetch_inc_nonzero(&view->ref) > 0; -} - -void flatview_unref(FlatView *view) -{ - if (qatomic_fetch_dec(&view->ref) == 1) { - trace_flatview_destroy_rcu(view, view->root); - assert(view->root); - call_rcu(view, flatview_destroy, rcu); - } -} - -static bool can_merge(FlatRange *r1, FlatRange *r2) -{ - return int128_eq(addrrange_end(r1->addr), r2->addr.start) - && r1->mr == r2->mr - && int128_eq(int128_add(int128_make64(r1->offset_in_region), - r1->addr.size), - int128_make64(r2->offset_in_region)) - && r1->dirty_log_mask == r2->dirty_log_mask - && r1->romd_mode == r2->romd_mode - && r1->readonly == r2->readonly - && r1->nonvolatile == r2->nonvolatile; -} - -/* Attempt to simplify a view by merging adjacent ranges */ -static void flatview_simplify(FlatView *view) -{ - unsigned i, j, k; - - i = 0; - while (i < view->nr) { - j = i + 1; - while (j < view->nr - && can_merge(&view->ranges[j-1], &view->ranges[j])) { - int128_addto(&view->ranges[i].addr.size, view->ranges[j].addr.size); - ++j; - } - ++i; - for (k = i; k < j; k++) { - memory_region_unref(view->ranges[k].mr); - } - memmove(&view->ranges[i], &view->ranges[j], - (view->nr - j) * sizeof(view->ranges[j])); - view->nr -= j - i; - } -} - -static bool memory_region_big_endian(MemoryRegion *mr) -{ -#if TARGET_BIG_ENDIAN - return mr->ops->endianness != DEVICE_LITTLE_ENDIAN; -#else - return mr->ops->endianness == DEVICE_BIG_ENDIAN; -#endif -} - -static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op) -{ - if ((op & MO_BSWAP) != devend_memop(mr->ops->endianness)) { - switch (op & MO_SIZE) { - case MO_8: - break; - case MO_16: - *data = bswap16(*data); - break; - case MO_32: - *data = bswap32(*data); - break; - case MO_64: - *data = bswap64(*data); - break; - default: - g_assert_not_reached(); - } - } -} - -static inline void memory_region_shift_read_access(uint64_t *value, - signed shift, - uint64_t mask, - uint64_t tmp) -{ - if (shift >= 0) { - *value |= (tmp & mask) << shift; - } else { - *value |= (tmp & mask) >> -shift; - } -} - -static inline uint64_t memory_region_shift_write_access(uint64_t *value, - signed shift, - uint64_t mask) -{ - uint64_t tmp; - - if (shift >= 0) { - tmp = (*value >> shift) & mask; - } else { - tmp = (*value << -shift) & mask; - } - - return tmp; -} - -static hwaddr memory_region_to_absolute_addr(MemoryRegion *mr, hwaddr offset) -{ - MemoryRegion *root; - hwaddr abs_addr = offset; - - abs_addr += mr->addr; - for (root = mr; root->container; ) { - root = root->container; - abs_addr += root->addr; - } - - return abs_addr; -} - -static int get_cpu_index(void) -{ - if (current_cpu) { - return current_cpu->cpu_index; - } - return -1; -} - -static MemTxResult memory_region_read_accessor(MemoryRegion *mr, - hwaddr addr, - uint64_t *value, - unsigned size, - signed shift, - uint64_t mask, - MemTxAttrs attrs) -{ - uint64_t tmp; - - tmp = mr->ops->read(mr->opaque, addr, size); - if (mr->subpage) { - trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); - } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_READ)) { - hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size, - memory_region_name(mr)); - } - memory_region_shift_read_access(value, shift, mask, tmp); - return MEMTX_OK; -} - -static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr, - hwaddr addr, - uint64_t *value, - unsigned size, - signed shift, - uint64_t mask, - MemTxAttrs attrs) -{ - uint64_t tmp = 0; - MemTxResult r; - - r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs); - if (mr->subpage) { - trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); - } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_READ)) { - hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size, - memory_region_name(mr)); - } - memory_region_shift_read_access(value, shift, mask, tmp); - return r; -} - -static MemTxResult memory_region_write_accessor(MemoryRegion *mr, - hwaddr addr, - uint64_t *value, - unsigned size, - signed shift, - uint64_t mask, - MemTxAttrs attrs) -{ - uint64_t tmp = memory_region_shift_write_access(value, shift, mask); - - if (mr->subpage) { - trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); - } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_WRITE)) { - hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size, - memory_region_name(mr)); - } - mr->ops->write(mr->opaque, addr, tmp, size); - return MEMTX_OK; -} - -static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr, - hwaddr addr, - uint64_t *value, - unsigned size, - signed shift, - uint64_t mask, - MemTxAttrs attrs) -{ - uint64_t tmp = memory_region_shift_write_access(value, shift, mask); - - if (mr->subpage) { - trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); - } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_WRITE)) { - hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); - trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size, - memory_region_name(mr)); - } - return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs); -} - -static MemTxResult access_with_adjusted_size(hwaddr addr, - uint64_t *value, - unsigned size, - unsigned access_size_min, - unsigned access_size_max, - MemTxResult (*access_fn) - (MemoryRegion *mr, - hwaddr addr, - uint64_t *value, - unsigned size, - signed shift, - uint64_t mask, - MemTxAttrs attrs), - MemoryRegion *mr, - MemTxAttrs attrs) -{ - uint64_t access_mask; - unsigned access_size; - unsigned i; - MemTxResult r = MEMTX_OK; - bool reentrancy_guard_applied = false; - - if (!access_size_min) { - access_size_min = 1; - } - if (!access_size_max) { - access_size_max = 4; - } - - /* Do not allow more than one simultaneous access to a device's IO Regions */ - if (mr->dev && !mr->disable_reentrancy_guard && - !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { - if (mr->dev->mem_reentrancy_guard.engaged_in_io) { - warn_report_once("Blocked re-entrant IO on MemoryRegion: " - "%s at addr: 0x%" HWADDR_PRIX, - memory_region_name(mr), addr); - return MEMTX_ACCESS_ERROR; - } - mr->dev->mem_reentrancy_guard.engaged_in_io = true; - reentrancy_guard_applied = true; - } - - /* FIXME: support unaligned access? */ - access_size = MAX(MIN(size, access_size_max), access_size_min); - access_mask = MAKE_64BIT_MASK(0, access_size * 8); - if (memory_region_big_endian(mr)) { - for (i = 0; i < size; i += access_size) { - r |= access_fn(mr, addr + i, value, access_size, - (size - access_size - i) * 8, access_mask, attrs); - } - } else { - for (i = 0; i < size; i += access_size) { - r |= access_fn(mr, addr + i, value, access_size, i * 8, - access_mask, attrs); - } - } - if (mr->dev && reentrancy_guard_applied) { - mr->dev->mem_reentrancy_guard.engaged_in_io = false; - } - return r; -} - -static AddressSpace *memory_region_to_address_space(MemoryRegion *mr) -{ - AddressSpace *as; - - while (mr->container) { - mr = mr->container; - } - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - if (mr == as->root) { - return as; - } - } - return NULL; -} - -/* Render a memory region into the global view. Ranges in @view obscure - * ranges in @mr. - */ -static void render_memory_region(FlatView *view, - MemoryRegion *mr, - Int128 base, - AddrRange clip, - bool readonly, - bool nonvolatile) -{ - MemoryRegion *subregion; - unsigned i; - hwaddr offset_in_region; - Int128 remain; - Int128 now; - FlatRange fr; - AddrRange tmp; - - if (!mr->enabled) { - return; - } - - int128_addto(&base, int128_make64(mr->addr)); - readonly |= mr->readonly; - nonvolatile |= mr->nonvolatile; - - tmp = addrrange_make(base, mr->size); - - if (!addrrange_intersects(tmp, clip)) { - return; - } - - clip = addrrange_intersection(tmp, clip); - - if (mr->alias) { - int128_subfrom(&base, int128_make64(mr->alias->addr)); - int128_subfrom(&base, int128_make64(mr->alias_offset)); - render_memory_region(view, mr->alias, base, clip, - readonly, nonvolatile); - return; - } - - /* Render subregions in priority order. */ - QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) { - render_memory_region(view, subregion, base, clip, - readonly, nonvolatile); - } - - if (!mr->terminates) { - return; - } - - offset_in_region = int128_get64(int128_sub(clip.start, base)); - base = clip.start; - remain = clip.size; - - fr.mr = mr; - fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr); - fr.romd_mode = mr->romd_mode; - fr.readonly = readonly; - fr.nonvolatile = nonvolatile; - - /* Render the region itself into any gaps left by the current view. */ - for (i = 0; i < view->nr && int128_nz(remain); ++i) { - if (int128_ge(base, addrrange_end(view->ranges[i].addr))) { - continue; - } - if (int128_lt(base, view->ranges[i].addr.start)) { - now = int128_min(remain, - int128_sub(view->ranges[i].addr.start, base)); - fr.offset_in_region = offset_in_region; - fr.addr = addrrange_make(base, now); - flatview_insert(view, i, &fr); - ++i; - int128_addto(&base, now); - offset_in_region += int128_get64(now); - int128_subfrom(&remain, now); - } - now = int128_sub(int128_min(int128_add(base, remain), - addrrange_end(view->ranges[i].addr)), - base); - int128_addto(&base, now); - offset_in_region += int128_get64(now); - int128_subfrom(&remain, now); - } - if (int128_nz(remain)) { - fr.offset_in_region = offset_in_region; - fr.addr = addrrange_make(base, remain); - flatview_insert(view, i, &fr); - } -} - -void flatview_for_each_range(FlatView *fv, flatview_cb cb , void *opaque) -{ - FlatRange *fr; - - assert(fv); - assert(cb); - - FOR_EACH_FLAT_RANGE(fr, fv) { - if (cb(fr->addr.start, fr->addr.size, fr->mr, - fr->offset_in_region, opaque)) { - break; - } - } -} - -static MemoryRegion *memory_region_get_flatview_root(MemoryRegion *mr) -{ - while (mr->enabled) { - if (mr->alias) { - if (!mr->alias_offset && int128_ge(mr->size, mr->alias->size)) { - /* The alias is included in its entirety. Use it as - * the "real" root, so that we can share more FlatViews. - */ - mr = mr->alias; - continue; - } - } else if (!mr->terminates) { - unsigned int found = 0; - MemoryRegion *child, *next = NULL; - QTAILQ_FOREACH(child, &mr->subregions, subregions_link) { - if (child->enabled) { - if (++found > 1) { - next = NULL; - break; - } - if (!child->addr && int128_ge(mr->size, child->size)) { - /* A child is included in its entirety. If it's the only - * enabled one, use it in the hope of finding an alias down the - * way. This will also let us share FlatViews. - */ - next = child; - } - } - } - if (found == 0) { - return NULL; - } - if (next) { - mr = next; - continue; - } - } - - return mr; - } - - return NULL; -} - -/* Render a memory topology into a list of disjoint absolute ranges. */ -static FlatView *generate_memory_topology(MemoryRegion *mr) -{ - int i; - FlatView *view; - - view = flatview_new(mr); - - if (mr) { - render_memory_region(view, mr, int128_zero(), - addrrange_make(int128_zero(), int128_2_64()), - false, false); - } - flatview_simplify(view); - - view->dispatch = address_space_dispatch_new(view); - for (i = 0; i < view->nr; i++) { - MemoryRegionSection mrs = - section_from_flat_range(&view->ranges[i], view); - flatview_add_to_dispatch(view, &mrs); - } - address_space_dispatch_compact(view->dispatch); - g_hash_table_replace(flat_views, mr, view); - - return view; -} - -static void address_space_add_del_ioeventfds(AddressSpace *as, - MemoryRegionIoeventfd *fds_new, - unsigned fds_new_nb, - MemoryRegionIoeventfd *fds_old, - unsigned fds_old_nb) -{ - unsigned iold, inew; - MemoryRegionIoeventfd *fd; - MemoryRegionSection section; - - /* Generate a symmetric difference of the old and new fd sets, adding - * and deleting as necessary. - */ - - iold = inew = 0; - while (iold < fds_old_nb || inew < fds_new_nb) { - if (iold < fds_old_nb - && (inew == fds_new_nb - || memory_region_ioeventfd_before(&fds_old[iold], - &fds_new[inew]))) { - fd = &fds_old[iold]; - section = (MemoryRegionSection) { - .fv = address_space_to_flatview(as), - .offset_within_address_space = int128_get64(fd->addr.start), - .size = fd->addr.size, - }; - MEMORY_LISTENER_CALL(as, eventfd_del, Forward, §ion, - fd->match_data, fd->data, fd->e); - ++iold; - } else if (inew < fds_new_nb - && (iold == fds_old_nb - || memory_region_ioeventfd_before(&fds_new[inew], - &fds_old[iold]))) { - fd = &fds_new[inew]; - section = (MemoryRegionSection) { - .fv = address_space_to_flatview(as), - .offset_within_address_space = int128_get64(fd->addr.start), - .size = fd->addr.size, - }; - MEMORY_LISTENER_CALL(as, eventfd_add, Reverse, §ion, - fd->match_data, fd->data, fd->e); - ++inew; - } else { - ++iold; - ++inew; - } - } -} - -FlatView *address_space_get_flatview(AddressSpace *as) -{ - FlatView *view; - - RCU_READ_LOCK_GUARD(); - do { - view = address_space_to_flatview(as); - /* If somebody has replaced as->current_map concurrently, - * flatview_ref returns false. - */ - } while (!flatview_ref(view)); - return view; -} - -static void address_space_update_ioeventfds(AddressSpace *as) -{ - FlatView *view; - FlatRange *fr; - unsigned ioeventfd_nb = 0; - unsigned ioeventfd_max; - MemoryRegionIoeventfd *ioeventfds; - AddrRange tmp; - unsigned i; - - if (!as->ioeventfd_notifiers) { - return; - } - - /* - * It is likely that the number of ioeventfds hasn't changed much, so use - * the previous size as the starting value, with some headroom to avoid - * gratuitous reallocations. - */ - ioeventfd_max = QEMU_ALIGN_UP(as->ioeventfd_nb, 4); - ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max); - - view = address_space_get_flatview(as); - FOR_EACH_FLAT_RANGE(fr, view) { - for (i = 0; i < fr->mr->ioeventfd_nb; ++i) { - tmp = addrrange_shift(fr->mr->ioeventfds[i].addr, - int128_sub(fr->addr.start, - int128_make64(fr->offset_in_region))); - if (addrrange_intersects(fr->addr, tmp)) { - ++ioeventfd_nb; - if (ioeventfd_nb > ioeventfd_max) { - ioeventfd_max = MAX(ioeventfd_max * 2, 4); - ioeventfds = g_realloc(ioeventfds, - ioeventfd_max * sizeof(*ioeventfds)); - } - ioeventfds[ioeventfd_nb-1] = fr->mr->ioeventfds[i]; - ioeventfds[ioeventfd_nb-1].addr = tmp; - } - } - } - - address_space_add_del_ioeventfds(as, ioeventfds, ioeventfd_nb, - as->ioeventfds, as->ioeventfd_nb); - - g_free(as->ioeventfds); - as->ioeventfds = ioeventfds; - as->ioeventfd_nb = ioeventfd_nb; - flatview_unref(view); -} - -/* - * Notify the memory listeners about the coalesced IO change events of - * range `cmr'. Only the part that has intersection of the specified - * FlatRange will be sent. - */ -static void flat_range_coalesced_io_notify(FlatRange *fr, AddressSpace *as, - CoalescedMemoryRange *cmr, bool add) -{ - AddrRange tmp; - - tmp = addrrange_shift(cmr->addr, - int128_sub(fr->addr.start, - int128_make64(fr->offset_in_region))); - if (!addrrange_intersects(tmp, fr->addr)) { - return; - } - tmp = addrrange_intersection(tmp, fr->addr); - - if (add) { - MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, coalesced_io_add, - int128_get64(tmp.start), - int128_get64(tmp.size)); - } else { - MEMORY_LISTENER_UPDATE_REGION(fr, as, Reverse, coalesced_io_del, - int128_get64(tmp.start), - int128_get64(tmp.size)); - } -} - -static void flat_range_coalesced_io_del(FlatRange *fr, AddressSpace *as) -{ - CoalescedMemoryRange *cmr; - - QTAILQ_FOREACH(cmr, &fr->mr->coalesced, link) { - flat_range_coalesced_io_notify(fr, as, cmr, false); - } -} - -static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as) -{ - MemoryRegion *mr = fr->mr; - CoalescedMemoryRange *cmr; - - if (QTAILQ_EMPTY(&mr->coalesced)) { - return; - } - - QTAILQ_FOREACH(cmr, &mr->coalesced, link) { - flat_range_coalesced_io_notify(fr, as, cmr, true); - } -} - -static void address_space_update_topology_pass(AddressSpace *as, - const FlatView *old_view, - const FlatView *new_view, - bool adding) -{ - unsigned iold, inew; - FlatRange *frold, *frnew; - - /* Generate a symmetric difference of the old and new memory maps. - * Kill ranges in the old map, and instantiate ranges in the new map. - */ - iold = inew = 0; - while (iold < old_view->nr || inew < new_view->nr) { - if (iold < old_view->nr) { - frold = &old_view->ranges[iold]; - } else { - frold = NULL; - } - if (inew < new_view->nr) { - frnew = &new_view->ranges[inew]; - } else { - frnew = NULL; - } - - if (frold - && (!frnew - || int128_lt(frold->addr.start, frnew->addr.start) - || (int128_eq(frold->addr.start, frnew->addr.start) - && !flatrange_equal(frold, frnew)))) { - /* In old but not in new, or in both but attributes changed. */ - - if (!adding) { - flat_range_coalesced_io_del(frold, as); - MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del); - } - - ++iold; - } else if (frold && frnew && flatrange_equal(frold, frnew)) { - /* In both and unchanged (except logging may have changed) */ - - if (adding) { - MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop); - if (frnew->dirty_log_mask & ~frold->dirty_log_mask) { - MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start, - frold->dirty_log_mask, - frnew->dirty_log_mask); - } - if (frold->dirty_log_mask & ~frnew->dirty_log_mask) { - MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop, - frold->dirty_log_mask, - frnew->dirty_log_mask); - } - } - - ++iold; - ++inew; - } else { - /* In new */ - - if (adding) { - MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add); - flat_range_coalesced_io_add(frnew, as); - } - - ++inew; - } - } -} - -static void flatviews_init(void) -{ - static FlatView *empty_view; - - if (flat_views) { - return; - } - - flat_views = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, - (GDestroyNotify) flatview_unref); - if (!empty_view) { - empty_view = generate_memory_topology(NULL); - /* We keep it alive forever in the global variable. */ - flatview_ref(empty_view); - } else { - g_hash_table_replace(flat_views, NULL, empty_view); - flatview_ref(empty_view); - } -} - -static void flatviews_reset(void) -{ - AddressSpace *as; - - if (flat_views) { - g_hash_table_unref(flat_views); - flat_views = NULL; - } - flatviews_init(); - - /* Render unique FVs */ - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - MemoryRegion *physmr = memory_region_get_flatview_root(as->root); - - if (g_hash_table_lookup(flat_views, physmr)) { - continue; - } - - generate_memory_topology(physmr); - } -} - -static void address_space_set_flatview(AddressSpace *as) -{ - FlatView *old_view = address_space_to_flatview(as); - MemoryRegion *physmr = memory_region_get_flatview_root(as->root); - FlatView *new_view = g_hash_table_lookup(flat_views, physmr); - - assert(new_view); - - if (old_view == new_view) { - return; - } - - if (old_view) { - flatview_ref(old_view); - } - - flatview_ref(new_view); - - if (!QTAILQ_EMPTY(&as->listeners)) { - FlatView tmpview = { .nr = 0 }, *old_view2 = old_view; - - if (!old_view2) { - old_view2 = &tmpview; - } - address_space_update_topology_pass(as, old_view2, new_view, false); - address_space_update_topology_pass(as, old_view2, new_view, true); - } - - /* Writes are protected by the BQL. */ - qatomic_rcu_set(&as->current_map, new_view); - if (old_view) { - flatview_unref(old_view); - } - - /* Note that all the old MemoryRegions are still alive up to this - * point. This relieves most MemoryListeners from the need to - * ref/unref the MemoryRegions they get---unless they use them - * outside the iothread mutex, in which case precise reference - * counting is necessary. - */ - if (old_view) { - flatview_unref(old_view); - } -} - -static void address_space_update_topology(AddressSpace *as) -{ - MemoryRegion *physmr = memory_region_get_flatview_root(as->root); - - flatviews_init(); - if (!g_hash_table_lookup(flat_views, physmr)) { - generate_memory_topology(physmr); - } - address_space_set_flatview(as); -} - -void memory_region_transaction_begin(void) -{ - qemu_flush_coalesced_mmio_buffer(); - ++memory_region_transaction_depth; -} - -void memory_region_transaction_commit(void) -{ - AddressSpace *as; - - assert(memory_region_transaction_depth); - assert(qemu_mutex_iothread_locked()); - - --memory_region_transaction_depth; - if (!memory_region_transaction_depth) { - if (memory_region_update_pending) { - flatviews_reset(); - - MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); - - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - address_space_set_flatview(as); - address_space_update_ioeventfds(as); - } - memory_region_update_pending = false; - ioeventfd_update_pending = false; - MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); - } else if (ioeventfd_update_pending) { - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - address_space_update_ioeventfds(as); - } - ioeventfd_update_pending = false; - } - } -} - -static void memory_region_destructor_none(MemoryRegion *mr) -{ -} - -static void memory_region_destructor_ram(MemoryRegion *mr) -{ - qemu_ram_free(mr->ram_block); -} - -static bool memory_region_need_escape(char c) -{ - return c == '/' || c == '[' || c == '\\' || c == ']'; -} - -static char *memory_region_escape_name(const char *name) -{ - const char *p; - char *escaped, *q; - uint8_t c; - size_t bytes = 0; - - for (p = name; *p; p++) { - bytes += memory_region_need_escape(*p) ? 4 : 1; - } - if (bytes == p - name) { - return g_memdup(name, bytes + 1); - } - - escaped = g_malloc(bytes + 1); - for (p = name, q = escaped; *p; p++) { - c = *p; - if (unlikely(memory_region_need_escape(c))) { - *q++ = '\\'; - *q++ = 'x'; - *q++ = "0123456789abcdef"[c >> 4]; - c = "0123456789abcdef"[c & 15]; - } - *q++ = c; - } - *q = 0; - return escaped; -} - -static void memory_region_do_init(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size) -{ - mr->size = int128_make64(size); - if (size == UINT64_MAX) { - mr->size = int128_2_64(); - } - mr->name = g_strdup(name); - mr->owner = owner; - mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); - mr->ram_block = NULL; - - if (name) { - char *escaped_name = memory_region_escape_name(name); - char *name_array = g_strdup_printf("%s[*]", escaped_name); - - if (!owner) { - owner = container_get(qdev_get_machine(), "/unattached"); - } - - object_property_add_child(owner, name_array, OBJECT(mr)); - object_unref(OBJECT(mr)); - g_free(name_array); - g_free(escaped_name); - } -} - -void memory_region_init(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size) -{ - object_initialize(mr, sizeof(*mr), TYPE_MEMORY_REGION); - memory_region_do_init(mr, owner, name, size); -} - -static void memory_region_get_container(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - MemoryRegion *mr = MEMORY_REGION(obj); - char *path = (char *)""; - - if (mr->container) { - path = object_get_canonical_path(OBJECT(mr->container)); - } - visit_type_str(v, name, &path, errp); - if (mr->container) { - g_free(path); - } -} - -static Object *memory_region_resolve_container(Object *obj, void *opaque, - const char *part) -{ - MemoryRegion *mr = MEMORY_REGION(obj); - - return OBJECT(mr->container); -} - -static void memory_region_get_priority(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - MemoryRegion *mr = MEMORY_REGION(obj); - int32_t value = mr->priority; - - visit_type_int32(v, name, &value, errp); -} - -static void memory_region_get_size(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - MemoryRegion *mr = MEMORY_REGION(obj); - uint64_t value = memory_region_size(mr); - - visit_type_uint64(v, name, &value, errp); -} - -static void memory_region_initfn(Object *obj) -{ - MemoryRegion *mr = MEMORY_REGION(obj); - ObjectProperty *op; - - mr->ops = &unassigned_mem_ops; - mr->enabled = true; - mr->romd_mode = true; - mr->destructor = memory_region_destructor_none; - QTAILQ_INIT(&mr->subregions); - QTAILQ_INIT(&mr->coalesced); - - op = object_property_add(OBJECT(mr), "container", - "link<" TYPE_MEMORY_REGION ">", - memory_region_get_container, - NULL, /* memory_region_set_container */ - NULL, NULL); - op->resolve = memory_region_resolve_container; - - object_property_add_uint64_ptr(OBJECT(mr), "addr", - &mr->addr, OBJ_PROP_FLAG_READ); - object_property_add(OBJECT(mr), "priority", "uint32", - memory_region_get_priority, - NULL, /* memory_region_set_priority */ - NULL, NULL); - object_property_add(OBJECT(mr), "size", "uint64", - memory_region_get_size, - NULL, /* memory_region_set_size, */ - NULL, NULL); -} - -static void iommu_memory_region_initfn(Object *obj) -{ - MemoryRegion *mr = MEMORY_REGION(obj); - - mr->is_iommu = true; -} - -static uint64_t unassigned_mem_read(void *opaque, hwaddr addr, - unsigned size) -{ -#ifdef DEBUG_UNASSIGNED - printf("Unassigned mem read " HWADDR_FMT_plx "\n", addr); -#endif - return 0; -} - -static void unassigned_mem_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ -#ifdef DEBUG_UNASSIGNED - printf("Unassigned mem write " HWADDR_FMT_plx " = 0x%"PRIx64"\n", addr, val); -#endif -} - -static bool unassigned_mem_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write, - MemTxAttrs attrs) -{ - return false; -} - -const MemoryRegionOps unassigned_mem_ops = { - .valid.accepts = unassigned_mem_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static uint64_t memory_region_ram_device_read(void *opaque, - hwaddr addr, unsigned size) -{ - MemoryRegion *mr = opaque; - uint64_t data = (uint64_t)~0; - - switch (size) { - case 1: - data = *(uint8_t *)(mr->ram_block->host + addr); - break; - case 2: - data = *(uint16_t *)(mr->ram_block->host + addr); - break; - case 4: - data = *(uint32_t *)(mr->ram_block->host + addr); - break; - case 8: - data = *(uint64_t *)(mr->ram_block->host + addr); - break; - } - - trace_memory_region_ram_device_read(get_cpu_index(), mr, addr, data, size); - - return data; -} - -static void memory_region_ram_device_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - MemoryRegion *mr = opaque; - - trace_memory_region_ram_device_write(get_cpu_index(), mr, addr, data, size); - - switch (size) { - case 1: - *(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data; - break; - case 2: - *(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data; - break; - case 4: - *(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data; - break; - case 8: - *(uint64_t *)(mr->ram_block->host + addr) = data; - break; - } -} - -static const MemoryRegionOps ram_device_mem_ops = { - .read = memory_region_ram_device_read, - .write = memory_region_ram_device_write, - .endianness = DEVICE_HOST_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = true, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = true, - }, -}; - -bool memory_region_access_valid(MemoryRegion *mr, - hwaddr addr, - unsigned size, - bool is_write, - MemTxAttrs attrs) -{ - if (mr->ops->valid.accepts - && !mr->ops->valid.accepts(mr->opaque, addr, size, is_write, attrs)) { - qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX - ", size %u, region '%s', reason: rejected\n", - is_write ? "write" : "read", - addr, size, memory_region_name(mr)); - return false; - } - - if (!mr->ops->valid.unaligned && (addr & (size - 1))) { - qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX - ", size %u, region '%s', reason: unaligned\n", - is_write ? "write" : "read", - addr, size, memory_region_name(mr)); - return false; - } - - /* Treat zero as compatibility all valid */ - if (!mr->ops->valid.max_access_size) { - return true; - } - - if (size > mr->ops->valid.max_access_size - || size < mr->ops->valid.min_access_size) { - qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX - ", size %u, region '%s', reason: invalid size " - "(min:%u max:%u)\n", - is_write ? "write" : "read", - addr, size, memory_region_name(mr), - mr->ops->valid.min_access_size, - mr->ops->valid.max_access_size); - return false; - } - return true; -} - -static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr, - hwaddr addr, - uint64_t *pval, - unsigned size, - MemTxAttrs attrs) -{ - *pval = 0; - - if (mr->ops->read) { - return access_with_adjusted_size(addr, pval, size, - mr->ops->impl.min_access_size, - mr->ops->impl.max_access_size, - memory_region_read_accessor, - mr, attrs); - } else { - return access_with_adjusted_size(addr, pval, size, - mr->ops->impl.min_access_size, - mr->ops->impl.max_access_size, - memory_region_read_with_attrs_accessor, - mr, attrs); - } -} - -MemTxResult memory_region_dispatch_read(MemoryRegion *mr, - hwaddr addr, - uint64_t *pval, - MemOp op, - MemTxAttrs attrs) -{ - unsigned size = memop_size(op); - MemTxResult r; - - if (mr->alias) { - return memory_region_dispatch_read(mr->alias, - mr->alias_offset + addr, - pval, op, attrs); - } - if (!memory_region_access_valid(mr, addr, size, false, attrs)) { - *pval = unassigned_mem_read(mr, addr, size); - return MEMTX_DECODE_ERROR; - } - - r = memory_region_dispatch_read1(mr, addr, pval, size, attrs); - adjust_endianness(mr, pval, op); - return r; -} - -/* Return true if an eventfd was signalled */ -static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr, - hwaddr addr, - uint64_t data, - unsigned size, - MemTxAttrs attrs) -{ - MemoryRegionIoeventfd ioeventfd = { - .addr = addrrange_make(int128_make64(addr), int128_make64(size)), - .data = data, - }; - unsigned i; - - for (i = 0; i < mr->ioeventfd_nb; i++) { - ioeventfd.match_data = mr->ioeventfds[i].match_data; - ioeventfd.e = mr->ioeventfds[i].e; - - if (memory_region_ioeventfd_equal(&ioeventfd, &mr->ioeventfds[i])) { - event_notifier_set(ioeventfd.e); - return true; - } - } - - return false; -} - -MemTxResult memory_region_dispatch_write(MemoryRegion *mr, - hwaddr addr, - uint64_t data, - MemOp op, - MemTxAttrs attrs) -{ - unsigned size = memop_size(op); - - if (mr->alias) { - return memory_region_dispatch_write(mr->alias, - mr->alias_offset + addr, - data, op, attrs); - } - if (!memory_region_access_valid(mr, addr, size, true, attrs)) { - unassigned_mem_write(mr, addr, data, size); - return MEMTX_DECODE_ERROR; - } - - adjust_endianness(mr, &data, op); - - if ((!kvm_eventfds_enabled()) && - memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) { - return MEMTX_OK; - } - - if (mr->ops->write) { - return access_with_adjusted_size(addr, &data, size, - mr->ops->impl.min_access_size, - mr->ops->impl.max_access_size, - memory_region_write_accessor, mr, - attrs); - } else { - return - access_with_adjusted_size(addr, &data, size, - mr->ops->impl.min_access_size, - mr->ops->impl.max_access_size, - memory_region_write_with_attrs_accessor, - mr, attrs); - } -} - -void memory_region_init_io(MemoryRegion *mr, - Object *owner, - const MemoryRegionOps *ops, - void *opaque, - const char *name, - uint64_t size) -{ - memory_region_init(mr, owner, name, size); - mr->ops = ops ? ops : &unassigned_mem_ops; - mr->opaque = opaque; - mr->terminates = true; -} - -void memory_region_init_ram_nomigrate(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - Error **errp) -{ - memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp); -} - -void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - uint32_t ram_flags, - Error **errp) -{ - Error *err = NULL; - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; - mr->ram_block = qemu_ram_alloc(size, ram_flags, mr, &err); - if (err) { - mr->size = int128_zero(); - object_unparent(OBJECT(mr)); - error_propagate(errp, err); - } -} - -void memory_region_init_resizeable_ram(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - uint64_t max_size, - void (*resized)(const char*, - uint64_t length, - void *host), - Error **errp) -{ - Error *err = NULL; - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; - mr->ram_block = qemu_ram_alloc_resizeable(size, max_size, resized, - mr, &err); - if (err) { - mr->size = int128_zero(); - object_unparent(OBJECT(mr)); - error_propagate(errp, err); - } -} - -#ifdef CONFIG_POSIX -void memory_region_init_ram_from_file(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - uint64_t align, - uint32_t ram_flags, - const char *path, - ram_addr_t offset, - Error **errp) -{ - Error *err = NULL; - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->readonly = !!(ram_flags & RAM_READONLY); - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; - mr->align = align; - mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, - offset, &err); - if (err) { - mr->size = int128_zero(); - object_unparent(OBJECT(mr)); - error_propagate(errp, err); - } -} - -void memory_region_init_ram_from_fd(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - uint32_t ram_flags, - int fd, - ram_addr_t offset, - Error **errp) -{ - Error *err = NULL; - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->readonly = !!(ram_flags & RAM_READONLY); - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; - mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, - &err); - if (err) { - mr->size = int128_zero(); - object_unparent(OBJECT(mr)); - error_propagate(errp, err); - } -} -#endif - -void memory_region_init_ram_ptr(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - void *ptr) -{ - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; - - /* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL. */ - assert(ptr != NULL); - mr->ram_block = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal); -} - -void memory_region_init_ram_device_ptr(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - void *ptr) -{ - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->terminates = true; - mr->ram_device = true; - mr->ops = &ram_device_mem_ops; - mr->opaque = mr; - mr->destructor = memory_region_destructor_ram; - - /* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL. */ - assert(ptr != NULL); - mr->ram_block = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal); -} - -void memory_region_init_alias(MemoryRegion *mr, - Object *owner, - const char *name, - MemoryRegion *orig, - hwaddr offset, - uint64_t size) -{ - memory_region_init(mr, owner, name, size); - mr->alias = orig; - mr->alias_offset = offset; -} - -void memory_region_init_rom_nomigrate(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - Error **errp) -{ - memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp); - mr->readonly = true; -} - -void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, - Object *owner, - const MemoryRegionOps *ops, - void *opaque, - const char *name, - uint64_t size, - Error **errp) -{ - Error *err = NULL; - assert(ops); - memory_region_init(mr, owner, name, size); - mr->ops = ops; - mr->opaque = opaque; - mr->terminates = true; - mr->rom_device = true; - mr->destructor = memory_region_destructor_ram; - mr->ram_block = qemu_ram_alloc(size, 0, mr, &err); - if (err) { - mr->size = int128_zero(); - object_unparent(OBJECT(mr)); - error_propagate(errp, err); - } -} - -void memory_region_init_iommu(void *_iommu_mr, - size_t instance_size, - const char *mrtypename, - Object *owner, - const char *name, - uint64_t size) -{ - struct IOMMUMemoryRegion *iommu_mr; - struct MemoryRegion *mr; - - object_initialize(_iommu_mr, instance_size, mrtypename); - mr = MEMORY_REGION(_iommu_mr); - memory_region_do_init(mr, owner, name, size); - iommu_mr = IOMMU_MEMORY_REGION(mr); - mr->terminates = true; /* then re-forwards */ - QLIST_INIT(&iommu_mr->iommu_notify); - iommu_mr->iommu_notify_flags = IOMMU_NOTIFIER_NONE; -} - -static void memory_region_finalize(Object *obj) -{ - MemoryRegion *mr = MEMORY_REGION(obj); - - assert(!mr->container); - - /* We know the region is not visible in any address space (it - * does not have a container and cannot be a root either because - * it has no references, so we can blindly clear mr->enabled. - * memory_region_set_enabled instead could trigger a transaction - * and cause an infinite loop. - */ - mr->enabled = false; - memory_region_transaction_begin(); - while (!QTAILQ_EMPTY(&mr->subregions)) { - MemoryRegion *subregion = QTAILQ_FIRST(&mr->subregions); - memory_region_del_subregion(mr, subregion); - } - memory_region_transaction_commit(); - - mr->destructor(mr); - memory_region_clear_coalescing(mr); - g_free((char *)mr->name); - g_free(mr->ioeventfds); -} - -Object *memory_region_owner(MemoryRegion *mr) -{ - Object *obj = OBJECT(mr); - return obj->parent; -} - -void memory_region_ref(MemoryRegion *mr) -{ - /* MMIO callbacks most likely will access data that belongs - * to the owner, hence the need to ref/unref the owner whenever - * the memory region is in use. - * - * The memory region is a child of its owner. As long as the - * owner doesn't call unparent itself on the memory region, - * ref-ing the owner will also keep the memory region alive. - * Memory regions without an owner are supposed to never go away; - * we do not ref/unref them because it slows down DMA sensibly. - */ - if (mr && mr->owner) { - object_ref(mr->owner); - } -} - -void memory_region_unref(MemoryRegion *mr) -{ - if (mr && mr->owner) { - object_unref(mr->owner); - } -} - -uint64_t memory_region_size(MemoryRegion *mr) -{ - if (int128_eq(mr->size, int128_2_64())) { - return UINT64_MAX; - } - return int128_get64(mr->size); -} - -const char *memory_region_name(const MemoryRegion *mr) -{ - if (!mr->name) { - ((MemoryRegion *)mr)->name = - g_strdup(object_get_canonical_path_component(OBJECT(mr))); - } - return mr->name; -} - -bool memory_region_is_ram_device(MemoryRegion *mr) -{ - return mr->ram_device; -} - -bool memory_region_is_protected(MemoryRegion *mr) -{ - return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); -} - -uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) -{ - uint8_t mask = mr->dirty_log_mask; - RAMBlock *rb = mr->ram_block; - - if (global_dirty_tracking && ((rb && qemu_ram_is_migratable(rb)) || - memory_region_is_iommu(mr))) { - mask |= (1 << DIRTY_MEMORY_MIGRATION); - } - - if (tcg_enabled() && rb) { - /* TCG only cares about dirty memory logging for RAM, not IOMMU. */ - mask |= (1 << DIRTY_MEMORY_CODE); - } - return mask; -} - -bool memory_region_is_logging(MemoryRegion *mr, uint8_t client) -{ - return memory_region_get_dirty_log_mask(mr) & (1 << client); -} - -static int memory_region_update_iommu_notify_flags(IOMMUMemoryRegion *iommu_mr, - Error **errp) -{ - IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE; - IOMMUNotifier *iommu_notifier; - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - int ret = 0; - - IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { - flags |= iommu_notifier->notifier_flags; - } - - if (flags != iommu_mr->iommu_notify_flags && imrc->notify_flag_changed) { - ret = imrc->notify_flag_changed(iommu_mr, - iommu_mr->iommu_notify_flags, - flags, errp); - } - - if (!ret) { - iommu_mr->iommu_notify_flags = flags; - } - return ret; -} - -int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, - uint64_t page_size_mask, - Error **errp) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - int ret = 0; - - if (imrc->iommu_set_page_size_mask) { - ret = imrc->iommu_set_page_size_mask(iommu_mr, page_size_mask, errp); - } - return ret; -} - -int memory_region_register_iommu_notifier(MemoryRegion *mr, - IOMMUNotifier *n, Error **errp) -{ - IOMMUMemoryRegion *iommu_mr; - int ret; - - if (mr->alias) { - return memory_region_register_iommu_notifier(mr->alias, n, errp); - } - - /* We need to register for at least one bitfield */ - iommu_mr = IOMMU_MEMORY_REGION(mr); - assert(n->notifier_flags != IOMMU_NOTIFIER_NONE); - assert(n->start <= n->end); - assert(n->iommu_idx >= 0 && - n->iommu_idx < memory_region_iommu_num_indexes(iommu_mr)); - - QLIST_INSERT_HEAD(&iommu_mr->iommu_notify, n, node); - ret = memory_region_update_iommu_notify_flags(iommu_mr, errp); - if (ret) { - QLIST_REMOVE(n, node); - } - return ret; -} - -uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - - if (imrc->get_min_page_size) { - return imrc->get_min_page_size(iommu_mr); - } - return TARGET_PAGE_SIZE; -} - -void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) -{ - MemoryRegion *mr = MEMORY_REGION(iommu_mr); - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - hwaddr addr, granularity; - IOMMUTLBEntry iotlb; - - /* If the IOMMU has its own replay callback, override */ - if (imrc->replay) { - imrc->replay(iommu_mr, n); - return; - } - - granularity = memory_region_iommu_get_min_page_size(iommu_mr); - - for (addr = 0; addr < memory_region_size(mr); addr += granularity) { - iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx); - if (iotlb.perm != IOMMU_NONE) { - n->notify(n, &iotlb); - } - - /* if (2^64 - MR size) < granularity, it's possible to get an - * infinite loop here. This should catch such a wraparound */ - if ((addr + granularity) < addr) { - break; - } - } -} - -void memory_region_unregister_iommu_notifier(MemoryRegion *mr, - IOMMUNotifier *n) -{ - IOMMUMemoryRegion *iommu_mr; - - if (mr->alias) { - memory_region_unregister_iommu_notifier(mr->alias, n); - return; - } - QLIST_REMOVE(n, node); - iommu_mr = IOMMU_MEMORY_REGION(mr); - memory_region_update_iommu_notify_flags(iommu_mr, NULL); -} - -void memory_region_notify_iommu_one(IOMMUNotifier *notifier, - IOMMUTLBEvent *event) -{ - IOMMUTLBEntry *entry = &event->entry; - hwaddr entry_end = entry->iova + entry->addr_mask; - IOMMUTLBEntry tmp = *entry; - - if (event->type == IOMMU_NOTIFIER_UNMAP) { - assert(entry->perm == IOMMU_NONE); - } - - /* - * Skip the notification if the notification does not overlap - * with registered range. - */ - if (notifier->start > entry_end || notifier->end < entry->iova) { - return; - } - - if (notifier->notifier_flags & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { - /* Crop (iova, addr_mask) to range */ - tmp.iova = MAX(tmp.iova, notifier->start); - tmp.addr_mask = MIN(entry_end, notifier->end) - tmp.iova; - } else { - assert(entry->iova >= notifier->start && entry_end <= notifier->end); - } - - if (event->type & notifier->notifier_flags) { - notifier->notify(notifier, &tmp); - } -} - -void memory_region_unmap_iommu_notifier_range(IOMMUNotifier *notifier) -{ - IOMMUTLBEvent event; - - event.type = IOMMU_NOTIFIER_UNMAP; - event.entry.target_as = &address_space_memory; - event.entry.iova = notifier->start; - event.entry.perm = IOMMU_NONE; - event.entry.addr_mask = notifier->end - notifier->start; - - memory_region_notify_iommu_one(notifier, &event); -} - -void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - int iommu_idx, - IOMMUTLBEvent event) -{ - IOMMUNotifier *iommu_notifier; - - assert(memory_region_is_iommu(MEMORY_REGION(iommu_mr))); - - IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { - if (iommu_notifier->iommu_idx == iommu_idx) { - memory_region_notify_iommu_one(iommu_notifier, &event); - } - } -} - -int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr, - enum IOMMUMemoryRegionAttr attr, - void *data) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - - if (!imrc->get_attr) { - return -EINVAL; - } - - return imrc->get_attr(iommu_mr, attr, data); -} - -int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr, - MemTxAttrs attrs) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - - if (!imrc->attrs_to_index) { - return 0; - } - - return imrc->attrs_to_index(iommu_mr, attrs); -} - -int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - - if (!imrc->num_indexes) { - return 1; - } - - return imrc->num_indexes(iommu_mr); -} - -RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) -{ - if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) { - return NULL; - } - return mr->rdm; -} - -void memory_region_set_ram_discard_manager(MemoryRegion *mr, - RamDiscardManager *rdm) -{ - g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr)); - g_assert(!rdm || !mr->rdm); - mr->rdm = rdm; -} - -uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, - const MemoryRegion *mr) -{ - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); - - g_assert(rdmc->get_min_granularity); - return rdmc->get_min_granularity(rdm, mr); -} - -bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, - const MemoryRegionSection *section) -{ - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); - - g_assert(rdmc->is_populated); - return rdmc->is_populated(rdm, section); -} - -int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamPopulate replay_fn, - void *opaque) -{ - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); - - g_assert(rdmc->replay_populated); - return rdmc->replay_populated(rdm, section, replay_fn, opaque); -} - -void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, - void *opaque) -{ - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); - - g_assert(rdmc->replay_discarded); - rdmc->replay_discarded(rdm, section, replay_fn, opaque); -} - -void ram_discard_manager_register_listener(RamDiscardManager *rdm, - RamDiscardListener *rdl, - MemoryRegionSection *section) -{ - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); - - g_assert(rdmc->register_listener); - rdmc->register_listener(rdm, rdl, section); -} - -void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, - RamDiscardListener *rdl) -{ - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); - - g_assert(rdmc->unregister_listener); - rdmc->unregister_listener(rdm, rdl); -} - -/* Called with rcu_read_lock held. */ -bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - bool *mr_has_discard_manager) -{ - MemoryRegion *mr; - hwaddr xlat; - hwaddr len = iotlb->addr_mask + 1; - bool writable = iotlb->perm & IOMMU_WO; - - if (mr_has_discard_manager) { - *mr_has_discard_manager = false; - } - /* - * The IOMMU TLB entry we have just covers translation through - * this IOMMU to its immediate target. We need to translate - * it the rest of the way through to memory. - */ - mr = address_space_translate(&address_space_memory, iotlb->translated_addr, - &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED); - if (!memory_region_is_ram(mr)) { - error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat); - return false; - } else if (memory_region_has_ram_discard_manager(mr)) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); - MemoryRegionSection tmp = { - .mr = mr, - .offset_within_region = xlat, - .size = int128_make64(len), - }; - if (mr_has_discard_manager) { - *mr_has_discard_manager = true; - } - /* - * Malicious VMs can map memory into the IOMMU, which is expected - * to remain discarded. vfio will pin all pages, populating memory. - * Disallow that. vmstate priorities make sure any RamDiscardManager - * were already restored before IOMMUs are restored. - */ - if (!ram_discard_manager_is_populated(rdm, &tmp)) { - error_report("iommu map to discarded memory (e.g., unplugged via" - " virtio-mem): %" HWADDR_PRIx "", - iotlb->translated_addr); - return false; - } - } - - /* - * Translation truncates length to the IOMMU page size, - * check that it did not truncate too much. - */ - if (len & iotlb->addr_mask) { - error_report("iommu has granularity incompatible with target AS"); - return false; - } - - if (vaddr) { - *vaddr = memory_region_get_ram_ptr(mr) + xlat; - } - - if (ram_addr) { - *ram_addr = memory_region_get_ram_addr(mr) + xlat; - } - - if (read_only) { - *read_only = !writable || mr->readonly; - } - - return true; -} - -void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) -{ - uint8_t mask = 1 << client; - uint8_t old_logging; - - assert(client == DIRTY_MEMORY_VGA); - old_logging = mr->vga_logging_count; - mr->vga_logging_count += log ? 1 : -1; - if (!!old_logging == !!mr->vga_logging_count) { - return; - } - - memory_region_transaction_begin(); - mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask); - memory_region_update_pending |= mr->enabled; - memory_region_transaction_commit(); -} - -void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, - hwaddr size) -{ - assert(mr->ram_block); - cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr, - size, - memory_region_get_dirty_log_mask(mr)); -} - -/* - * If memory region `mr' is NULL, do global sync. Otherwise, sync - * dirty bitmap for the specified memory region. - */ -static void memory_region_sync_dirty_bitmap(MemoryRegion *mr, bool last_stage) -{ - MemoryListener *listener; - AddressSpace *as; - FlatView *view; - FlatRange *fr; - - /* If the same address space has multiple log_sync listeners, we - * visit that address space's FlatView multiple times. But because - * log_sync listeners are rare, it's still cheaper than walking each - * address space once. - */ - QTAILQ_FOREACH(listener, &memory_listeners, link) { - if (listener->log_sync) { - as = listener->address_space; - view = address_space_get_flatview(as); - FOR_EACH_FLAT_RANGE(fr, view) { - if (fr->dirty_log_mask && (!mr || fr->mr == mr)) { - MemoryRegionSection mrs = section_from_flat_range(fr, view); - listener->log_sync(listener, &mrs); - } - } - flatview_unref(view); - trace_memory_region_sync_dirty(mr ? mr->name : "(all)", listener->name, 0); - } else if (listener->log_sync_global) { - /* - * No matter whether MR is specified, what we can do here - * is to do a global sync, because we are not capable to - * sync in a finer granularity. - */ - listener->log_sync_global(listener, last_stage); - trace_memory_region_sync_dirty(mr ? mr->name : "(all)", listener->name, 1); - } - } -} - -void memory_region_clear_dirty_bitmap(MemoryRegion *mr, hwaddr start, - hwaddr len) -{ - MemoryRegionSection mrs; - MemoryListener *listener; - AddressSpace *as; - FlatView *view; - FlatRange *fr; - hwaddr sec_start, sec_end, sec_size; - - QTAILQ_FOREACH(listener, &memory_listeners, link) { - if (!listener->log_clear) { - continue; - } - as = listener->address_space; - view = address_space_get_flatview(as); - FOR_EACH_FLAT_RANGE(fr, view) { - if (!fr->dirty_log_mask || fr->mr != mr) { - /* - * Clear dirty bitmap operation only applies to those - * regions whose dirty logging is at least enabled - */ - continue; - } - - mrs = section_from_flat_range(fr, view); - - sec_start = MAX(mrs.offset_within_region, start); - sec_end = mrs.offset_within_region + int128_get64(mrs.size); - sec_end = MIN(sec_end, start + len); - - if (sec_start >= sec_end) { - /* - * If this memory region section has no intersection - * with the requested range, skip. - */ - continue; - } - - /* Valid case; shrink the section if needed */ - mrs.offset_within_address_space += - sec_start - mrs.offset_within_region; - mrs.offset_within_region = sec_start; - sec_size = sec_end - sec_start; - mrs.size = int128_make64(sec_size); - listener->log_clear(listener, &mrs); - } - flatview_unref(view); - } -} - -DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr, - hwaddr addr, - hwaddr size, - unsigned client) -{ - DirtyBitmapSnapshot *snapshot; - assert(mr->ram_block); - memory_region_sync_dirty_bitmap(mr, false); - snapshot = cpu_physical_memory_snapshot_and_clear_dirty(mr, addr, size, client); - memory_global_after_dirty_log_sync(); - return snapshot; -} - -bool memory_region_snapshot_get_dirty(MemoryRegion *mr, DirtyBitmapSnapshot *snap, - hwaddr addr, hwaddr size) -{ - assert(mr->ram_block); - return cpu_physical_memory_snapshot_get_dirty(snap, - memory_region_get_ram_addr(mr) + addr, size); -} - -void memory_region_set_readonly(MemoryRegion *mr, bool readonly) -{ - if (mr->readonly != readonly) { - memory_region_transaction_begin(); - mr->readonly = readonly; - memory_region_update_pending |= mr->enabled; - memory_region_transaction_commit(); - } -} - -void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile) -{ - if (mr->nonvolatile != nonvolatile) { - memory_region_transaction_begin(); - mr->nonvolatile = nonvolatile; - memory_region_update_pending |= mr->enabled; - memory_region_transaction_commit(); - } -} - -void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode) -{ - if (mr->romd_mode != romd_mode) { - memory_region_transaction_begin(); - mr->romd_mode = romd_mode; - memory_region_update_pending |= mr->enabled; - memory_region_transaction_commit(); - } -} - -void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, - hwaddr size, unsigned client) -{ - assert(mr->ram_block); - cpu_physical_memory_test_and_clear_dirty( - memory_region_get_ram_addr(mr) + addr, size, client); -} - -int memory_region_get_fd(MemoryRegion *mr) -{ - RCU_READ_LOCK_GUARD(); - while (mr->alias) { - mr = mr->alias; - } - return mr->ram_block->fd; -} - -void *memory_region_get_ram_ptr(MemoryRegion *mr) -{ - uint64_t offset = 0; - - RCU_READ_LOCK_GUARD(); - while (mr->alias) { - offset += mr->alias_offset; - mr = mr->alias; - } - assert(mr->ram_block); - return qemu_map_ram_ptr(mr->ram_block, offset); -} - -MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset) -{ - RAMBlock *block; - - block = qemu_ram_block_from_host(ptr, false, offset); - if (!block) { - return NULL; - } - - return block->mr; -} - -ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr) -{ - return mr->ram_block ? mr->ram_block->offset : RAM_ADDR_INVALID; -} - -void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp) -{ - assert(mr->ram_block); - - qemu_ram_resize(mr->ram_block, newsize, errp); -} - -void memory_region_msync(MemoryRegion *mr, hwaddr addr, hwaddr size) -{ - if (mr->ram_block) { - qemu_ram_msync(mr->ram_block, addr, size); - } -} - -void memory_region_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size) -{ - /* - * Might be extended case needed to cover - * different types of memory regions - */ - if (mr->dirty_log_mask) { - memory_region_msync(mr, addr, size); - } -} - -/* - * Call proper memory listeners about the change on the newly - * added/removed CoalescedMemoryRange. - */ -static void memory_region_update_coalesced_range(MemoryRegion *mr, - CoalescedMemoryRange *cmr, - bool add) -{ - AddressSpace *as; - FlatView *view; - FlatRange *fr; - - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - view = address_space_get_flatview(as); - FOR_EACH_FLAT_RANGE(fr, view) { - if (fr->mr == mr) { - flat_range_coalesced_io_notify(fr, as, cmr, add); - } - } - flatview_unref(view); - } -} - -void memory_region_set_coalescing(MemoryRegion *mr) -{ - memory_region_clear_coalescing(mr); - memory_region_add_coalescing(mr, 0, int128_get64(mr->size)); -} - -void memory_region_add_coalescing(MemoryRegion *mr, - hwaddr offset, - uint64_t size) -{ - CoalescedMemoryRange *cmr = g_malloc(sizeof(*cmr)); - - cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size)); - QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link); - memory_region_update_coalesced_range(mr, cmr, true); - memory_region_set_flush_coalesced(mr); -} - -void memory_region_clear_coalescing(MemoryRegion *mr) -{ - CoalescedMemoryRange *cmr; - - if (QTAILQ_EMPTY(&mr->coalesced)) { - return; - } - - qemu_flush_coalesced_mmio_buffer(); - mr->flush_coalesced_mmio = false; - - while (!QTAILQ_EMPTY(&mr->coalesced)) { - cmr = QTAILQ_FIRST(&mr->coalesced); - QTAILQ_REMOVE(&mr->coalesced, cmr, link); - memory_region_update_coalesced_range(mr, cmr, false); - g_free(cmr); - } -} - -void memory_region_set_flush_coalesced(MemoryRegion *mr) -{ - mr->flush_coalesced_mmio = true; -} - -void memory_region_clear_flush_coalesced(MemoryRegion *mr) -{ - qemu_flush_coalesced_mmio_buffer(); - if (QTAILQ_EMPTY(&mr->coalesced)) { - mr->flush_coalesced_mmio = false; - } -} - -static bool userspace_eventfd_warning; - -void memory_region_add_eventfd(MemoryRegion *mr, - hwaddr addr, - unsigned size, - bool match_data, - uint64_t data, - EventNotifier *e) -{ - MemoryRegionIoeventfd mrfd = { - .addr.start = int128_make64(addr), - .addr.size = int128_make64(size), - .match_data = match_data, - .data = data, - .e = e, - }; - unsigned i; - - if (kvm_enabled() && (!(kvm_eventfds_enabled() || - userspace_eventfd_warning))) { - userspace_eventfd_warning = true; - error_report("Using eventfd without MMIO binding in KVM. " - "Suboptimal performance expected"); - } - - if (size) { - adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE); - } - memory_region_transaction_begin(); - for (i = 0; i < mr->ioeventfd_nb; ++i) { - if (memory_region_ioeventfd_before(&mrfd, &mr->ioeventfds[i])) { - break; - } - } - ++mr->ioeventfd_nb; - mr->ioeventfds = g_realloc(mr->ioeventfds, - sizeof(*mr->ioeventfds) * mr->ioeventfd_nb); - memmove(&mr->ioeventfds[i+1], &mr->ioeventfds[i], - sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i)); - mr->ioeventfds[i] = mrfd; - ioeventfd_update_pending |= mr->enabled; - memory_region_transaction_commit(); -} - -void memory_region_del_eventfd(MemoryRegion *mr, - hwaddr addr, - unsigned size, - bool match_data, - uint64_t data, - EventNotifier *e) -{ - MemoryRegionIoeventfd mrfd = { - .addr.start = int128_make64(addr), - .addr.size = int128_make64(size), - .match_data = match_data, - .data = data, - .e = e, - }; - unsigned i; - - if (size) { - adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE); - } - memory_region_transaction_begin(); - for (i = 0; i < mr->ioeventfd_nb; ++i) { - if (memory_region_ioeventfd_equal(&mrfd, &mr->ioeventfds[i])) { - break; - } - } - assert(i != mr->ioeventfd_nb); - memmove(&mr->ioeventfds[i], &mr->ioeventfds[i+1], - sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb - (i+1))); - --mr->ioeventfd_nb; - mr->ioeventfds = g_realloc(mr->ioeventfds, - sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1); - ioeventfd_update_pending |= mr->enabled; - memory_region_transaction_commit(); -} - -static void memory_region_update_container_subregions(MemoryRegion *subregion) -{ - MemoryRegion *mr = subregion->container; - MemoryRegion *other; - - memory_region_transaction_begin(); - - memory_region_ref(subregion); - QTAILQ_FOREACH(other, &mr->subregions, subregions_link) { - if (subregion->priority >= other->priority) { - QTAILQ_INSERT_BEFORE(other, subregion, subregions_link); - goto done; - } - } - QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link); -done: - memory_region_update_pending |= mr->enabled && subregion->enabled; - memory_region_transaction_commit(); -} - -static void memory_region_add_subregion_common(MemoryRegion *mr, - hwaddr offset, - MemoryRegion *subregion) -{ - MemoryRegion *alias; - - assert(!subregion->container); - subregion->container = mr; - for (alias = subregion->alias; alias; alias = alias->alias) { - alias->mapped_via_alias++; - } - subregion->addr = offset; - memory_region_update_container_subregions(subregion); -} - -void memory_region_add_subregion(MemoryRegion *mr, - hwaddr offset, - MemoryRegion *subregion) -{ - subregion->priority = 0; - memory_region_add_subregion_common(mr, offset, subregion); -} - -void memory_region_add_subregion_overlap(MemoryRegion *mr, - hwaddr offset, - MemoryRegion *subregion, - int priority) -{ - subregion->priority = priority; - memory_region_add_subregion_common(mr, offset, subregion); -} - -void memory_region_del_subregion(MemoryRegion *mr, - MemoryRegion *subregion) -{ - MemoryRegion *alias; - - memory_region_transaction_begin(); - assert(subregion->container == mr); - subregion->container = NULL; - for (alias = subregion->alias; alias; alias = alias->alias) { - alias->mapped_via_alias--; - assert(alias->mapped_via_alias >= 0); - } - QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link); - memory_region_unref(subregion); - memory_region_update_pending |= mr->enabled && subregion->enabled; - memory_region_transaction_commit(); -} - -void memory_region_set_enabled(MemoryRegion *mr, bool enabled) -{ - if (enabled == mr->enabled) { - return; - } - memory_region_transaction_begin(); - mr->enabled = enabled; - memory_region_update_pending = true; - memory_region_transaction_commit(); -} - -void memory_region_set_size(MemoryRegion *mr, uint64_t size) -{ - Int128 s = int128_make64(size); - - if (size == UINT64_MAX) { - s = int128_2_64(); - } - if (int128_eq(s, mr->size)) { - return; - } - memory_region_transaction_begin(); - mr->size = s; - memory_region_update_pending = true; - memory_region_transaction_commit(); -} - -static void memory_region_readd_subregion(MemoryRegion *mr) -{ - MemoryRegion *container = mr->container; - - if (container) { - memory_region_transaction_begin(); - memory_region_ref(mr); - memory_region_del_subregion(container, mr); - memory_region_add_subregion_common(container, mr->addr, mr); - memory_region_unref(mr); - memory_region_transaction_commit(); - } -} - -void memory_region_set_address(MemoryRegion *mr, hwaddr addr) -{ - if (addr != mr->addr) { - mr->addr = addr; - memory_region_readd_subregion(mr); - } -} - -void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset) -{ - assert(mr->alias); - - if (offset == mr->alias_offset) { - return; - } - - memory_region_transaction_begin(); - mr->alias_offset = offset; - memory_region_update_pending |= mr->enabled; - memory_region_transaction_commit(); -} - -uint64_t memory_region_get_alignment(const MemoryRegion *mr) -{ - return mr->align; -} - -static int cmp_flatrange_addr(const void *addr_, const void *fr_) -{ - const AddrRange *addr = addr_; - const FlatRange *fr = fr_; - - if (int128_le(addrrange_end(*addr), fr->addr.start)) { - return -1; - } else if (int128_ge(addr->start, addrrange_end(fr->addr))) { - return 1; - } - return 0; -} - -static FlatRange *flatview_lookup(FlatView *view, AddrRange addr) -{ - return bsearch(&addr, view->ranges, view->nr, - sizeof(FlatRange), cmp_flatrange_addr); -} - -bool memory_region_is_mapped(MemoryRegion *mr) -{ - return !!mr->container || mr->mapped_via_alias; -} - -/* Same as memory_region_find, but it does not add a reference to the - * returned region. It must be called from an RCU critical section. - */ -static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr, - hwaddr addr, uint64_t size) -{ - MemoryRegionSection ret = { .mr = NULL }; - MemoryRegion *root; - AddressSpace *as; - AddrRange range; - FlatView *view; - FlatRange *fr; - - addr += mr->addr; - for (root = mr; root->container; ) { - root = root->container; - addr += root->addr; - } - - as = memory_region_to_address_space(root); - if (!as) { - return ret; - } - range = addrrange_make(int128_make64(addr), int128_make64(size)); - - view = address_space_to_flatview(as); - fr = flatview_lookup(view, range); - if (!fr) { - return ret; - } - - while (fr > view->ranges && addrrange_intersects(fr[-1].addr, range)) { - --fr; - } - - ret.mr = fr->mr; - ret.fv = view; - range = addrrange_intersection(range, fr->addr); - ret.offset_within_region = fr->offset_in_region; - ret.offset_within_region += int128_get64(int128_sub(range.start, - fr->addr.start)); - ret.size = range.size; - ret.offset_within_address_space = int128_get64(range.start); - ret.readonly = fr->readonly; - ret.nonvolatile = fr->nonvolatile; - return ret; -} - -MemoryRegionSection memory_region_find(MemoryRegion *mr, - hwaddr addr, uint64_t size) -{ - MemoryRegionSection ret; - RCU_READ_LOCK_GUARD(); - ret = memory_region_find_rcu(mr, addr, size); - if (ret.mr) { - memory_region_ref(ret.mr); - } - return ret; -} - -MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s) -{ - MemoryRegionSection *tmp = g_new(MemoryRegionSection, 1); - - *tmp = *s; - if (tmp->mr) { - memory_region_ref(tmp->mr); - } - if (tmp->fv) { - bool ret = flatview_ref(tmp->fv); - - g_assert(ret); - } - return tmp; -} - -void memory_region_section_free_copy(MemoryRegionSection *s) -{ - if (s->fv) { - flatview_unref(s->fv); - } - if (s->mr) { - memory_region_unref(s->mr); - } - g_free(s); -} - -bool memory_region_present(MemoryRegion *container, hwaddr addr) -{ - MemoryRegion *mr; - - RCU_READ_LOCK_GUARD(); - mr = memory_region_find_rcu(container, addr, 1).mr; - return mr && mr != container; -} - -void memory_global_dirty_log_sync(bool last_stage) -{ - memory_region_sync_dirty_bitmap(NULL, last_stage); -} - -void memory_global_after_dirty_log_sync(void) -{ - MEMORY_LISTENER_CALL_GLOBAL(log_global_after_sync, Forward); -} - -/* - * Dirty track stop flags that are postponed due to VM being stopped. Should - * only be used within vmstate_change hook. - */ -static unsigned int postponed_stop_flags; -static VMChangeStateEntry *vmstate_change; -static void memory_global_dirty_log_stop_postponed_run(void); - -void memory_global_dirty_log_start(unsigned int flags) -{ - unsigned int old_flags; - - assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); - - if (vmstate_change) { - /* If there is postponed stop(), operate on it first */ - postponed_stop_flags &= ~flags; - memory_global_dirty_log_stop_postponed_run(); - } - - flags &= ~global_dirty_tracking; - if (!flags) { - return; - } - - old_flags = global_dirty_tracking; - global_dirty_tracking |= flags; - trace_global_dirty_changed(global_dirty_tracking); - - if (!old_flags) { - MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward); - memory_region_transaction_begin(); - memory_region_update_pending = true; - memory_region_transaction_commit(); - } -} - -static void memory_global_dirty_log_do_stop(unsigned int flags) -{ - assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); - assert((global_dirty_tracking & flags) == flags); - global_dirty_tracking &= ~flags; - - trace_global_dirty_changed(global_dirty_tracking); - - if (!global_dirty_tracking) { - memory_region_transaction_begin(); - memory_region_update_pending = true; - memory_region_transaction_commit(); - MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse); - } -} - -/* - * Execute the postponed dirty log stop operations if there is, then reset - * everything (including the flags and the vmstate change hook). - */ -static void memory_global_dirty_log_stop_postponed_run(void) -{ - /* This must be called with the vmstate handler registered */ - assert(vmstate_change); - - /* Note: postponed_stop_flags can be cleared in log start routine */ - if (postponed_stop_flags) { - memory_global_dirty_log_do_stop(postponed_stop_flags); - postponed_stop_flags = 0; - } - - qemu_del_vm_change_state_handler(vmstate_change); - vmstate_change = NULL; -} - -static void memory_vm_change_state_handler(void *opaque, bool running, - RunState state) -{ - if (running) { - memory_global_dirty_log_stop_postponed_run(); - } -} - -void memory_global_dirty_log_stop(unsigned int flags) -{ - if (!runstate_is_running()) { - /* Postpone the dirty log stop, e.g., to when VM starts again */ - if (vmstate_change) { - /* Batch with previous postponed flags */ - postponed_stop_flags |= flags; - } else { - postponed_stop_flags = flags; - vmstate_change = qemu_add_vm_change_state_handler( - memory_vm_change_state_handler, NULL); - } - return; - } - - memory_global_dirty_log_do_stop(flags); -} - -static void listener_add_address_space(MemoryListener *listener, - AddressSpace *as) -{ - FlatView *view; - FlatRange *fr; - - if (listener->begin) { - listener->begin(listener); - } - if (global_dirty_tracking) { - if (listener->log_global_start) { - listener->log_global_start(listener); - } - } - - view = address_space_get_flatview(as); - FOR_EACH_FLAT_RANGE(fr, view) { - MemoryRegionSection section = section_from_flat_range(fr, view); - - if (listener->region_add) { - listener->region_add(listener, §ion); - } - if (fr->dirty_log_mask && listener->log_start) { - listener->log_start(listener, §ion, 0, fr->dirty_log_mask); - } - } - if (listener->commit) { - listener->commit(listener); - } - flatview_unref(view); -} - -static void listener_del_address_space(MemoryListener *listener, - AddressSpace *as) -{ - FlatView *view; - FlatRange *fr; - - if (listener->begin) { - listener->begin(listener); - } - view = address_space_get_flatview(as); - FOR_EACH_FLAT_RANGE(fr, view) { - MemoryRegionSection section = section_from_flat_range(fr, view); - - if (fr->dirty_log_mask && listener->log_stop) { - listener->log_stop(listener, §ion, fr->dirty_log_mask, 0); - } - if (listener->region_del) { - listener->region_del(listener, §ion); - } - } - if (listener->commit) { - listener->commit(listener); - } - flatview_unref(view); -} - -void memory_listener_register(MemoryListener *listener, AddressSpace *as) -{ - MemoryListener *other = NULL; - - /* Only one of them can be defined for a listener */ - assert(!(listener->log_sync && listener->log_sync_global)); - - listener->address_space = as; - if (QTAILQ_EMPTY(&memory_listeners) - || listener->priority >= QTAILQ_LAST(&memory_listeners)->priority) { - QTAILQ_INSERT_TAIL(&memory_listeners, listener, link); - } else { - QTAILQ_FOREACH(other, &memory_listeners, link) { - if (listener->priority < other->priority) { - break; - } - } - QTAILQ_INSERT_BEFORE(other, listener, link); - } - - if (QTAILQ_EMPTY(&as->listeners) - || listener->priority >= QTAILQ_LAST(&as->listeners)->priority) { - QTAILQ_INSERT_TAIL(&as->listeners, listener, link_as); - } else { - QTAILQ_FOREACH(other, &as->listeners, link_as) { - if (listener->priority < other->priority) { - break; - } - } - QTAILQ_INSERT_BEFORE(other, listener, link_as); - } - - listener_add_address_space(listener, as); - - if (listener->eventfd_add || listener->eventfd_del) { - as->ioeventfd_notifiers++; - } -} - -void memory_listener_unregister(MemoryListener *listener) -{ - if (!listener->address_space) { - return; - } - - if (listener->eventfd_add || listener->eventfd_del) { - listener->address_space->ioeventfd_notifiers--; - } - - listener_del_address_space(listener, listener->address_space); - QTAILQ_REMOVE(&memory_listeners, listener, link); - QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as); - listener->address_space = NULL; -} - -void address_space_remove_listeners(AddressSpace *as) -{ - while (!QTAILQ_EMPTY(&as->listeners)) { - memory_listener_unregister(QTAILQ_FIRST(&as->listeners)); - } -} - -void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) -{ - memory_region_ref(root); - as->root = root; - as->current_map = NULL; - as->ioeventfd_nb = 0; - as->ioeventfds = NULL; - QTAILQ_INIT(&as->listeners); - QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link); - as->name = g_strdup(name ? name : "anonymous"); - address_space_update_topology(as); - address_space_update_ioeventfds(as); -} - -static void do_address_space_destroy(AddressSpace *as) -{ - assert(QTAILQ_EMPTY(&as->listeners)); - - flatview_unref(as->current_map); - g_free(as->name); - g_free(as->ioeventfds); - memory_region_unref(as->root); -} - -void address_space_destroy(AddressSpace *as) -{ - MemoryRegion *root = as->root; - - /* Flush out anything from MemoryListeners listening in on this */ - memory_region_transaction_begin(); - as->root = NULL; - memory_region_transaction_commit(); - QTAILQ_REMOVE(&address_spaces, as, address_spaces_link); - - /* At this point, as->dispatch and as->current_map are dummy - * entries that the guest should never use. Wait for the old - * values to expire before freeing the data. - */ - as->root = root; - call_rcu(as, do_address_space_destroy, rcu); -} - -static const char *memory_region_type(MemoryRegion *mr) -{ - if (mr->alias) { - return memory_region_type(mr->alias); - } - if (memory_region_is_ram_device(mr)) { - return "ramd"; - } else if (memory_region_is_romd(mr)) { - return "romd"; - } else if (memory_region_is_rom(mr)) { - return "rom"; - } else if (memory_region_is_ram(mr)) { - return "ram"; - } else { - return "i/o"; - } -} - -typedef struct MemoryRegionList MemoryRegionList; - -struct MemoryRegionList { - const MemoryRegion *mr; - QTAILQ_ENTRY(MemoryRegionList) mrqueue; -}; - -typedef QTAILQ_HEAD(, MemoryRegionList) MemoryRegionListHead; - -#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \ - int128_sub((size), int128_one())) : 0) -#define MTREE_INDENT " " - -static void mtree_expand_owner(const char *label, Object *obj) -{ - DeviceState *dev = (DeviceState *) object_dynamic_cast(obj, TYPE_DEVICE); - - qemu_printf(" %s:{%s", label, dev ? "dev" : "obj"); - if (dev && dev->id) { - qemu_printf(" id=%s", dev->id); - } else { - char *canonical_path = object_get_canonical_path(obj); - if (canonical_path) { - qemu_printf(" path=%s", canonical_path); - g_free(canonical_path); - } else { - qemu_printf(" type=%s", object_get_typename(obj)); - } - } - qemu_printf("}"); -} - -static void mtree_print_mr_owner(const MemoryRegion *mr) -{ - Object *owner = mr->owner; - Object *parent = memory_region_owner((MemoryRegion *)mr); - - if (!owner && !parent) { - qemu_printf(" orphan"); - return; - } - if (owner) { - mtree_expand_owner("owner", owner); - } - if (parent && parent != owner) { - mtree_expand_owner("parent", parent); - } -} - -static void mtree_print_mr(const MemoryRegion *mr, unsigned int level, - hwaddr base, - MemoryRegionListHead *alias_print_queue, - bool owner, bool display_disabled) -{ - MemoryRegionList *new_ml, *ml, *next_ml; - MemoryRegionListHead submr_print_queue; - const MemoryRegion *submr; - unsigned int i; - hwaddr cur_start, cur_end; - - if (!mr) { - return; - } - - cur_start = base + mr->addr; - cur_end = cur_start + MR_SIZE(mr->size); - - /* - * Try to detect overflow of memory region. This should never - * happen normally. When it happens, we dump something to warn the - * user who is observing this. - */ - if (cur_start < base || cur_end < cur_start) { - qemu_printf("[DETECTED OVERFLOW!] "); - } - - if (mr->alias) { - bool found = false; - - /* check if the alias is already in the queue */ - QTAILQ_FOREACH(ml, alias_print_queue, mrqueue) { - if (ml->mr == mr->alias) { - found = true; - } - } - - if (!found) { - ml = g_new(MemoryRegionList, 1); - ml->mr = mr->alias; - QTAILQ_INSERT_TAIL(alias_print_queue, ml, mrqueue); - } - if (mr->enabled || display_disabled) { - for (i = 0; i < level; i++) { - qemu_printf(MTREE_INDENT); - } - qemu_printf(HWADDR_FMT_plx "-" HWADDR_FMT_plx - " (prio %d, %s%s): alias %s @%s " HWADDR_FMT_plx - "-" HWADDR_FMT_plx "%s", - cur_start, cur_end, - mr->priority, - mr->nonvolatile ? "nv-" : "", - memory_region_type((MemoryRegion *)mr), - memory_region_name(mr), - memory_region_name(mr->alias), - mr->alias_offset, - mr->alias_offset + MR_SIZE(mr->size), - mr->enabled ? "" : " [disabled]"); - if (owner) { - mtree_print_mr_owner(mr); - } - qemu_printf("\n"); - } - } else { - if (mr->enabled || display_disabled) { - for (i = 0; i < level; i++) { - qemu_printf(MTREE_INDENT); - } - qemu_printf(HWADDR_FMT_plx "-" HWADDR_FMT_plx - " (prio %d, %s%s): %s%s", - cur_start, cur_end, - mr->priority, - mr->nonvolatile ? "nv-" : "", - memory_region_type((MemoryRegion *)mr), - memory_region_name(mr), - mr->enabled ? "" : " [disabled]"); - if (owner) { - mtree_print_mr_owner(mr); - } - qemu_printf("\n"); - } - } - - QTAILQ_INIT(&submr_print_queue); - - QTAILQ_FOREACH(submr, &mr->subregions, subregions_link) { - new_ml = g_new(MemoryRegionList, 1); - new_ml->mr = submr; - QTAILQ_FOREACH(ml, &submr_print_queue, mrqueue) { - if (new_ml->mr->addr < ml->mr->addr || - (new_ml->mr->addr == ml->mr->addr && - new_ml->mr->priority > ml->mr->priority)) { - QTAILQ_INSERT_BEFORE(ml, new_ml, mrqueue); - new_ml = NULL; - break; - } - } - if (new_ml) { - QTAILQ_INSERT_TAIL(&submr_print_queue, new_ml, mrqueue); - } - } - - QTAILQ_FOREACH(ml, &submr_print_queue, mrqueue) { - mtree_print_mr(ml->mr, level + 1, cur_start, - alias_print_queue, owner, display_disabled); - } - - QTAILQ_FOREACH_SAFE(ml, &submr_print_queue, mrqueue, next_ml) { - g_free(ml); - } -} - -struct FlatViewInfo { - int counter; - bool dispatch_tree; - bool owner; - AccelClass *ac; -}; - -static void mtree_print_flatview(gpointer key, gpointer value, - gpointer user_data) -{ - FlatView *view = key; - GArray *fv_address_spaces = value; - struct FlatViewInfo *fvi = user_data; - FlatRange *range = &view->ranges[0]; - MemoryRegion *mr; - int n = view->nr; - int i; - AddressSpace *as; - - qemu_printf("FlatView #%d\n", fvi->counter); - ++fvi->counter; - - for (i = 0; i < fv_address_spaces->len; ++i) { - as = g_array_index(fv_address_spaces, AddressSpace*, i); - qemu_printf(" AS \"%s\", root: %s", - as->name, memory_region_name(as->root)); - if (as->root->alias) { - qemu_printf(", alias %s", memory_region_name(as->root->alias)); - } - qemu_printf("\n"); - } - - qemu_printf(" Root memory region: %s\n", - view->root ? memory_region_name(view->root) : "(none)"); - - if (n <= 0) { - qemu_printf(MTREE_INDENT "No rendered FlatView\n\n"); - return; - } - - while (n--) { - mr = range->mr; - if (range->offset_in_region) { - qemu_printf(MTREE_INDENT HWADDR_FMT_plx "-" HWADDR_FMT_plx - " (prio %d, %s%s): %s @" HWADDR_FMT_plx, - int128_get64(range->addr.start), - int128_get64(range->addr.start) - + MR_SIZE(range->addr.size), - mr->priority, - range->nonvolatile ? "nv-" : "", - range->readonly ? "rom" : memory_region_type(mr), - memory_region_name(mr), - range->offset_in_region); - } else { - qemu_printf(MTREE_INDENT HWADDR_FMT_plx "-" HWADDR_FMT_plx - " (prio %d, %s%s): %s", - int128_get64(range->addr.start), - int128_get64(range->addr.start) - + MR_SIZE(range->addr.size), - mr->priority, - range->nonvolatile ? "nv-" : "", - range->readonly ? "rom" : memory_region_type(mr), - memory_region_name(mr)); - } - if (fvi->owner) { - mtree_print_mr_owner(mr); - } - - if (fvi->ac) { - for (i = 0; i < fv_address_spaces->len; ++i) { - as = g_array_index(fv_address_spaces, AddressSpace*, i); - if (fvi->ac->has_memory(current_machine, as, - int128_get64(range->addr.start), - MR_SIZE(range->addr.size) + 1)) { - qemu_printf(" %s", fvi->ac->name); - } - } - } - qemu_printf("\n"); - range++; - } - -#if !defined(CONFIG_USER_ONLY) - if (fvi->dispatch_tree && view->root) { - mtree_print_dispatch(view->dispatch, view->root); - } -#endif - - qemu_printf("\n"); -} - -static gboolean mtree_info_flatview_free(gpointer key, gpointer value, - gpointer user_data) -{ - FlatView *view = key; - GArray *fv_address_spaces = value; - - g_array_unref(fv_address_spaces); - flatview_unref(view); - - return true; -} - -static void mtree_info_flatview(bool dispatch_tree, bool owner) -{ - struct FlatViewInfo fvi = { - .counter = 0, - .dispatch_tree = dispatch_tree, - .owner = owner, - }; - AddressSpace *as; - FlatView *view; - GArray *fv_address_spaces; - GHashTable *views = g_hash_table_new(g_direct_hash, g_direct_equal); - AccelClass *ac = ACCEL_GET_CLASS(current_accel()); - - if (ac->has_memory) { - fvi.ac = ac; - } - - /* Gather all FVs in one table */ - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - view = address_space_get_flatview(as); - - fv_address_spaces = g_hash_table_lookup(views, view); - if (!fv_address_spaces) { - fv_address_spaces = g_array_new(false, false, sizeof(as)); - g_hash_table_insert(views, view, fv_address_spaces); - } - - g_array_append_val(fv_address_spaces, as); - } - - /* Print */ - g_hash_table_foreach(views, mtree_print_flatview, &fvi); - - /* Free */ - g_hash_table_foreach_remove(views, mtree_info_flatview_free, 0); - g_hash_table_unref(views); -} - -struct AddressSpaceInfo { - MemoryRegionListHead *ml_head; - bool owner; - bool disabled; -}; - -/* Returns negative value if a < b; zero if a = b; positive value if a > b. */ -static gint address_space_compare_name(gconstpointer a, gconstpointer b) -{ - const AddressSpace *as_a = a; - const AddressSpace *as_b = b; - - return g_strcmp0(as_a->name, as_b->name); -} - -static void mtree_print_as_name(gpointer data, gpointer user_data) -{ - AddressSpace *as = data; - - qemu_printf("address-space: %s\n", as->name); -} - -static void mtree_print_as(gpointer key, gpointer value, gpointer user_data) -{ - MemoryRegion *mr = key; - GSList *as_same_root_mr_list = value; - struct AddressSpaceInfo *asi = user_data; - - g_slist_foreach(as_same_root_mr_list, mtree_print_as_name, NULL); - mtree_print_mr(mr, 1, 0, asi->ml_head, asi->owner, asi->disabled); - qemu_printf("\n"); -} - -static gboolean mtree_info_as_free(gpointer key, gpointer value, - gpointer user_data) -{ - GSList *as_same_root_mr_list = value; - - g_slist_free(as_same_root_mr_list); - - return true; -} - -static void mtree_info_as(bool dispatch_tree, bool owner, bool disabled) -{ - MemoryRegionListHead ml_head; - MemoryRegionList *ml, *ml2; - AddressSpace *as; - GHashTable *views = g_hash_table_new(g_direct_hash, g_direct_equal); - GSList *as_same_root_mr_list; - struct AddressSpaceInfo asi = { - .ml_head = &ml_head, - .owner = owner, - .disabled = disabled, - }; - - QTAILQ_INIT(&ml_head); - - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - /* Create hashtable, key=AS root MR, value = list of AS */ - as_same_root_mr_list = g_hash_table_lookup(views, as->root); - as_same_root_mr_list = g_slist_insert_sorted(as_same_root_mr_list, as, - address_space_compare_name); - g_hash_table_insert(views, as->root, as_same_root_mr_list); - } - - /* print address spaces */ - g_hash_table_foreach(views, mtree_print_as, &asi); - g_hash_table_foreach_remove(views, mtree_info_as_free, 0); - g_hash_table_unref(views); - - /* print aliased regions */ - QTAILQ_FOREACH(ml, &ml_head, mrqueue) { - qemu_printf("memory-region: %s\n", memory_region_name(ml->mr)); - mtree_print_mr(ml->mr, 1, 0, &ml_head, owner, disabled); - qemu_printf("\n"); - } - - QTAILQ_FOREACH_SAFE(ml, &ml_head, mrqueue, ml2) { - g_free(ml); - } -} - -void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled) -{ - if (flatview) { - mtree_info_flatview(dispatch_tree, owner); - } else { - mtree_info_as(dispatch_tree, owner, disabled); - } -} - -void memory_region_init_ram(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - Error **errp) -{ - DeviceState *owner_dev; - Error *err = NULL; - - memory_region_init_ram_nomigrate(mr, owner, name, size, &err); - if (err) { - error_propagate(errp, err); - return; - } - /* This will assert if owner is neither NULL nor a DeviceState. - * We only want the owner here for the purposes of defining a - * unique name for migration. TODO: Ideally we should implement - * a naming scheme for Objects which are not DeviceStates, in - * which case we can relax this restriction. - */ - owner_dev = DEVICE(owner); - vmstate_register_ram(mr, owner_dev); -} - -void memory_region_init_rom(MemoryRegion *mr, - Object *owner, - const char *name, - uint64_t size, - Error **errp) -{ - DeviceState *owner_dev; - Error *err = NULL; - - memory_region_init_rom_nomigrate(mr, owner, name, size, &err); - if (err) { - error_propagate(errp, err); - return; - } - /* This will assert if owner is neither NULL nor a DeviceState. - * We only want the owner here for the purposes of defining a - * unique name for migration. TODO: Ideally we should implement - * a naming scheme for Objects which are not DeviceStates, in - * which case we can relax this restriction. - */ - owner_dev = DEVICE(owner); - vmstate_register_ram(mr, owner_dev); -} - -void memory_region_init_rom_device(MemoryRegion *mr, - Object *owner, - const MemoryRegionOps *ops, - void *opaque, - const char *name, - uint64_t size, - Error **errp) -{ - DeviceState *owner_dev; - Error *err = NULL; - - memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque, - name, size, &err); - if (err) { - error_propagate(errp, err); - return; - } - /* This will assert if owner is neither NULL nor a DeviceState. - * We only want the owner here for the purposes of defining a - * unique name for migration. TODO: Ideally we should implement - * a naming scheme for Objects which are not DeviceStates, in - * which case we can relax this restriction. - */ - owner_dev = DEVICE(owner); - vmstate_register_ram(mr, owner_dev); -} - -/* - * Support system builds with CONFIG_FUZZ using a weak symbol and a stub for - * the fuzz_dma_read_cb callback - */ -#ifdef CONFIG_FUZZ -void __attribute__((weak)) fuzz_dma_read_cb(size_t addr, - size_t len, - MemoryRegion *mr) -{ -} -#endif - -static const TypeInfo memory_region_info = { - .parent = TYPE_OBJECT, - .name = TYPE_MEMORY_REGION, - .class_size = sizeof(MemoryRegionClass), - .instance_size = sizeof(MemoryRegion), - .instance_init = memory_region_initfn, - .instance_finalize = memory_region_finalize, -}; - -static const TypeInfo iommu_memory_region_info = { - .parent = TYPE_MEMORY_REGION, - .name = TYPE_IOMMU_MEMORY_REGION, - .class_size = sizeof(IOMMUMemoryRegionClass), - .instance_size = sizeof(IOMMUMemoryRegion), - .instance_init = iommu_memory_region_initfn, - .abstract = true, -}; - -static const TypeInfo ram_discard_manager_info = { - .parent = TYPE_INTERFACE, - .name = TYPE_RAM_DISCARD_MANAGER, - .class_size = sizeof(RamDiscardManagerClass), -}; - -static void memory_register_types(void) -{ - type_register_static(&memory_region_info); - type_register_static(&iommu_memory_region_info); - type_register_static(&ram_discard_manager_info); -} - -type_init(memory_register_types) diff --git a/softmmu/memory_mapping.c b/softmmu/memory_mapping.c deleted file mode 100644 index d7f1d09..0000000 --- a/softmmu/memory_mapping.c +++ /dev/null @@ -1,377 +0,0 @@ -/* - * QEMU memory mapping - * - * Copyright Fujitsu, Corp. 2011, 2012 - * - * Authors: - * Wen Congyang - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" - -#include "sysemu/memory_mapping.h" -#include "exec/memory.h" -#include "exec/address-spaces.h" -#include "hw/core/cpu.h" - -//#define DEBUG_GUEST_PHYS_REGION_ADD - -static void memory_mapping_list_add_mapping_sorted(MemoryMappingList *list, - MemoryMapping *mapping) -{ - MemoryMapping *p; - - QTAILQ_FOREACH(p, &list->head, next) { - if (p->phys_addr >= mapping->phys_addr) { - QTAILQ_INSERT_BEFORE(p, mapping, next); - return; - } - } - QTAILQ_INSERT_TAIL(&list->head, mapping, next); -} - -static void create_new_memory_mapping(MemoryMappingList *list, - hwaddr phys_addr, - hwaddr virt_addr, - ram_addr_t length) -{ - MemoryMapping *memory_mapping; - - memory_mapping = g_new(MemoryMapping, 1); - memory_mapping->phys_addr = phys_addr; - memory_mapping->virt_addr = virt_addr; - memory_mapping->length = length; - list->last_mapping = memory_mapping; - list->num++; - memory_mapping_list_add_mapping_sorted(list, memory_mapping); -} - -static inline bool mapping_contiguous(MemoryMapping *map, - hwaddr phys_addr, - hwaddr virt_addr) -{ - return phys_addr == map->phys_addr + map->length && - virt_addr == map->virt_addr + map->length; -} - -/* - * [map->phys_addr, map->phys_addr + map->length) and - * [phys_addr, phys_addr + length) have intersection? - */ -static inline bool mapping_have_same_region(MemoryMapping *map, - hwaddr phys_addr, - ram_addr_t length) -{ - return !(phys_addr + length < map->phys_addr || - phys_addr >= map->phys_addr + map->length); -} - -/* - * [map->phys_addr, map->phys_addr + map->length) and - * [phys_addr, phys_addr + length) have intersection. The virtual address in the - * intersection are the same? - */ -static inline bool mapping_conflict(MemoryMapping *map, - hwaddr phys_addr, - hwaddr virt_addr) -{ - return virt_addr - map->virt_addr != phys_addr - map->phys_addr; -} - -/* - * [map->virt_addr, map->virt_addr + map->length) and - * [virt_addr, virt_addr + length) have intersection. And the physical address - * in the intersection are the same. - */ -static inline void mapping_merge(MemoryMapping *map, - hwaddr virt_addr, - ram_addr_t length) -{ - if (virt_addr < map->virt_addr) { - map->length += map->virt_addr - virt_addr; - map->virt_addr = virt_addr; - } - - if ((virt_addr + length) > - (map->virt_addr + map->length)) { - map->length = virt_addr + length - map->virt_addr; - } -} - -void memory_mapping_list_add_merge_sorted(MemoryMappingList *list, - hwaddr phys_addr, - hwaddr virt_addr, - ram_addr_t length) -{ - MemoryMapping *memory_mapping, *last_mapping; - - if (QTAILQ_EMPTY(&list->head)) { - create_new_memory_mapping(list, phys_addr, virt_addr, length); - return; - } - - last_mapping = list->last_mapping; - if (last_mapping) { - if (mapping_contiguous(last_mapping, phys_addr, virt_addr)) { - last_mapping->length += length; - return; - } - } - - QTAILQ_FOREACH(memory_mapping, &list->head, next) { - if (mapping_contiguous(memory_mapping, phys_addr, virt_addr)) { - memory_mapping->length += length; - list->last_mapping = memory_mapping; - return; - } - - if (phys_addr + length < memory_mapping->phys_addr) { - /* create a new region before memory_mapping */ - break; - } - - if (mapping_have_same_region(memory_mapping, phys_addr, length)) { - if (mapping_conflict(memory_mapping, phys_addr, virt_addr)) { - continue; - } - - /* merge this region into memory_mapping */ - mapping_merge(memory_mapping, virt_addr, length); - list->last_mapping = memory_mapping; - return; - } - } - - /* this region can not be merged into any existed memory mapping. */ - create_new_memory_mapping(list, phys_addr, virt_addr, length); -} - -void memory_mapping_list_free(MemoryMappingList *list) -{ - MemoryMapping *p, *q; - - QTAILQ_FOREACH_SAFE(p, &list->head, next, q) { - QTAILQ_REMOVE(&list->head, p, next); - g_free(p); - } - - list->num = 0; - list->last_mapping = NULL; -} - -void memory_mapping_list_init(MemoryMappingList *list) -{ - list->num = 0; - list->last_mapping = NULL; - QTAILQ_INIT(&list->head); -} - -void guest_phys_blocks_free(GuestPhysBlockList *list) -{ - GuestPhysBlock *p, *q; - - QTAILQ_FOREACH_SAFE(p, &list->head, next, q) { - QTAILQ_REMOVE(&list->head, p, next); - memory_region_unref(p->mr); - g_free(p); - } - list->num = 0; -} - -void guest_phys_blocks_init(GuestPhysBlockList *list) -{ - list->num = 0; - QTAILQ_INIT(&list->head); -} - -typedef struct GuestPhysListener { - GuestPhysBlockList *list; - MemoryListener listener; -} GuestPhysListener; - -static void guest_phys_block_add_section(GuestPhysListener *g, - MemoryRegionSection *section) -{ - const hwaddr target_start = section->offset_within_address_space; - const hwaddr target_end = target_start + int128_get64(section->size); - uint8_t *host_addr = memory_region_get_ram_ptr(section->mr) + - section->offset_within_region; - GuestPhysBlock *predecessor = NULL; - - /* find continuity in guest physical address space */ - if (!QTAILQ_EMPTY(&g->list->head)) { - hwaddr predecessor_size; - - predecessor = QTAILQ_LAST(&g->list->head); - predecessor_size = predecessor->target_end - predecessor->target_start; - - /* the memory API guarantees monotonically increasing traversal */ - g_assert(predecessor->target_end <= target_start); - - /* we want continuity in both guest-physical and host-virtual memory */ - if (predecessor->target_end < target_start || - predecessor->host_addr + predecessor_size != host_addr || - predecessor->mr != section->mr) { - predecessor = NULL; - } - } - - if (predecessor == NULL) { - /* isolated mapping, allocate it and add it to the list */ - GuestPhysBlock *block = g_malloc0(sizeof *block); - - block->target_start = target_start; - block->target_end = target_end; - block->host_addr = host_addr; - block->mr = section->mr; - memory_region_ref(section->mr); - - QTAILQ_INSERT_TAIL(&g->list->head, block, next); - ++g->list->num; - } else { - /* expand predecessor until @target_end; predecessor's start doesn't - * change - */ - predecessor->target_end = target_end; - } - -#ifdef DEBUG_GUEST_PHYS_REGION_ADD - fprintf(stderr, "%s: target_start=" HWADDR_FMT_plx " target_end=" - HWADDR_FMT_plx ": %s (count: %u)\n", __func__, target_start, - target_end, predecessor ? "joined" : "added", g->list->num); -#endif -} - -static int guest_phys_ram_populate_cb(MemoryRegionSection *section, - void *opaque) -{ - GuestPhysListener *g = opaque; - - guest_phys_block_add_section(g, section); - return 0; -} - -static void guest_phys_blocks_region_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - GuestPhysListener *g = container_of(listener, GuestPhysListener, listener); - - /* we only care about RAM */ - if (!memory_region_is_ram(section->mr) || - memory_region_is_ram_device(section->mr) || - memory_region_is_nonvolatile(section->mr)) { - return; - } - - /* for special sparse regions, only add populated parts */ - if (memory_region_has_ram_discard_manager(section->mr)) { - RamDiscardManager *rdm; - - rdm = memory_region_get_ram_discard_manager(section->mr); - ram_discard_manager_replay_populated(rdm, section, - guest_phys_ram_populate_cb, g); - return; - } - - guest_phys_block_add_section(g, section); -} - -void guest_phys_blocks_append(GuestPhysBlockList *list) -{ - GuestPhysListener g = { 0 }; - - g.list = list; - g.listener.region_add = &guest_phys_blocks_region_add; - memory_listener_register(&g.listener, &address_space_memory); - memory_listener_unregister(&g.listener); -} - -static CPUState *find_paging_enabled_cpu(CPUState *start_cpu) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - if (cpu_paging_enabled(cpu)) { - return cpu; - } - } - - return NULL; -} - -void qemu_get_guest_memory_mapping(MemoryMappingList *list, - const GuestPhysBlockList *guest_phys_blocks, - Error **errp) -{ - CPUState *cpu, *first_paging_enabled_cpu; - GuestPhysBlock *block; - ram_addr_t offset, length; - - first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu); - if (first_paging_enabled_cpu) { - for (cpu = first_paging_enabled_cpu; cpu != NULL; - cpu = CPU_NEXT(cpu)) { - Error *err = NULL; - cpu_get_memory_mapping(cpu, list, &err); - if (err) { - error_propagate(errp, err); - return; - } - } - return; - } - - /* - * If the guest doesn't use paging, the virtual address is equal to physical - * address. - */ - QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) { - offset = block->target_start; - length = block->target_end - block->target_start; - create_new_memory_mapping(list, offset, offset, length); - } -} - -void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list, - const GuestPhysBlockList *guest_phys_blocks) -{ - GuestPhysBlock *block; - - QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) { - create_new_memory_mapping(list, block->target_start, 0, - block->target_end - block->target_start); - } -} - -void memory_mapping_filter(MemoryMappingList *list, int64_t begin, - int64_t length) -{ - MemoryMapping *cur, *next; - - QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) { - if (cur->phys_addr >= begin + length || - cur->phys_addr + cur->length <= begin) { - QTAILQ_REMOVE(&list->head, cur, next); - g_free(cur); - list->num--; - continue; - } - - if (cur->phys_addr < begin) { - cur->length -= begin - cur->phys_addr; - if (cur->virt_addr) { - cur->virt_addr += begin - cur->phys_addr; - } - cur->phys_addr = begin; - } - - if (cur->phys_addr + cur->length > begin + length) { - cur->length -= cur->phys_addr + cur->length - begin - length; - } - } -} diff --git a/softmmu/meson.build b/softmmu/meson.build deleted file mode 100644 index 3a64dd8..0000000 --- a/softmmu/meson.build +++ /dev/null @@ -1,36 +0,0 @@ -specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: [files( - 'arch_init.c', - 'ioport.c', - 'memory.c', - 'physmem.c', - 'watchpoint.c', -)]) - -system_ss.add(files( - 'balloon.c', - 'bootdevice.c', - 'cpus.c', - 'cpu-throttle.c', - 'cpu-timers.c', - 'datadir.c', - 'dirtylimit.c', - 'dma-helpers.c', - 'globals.c', - 'memory_mapping.c', - 'qdev-monitor.c', - 'qtest.c', - 'rtc.c', - 'runstate-action.c', - 'runstate-hmp-cmds.c', - 'runstate.c', - 'tpm-hmp-cmds.c', - 'vl.c', -), sdl, libpmem, libdaxctl) - -if have_tpm - system_ss.add(files('tpm.c')) -endif - -system_ss.add(when: seccomp, if_true: files('qemu-seccomp.c')) -system_ss.add(when: fdt, if_true: files('device_tree.c')) -system_ss.add(when: 'CONFIG_LINUX', if_true: files('async-teardown.c')) diff --git a/softmmu/physmem.c b/softmmu/physmem.c deleted file mode 100644 index 309653c..0000000 --- a/softmmu/physmem.c +++ /dev/null @@ -1,3794 +0,0 @@ -/* - * RAM allocation and memory access - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "qemu/osdep.h" -#include "exec/page-vary.h" -#include "qapi/error.h" - -#include "qemu/cutils.h" -#include "qemu/cacheflush.h" -#include "qemu/hbitmap.h" -#include "qemu/madvise.h" - -#ifdef CONFIG_TCG -#include "hw/core/tcg-cpu-ops.h" -#endif /* CONFIG_TCG */ - -#include "exec/exec-all.h" -#include "exec/target_page.h" -#include "hw/qdev-core.h" -#include "hw/qdev-properties.h" -#include "hw/boards.h" -#include "hw/xen/xen.h" -#include "sysemu/kvm.h" -#include "sysemu/tcg.h" -#include "sysemu/qtest.h" -#include "qemu/timer.h" -#include "qemu/config-file.h" -#include "qemu/error-report.h" -#include "qemu/qemu-print.h" -#include "qemu/log.h" -#include "qemu/memalign.h" -#include "exec/memory.h" -#include "exec/ioport.h" -#include "sysemu/dma.h" -#include "sysemu/hostmem.h" -#include "sysemu/hw_accel.h" -#include "sysemu/xen-mapcache.h" -#include "trace/trace-root.h" - -#ifdef CONFIG_FALLOCATE_PUNCH_HOLE -#include -#endif - -#include "qemu/rcu_queue.h" -#include "qemu/main-loop.h" -#include "exec/translate-all.h" -#include "sysemu/replay.h" - -#include "exec/memory-internal.h" -#include "exec/ram_addr.h" - -#include "qemu/pmem.h" - -#include "migration/vmstate.h" - -#include "qemu/range.h" -#ifndef _WIN32 -#include "qemu/mmap-alloc.h" -#endif - -#include "monitor/monitor.h" - -#ifdef CONFIG_LIBDAXCTL -#include -#endif - -//#define DEBUG_SUBPAGE - -/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes - * are protected by the ramlist lock. - */ -RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) }; - -static MemoryRegion *system_memory; -static MemoryRegion *system_io; - -AddressSpace address_space_io; -AddressSpace address_space_memory; - -static MemoryRegion io_mem_unassigned; - -typedef struct PhysPageEntry PhysPageEntry; - -struct PhysPageEntry { - /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */ - uint32_t skip : 6; - /* index into phys_sections (!skip) or phys_map_nodes (skip) */ - uint32_t ptr : 26; -}; - -#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6) - -/* Size of the L2 (and L3, etc) page tables. */ -#define ADDR_SPACE_BITS 64 - -#define P_L2_BITS 9 -#define P_L2_SIZE (1 << P_L2_BITS) - -#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1) - -typedef PhysPageEntry Node[P_L2_SIZE]; - -typedef struct PhysPageMap { - struct rcu_head rcu; - - unsigned sections_nb; - unsigned sections_nb_alloc; - unsigned nodes_nb; - unsigned nodes_nb_alloc; - Node *nodes; - MemoryRegionSection *sections; -} PhysPageMap; - -struct AddressSpaceDispatch { - MemoryRegionSection *mru_section; - /* This is a multi-level map on the physical address space. - * The bottom level has pointers to MemoryRegionSections. - */ - PhysPageEntry phys_map; - PhysPageMap map; -}; - -#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK) -typedef struct subpage_t { - MemoryRegion iomem; - FlatView *fv; - hwaddr base; - uint16_t sub_section[]; -} subpage_t; - -#define PHYS_SECTION_UNASSIGNED 0 - -static void io_mem_init(void); -static void memory_map_init(void); -static void tcg_log_global_after_sync(MemoryListener *listener); -static void tcg_commit(MemoryListener *listener); - -/** - * CPUAddressSpace: all the information a CPU needs about an AddressSpace - * @cpu: the CPU whose AddressSpace this is - * @as: the AddressSpace itself - * @memory_dispatch: its dispatch pointer (cached, RCU protected) - * @tcg_as_listener: listener for tracking changes to the AddressSpace - */ -struct CPUAddressSpace { - CPUState *cpu; - AddressSpace *as; - struct AddressSpaceDispatch *memory_dispatch; - MemoryListener tcg_as_listener; -}; - -struct DirtyBitmapSnapshot { - ram_addr_t start; - ram_addr_t end; - unsigned long dirty[]; -}; - -static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) -{ - static unsigned alloc_hint = 16; - if (map->nodes_nb + nodes > map->nodes_nb_alloc) { - map->nodes_nb_alloc = MAX(alloc_hint, map->nodes_nb + nodes); - map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); - alloc_hint = map->nodes_nb_alloc; - } -} - -static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf) -{ - unsigned i; - uint32_t ret; - PhysPageEntry e; - PhysPageEntry *p; - - ret = map->nodes_nb++; - p = map->nodes[ret]; - assert(ret != PHYS_MAP_NODE_NIL); - assert(ret != map->nodes_nb_alloc); - - e.skip = leaf ? 0 : 1; - e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL; - for (i = 0; i < P_L2_SIZE; ++i) { - memcpy(&p[i], &e, sizeof(e)); - } - return ret; -} - -static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, - hwaddr *index, uint64_t *nb, uint16_t leaf, - int level) -{ - PhysPageEntry *p; - hwaddr step = (hwaddr)1 << (level * P_L2_BITS); - - if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) { - lp->ptr = phys_map_node_alloc(map, level == 0); - } - p = map->nodes[lp->ptr]; - lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)]; - - while (*nb && lp < &p[P_L2_SIZE]) { - if ((*index & (step - 1)) == 0 && *nb >= step) { - lp->skip = 0; - lp->ptr = leaf; - *index += step; - *nb -= step; - } else { - phys_page_set_level(map, lp, index, nb, leaf, level - 1); - } - ++lp; - } -} - -static void phys_page_set(AddressSpaceDispatch *d, - hwaddr index, uint64_t nb, - uint16_t leaf) -{ - /* Wildly overreserve - it doesn't matter much. */ - phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS); - - phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); -} - -/* Compact a non leaf page entry. Simply detect that the entry has a single child, - * and update our entry so we can skip it and go directly to the destination. - */ -static void phys_page_compact(PhysPageEntry *lp, Node *nodes) -{ - unsigned valid_ptr = P_L2_SIZE; - int valid = 0; - PhysPageEntry *p; - int i; - - if (lp->ptr == PHYS_MAP_NODE_NIL) { - return; - } - - p = nodes[lp->ptr]; - for (i = 0; i < P_L2_SIZE; i++) { - if (p[i].ptr == PHYS_MAP_NODE_NIL) { - continue; - } - - valid_ptr = i; - valid++; - if (p[i].skip) { - phys_page_compact(&p[i], nodes); - } - } - - /* We can only compress if there's only one child. */ - if (valid != 1) { - return; - } - - assert(valid_ptr < P_L2_SIZE); - - /* Don't compress if it won't fit in the # of bits we have. */ - if (P_L2_LEVELS >= (1 << 6) && - lp->skip + p[valid_ptr].skip >= (1 << 6)) { - return; - } - - lp->ptr = p[valid_ptr].ptr; - if (!p[valid_ptr].skip) { - /* If our only child is a leaf, make this a leaf. */ - /* By design, we should have made this node a leaf to begin with so we - * should never reach here. - * But since it's so simple to handle this, let's do it just in case we - * change this rule. - */ - lp->skip = 0; - } else { - lp->skip += p[valid_ptr].skip; - } -} - -void address_space_dispatch_compact(AddressSpaceDispatch *d) -{ - if (d->phys_map.skip) { - phys_page_compact(&d->phys_map, d->map.nodes); - } -} - -static inline bool section_covers_addr(const MemoryRegionSection *section, - hwaddr addr) -{ - /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means - * the section must cover the entire address space. - */ - return int128_gethi(section->size) || - range_covers_byte(section->offset_within_address_space, - int128_getlo(section->size), addr); -} - -static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr) -{ - PhysPageEntry lp = d->phys_map, *p; - Node *nodes = d->map.nodes; - MemoryRegionSection *sections = d->map.sections; - hwaddr index = addr >> TARGET_PAGE_BITS; - int i; - - for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) { - if (lp.ptr == PHYS_MAP_NODE_NIL) { - return §ions[PHYS_SECTION_UNASSIGNED]; - } - p = nodes[lp.ptr]; - lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)]; - } - - if (section_covers_addr(§ions[lp.ptr], addr)) { - return §ions[lp.ptr]; - } else { - return §ions[PHYS_SECTION_UNASSIGNED]; - } -} - -/* Called from RCU critical section */ -static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, - hwaddr addr, - bool resolve_subpage) -{ - MemoryRegionSection *section = qatomic_read(&d->mru_section); - subpage_t *subpage; - - if (!section || section == &d->map.sections[PHYS_SECTION_UNASSIGNED] || - !section_covers_addr(section, addr)) { - section = phys_page_find(d, addr); - qatomic_set(&d->mru_section, section); - } - if (resolve_subpage && section->mr->subpage) { - subpage = container_of(section->mr, subpage_t, iomem); - section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; - } - return section; -} - -/* Called from RCU critical section */ -static MemoryRegionSection * -address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, - hwaddr *plen, bool resolve_subpage) -{ - MemoryRegionSection *section; - MemoryRegion *mr; - Int128 diff; - - section = address_space_lookup_region(d, addr, resolve_subpage); - /* Compute offset within MemoryRegionSection */ - addr -= section->offset_within_address_space; - - /* Compute offset within MemoryRegion */ - *xlat = addr + section->offset_within_region; - - mr = section->mr; - - /* MMIO registers can be expected to perform full-width accesses based only - * on their address, without considering adjacent registers that could - * decode to completely different MemoryRegions. When such registers - * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO - * regions overlap wildly. For this reason we cannot clamp the accesses - * here. - * - * If the length is small (as is the case for address_space_ldl/stl), - * everything works fine. If the incoming length is large, however, - * the caller really has to do the clamping through memory_access_size. - */ - if (memory_region_is_ram(mr)) { - diff = int128_sub(section->size, int128_make64(addr)); - *plen = int128_get64(int128_min(diff, int128_make64(*plen))); - } - return section; -} - -/** - * address_space_translate_iommu - translate an address through an IOMMU - * memory region and then through the target address space. - * - * @iommu_mr: the IOMMU memory region that we start the translation from - * @addr: the address to be translated through the MMU - * @xlat: the translated address offset within the destination memory region. - * It cannot be %NULL. - * @plen_out: valid read/write length of the translated address. It - * cannot be %NULL. - * @page_mask_out: page mask for the translated address. This - * should only be meaningful for IOMMU translated - * addresses, since there may be huge pages that this bit - * would tell. It can be %NULL if we don't care about it. - * @is_write: whether the translation operation is for write - * @is_mmio: whether this can be MMIO, set true if it can - * @target_as: the address space targeted by the IOMMU - * @attrs: transaction attributes - * - * This function is called from RCU critical section. It is the common - * part of flatview_do_translate and address_space_translate_cached. - */ -static MemoryRegionSection address_space_translate_iommu(IOMMUMemoryRegion *iommu_mr, - hwaddr *xlat, - hwaddr *plen_out, - hwaddr *page_mask_out, - bool is_write, - bool is_mmio, - AddressSpace **target_as, - MemTxAttrs attrs) -{ - MemoryRegionSection *section; - hwaddr page_mask = (hwaddr)-1; - - do { - hwaddr addr = *xlat; - IOMMUMemoryRegionClass *imrc = memory_region_get_iommu_class_nocheck(iommu_mr); - int iommu_idx = 0; - IOMMUTLBEntry iotlb; - - if (imrc->attrs_to_index) { - iommu_idx = imrc->attrs_to_index(iommu_mr, attrs); - } - - iotlb = imrc->translate(iommu_mr, addr, is_write ? - IOMMU_WO : IOMMU_RO, iommu_idx); - - if (!(iotlb.perm & (1 << is_write))) { - goto unassigned; - } - - addr = ((iotlb.translated_addr & ~iotlb.addr_mask) - | (addr & iotlb.addr_mask)); - page_mask &= iotlb.addr_mask; - *plen_out = MIN(*plen_out, (addr | iotlb.addr_mask) - addr + 1); - *target_as = iotlb.target_as; - - section = address_space_translate_internal( - address_space_to_dispatch(iotlb.target_as), addr, xlat, - plen_out, is_mmio); - - iommu_mr = memory_region_get_iommu(section->mr); - } while (unlikely(iommu_mr)); - - if (page_mask_out) { - *page_mask_out = page_mask; - } - return *section; - -unassigned: - return (MemoryRegionSection) { .mr = &io_mem_unassigned }; -} - -/** - * flatview_do_translate - translate an address in FlatView - * - * @fv: the flat view that we want to translate on - * @addr: the address to be translated in above address space - * @xlat: the translated address offset within memory region. It - * cannot be @NULL. - * @plen_out: valid read/write length of the translated address. It - * can be @NULL when we don't care about it. - * @page_mask_out: page mask for the translated address. This - * should only be meaningful for IOMMU translated - * addresses, since there may be huge pages that this bit - * would tell. It can be @NULL if we don't care about it. - * @is_write: whether the translation operation is for write - * @is_mmio: whether this can be MMIO, set true if it can - * @target_as: the address space targeted by the IOMMU - * @attrs: memory transaction attributes - * - * This function is called from RCU critical section - */ -static MemoryRegionSection flatview_do_translate(FlatView *fv, - hwaddr addr, - hwaddr *xlat, - hwaddr *plen_out, - hwaddr *page_mask_out, - bool is_write, - bool is_mmio, - AddressSpace **target_as, - MemTxAttrs attrs) -{ - MemoryRegionSection *section; - IOMMUMemoryRegion *iommu_mr; - hwaddr plen = (hwaddr)(-1); - - if (!plen_out) { - plen_out = &plen; - } - - section = address_space_translate_internal( - flatview_to_dispatch(fv), addr, xlat, - plen_out, is_mmio); - - iommu_mr = memory_region_get_iommu(section->mr); - if (unlikely(iommu_mr)) { - return address_space_translate_iommu(iommu_mr, xlat, - plen_out, page_mask_out, - is_write, is_mmio, - target_as, attrs); - } - if (page_mask_out) { - /* Not behind an IOMMU, use default page size. */ - *page_mask_out = ~TARGET_PAGE_MASK; - } - - return *section; -} - -/* Called from RCU critical section */ -IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr, - bool is_write, MemTxAttrs attrs) -{ - MemoryRegionSection section; - hwaddr xlat, page_mask; - - /* - * This can never be MMIO, and we don't really care about plen, - * but page mask. - */ - section = flatview_do_translate(address_space_to_flatview(as), addr, &xlat, - NULL, &page_mask, is_write, false, &as, - attrs); - - /* Illegal translation */ - if (section.mr == &io_mem_unassigned) { - goto iotlb_fail; - } - - /* Convert memory region offset into address space offset */ - xlat += section.offset_within_address_space - - section.offset_within_region; - - return (IOMMUTLBEntry) { - .target_as = as, - .iova = addr & ~page_mask, - .translated_addr = xlat & ~page_mask, - .addr_mask = page_mask, - /* IOTLBs are for DMAs, and DMA only allows on RAMs. */ - .perm = IOMMU_RW, - }; - -iotlb_fail: - return (IOMMUTLBEntry) {0}; -} - -/* Called from RCU critical section */ -MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat, - hwaddr *plen, bool is_write, - MemTxAttrs attrs) -{ - MemoryRegion *mr; - MemoryRegionSection section; - AddressSpace *as = NULL; - - /* This can be MMIO, so setup MMIO bit. */ - section = flatview_do_translate(fv, addr, xlat, plen, NULL, - is_write, true, &as, attrs); - mr = section.mr; - - if (xen_enabled() && memory_access_is_direct(mr, is_write)) { - hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr; - *plen = MIN(page, *plen); - } - - return mr; -} - -typedef struct TCGIOMMUNotifier { - IOMMUNotifier n; - MemoryRegion *mr; - CPUState *cpu; - int iommu_idx; - bool active; -} TCGIOMMUNotifier; - -static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) -{ - TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n); - - if (!notifier->active) { - return; - } - tlb_flush(notifier->cpu); - notifier->active = false; - /* We leave the notifier struct on the list to avoid reallocating it later. - * Generally the number of IOMMUs a CPU deals with will be small. - * In any case we can't unregister the iommu notifier from a notify - * callback. - */ -} - -static void tcg_register_iommu_notifier(CPUState *cpu, - IOMMUMemoryRegion *iommu_mr, - int iommu_idx) -{ - /* Make sure this CPU has an IOMMU notifier registered for this - * IOMMU/IOMMU index combination, so that we can flush its TLB - * when the IOMMU tells us the mappings we've cached have changed. - */ - MemoryRegion *mr = MEMORY_REGION(iommu_mr); - TCGIOMMUNotifier *notifier = NULL; - int i; - - for (i = 0; i < cpu->iommu_notifiers->len; i++) { - notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i); - if (notifier->mr == mr && notifier->iommu_idx == iommu_idx) { - break; - } - } - if (i == cpu->iommu_notifiers->len) { - /* Not found, add a new entry at the end of the array */ - cpu->iommu_notifiers = g_array_set_size(cpu->iommu_notifiers, i + 1); - notifier = g_new0(TCGIOMMUNotifier, 1); - g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i) = notifier; - - notifier->mr = mr; - notifier->iommu_idx = iommu_idx; - notifier->cpu = cpu; - /* Rather than trying to register interest in the specific part - * of the iommu's address space that we've accessed and then - * expand it later as subsequent accesses touch more of it, we - * just register interest in the whole thing, on the assumption - * that iommu reconfiguration will be rare. - */ - iommu_notifier_init(¬ifier->n, - tcg_iommu_unmap_notify, - IOMMU_NOTIFIER_UNMAP, - 0, - HWADDR_MAX, - iommu_idx); - memory_region_register_iommu_notifier(notifier->mr, ¬ifier->n, - &error_fatal); - } - - if (!notifier->active) { - notifier->active = true; - } -} - -void tcg_iommu_free_notifier_list(CPUState *cpu) -{ - /* Destroy the CPU's notifier list */ - int i; - TCGIOMMUNotifier *notifier; - - for (i = 0; i < cpu->iommu_notifiers->len; i++) { - notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i); - memory_region_unregister_iommu_notifier(notifier->mr, ¬ifier->n); - g_free(notifier); - } - g_array_free(cpu->iommu_notifiers, true); -} - -void tcg_iommu_init_notifier_list(CPUState *cpu) -{ - cpu->iommu_notifiers = g_array_new(false, true, sizeof(TCGIOMMUNotifier *)); -} - -/* Called from RCU critical section */ -MemoryRegionSection * -address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr, - hwaddr *xlat, hwaddr *plen, - MemTxAttrs attrs, int *prot) -{ - MemoryRegionSection *section; - IOMMUMemoryRegion *iommu_mr; - IOMMUMemoryRegionClass *imrc; - IOMMUTLBEntry iotlb; - int iommu_idx; - hwaddr addr = orig_addr; - AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch; - - for (;;) { - section = address_space_translate_internal(d, addr, &addr, plen, false); - - iommu_mr = memory_region_get_iommu(section->mr); - if (!iommu_mr) { - break; - } - - imrc = memory_region_get_iommu_class_nocheck(iommu_mr); - - iommu_idx = imrc->attrs_to_index(iommu_mr, attrs); - tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx); - /* We need all the permissions, so pass IOMMU_NONE so the IOMMU - * doesn't short-cut its translation table walk. - */ - iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx); - addr = ((iotlb.translated_addr & ~iotlb.addr_mask) - | (addr & iotlb.addr_mask)); - /* Update the caller's prot bits to remove permissions the IOMMU - * is giving us a failure response for. If we get down to no - * permissions left at all we can give up now. - */ - if (!(iotlb.perm & IOMMU_RO)) { - *prot &= ~(PAGE_READ | PAGE_EXEC); - } - if (!(iotlb.perm & IOMMU_WO)) { - *prot &= ~PAGE_WRITE; - } - - if (!*prot) { - goto translate_fail; - } - - d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as)); - } - - assert(!memory_region_is_iommu(section->mr)); - *xlat = addr; - return section; - -translate_fail: - /* - * We should be given a page-aligned address -- certainly - * tlb_set_page_with_attrs() does so. The page offset of xlat - * is used to index sections[], and PHYS_SECTION_UNASSIGNED = 0. - * The page portion of xlat will be logged by memory_region_access_valid() - * when this memory access is rejected, so use the original untranslated - * physical address. - */ - assert((orig_addr & ~TARGET_PAGE_MASK) == 0); - *xlat = orig_addr; - return &d->map.sections[PHYS_SECTION_UNASSIGNED]; -} - -void cpu_address_space_init(CPUState *cpu, int asidx, - const char *prefix, MemoryRegion *mr) -{ - CPUAddressSpace *newas; - AddressSpace *as = g_new0(AddressSpace, 1); - char *as_name; - - assert(mr); - as_name = g_strdup_printf("%s-%d", prefix, cpu->cpu_index); - address_space_init(as, mr, as_name); - g_free(as_name); - - /* Target code should have set num_ases before calling us */ - assert(asidx < cpu->num_ases); - - if (asidx == 0) { - /* address space 0 gets the convenience alias */ - cpu->as = as; - } - - /* KVM cannot currently support multiple address spaces. */ - assert(asidx == 0 || !kvm_enabled()); - - if (!cpu->cpu_ases) { - cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); - } - - newas = &cpu->cpu_ases[asidx]; - newas->cpu = cpu; - newas->as = as; - if (tcg_enabled()) { - newas->tcg_as_listener.log_global_after_sync = tcg_log_global_after_sync; - newas->tcg_as_listener.commit = tcg_commit; - newas->tcg_as_listener.name = "tcg"; - memory_listener_register(&newas->tcg_as_listener, as); - } -} - -AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) -{ - /* Return the AddressSpace corresponding to the specified index */ - return cpu->cpu_ases[asidx].as; -} - -/* Called from RCU critical section */ -static RAMBlock *qemu_get_ram_block(ram_addr_t addr) -{ - RAMBlock *block; - - block = qatomic_rcu_read(&ram_list.mru_block); - if (block && addr - block->offset < block->max_length) { - return block; - } - RAMBLOCK_FOREACH(block) { - if (addr - block->offset < block->max_length) { - goto found; - } - } - - fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); - abort(); - -found: - /* It is safe to write mru_block outside the iothread lock. This - * is what happens: - * - * mru_block = xxx - * rcu_read_unlock() - * xxx removed from list - * rcu_read_lock() - * read mru_block - * mru_block = NULL; - * call_rcu(reclaim_ramblock, xxx); - * rcu_read_unlock() - * - * qatomic_rcu_set is not needed here. The block was already published - * when it was placed into the list. Here we're just making an extra - * copy of the pointer. - */ - ram_list.mru_block = block; - return block; -} - -static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) -{ - CPUState *cpu; - ram_addr_t start1; - RAMBlock *block; - ram_addr_t end; - - assert(tcg_enabled()); - end = TARGET_PAGE_ALIGN(start + length); - start &= TARGET_PAGE_MASK; - - RCU_READ_LOCK_GUARD(); - block = qemu_get_ram_block(start); - assert(block == qemu_get_ram_block(end - 1)); - start1 = (uintptr_t)ramblock_ptr(block, start - block->offset); - CPU_FOREACH(cpu) { - tlb_reset_dirty(cpu, start1, length); - } -} - -/* Note: start and end must be within the same ram block. */ -bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, - ram_addr_t length, - unsigned client) -{ - DirtyMemoryBlocks *blocks; - unsigned long end, page, start_page; - bool dirty = false; - RAMBlock *ramblock; - uint64_t mr_offset, mr_size; - - if (length == 0) { - return false; - } - - end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; - start_page = start >> TARGET_PAGE_BITS; - page = start_page; - - WITH_RCU_READ_LOCK_GUARD() { - blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); - ramblock = qemu_get_ram_block(start); - /* Range sanity check on the ramblock */ - assert(start >= ramblock->offset && - start + length <= ramblock->offset + ramblock->used_length); - - while (page < end) { - unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; - unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; - unsigned long num = MIN(end - page, - DIRTY_MEMORY_BLOCK_SIZE - offset); - - dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx], - offset, num); - page += num; - } - - mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - ramblock->offset; - mr_size = (end - start_page) << TARGET_PAGE_BITS; - memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size); - } - - if (dirty && tcg_enabled()) { - tlb_reset_dirty_range_all(start, length); - } - - return dirty; -} - -DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty - (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client) -{ - DirtyMemoryBlocks *blocks; - ram_addr_t start = memory_region_get_ram_addr(mr) + offset; - unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL); - ram_addr_t first = QEMU_ALIGN_DOWN(start, align); - ram_addr_t last = QEMU_ALIGN_UP(start + length, align); - DirtyBitmapSnapshot *snap; - unsigned long page, end, dest; - - snap = g_malloc0(sizeof(*snap) + - ((last - first) >> (TARGET_PAGE_BITS + 3))); - snap->start = first; - snap->end = last; - - page = first >> TARGET_PAGE_BITS; - end = last >> TARGET_PAGE_BITS; - dest = 0; - - WITH_RCU_READ_LOCK_GUARD() { - blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); - - while (page < end) { - unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; - unsigned long ofs = page % DIRTY_MEMORY_BLOCK_SIZE; - unsigned long num = MIN(end - page, - DIRTY_MEMORY_BLOCK_SIZE - ofs); - - assert(QEMU_IS_ALIGNED(ofs, (1 << BITS_PER_LEVEL))); - assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL))); - ofs >>= BITS_PER_LEVEL; - - bitmap_copy_and_clear_atomic(snap->dirty + dest, - blocks->blocks[idx] + ofs, - num); - page += num; - dest += num >> BITS_PER_LEVEL; - } - } - - if (tcg_enabled()) { - tlb_reset_dirty_range_all(start, length); - } - - memory_region_clear_dirty_bitmap(mr, offset, length); - - return snap; -} - -bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, - ram_addr_t start, - ram_addr_t length) -{ - unsigned long page, end; - - assert(start >= snap->start); - assert(start + length <= snap->end); - - end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS; - page = (start - snap->start) >> TARGET_PAGE_BITS; - - while (page < end) { - if (test_bit(page, snap->dirty)) { - return true; - } - page++; - } - return false; -} - -/* Called from RCU critical section */ -hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section) -{ - AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); - return section - d->map.sections; -} - -static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, - uint16_t section); -static subpage_t *subpage_init(FlatView *fv, hwaddr base); - -static uint16_t phys_section_add(PhysPageMap *map, - MemoryRegionSection *section) -{ - /* The physical section number is ORed with a page-aligned - * pointer to produce the iotlb entries. Thus it should - * never overflow into the page-aligned value. - */ - assert(map->sections_nb < TARGET_PAGE_SIZE); - - if (map->sections_nb == map->sections_nb_alloc) { - map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16); - map->sections = g_renew(MemoryRegionSection, map->sections, - map->sections_nb_alloc); - } - map->sections[map->sections_nb] = *section; - memory_region_ref(section->mr); - return map->sections_nb++; -} - -static void phys_section_destroy(MemoryRegion *mr) -{ - bool have_sub_page = mr->subpage; - - memory_region_unref(mr); - - if (have_sub_page) { - subpage_t *subpage = container_of(mr, subpage_t, iomem); - object_unref(OBJECT(&subpage->iomem)); - g_free(subpage); - } -} - -static void phys_sections_free(PhysPageMap *map) -{ - while (map->sections_nb > 0) { - MemoryRegionSection *section = &map->sections[--map->sections_nb]; - phys_section_destroy(section->mr); - } - g_free(map->sections); - g_free(map->nodes); -} - -static void register_subpage(FlatView *fv, MemoryRegionSection *section) -{ - AddressSpaceDispatch *d = flatview_to_dispatch(fv); - subpage_t *subpage; - hwaddr base = section->offset_within_address_space - & TARGET_PAGE_MASK; - MemoryRegionSection *existing = phys_page_find(d, base); - MemoryRegionSection subsection = { - .offset_within_address_space = base, - .size = int128_make64(TARGET_PAGE_SIZE), - }; - hwaddr start, end; - - assert(existing->mr->subpage || existing->mr == &io_mem_unassigned); - - if (!(existing->mr->subpage)) { - subpage = subpage_init(fv, base); - subsection.fv = fv; - subsection.mr = &subpage->iomem; - phys_page_set(d, base >> TARGET_PAGE_BITS, 1, - phys_section_add(&d->map, &subsection)); - } else { - subpage = container_of(existing->mr, subpage_t, iomem); - } - start = section->offset_within_address_space & ~TARGET_PAGE_MASK; - end = start + int128_get64(section->size) - 1; - subpage_register(subpage, start, end, - phys_section_add(&d->map, section)); -} - - -static void register_multipage(FlatView *fv, - MemoryRegionSection *section) -{ - AddressSpaceDispatch *d = flatview_to_dispatch(fv); - hwaddr start_addr = section->offset_within_address_space; - uint16_t section_index = phys_section_add(&d->map, section); - uint64_t num_pages = int128_get64(int128_rshift(section->size, - TARGET_PAGE_BITS)); - - assert(num_pages); - phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index); -} - -/* - * The range in *section* may look like this: - * - * |s|PPPPPPP|s| - * - * where s stands for subpage and P for page. - */ -void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section) -{ - MemoryRegionSection remain = *section; - Int128 page_size = int128_make64(TARGET_PAGE_SIZE); - - /* register first subpage */ - if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) { - uint64_t left = TARGET_PAGE_ALIGN(remain.offset_within_address_space) - - remain.offset_within_address_space; - - MemoryRegionSection now = remain; - now.size = int128_min(int128_make64(left), now.size); - register_subpage(fv, &now); - if (int128_eq(remain.size, now.size)) { - return; - } - remain.size = int128_sub(remain.size, now.size); - remain.offset_within_address_space += int128_get64(now.size); - remain.offset_within_region += int128_get64(now.size); - } - - /* register whole pages */ - if (int128_ge(remain.size, page_size)) { - MemoryRegionSection now = remain; - now.size = int128_and(now.size, int128_neg(page_size)); - register_multipage(fv, &now); - if (int128_eq(remain.size, now.size)) { - return; - } - remain.size = int128_sub(remain.size, now.size); - remain.offset_within_address_space += int128_get64(now.size); - remain.offset_within_region += int128_get64(now.size); - } - - /* register last subpage */ - register_subpage(fv, &remain); -} - -void qemu_flush_coalesced_mmio_buffer(void) -{ - if (kvm_enabled()) - kvm_flush_coalesced_mmio_buffer(); -} - -void qemu_mutex_lock_ramlist(void) -{ - qemu_mutex_lock(&ram_list.mutex); -} - -void qemu_mutex_unlock_ramlist(void) -{ - qemu_mutex_unlock(&ram_list.mutex); -} - -GString *ram_block_format(void) -{ - RAMBlock *block; - char *psize; - GString *buf = g_string_new(""); - - RCU_READ_LOCK_GUARD(); - g_string_append_printf(buf, "%24s %8s %18s %18s %18s %18s %3s\n", - "Block Name", "PSize", "Offset", "Used", "Total", - "HVA", "RO"); - - RAMBLOCK_FOREACH(block) { - psize = size_to_str(block->page_size); - g_string_append_printf(buf, "%24s %8s 0x%016" PRIx64 " 0x%016" PRIx64 - " 0x%016" PRIx64 " 0x%016" PRIx64 " %3s\n", - block->idstr, psize, - (uint64_t)block->offset, - (uint64_t)block->used_length, - (uint64_t)block->max_length, - (uint64_t)(uintptr_t)block->host, - block->mr->readonly ? "ro" : "rw"); - - g_free(psize); - } - - return buf; -} - -static int find_min_backend_pagesize(Object *obj, void *opaque) -{ - long *hpsize_min = opaque; - - if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { - HostMemoryBackend *backend = MEMORY_BACKEND(obj); - long hpsize = host_memory_backend_pagesize(backend); - - if (host_memory_backend_is_mapped(backend) && (hpsize < *hpsize_min)) { - *hpsize_min = hpsize; - } - } - - return 0; -} - -static int find_max_backend_pagesize(Object *obj, void *opaque) -{ - long *hpsize_max = opaque; - - if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { - HostMemoryBackend *backend = MEMORY_BACKEND(obj); - long hpsize = host_memory_backend_pagesize(backend); - - if (host_memory_backend_is_mapped(backend) && (hpsize > *hpsize_max)) { - *hpsize_max = hpsize; - } - } - - return 0; -} - -/* - * TODO: We assume right now that all mapped host memory backends are - * used as RAM, however some might be used for different purposes. - */ -long qemu_minrampagesize(void) -{ - long hpsize = LONG_MAX; - Object *memdev_root = object_resolve_path("/objects", NULL); - - object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize); - return hpsize; -} - -long qemu_maxrampagesize(void) -{ - long pagesize = 0; - Object *memdev_root = object_resolve_path("/objects", NULL); - - object_child_foreach(memdev_root, find_max_backend_pagesize, &pagesize); - return pagesize; -} - -#ifdef CONFIG_POSIX -static int64_t get_file_size(int fd) -{ - int64_t size; -#if defined(__linux__) - struct stat st; - - if (fstat(fd, &st) < 0) { - return -errno; - } - - /* Special handling for devdax character devices */ - if (S_ISCHR(st.st_mode)) { - g_autofree char *subsystem_path = NULL; - g_autofree char *subsystem = NULL; - - subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", - major(st.st_rdev), minor(st.st_rdev)); - subsystem = g_file_read_link(subsystem_path, NULL); - - if (subsystem && g_str_has_suffix(subsystem, "/dax")) { - g_autofree char *size_path = NULL; - g_autofree char *size_str = NULL; - - size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", - major(st.st_rdev), minor(st.st_rdev)); - - if (g_file_get_contents(size_path, &size_str, NULL, NULL)) { - return g_ascii_strtoll(size_str, NULL, 0); - } - } - } -#endif /* defined(__linux__) */ - - /* st.st_size may be zero for special files yet lseek(2) works */ - size = lseek(fd, 0, SEEK_END); - if (size < 0) { - return -errno; - } - return size; -} - -static int64_t get_file_align(int fd) -{ - int64_t align = -1; -#if defined(__linux__) && defined(CONFIG_LIBDAXCTL) - struct stat st; - - if (fstat(fd, &st) < 0) { - return -errno; - } - - /* Special handling for devdax character devices */ - if (S_ISCHR(st.st_mode)) { - g_autofree char *path = NULL; - g_autofree char *rpath = NULL; - struct daxctl_ctx *ctx; - struct daxctl_region *region; - int rc = 0; - - path = g_strdup_printf("/sys/dev/char/%d:%d", - major(st.st_rdev), minor(st.st_rdev)); - rpath = realpath(path, NULL); - if (!rpath) { - return -errno; - } - - rc = daxctl_new(&ctx); - if (rc) { - return -1; - } - - daxctl_region_foreach(ctx, region) { - if (strstr(rpath, daxctl_region_get_path(region))) { - align = daxctl_region_get_align(region); - break; - } - } - daxctl_unref(ctx); - } -#endif /* defined(__linux__) && defined(CONFIG_LIBDAXCTL) */ - - return align; -} - -static int file_ram_open(const char *path, - const char *region_name, - bool readonly, - bool *created) -{ - char *filename; - char *sanitized_name; - char *c; - int fd = -1; - - *created = false; - for (;;) { - fd = open(path, readonly ? O_RDONLY : O_RDWR); - if (fd >= 0) { - /* - * open(O_RDONLY) won't fail with EISDIR. Check manually if we - * opened a directory and fail similarly to how we fail ENOENT - * in readonly mode. Note that mkstemp() would imply O_RDWR. - */ - if (readonly) { - struct stat file_stat; - - if (fstat(fd, &file_stat)) { - close(fd); - if (errno == EINTR) { - continue; - } - return -errno; - } else if (S_ISDIR(file_stat.st_mode)) { - close(fd); - return -EISDIR; - } - } - /* @path names an existing file, use it */ - break; - } - if (errno == ENOENT) { - if (readonly) { - /* Refuse to create new, readonly files. */ - return -ENOENT; - } - /* @path names a file that doesn't exist, create it */ - fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); - if (fd >= 0) { - *created = true; - break; - } - } else if (errno == EISDIR) { - /* @path names a directory, create a file there */ - /* Make name safe to use with mkstemp by replacing '/' with '_'. */ - sanitized_name = g_strdup(region_name); - for (c = sanitized_name; *c != '\0'; c++) { - if (*c == '/') { - *c = '_'; - } - } - - filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path, - sanitized_name); - g_free(sanitized_name); - - fd = mkstemp(filename); - if (fd >= 0) { - unlink(filename); - g_free(filename); - break; - } - g_free(filename); - } - if (errno != EEXIST && errno != EINTR) { - return -errno; - } - /* - * Try again on EINTR and EEXIST. The latter happens when - * something else creates the file between our two open(). - */ - } - - return fd; -} - -static void *file_ram_alloc(RAMBlock *block, - ram_addr_t memory, - int fd, - bool truncate, - off_t offset, - Error **errp) -{ - uint32_t qemu_map_flags; - void *area; - - block->page_size = qemu_fd_getpagesize(fd); - if (block->mr->align % block->page_size) { - error_setg(errp, "alignment 0x%" PRIx64 - " must be multiples of page size 0x%zx", - block->mr->align, block->page_size); - return NULL; - } else if (block->mr->align && !is_power_of_2(block->mr->align)) { - error_setg(errp, "alignment 0x%" PRIx64 - " must be a power of two", block->mr->align); - return NULL; - } else if (offset % block->page_size) { - error_setg(errp, "offset 0x%" PRIx64 - " must be multiples of page size 0x%zx", - offset, block->page_size); - return NULL; - } - block->mr->align = MAX(block->page_size, block->mr->align); -#if defined(__s390x__) - if (kvm_enabled()) { - block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN); - } -#endif - - if (memory < block->page_size) { - error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " - "or larger than page size 0x%zx", - memory, block->page_size); - return NULL; - } - - memory = ROUND_UP(memory, block->page_size); - - /* - * ftruncate is not supported by hugetlbfs in older - * hosts, so don't bother bailing out on errors. - * If anything goes wrong with it under other filesystems, - * mmap will fail. - * - * Do not truncate the non-empty backend file to avoid corrupting - * the existing data in the file. Disabling shrinking is not - * enough. For example, the current vNVDIMM implementation stores - * the guest NVDIMM labels at the end of the backend file. If the - * backend file is later extended, QEMU will not be able to find - * those labels. Therefore, extending the non-empty backend file - * is disabled as well. - */ - if (truncate && ftruncate(fd, offset + memory)) { - perror("ftruncate"); - } - - qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0; - qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0; - qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0; - qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0; - area = qemu_ram_mmap(fd, memory, block->mr->align, qemu_map_flags, offset); - if (area == MAP_FAILED) { - error_setg_errno(errp, errno, - "unable to map backing store for guest RAM"); - return NULL; - } - - block->fd = fd; - block->fd_offset = offset; - return area; -} -#endif - -/* Allocate space within the ram_addr_t space that governs the - * dirty bitmaps. - * Called with the ramlist lock held. - */ -static ram_addr_t find_ram_offset(ram_addr_t size) -{ - RAMBlock *block, *next_block; - ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX; - - assert(size != 0); /* it would hand out same offset multiple times */ - - if (QLIST_EMPTY_RCU(&ram_list.blocks)) { - return 0; - } - - RAMBLOCK_FOREACH(block) { - ram_addr_t candidate, next = RAM_ADDR_MAX; - - /* Align blocks to start on a 'long' in the bitmap - * which makes the bitmap sync'ing take the fast path. - */ - candidate = block->offset + block->max_length; - candidate = ROUND_UP(candidate, BITS_PER_LONG << TARGET_PAGE_BITS); - - /* Search for the closest following block - * and find the gap. - */ - RAMBLOCK_FOREACH(next_block) { - if (next_block->offset >= candidate) { - next = MIN(next, next_block->offset); - } - } - - /* If it fits remember our place and remember the size - * of gap, but keep going so that we might find a smaller - * gap to fill so avoiding fragmentation. - */ - if (next - candidate >= size && next - candidate < mingap) { - offset = candidate; - mingap = next - candidate; - } - - trace_find_ram_offset_loop(size, candidate, offset, next, mingap); - } - - if (offset == RAM_ADDR_MAX) { - fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n", - (uint64_t)size); - abort(); - } - - trace_find_ram_offset(size, offset); - - return offset; -} - -static unsigned long last_ram_page(void) -{ - RAMBlock *block; - ram_addr_t last = 0; - - RCU_READ_LOCK_GUARD(); - RAMBLOCK_FOREACH(block) { - last = MAX(last, block->offset + block->max_length); - } - return last >> TARGET_PAGE_BITS; -} - -static void qemu_ram_setup_dump(void *addr, ram_addr_t size) -{ - int ret; - - /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */ - if (!machine_dump_guest_core(current_machine)) { - ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP); - if (ret) { - perror("qemu_madvise"); - fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, " - "but dump_guest_core=off specified\n"); - } - } -} - -const char *qemu_ram_get_idstr(RAMBlock *rb) -{ - return rb->idstr; -} - -void *qemu_ram_get_host_addr(RAMBlock *rb) -{ - return rb->host; -} - -ram_addr_t qemu_ram_get_offset(RAMBlock *rb) -{ - return rb->offset; -} - -ram_addr_t qemu_ram_get_used_length(RAMBlock *rb) -{ - return rb->used_length; -} - -ram_addr_t qemu_ram_get_max_length(RAMBlock *rb) -{ - return rb->max_length; -} - -bool qemu_ram_is_shared(RAMBlock *rb) -{ - return rb->flags & RAM_SHARED; -} - -bool qemu_ram_is_noreserve(RAMBlock *rb) -{ - return rb->flags & RAM_NORESERVE; -} - -/* Note: Only set at the start of postcopy */ -bool qemu_ram_is_uf_zeroable(RAMBlock *rb) -{ - return rb->flags & RAM_UF_ZEROPAGE; -} - -void qemu_ram_set_uf_zeroable(RAMBlock *rb) -{ - rb->flags |= RAM_UF_ZEROPAGE; -} - -bool qemu_ram_is_migratable(RAMBlock *rb) -{ - return rb->flags & RAM_MIGRATABLE; -} - -void qemu_ram_set_migratable(RAMBlock *rb) -{ - rb->flags |= RAM_MIGRATABLE; -} - -void qemu_ram_unset_migratable(RAMBlock *rb) -{ - rb->flags &= ~RAM_MIGRATABLE; -} - -bool qemu_ram_is_named_file(RAMBlock *rb) -{ - return rb->flags & RAM_NAMED_FILE; -} - -int qemu_ram_get_fd(RAMBlock *rb) -{ - return rb->fd; -} - -/* Called with iothread lock held. */ -void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) -{ - RAMBlock *block; - - assert(new_block); - assert(!new_block->idstr[0]); - - if (dev) { - char *id = qdev_get_dev_path(dev); - if (id) { - snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id); - g_free(id); - } - } - pstrcat(new_block->idstr, sizeof(new_block->idstr), name); - - RCU_READ_LOCK_GUARD(); - RAMBLOCK_FOREACH(block) { - if (block != new_block && - !strcmp(block->idstr, new_block->idstr)) { - fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", - new_block->idstr); - abort(); - } - } -} - -/* Called with iothread lock held. */ -void qemu_ram_unset_idstr(RAMBlock *block) -{ - /* FIXME: arch_init.c assumes that this is not called throughout - * migration. Ignore the problem since hot-unplug during migration - * does not work anyway. - */ - if (block) { - memset(block->idstr, 0, sizeof(block->idstr)); - } -} - -size_t qemu_ram_pagesize(RAMBlock *rb) -{ - return rb->page_size; -} - -/* Returns the largest size of page in use */ -size_t qemu_ram_pagesize_largest(void) -{ - RAMBlock *block; - size_t largest = 0; - - RAMBLOCK_FOREACH(block) { - largest = MAX(largest, qemu_ram_pagesize(block)); - } - - return largest; -} - -static int memory_try_enable_merging(void *addr, size_t len) -{ - if (!machine_mem_merge(current_machine)) { - /* disabled by the user */ - return 0; - } - - return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); -} - -/* - * Resizing RAM while migrating can result in the migration being canceled. - * Care has to be taken if the guest might have already detected the memory. - * - * As memory core doesn't know how is memory accessed, it is up to - * resize callback to update device state and/or add assertions to detect - * misuse, if necessary. - */ -int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) -{ - const ram_addr_t oldsize = block->used_length; - const ram_addr_t unaligned_size = newsize; - - assert(block); - - newsize = HOST_PAGE_ALIGN(newsize); - - if (block->used_length == newsize) { - /* - * We don't have to resize the ram block (which only knows aligned - * sizes), however, we have to notify if the unaligned size changed. - */ - if (unaligned_size != memory_region_size(block->mr)) { - memory_region_set_size(block->mr, unaligned_size); - if (block->resized) { - block->resized(block->idstr, unaligned_size, block->host); - } - } - return 0; - } - - if (!(block->flags & RAM_RESIZEABLE)) { - error_setg_errno(errp, EINVAL, - "Size mismatch: %s: 0x" RAM_ADDR_FMT - " != 0x" RAM_ADDR_FMT, block->idstr, - newsize, block->used_length); - return -EINVAL; - } - - if (block->max_length < newsize) { - error_setg_errno(errp, EINVAL, - "Size too large: %s: 0x" RAM_ADDR_FMT - " > 0x" RAM_ADDR_FMT, block->idstr, - newsize, block->max_length); - return -EINVAL; - } - - /* Notify before modifying the ram block and touching the bitmaps. */ - if (block->host) { - ram_block_notify_resize(block->host, oldsize, newsize); - } - - cpu_physical_memory_clear_dirty_range(block->offset, block->used_length); - block->used_length = newsize; - cpu_physical_memory_set_dirty_range(block->offset, block->used_length, - DIRTY_CLIENTS_ALL); - memory_region_set_size(block->mr, unaligned_size); - if (block->resized) { - block->resized(block->idstr, unaligned_size, block->host); - } - return 0; -} - -/* - * Trigger sync on the given ram block for range [start, start + length] - * with the backing store if one is available. - * Otherwise no-op. - * @Note: this is supposed to be a synchronous op. - */ -void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) -{ - /* The requested range should fit in within the block range */ - g_assert((start + length) <= block->used_length); - -#ifdef CONFIG_LIBPMEM - /* The lack of support for pmem should not block the sync */ - if (ramblock_is_pmem(block)) { - void *addr = ramblock_ptr(block, start); - pmem_persist(addr, length); - return; - } -#endif - if (block->fd >= 0) { - /** - * Case there is no support for PMEM or the memory has not been - * specified as persistent (or is not one) - use the msync. - * Less optimal but still achieves the same goal - */ - void *addr = ramblock_ptr(block, start); - if (qemu_msync(addr, length, block->fd)) { - warn_report("%s: failed to sync memory range: start: " - RAM_ADDR_FMT " length: " RAM_ADDR_FMT, - __func__, start, length); - } - } -} - -/* Called with ram_list.mutex held */ -static void dirty_memory_extend(ram_addr_t old_ram_size, - ram_addr_t new_ram_size) -{ - ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, - DIRTY_MEMORY_BLOCK_SIZE); - ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, - DIRTY_MEMORY_BLOCK_SIZE); - int i; - - /* Only need to extend if block count increased */ - if (new_num_blocks <= old_num_blocks) { - return; - } - - for (i = 0; i < DIRTY_MEMORY_NUM; i++) { - DirtyMemoryBlocks *old_blocks; - DirtyMemoryBlocks *new_blocks; - int j; - - old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]); - new_blocks = g_malloc(sizeof(*new_blocks) + - sizeof(new_blocks->blocks[0]) * new_num_blocks); - - if (old_num_blocks) { - memcpy(new_blocks->blocks, old_blocks->blocks, - old_num_blocks * sizeof(old_blocks->blocks[0])); - } - - for (j = old_num_blocks; j < new_num_blocks; j++) { - new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE); - } - - qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks); - - if (old_blocks) { - g_free_rcu(old_blocks, rcu); - } - } -} - -static void ram_block_add(RAMBlock *new_block, Error **errp) -{ - const bool noreserve = qemu_ram_is_noreserve(new_block); - const bool shared = qemu_ram_is_shared(new_block); - RAMBlock *block; - RAMBlock *last_block = NULL; - ram_addr_t old_ram_size, new_ram_size; - Error *err = NULL; - - old_ram_size = last_ram_page(); - - qemu_mutex_lock_ramlist(); - new_block->offset = find_ram_offset(new_block->max_length); - - if (!new_block->host) { - if (xen_enabled()) { - xen_ram_alloc(new_block->offset, new_block->max_length, - new_block->mr, &err); - if (err) { - error_propagate(errp, err); - qemu_mutex_unlock_ramlist(); - return; - } - } else { - new_block->host = qemu_anon_ram_alloc(new_block->max_length, - &new_block->mr->align, - shared, noreserve); - if (!new_block->host) { - error_setg_errno(errp, errno, - "cannot set up guest memory '%s'", - memory_region_name(new_block->mr)); - qemu_mutex_unlock_ramlist(); - return; - } - memory_try_enable_merging(new_block->host, new_block->max_length); - } - } - - new_ram_size = MAX(old_ram_size, - (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); - if (new_ram_size > old_ram_size) { - dirty_memory_extend(old_ram_size, new_ram_size); - } - /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, - * QLIST (which has an RCU-friendly variant) does not have insertion at - * tail, so save the last element in last_block. - */ - RAMBLOCK_FOREACH(block) { - last_block = block; - if (block->max_length < new_block->max_length) { - break; - } - } - if (block) { - QLIST_INSERT_BEFORE_RCU(block, new_block, next); - } else if (last_block) { - QLIST_INSERT_AFTER_RCU(last_block, new_block, next); - } else { /* list is empty */ - QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next); - } - ram_list.mru_block = NULL; - - /* Write list before version */ - smp_wmb(); - ram_list.version++; - qemu_mutex_unlock_ramlist(); - - cpu_physical_memory_set_dirty_range(new_block->offset, - new_block->used_length, - DIRTY_CLIENTS_ALL); - - if (new_block->host) { - qemu_ram_setup_dump(new_block->host, new_block->max_length); - qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE); - /* - * MADV_DONTFORK is also needed by KVM in absence of synchronous MMU - * Configure it unless the machine is a qtest server, in which case - * KVM is not used and it may be forked (eg for fuzzing purposes). - */ - if (!qtest_enabled()) { - qemu_madvise(new_block->host, new_block->max_length, - QEMU_MADV_DONTFORK); - } - ram_block_notify_add(new_block->host, new_block->used_length, - new_block->max_length); - } -} - -#ifdef CONFIG_POSIX -RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, - uint32_t ram_flags, int fd, off_t offset, - Error **errp) -{ - RAMBlock *new_block; - Error *local_err = NULL; - int64_t file_size, file_align; - - /* Just support these ram flags by now. */ - assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | - RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | - RAM_READONLY_FD)) == 0); - - if (xen_enabled()) { - error_setg(errp, "-mem-path not supported with Xen"); - return NULL; - } - - if (kvm_enabled() && !kvm_has_sync_mmu()) { - error_setg(errp, - "host lacks kvm mmu notifiers, -mem-path unsupported"); - return NULL; - } - - size = HOST_PAGE_ALIGN(size); - file_size = get_file_size(fd); - if (file_size > offset && file_size < (offset + size)) { - error_setg(errp, "backing store size 0x%" PRIx64 - " does not match 'size' option 0x" RAM_ADDR_FMT, - file_size, size); - return NULL; - } - - file_align = get_file_align(fd); - if (file_align > 0 && file_align > mr->align) { - error_setg(errp, "backing store align 0x%" PRIx64 - " is larger than 'align' option 0x%" PRIx64, - file_align, mr->align); - return NULL; - } - - new_block = g_malloc0(sizeof(*new_block)); - new_block->mr = mr; - new_block->used_length = size; - new_block->max_length = size; - new_block->flags = ram_flags; - new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, - errp); - if (!new_block->host) { - g_free(new_block); - return NULL; - } - - ram_block_add(new_block, &local_err); - if (local_err) { - g_free(new_block); - error_propagate(errp, local_err); - return NULL; - } - return new_block; - -} - - -RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - uint32_t ram_flags, const char *mem_path, - off_t offset, Error **errp) -{ - int fd; - bool created; - RAMBlock *block; - - fd = file_ram_open(mem_path, memory_region_name(mr), - !!(ram_flags & RAM_READONLY_FD), &created); - if (fd < 0) { - error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM", - mem_path); - if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) && - fd == -EACCES) { - /* - * If we can open the file R/O (note: will never create a new file) - * and we are dealing with a private mapping, there are still ways - * to consume such files and get RAM instead of ROM. - */ - fd = file_ram_open(mem_path, memory_region_name(mr), true, - &created); - if (fd < 0) { - return NULL; - } - assert(!created); - close(fd); - error_append_hint(errp, "Consider opening the backing store" - " read-only but still creating writable RAM using" - " '-object memory-backend-file,readonly=on,rom=off...'" - " (see \"VM templating\" documentation)\n"); - } - return NULL; - } - - block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp); - if (!block) { - if (created) { - unlink(mem_path); - } - close(fd); - return NULL; - } - - return block; -} -#endif - -static -RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, - void (*resized)(const char*, - uint64_t length, - void *host), - void *host, uint32_t ram_flags, - MemoryRegion *mr, Error **errp) -{ - RAMBlock *new_block; - Error *local_err = NULL; - - assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | - RAM_NORESERVE)) == 0); - assert(!host ^ (ram_flags & RAM_PREALLOC)); - - size = HOST_PAGE_ALIGN(size); - max_size = HOST_PAGE_ALIGN(max_size); - new_block = g_malloc0(sizeof(*new_block)); - new_block->mr = mr; - new_block->resized = resized; - new_block->used_length = size; - new_block->max_length = max_size; - assert(max_size >= size); - new_block->fd = -1; - new_block->page_size = qemu_real_host_page_size(); - new_block->host = host; - new_block->flags = ram_flags; - ram_block_add(new_block, &local_err); - if (local_err) { - g_free(new_block); - error_propagate(errp, local_err); - return NULL; - } - return new_block; -} - -RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, - MemoryRegion *mr, Error **errp) -{ - return qemu_ram_alloc_internal(size, size, NULL, host, RAM_PREALLOC, mr, - errp); -} - -RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, - MemoryRegion *mr, Error **errp) -{ - assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); - return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); -} - -RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz, - void (*resized)(const char*, - uint64_t length, - void *host), - MemoryRegion *mr, Error **errp) -{ - return qemu_ram_alloc_internal(size, maxsz, resized, NULL, - RAM_RESIZEABLE, mr, errp); -} - -static void reclaim_ramblock(RAMBlock *block) -{ - if (block->flags & RAM_PREALLOC) { - ; - } else if (xen_enabled()) { - xen_invalidate_map_cache_entry(block->host); -#ifndef _WIN32 - } else if (block->fd >= 0) { - qemu_ram_munmap(block->fd, block->host, block->max_length); - close(block->fd); -#endif - } else { - qemu_anon_ram_free(block->host, block->max_length); - } - g_free(block); -} - -void qemu_ram_free(RAMBlock *block) -{ - if (!block) { - return; - } - - if (block->host) { - ram_block_notify_remove(block->host, block->used_length, - block->max_length); - } - - qemu_mutex_lock_ramlist(); - QLIST_REMOVE_RCU(block, next); - ram_list.mru_block = NULL; - /* Write list before version */ - smp_wmb(); - ram_list.version++; - call_rcu(block, reclaim_ramblock, rcu); - qemu_mutex_unlock_ramlist(); -} - -#ifndef _WIN32 -void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) -{ - RAMBlock *block; - ram_addr_t offset; - int flags; - void *area, *vaddr; - int prot; - - RAMBLOCK_FOREACH(block) { - offset = addr - block->offset; - if (offset < block->max_length) { - vaddr = ramblock_ptr(block, offset); - if (block->flags & RAM_PREALLOC) { - ; - } else if (xen_enabled()) { - abort(); - } else { - flags = MAP_FIXED; - flags |= block->flags & RAM_SHARED ? - MAP_SHARED : MAP_PRIVATE; - flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0; - prot = PROT_READ; - prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; - if (block->fd >= 0) { - area = mmap(vaddr, length, prot, flags, block->fd, - offset + block->fd_offset); - } else { - flags |= MAP_ANONYMOUS; - area = mmap(vaddr, length, prot, flags, -1, 0); - } - if (area != vaddr) { - error_report("Could not remap addr: " - RAM_ADDR_FMT "@" RAM_ADDR_FMT "", - length, addr); - exit(1); - } - memory_try_enable_merging(vaddr, length); - qemu_ram_setup_dump(vaddr, length); - } - } - } -} -#endif /* !_WIN32 */ - -/* Return a host pointer to ram allocated with qemu_ram_alloc. - * This should not be used for general purpose DMA. Use address_space_map - * or address_space_rw instead. For local memory (e.g. video ram) that the - * device owns, use memory_region_get_ram_ptr. - * - * Called within RCU critical section. - */ -void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr) -{ - RAMBlock *block = ram_block; - - if (block == NULL) { - block = qemu_get_ram_block(addr); - addr -= block->offset; - } - - if (xen_enabled() && block->host == NULL) { - /* We need to check if the requested address is in the RAM - * because we don't want to map the entire memory in QEMU. - * In that case just map until the end of the page. - */ - if (block->offset == 0) { - return xen_map_cache(addr, 0, 0, false); - } - - block->host = xen_map_cache(block->offset, block->max_length, 1, false); - } - return ramblock_ptr(block, addr); -} - -/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr - * but takes a size argument. - * - * Called within RCU critical section. - */ -static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr, - hwaddr *size, bool lock) -{ - RAMBlock *block = ram_block; - if (*size == 0) { - return NULL; - } - - if (block == NULL) { - block = qemu_get_ram_block(addr); - addr -= block->offset; - } - *size = MIN(*size, block->max_length - addr); - - if (xen_enabled() && block->host == NULL) { - /* We need to check if the requested address is in the RAM - * because we don't want to map the entire memory in QEMU. - * In that case just map the requested area. - */ - if (block->offset == 0) { - return xen_map_cache(addr, *size, lock, lock); - } - - block->host = xen_map_cache(block->offset, block->max_length, 1, lock); - } - - return ramblock_ptr(block, addr); -} - -/* Return the offset of a hostpointer within a ramblock */ -ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host) -{ - ram_addr_t res = (uint8_t *)host - (uint8_t *)rb->host; - assert((uintptr_t)host >= (uintptr_t)rb->host); - assert(res < rb->max_length); - - return res; -} - -/* - * Translates a host ptr back to a RAMBlock, a ram_addr and an offset - * in that RAMBlock. - * - * ptr: Host pointer to look up - * round_offset: If true round the result offset down to a page boundary - * *ram_addr: set to result ram_addr - * *offset: set to result offset within the RAMBlock - * - * Returns: RAMBlock (or NULL if not found) - * - * By the time this function returns, the returned pointer is not protected - * by RCU anymore. If the caller is not within an RCU critical section and - * does not hold the iothread lock, it must have other means of protecting the - * pointer, such as a reference to the region that includes the incoming - * ram_addr_t. - */ -RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, - ram_addr_t *offset) -{ - RAMBlock *block; - uint8_t *host = ptr; - - if (xen_enabled()) { - ram_addr_t ram_addr; - RCU_READ_LOCK_GUARD(); - ram_addr = xen_ram_addr_from_mapcache(ptr); - block = qemu_get_ram_block(ram_addr); - if (block) { - *offset = ram_addr - block->offset; - } - return block; - } - - RCU_READ_LOCK_GUARD(); - block = qatomic_rcu_read(&ram_list.mru_block); - if (block && block->host && host - block->host < block->max_length) { - goto found; - } - - RAMBLOCK_FOREACH(block) { - /* This case append when the block is not mapped. */ - if (block->host == NULL) { - continue; - } - if (host - block->host < block->max_length) { - goto found; - } - } - - return NULL; - -found: - *offset = (host - block->host); - if (round_offset) { - *offset &= TARGET_PAGE_MASK; - } - return block; -} - -/* - * Finds the named RAMBlock - * - * name: The name of RAMBlock to find - * - * Returns: RAMBlock (or NULL if not found) - */ -RAMBlock *qemu_ram_block_by_name(const char *name) -{ - RAMBlock *block; - - RAMBLOCK_FOREACH(block) { - if (!strcmp(name, block->idstr)) { - return block; - } - } - - return NULL; -} - -/* Some of the softmmu routines need to translate from a host pointer - (typically a TLB entry) back to a ram offset. */ -ram_addr_t qemu_ram_addr_from_host(void *ptr) -{ - RAMBlock *block; - ram_addr_t offset; - - block = qemu_ram_block_from_host(ptr, false, &offset); - if (!block) { - return RAM_ADDR_INVALID; - } - - return block->offset + offset; -} - -ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) -{ - ram_addr_t ram_addr; - - ram_addr = qemu_ram_addr_from_host(ptr); - if (ram_addr == RAM_ADDR_INVALID) { - error_report("Bad ram pointer %p", ptr); - abort(); - } - return ram_addr; -} - -static MemTxResult flatview_read(FlatView *fv, hwaddr addr, - MemTxAttrs attrs, void *buf, hwaddr len); -static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, - const void *buf, hwaddr len); -static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len, - bool is_write, MemTxAttrs attrs); - -static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data, - unsigned len, MemTxAttrs attrs) -{ - subpage_t *subpage = opaque; - uint8_t buf[8]; - MemTxResult res; - -#if defined(DEBUG_SUBPAGE) - printf("%s: subpage %p len %u addr " HWADDR_FMT_plx "\n", __func__, - subpage, len, addr); -#endif - res = flatview_read(subpage->fv, addr + subpage->base, attrs, buf, len); - if (res) { - return res; - } - *data = ldn_p(buf, len); - return MEMTX_OK; -} - -static MemTxResult subpage_write(void *opaque, hwaddr addr, - uint64_t value, unsigned len, MemTxAttrs attrs) -{ - subpage_t *subpage = opaque; - uint8_t buf[8]; - -#if defined(DEBUG_SUBPAGE) - printf("%s: subpage %p len %u addr " HWADDR_FMT_plx - " value %"PRIx64"\n", - __func__, subpage, len, addr, value); -#endif - stn_p(buf, len, value); - return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len); -} - -static bool subpage_accepts(void *opaque, hwaddr addr, - unsigned len, bool is_write, - MemTxAttrs attrs) -{ - subpage_t *subpage = opaque; -#if defined(DEBUG_SUBPAGE) - printf("%s: subpage %p %c len %u addr " HWADDR_FMT_plx "\n", - __func__, subpage, is_write ? 'w' : 'r', len, addr); -#endif - - return flatview_access_valid(subpage->fv, addr + subpage->base, - len, is_write, attrs); -} - -static const MemoryRegionOps subpage_ops = { - .read_with_attrs = subpage_read, - .write_with_attrs = subpage_write, - .impl.min_access_size = 1, - .impl.max_access_size = 8, - .valid.min_access_size = 1, - .valid.max_access_size = 8, - .valid.accepts = subpage_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, - uint16_t section) -{ - int idx, eidx; - - if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE) - return -1; - idx = SUBPAGE_IDX(start); - eidx = SUBPAGE_IDX(end); -#if defined(DEBUG_SUBPAGE) - printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n", - __func__, mmio, start, end, idx, eidx, section); -#endif - for (; idx <= eidx; idx++) { - mmio->sub_section[idx] = section; - } - - return 0; -} - -static subpage_t *subpage_init(FlatView *fv, hwaddr base) -{ - subpage_t *mmio; - - /* mmio->sub_section is set to PHYS_SECTION_UNASSIGNED with g_malloc0 */ - mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t)); - mmio->fv = fv; - mmio->base = base; - memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio, - NULL, TARGET_PAGE_SIZE); - mmio->iomem.subpage = true; -#if defined(DEBUG_SUBPAGE) - printf("%s: %p base " HWADDR_FMT_plx " len %08x\n", __func__, - mmio, base, TARGET_PAGE_SIZE); -#endif - - return mmio; -} - -static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr) -{ - assert(fv); - MemoryRegionSection section = { - .fv = fv, - .mr = mr, - .offset_within_address_space = 0, - .offset_within_region = 0, - .size = int128_2_64(), - }; - - return phys_section_add(map, §ion); -} - -MemoryRegionSection *iotlb_to_section(CPUState *cpu, - hwaddr index, MemTxAttrs attrs) -{ - int asidx = cpu_asidx_from_attrs(cpu, attrs); - CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; - AddressSpaceDispatch *d = cpuas->memory_dispatch; - int section_index = index & ~TARGET_PAGE_MASK; - MemoryRegionSection *ret; - - assert(section_index < d->map.sections_nb); - ret = d->map.sections + section_index; - assert(ret->mr); - assert(ret->mr->ops); - - return ret; -} - -static void io_mem_init(void) -{ - memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, - NULL, UINT64_MAX); -} - -AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) -{ - AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1); - uint16_t n; - - n = dummy_section(&d->map, fv, &io_mem_unassigned); - assert(n == PHYS_SECTION_UNASSIGNED); - - d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; - - return d; -} - -void address_space_dispatch_free(AddressSpaceDispatch *d) -{ - phys_sections_free(&d->map); - g_free(d); -} - -static void do_nothing(CPUState *cpu, run_on_cpu_data d) -{ -} - -static void tcg_log_global_after_sync(MemoryListener *listener) -{ - CPUAddressSpace *cpuas; - - /* Wait for the CPU to end the current TB. This avoids the following - * incorrect race: - * - * vCPU migration - * ---------------------- ------------------------- - * TLB check -> slow path - * notdirty_mem_write - * write to RAM - * mark dirty - * clear dirty flag - * TLB check -> fast path - * read memory - * write to RAM - * - * by pushing the migration thread's memory read after the vCPU thread has - * written the memory. - */ - if (replay_mode == REPLAY_MODE_NONE) { - /* - * VGA can make calls to this function while updating the screen. - * In record/replay mode this causes a deadlock, because - * run_on_cpu waits for rr mutex. Therefore no races are possible - * in this case and no need for making run_on_cpu when - * record/replay is enabled. - */ - cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); - run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL); - } -} - -static void tcg_commit_cpu(CPUState *cpu, run_on_cpu_data data) -{ - CPUAddressSpace *cpuas = data.host_ptr; - - cpuas->memory_dispatch = address_space_to_dispatch(cpuas->as); - tlb_flush(cpu); -} - -static void tcg_commit(MemoryListener *listener) -{ - CPUAddressSpace *cpuas; - CPUState *cpu; - - assert(tcg_enabled()); - /* since each CPU stores ram addresses in its TLB cache, we must - reset the modified entries */ - cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); - cpu = cpuas->cpu; - - /* - * Defer changes to as->memory_dispatch until the cpu is quiescent. - * Otherwise we race between (1) other cpu threads and (2) ongoing - * i/o for the current cpu thread, with data cached by mmu_lookup(). - * - * In addition, queueing the work function will kick the cpu back to - * the main loop, which will end the RCU critical section and reclaim - * the memory data structures. - * - * That said, the listener is also called during realize, before - * all of the tcg machinery for run-on is initialized: thus halt_cond. - */ - if (cpu->halt_cond) { - async_run_on_cpu(cpu, tcg_commit_cpu, RUN_ON_CPU_HOST_PTR(cpuas)); - } else { - tcg_commit_cpu(cpu, RUN_ON_CPU_HOST_PTR(cpuas)); - } -} - -static void memory_map_init(void) -{ - system_memory = g_malloc(sizeof(*system_memory)); - - memory_region_init(system_memory, NULL, "system", UINT64_MAX); - address_space_init(&address_space_memory, system_memory, "memory"); - - system_io = g_malloc(sizeof(*system_io)); - memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io", - 65536); - address_space_init(&address_space_io, system_io, "I/O"); -} - -MemoryRegion *get_system_memory(void) -{ - return system_memory; -} - -MemoryRegion *get_system_io(void) -{ - return system_io; -} - -static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, - hwaddr length) -{ - uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr); - addr += memory_region_get_ram_addr(mr); - - /* No early return if dirty_log_mask is or becomes 0, because - * cpu_physical_memory_set_dirty_range will still call - * xen_modified_memory. - */ - if (dirty_log_mask) { - dirty_log_mask = - cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask); - } - if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { - assert(tcg_enabled()); - tb_invalidate_phys_range(addr, addr + length - 1); - dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); - } - cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); -} - -void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) -{ - /* - * In principle this function would work on other memory region types too, - * but the ROM device use case is the only one where this operation is - * necessary. Other memory regions should use the - * address_space_read/write() APIs. - */ - assert(memory_region_is_romd(mr)); - - invalidate_and_set_dirty(mr, addr, size); -} - -int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) -{ - unsigned access_size_max = mr->ops->valid.max_access_size; - - /* Regions are assumed to support 1-4 byte accesses unless - otherwise specified. */ - if (access_size_max == 0) { - access_size_max = 4; - } - - /* Bound the maximum access by the alignment of the address. */ - if (!mr->ops->impl.unaligned) { - unsigned align_size_max = addr & -addr; - if (align_size_max != 0 && align_size_max < access_size_max) { - access_size_max = align_size_max; - } - } - - /* Don't attempt accesses larger than the maximum. */ - if (l > access_size_max) { - l = access_size_max; - } - l = pow2floor(l); - - return l; -} - -bool prepare_mmio_access(MemoryRegion *mr) -{ - bool release_lock = false; - - if (!qemu_mutex_iothread_locked()) { - qemu_mutex_lock_iothread(); - release_lock = true; - } - if (mr->flush_coalesced_mmio) { - qemu_flush_coalesced_mmio_buffer(); - } - - return release_lock; -} - -/** - * flatview_access_allowed - * @mr: #MemoryRegion to be accessed - * @attrs: memory transaction attributes - * @addr: address within that memory region - * @len: the number of bytes to access - * - * Check if a memory transaction is allowed. - * - * Returns: true if transaction is allowed, false if denied. - */ -static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs, - hwaddr addr, hwaddr len) -{ - if (likely(!attrs.memory)) { - return true; - } - if (memory_region_is_ram(mr)) { - return true; - } - qemu_log_mask(LOG_GUEST_ERROR, - "Invalid access to non-RAM device at " - "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", " - "region '%s'\n", addr, len, memory_region_name(mr)); - return false; -} - -/* Called within RCU critical section. */ -static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, - MemTxAttrs attrs, - const void *ptr, - hwaddr len, hwaddr addr1, - hwaddr l, MemoryRegion *mr) -{ - uint8_t *ram_ptr; - uint64_t val; - MemTxResult result = MEMTX_OK; - bool release_lock = false; - const uint8_t *buf = ptr; - - for (;;) { - if (!flatview_access_allowed(mr, attrs, addr1, l)) { - result |= MEMTX_ACCESS_ERROR; - /* Keep going. */ - } else if (!memory_access_is_direct(mr, true)) { - release_lock |= prepare_mmio_access(mr); - l = memory_access_size(mr, l, addr1); - /* XXX: could force current_cpu to NULL to avoid - potential bugs */ - val = ldn_he_p(buf, l); - result |= memory_region_dispatch_write(mr, addr1, val, - size_memop(l), attrs); - } else { - /* RAM case */ - ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); - memmove(ram_ptr, buf, l); - invalidate_and_set_dirty(mr, addr1, l); - } - - if (release_lock) { - qemu_mutex_unlock_iothread(); - release_lock = false; - } - - len -= l; - buf += l; - addr += l; - - if (!len) { - break; - } - - l = len; - mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); - } - - return result; -} - -/* Called from RCU critical section. */ -static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, - const void *buf, hwaddr len) -{ - hwaddr l; - hwaddr addr1; - MemoryRegion *mr; - - l = len; - mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); - if (!flatview_access_allowed(mr, attrs, addr, len)) { - return MEMTX_ACCESS_ERROR; - } - return flatview_write_continue(fv, addr, attrs, buf, len, - addr1, l, mr); -} - -/* Called within RCU critical section. */ -MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, - MemTxAttrs attrs, void *ptr, - hwaddr len, hwaddr addr1, hwaddr l, - MemoryRegion *mr) -{ - uint8_t *ram_ptr; - uint64_t val; - MemTxResult result = MEMTX_OK; - bool release_lock = false; - uint8_t *buf = ptr; - - fuzz_dma_read_cb(addr, len, mr); - for (;;) { - if (!flatview_access_allowed(mr, attrs, addr1, l)) { - result |= MEMTX_ACCESS_ERROR; - /* Keep going. */ - } else if (!memory_access_is_direct(mr, false)) { - /* I/O case */ - release_lock |= prepare_mmio_access(mr); - l = memory_access_size(mr, l, addr1); - result |= memory_region_dispatch_read(mr, addr1, &val, - size_memop(l), attrs); - stn_he_p(buf, l, val); - } else { - /* RAM case */ - ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); - memcpy(buf, ram_ptr, l); - } - - if (release_lock) { - qemu_mutex_unlock_iothread(); - release_lock = false; - } - - len -= l; - buf += l; - addr += l; - - if (!len) { - break; - } - - l = len; - mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); - } - - return result; -} - -/* Called from RCU critical section. */ -static MemTxResult flatview_read(FlatView *fv, hwaddr addr, - MemTxAttrs attrs, void *buf, hwaddr len) -{ - hwaddr l; - hwaddr addr1; - MemoryRegion *mr; - - l = len; - mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); - if (!flatview_access_allowed(mr, attrs, addr, len)) { - return MEMTX_ACCESS_ERROR; - } - return flatview_read_continue(fv, addr, attrs, buf, len, - addr1, l, mr); -} - -MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, void *buf, hwaddr len) -{ - MemTxResult result = MEMTX_OK; - FlatView *fv; - - if (len > 0) { - RCU_READ_LOCK_GUARD(); - fv = address_space_to_flatview(as); - result = flatview_read(fv, addr, attrs, buf, len); - } - - return result; -} - -MemTxResult address_space_write(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, - const void *buf, hwaddr len) -{ - MemTxResult result = MEMTX_OK; - FlatView *fv; - - if (len > 0) { - RCU_READ_LOCK_GUARD(); - fv = address_space_to_flatview(as); - result = flatview_write(fv, addr, attrs, buf, len); - } - - return result; -} - -MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, - void *buf, hwaddr len, bool is_write) -{ - if (is_write) { - return address_space_write(as, addr, attrs, buf, len); - } else { - return address_space_read_full(as, addr, attrs, buf, len); - } -} - -MemTxResult address_space_set(AddressSpace *as, hwaddr addr, - uint8_t c, hwaddr len, MemTxAttrs attrs) -{ -#define FILLBUF_SIZE 512 - uint8_t fillbuf[FILLBUF_SIZE]; - int l; - MemTxResult error = MEMTX_OK; - - memset(fillbuf, c, FILLBUF_SIZE); - while (len > 0) { - l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE; - error |= address_space_write(as, addr, attrs, fillbuf, l); - len -= l; - addr += l; - } - - return error; -} - -void cpu_physical_memory_rw(hwaddr addr, void *buf, - hwaddr len, bool is_write) -{ - address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED, - buf, len, is_write); -} - -enum write_rom_type { - WRITE_DATA, - FLUSH_CACHE, -}; - -static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, - hwaddr addr, - MemTxAttrs attrs, - const void *ptr, - hwaddr len, - enum write_rom_type type) -{ - hwaddr l; - uint8_t *ram_ptr; - hwaddr addr1; - MemoryRegion *mr; - const uint8_t *buf = ptr; - - RCU_READ_LOCK_GUARD(); - while (len > 0) { - l = len; - mr = address_space_translate(as, addr, &addr1, &l, true, attrs); - - if (!(memory_region_is_ram(mr) || - memory_region_is_romd(mr))) { - l = memory_access_size(mr, l, addr1); - } else { - /* ROM/RAM case */ - ram_ptr = qemu_map_ram_ptr(mr->ram_block, addr1); - switch (type) { - case WRITE_DATA: - memcpy(ram_ptr, buf, l); - invalidate_and_set_dirty(mr, addr1, l); - break; - case FLUSH_CACHE: - flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l); - break; - } - } - len -= l; - buf += l; - addr += l; - } - return MEMTX_OK; -} - -/* used for ROM loading : can write in RAM and ROM */ -MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, - const void *buf, hwaddr len) -{ - return address_space_write_rom_internal(as, addr, attrs, - buf, len, WRITE_DATA); -} - -void cpu_flush_icache_range(hwaddr start, hwaddr len) -{ - /* - * This function should do the same thing as an icache flush that was - * triggered from within the guest. For TCG we are always cache coherent, - * so there is no need to flush anything. For KVM / Xen we need to flush - * the host's instruction cache at least. - */ - if (tcg_enabled()) { - return; - } - - address_space_write_rom_internal(&address_space_memory, - start, MEMTXATTRS_UNSPECIFIED, - NULL, len, FLUSH_CACHE); -} - -typedef struct { - MemoryRegion *mr; - void *buffer; - hwaddr addr; - hwaddr len; - bool in_use; -} BounceBuffer; - -static BounceBuffer bounce; - -typedef struct MapClient { - QEMUBH *bh; - QLIST_ENTRY(MapClient) link; -} MapClient; - -QemuMutex map_client_list_lock; -static QLIST_HEAD(, MapClient) map_client_list - = QLIST_HEAD_INITIALIZER(map_client_list); - -static void cpu_unregister_map_client_do(MapClient *client) -{ - QLIST_REMOVE(client, link); - g_free(client); -} - -static void cpu_notify_map_clients_locked(void) -{ - MapClient *client; - - while (!QLIST_EMPTY(&map_client_list)) { - client = QLIST_FIRST(&map_client_list); - qemu_bh_schedule(client->bh); - cpu_unregister_map_client_do(client); - } -} - -void cpu_register_map_client(QEMUBH *bh) -{ - MapClient *client = g_malloc(sizeof(*client)); - - qemu_mutex_lock(&map_client_list_lock); - client->bh = bh; - QLIST_INSERT_HEAD(&map_client_list, client, link); - /* Write map_client_list before reading in_use. */ - smp_mb(); - if (!qatomic_read(&bounce.in_use)) { - cpu_notify_map_clients_locked(); - } - qemu_mutex_unlock(&map_client_list_lock); -} - -void cpu_exec_init_all(void) -{ - qemu_mutex_init(&ram_list.mutex); - /* The data structures we set up here depend on knowing the page size, - * so no more changes can be made after this point. - * In an ideal world, nothing we did before we had finished the - * machine setup would care about the target page size, and we could - * do this much later, rather than requiring board models to state - * up front what their requirements are. - */ - finalize_target_page_bits(); - io_mem_init(); - memory_map_init(); - qemu_mutex_init(&map_client_list_lock); -} - -void cpu_unregister_map_client(QEMUBH *bh) -{ - MapClient *client; - - qemu_mutex_lock(&map_client_list_lock); - QLIST_FOREACH(client, &map_client_list, link) { - if (client->bh == bh) { - cpu_unregister_map_client_do(client); - break; - } - } - qemu_mutex_unlock(&map_client_list_lock); -} - -static void cpu_notify_map_clients(void) -{ - qemu_mutex_lock(&map_client_list_lock); - cpu_notify_map_clients_locked(); - qemu_mutex_unlock(&map_client_list_lock); -} - -static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len, - bool is_write, MemTxAttrs attrs) -{ - MemoryRegion *mr; - hwaddr l, xlat; - - while (len > 0) { - l = len; - mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); - if (!memory_access_is_direct(mr, is_write)) { - l = memory_access_size(mr, l, addr); - if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) { - return false; - } - } - - len -= l; - addr += l; - } - return true; -} - -bool address_space_access_valid(AddressSpace *as, hwaddr addr, - hwaddr len, bool is_write, - MemTxAttrs attrs) -{ - FlatView *fv; - - RCU_READ_LOCK_GUARD(); - fv = address_space_to_flatview(as); - return flatview_access_valid(fv, addr, len, is_write, attrs); -} - -static hwaddr -flatview_extend_translation(FlatView *fv, hwaddr addr, - hwaddr target_len, - MemoryRegion *mr, hwaddr base, hwaddr len, - bool is_write, MemTxAttrs attrs) -{ - hwaddr done = 0; - hwaddr xlat; - MemoryRegion *this_mr; - - for (;;) { - target_len -= len; - addr += len; - done += len; - if (target_len == 0) { - return done; - } - - len = target_len; - this_mr = flatview_translate(fv, addr, &xlat, - &len, is_write, attrs); - if (this_mr != mr || xlat != base + done) { - return done; - } - } -} - -/* Map a physical memory region into a host virtual address. - * May map a subset of the requested range, given by and returned in *plen. - * May return NULL if resources needed to perform the mapping are exhausted. - * Use only for reads OR writes - not for read-modify-write operations. - * Use cpu_register_map_client() to know when retrying the map operation is - * likely to succeed. - */ -void *address_space_map(AddressSpace *as, - hwaddr addr, - hwaddr *plen, - bool is_write, - MemTxAttrs attrs) -{ - hwaddr len = *plen; - hwaddr l, xlat; - MemoryRegion *mr; - FlatView *fv; - - if (len == 0) { - return NULL; - } - - l = len; - RCU_READ_LOCK_GUARD(); - fv = address_space_to_flatview(as); - mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); - - if (!memory_access_is_direct(mr, is_write)) { - if (qatomic_xchg(&bounce.in_use, true)) { - *plen = 0; - return NULL; - } - /* Avoid unbounded allocations */ - l = MIN(l, TARGET_PAGE_SIZE); - bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); - bounce.addr = addr; - bounce.len = l; - - memory_region_ref(mr); - bounce.mr = mr; - if (!is_write) { - flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, - bounce.buffer, l); - } - - *plen = l; - return bounce.buffer; - } - - - memory_region_ref(mr); - *plen = flatview_extend_translation(fv, addr, len, mr, xlat, - l, is_write, attrs); - fuzz_dma_read_cb(addr, *plen, mr); - return qemu_ram_ptr_length(mr->ram_block, xlat, plen, true); -} - -/* Unmaps a memory region previously mapped by address_space_map(). - * Will also mark the memory as dirty if is_write is true. access_len gives - * the amount of memory that was actually read or written by the caller. - */ -void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, - bool is_write, hwaddr access_len) -{ - if (buffer != bounce.buffer) { - MemoryRegion *mr; - ram_addr_t addr1; - - mr = memory_region_from_host(buffer, &addr1); - assert(mr != NULL); - if (is_write) { - invalidate_and_set_dirty(mr, addr1, access_len); - } - if (xen_enabled()) { - xen_invalidate_map_cache_entry(buffer); - } - memory_region_unref(mr); - return; - } - if (is_write) { - address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED, - bounce.buffer, access_len); - } - qemu_vfree(bounce.buffer); - bounce.buffer = NULL; - memory_region_unref(bounce.mr); - /* Clear in_use before reading map_client_list. */ - qatomic_set_mb(&bounce.in_use, false); - cpu_notify_map_clients(); -} - -void *cpu_physical_memory_map(hwaddr addr, - hwaddr *plen, - bool is_write) -{ - return address_space_map(&address_space_memory, addr, plen, is_write, - MEMTXATTRS_UNSPECIFIED); -} - -void cpu_physical_memory_unmap(void *buffer, hwaddr len, - bool is_write, hwaddr access_len) -{ - return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len); -} - -#define ARG1_DECL AddressSpace *as -#define ARG1 as -#define SUFFIX -#define TRANSLATE(...) address_space_translate(as, __VA_ARGS__) -#define RCU_READ_LOCK(...) rcu_read_lock() -#define RCU_READ_UNLOCK(...) rcu_read_unlock() -#include "memory_ldst.c.inc" - -int64_t address_space_cache_init(MemoryRegionCache *cache, - AddressSpace *as, - hwaddr addr, - hwaddr len, - bool is_write) -{ - AddressSpaceDispatch *d; - hwaddr l; - MemoryRegion *mr; - Int128 diff; - - assert(len > 0); - - l = len; - cache->fv = address_space_get_flatview(as); - d = flatview_to_dispatch(cache->fv); - cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true); - - /* - * cache->xlat is now relative to cache->mrs.mr, not to the section itself. - * Take that into account to compute how many bytes are there between - * cache->xlat and the end of the section. - */ - diff = int128_sub(cache->mrs.size, - int128_make64(cache->xlat - cache->mrs.offset_within_region)); - l = int128_get64(int128_min(diff, int128_make64(l))); - - mr = cache->mrs.mr; - memory_region_ref(mr); - if (memory_access_is_direct(mr, is_write)) { - /* We don't care about the memory attributes here as we're only - * doing this if we found actual RAM, which behaves the same - * regardless of attributes; so UNSPECIFIED is fine. - */ - l = flatview_extend_translation(cache->fv, addr, len, mr, - cache->xlat, l, is_write, - MEMTXATTRS_UNSPECIFIED); - cache->ptr = qemu_ram_ptr_length(mr->ram_block, cache->xlat, &l, true); - } else { - cache->ptr = NULL; - } - - cache->len = l; - cache->is_write = is_write; - return l; -} - -void address_space_cache_invalidate(MemoryRegionCache *cache, - hwaddr addr, - hwaddr access_len) -{ - assert(cache->is_write); - if (likely(cache->ptr)) { - invalidate_and_set_dirty(cache->mrs.mr, addr + cache->xlat, access_len); - } -} - -void address_space_cache_destroy(MemoryRegionCache *cache) -{ - if (!cache->mrs.mr) { - return; - } - - if (xen_enabled()) { - xen_invalidate_map_cache_entry(cache->ptr); - } - memory_region_unref(cache->mrs.mr); - flatview_unref(cache->fv); - cache->mrs.mr = NULL; - cache->fv = NULL; -} - -/* Called from RCU critical section. This function has the same - * semantics as address_space_translate, but it only works on a - * predefined range of a MemoryRegion that was mapped with - * address_space_cache_init. - */ -static inline MemoryRegion *address_space_translate_cached( - MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat, - hwaddr *plen, bool is_write, MemTxAttrs attrs) -{ - MemoryRegionSection section; - MemoryRegion *mr; - IOMMUMemoryRegion *iommu_mr; - AddressSpace *target_as; - - assert(!cache->ptr); - *xlat = addr + cache->xlat; - - mr = cache->mrs.mr; - iommu_mr = memory_region_get_iommu(mr); - if (!iommu_mr) { - /* MMIO region. */ - return mr; - } - - section = address_space_translate_iommu(iommu_mr, xlat, plen, - NULL, is_write, true, - &target_as, attrs); - return section.mr; -} - -/* Called from RCU critical section. address_space_read_cached uses this - * out of line function when the target is an MMIO or IOMMU region. - */ -MemTxResult -address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr, - void *buf, hwaddr len) -{ - hwaddr addr1, l; - MemoryRegion *mr; - - l = len; - mr = address_space_translate_cached(cache, addr, &addr1, &l, false, - MEMTXATTRS_UNSPECIFIED); - return flatview_read_continue(cache->fv, - addr, MEMTXATTRS_UNSPECIFIED, buf, len, - addr1, l, mr); -} - -/* Called from RCU critical section. address_space_write_cached uses this - * out of line function when the target is an MMIO or IOMMU region. - */ -MemTxResult -address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, - const void *buf, hwaddr len) -{ - hwaddr addr1, l; - MemoryRegion *mr; - - l = len; - mr = address_space_translate_cached(cache, addr, &addr1, &l, true, - MEMTXATTRS_UNSPECIFIED); - return flatview_write_continue(cache->fv, - addr, MEMTXATTRS_UNSPECIFIED, buf, len, - addr1, l, mr); -} - -#define ARG1_DECL MemoryRegionCache *cache -#define ARG1 cache -#define SUFFIX _cached_slow -#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__) -#define RCU_READ_LOCK() ((void)0) -#define RCU_READ_UNLOCK() ((void)0) -#include "memory_ldst.c.inc" - -/* virtual memory access for debug (includes writing to ROM) */ -int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, - void *ptr, size_t len, bool is_write) -{ - hwaddr phys_addr; - vaddr l, page; - uint8_t *buf = ptr; - - cpu_synchronize_state(cpu); - while (len > 0) { - int asidx; - MemTxAttrs attrs; - MemTxResult res; - - page = addr & TARGET_PAGE_MASK; - phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs); - asidx = cpu_asidx_from_attrs(cpu, attrs); - /* if no physical page mapped, return an error */ - if (phys_addr == -1) - return -1; - l = (page + TARGET_PAGE_SIZE) - addr; - if (l > len) - l = len; - phys_addr += (addr & ~TARGET_PAGE_MASK); - if (is_write) { - res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, - attrs, buf, l); - } else { - res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr, - attrs, buf, l); - } - if (res != MEMTX_OK) { - return -1; - } - len -= l; - buf += l; - addr += l; - } - return 0; -} - -/* - * Allows code that needs to deal with migration bitmaps etc to still be built - * target independent. - */ -size_t qemu_target_page_size(void) -{ - return TARGET_PAGE_SIZE; -} - -int qemu_target_page_mask(void) -{ - return TARGET_PAGE_MASK; -} - -int qemu_target_page_bits(void) -{ - return TARGET_PAGE_BITS; -} - -int qemu_target_page_bits_min(void) -{ - return TARGET_PAGE_BITS_MIN; -} - -/* Convert target pages to MiB (2**20). */ -size_t qemu_target_pages_to_MiB(size_t pages) -{ - int page_bits = TARGET_PAGE_BITS; - - /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */ - g_assert(page_bits < 20); - - return pages >> (20 - page_bits); -} - -bool cpu_physical_memory_is_io(hwaddr phys_addr) -{ - MemoryRegion*mr; - hwaddr l = 1; - - RCU_READ_LOCK_GUARD(); - mr = address_space_translate(&address_space_memory, - phys_addr, &phys_addr, &l, false, - MEMTXATTRS_UNSPECIFIED); - - return !(memory_region_is_ram(mr) || memory_region_is_romd(mr)); -} - -int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) -{ - RAMBlock *block; - int ret = 0; - - RCU_READ_LOCK_GUARD(); - RAMBLOCK_FOREACH(block) { - ret = func(block, opaque); - if (ret) { - break; - } - } - return ret; -} - -/* - * Unmap pages of memory from start to start+length such that - * they a) read as 0, b) Trigger whatever fault mechanism - * the OS provides for postcopy. - * The pages must be unmapped by the end of the function. - * Returns: 0 on success, none-0 on failure - * - */ -int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) -{ - int ret = -1; - - uint8_t *host_startaddr = rb->host + start; - - if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) { - error_report("ram_block_discard_range: Unaligned start address: %p", - host_startaddr); - goto err; - } - - if ((start + length) <= rb->max_length) { - bool need_madvise, need_fallocate; - if (!QEMU_IS_ALIGNED(length, rb->page_size)) { - error_report("ram_block_discard_range: Unaligned length: %zx", - length); - goto err; - } - - errno = ENOTSUP; /* If we are missing MADVISE etc */ - - /* The logic here is messy; - * madvise DONTNEED fails for hugepages - * fallocate works on hugepages and shmem - * shared anonymous memory requires madvise REMOVE - */ - need_madvise = (rb->page_size == qemu_host_page_size); - need_fallocate = rb->fd != -1; - if (need_fallocate) { - /* For a file, this causes the area of the file to be zero'd - * if read, and for hugetlbfs also causes it to be unmapped - * so a userfault will trigger. - */ -#ifdef CONFIG_FALLOCATE_PUNCH_HOLE - /* - * fallocate() will fail with readonly files. Let's print a - * proper error message. - */ - if (rb->flags & RAM_READONLY_FD) { - error_report("ram_block_discard_range: Discarding RAM" - " with readonly files is not supported"); - goto err; - - } - /* - * We'll discard data from the actual file, even though we only - * have a MAP_PRIVATE mapping, possibly messing with other - * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to - * change that behavior whithout violating the promised - * semantics of ram_block_discard_range(). - * - * Only warn, because it works as long as nobody else uses that - * file. - */ - if (!qemu_ram_is_shared(rb)) { - warn_report_once("ram_block_discard_range: Discarding RAM" - " in private file mappings is possibly" - " dangerous, because it will modify the" - " underlying file and will affect other" - " users of the file"); - } - - ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - start, length); - if (ret) { - ret = -errno; - error_report("ram_block_discard_range: Failed to fallocate " - "%s:%" PRIx64 " +%zx (%d)", - rb->idstr, start, length, ret); - goto err; - } -#else - ret = -ENOSYS; - error_report("ram_block_discard_range: fallocate not available/file" - "%s:%" PRIx64 " +%zx (%d)", - rb->idstr, start, length, ret); - goto err; -#endif - } - if (need_madvise) { - /* For normal RAM this causes it to be unmapped, - * for shared memory it causes the local mapping to disappear - * and to fall back on the file contents (which we just - * fallocate'd away). - */ -#if defined(CONFIG_MADVISE) - if (qemu_ram_is_shared(rb) && rb->fd < 0) { - ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE); - } else { - ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED); - } - if (ret) { - ret = -errno; - error_report("ram_block_discard_range: Failed to discard range " - "%s:%" PRIx64 " +%zx (%d)", - rb->idstr, start, length, ret); - goto err; - } -#else - ret = -ENOSYS; - error_report("ram_block_discard_range: MADVISE not available" - "%s:%" PRIx64 " +%zx (%d)", - rb->idstr, start, length, ret); - goto err; -#endif - } - trace_ram_block_discard_range(rb->idstr, host_startaddr, length, - need_madvise, need_fallocate, ret); - } else { - error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64 - "/%zx/" RAM_ADDR_FMT")", - rb->idstr, start, length, rb->max_length); - } - -err: - return ret; -} - -bool ramblock_is_pmem(RAMBlock *rb) -{ - return rb->flags & RAM_PMEM; -} - -static void mtree_print_phys_entries(int start, int end, int skip, int ptr) -{ - if (start == end - 1) { - qemu_printf("\t%3d ", start); - } else { - qemu_printf("\t%3d..%-3d ", start, end - 1); - } - qemu_printf(" skip=%d ", skip); - if (ptr == PHYS_MAP_NODE_NIL) { - qemu_printf(" ptr=NIL"); - } else if (!skip) { - qemu_printf(" ptr=#%d", ptr); - } else { - qemu_printf(" ptr=[%d]", ptr); - } - qemu_printf("\n"); -} - -#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \ - int128_sub((size), int128_one())) : 0) - -void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root) -{ - int i; - - qemu_printf(" Dispatch\n"); - qemu_printf(" Physical sections\n"); - - for (i = 0; i < d->map.sections_nb; ++i) { - MemoryRegionSection *s = d->map.sections + i; - const char *names[] = { " [unassigned]", " [not dirty]", - " [ROM]", " [watch]" }; - - qemu_printf(" #%d @" HWADDR_FMT_plx ".." HWADDR_FMT_plx - " %s%s%s%s%s", - i, - s->offset_within_address_space, - s->offset_within_address_space + MR_SIZE(s->size), - s->mr->name ? s->mr->name : "(noname)", - i < ARRAY_SIZE(names) ? names[i] : "", - s->mr == root ? " [ROOT]" : "", - s == d->mru_section ? " [MRU]" : "", - s->mr->is_iommu ? " [iommu]" : ""); - - if (s->mr->alias) { - qemu_printf(" alias=%s", s->mr->alias->name ? - s->mr->alias->name : "noname"); - } - qemu_printf("\n"); - } - - qemu_printf(" Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n", - P_L2_BITS, P_L2_LEVELS, d->phys_map.ptr, d->phys_map.skip); - for (i = 0; i < d->map.nodes_nb; ++i) { - int j, jprev; - PhysPageEntry prev; - Node *n = d->map.nodes + i; - - qemu_printf(" [%d]\n", i); - - for (j = 0, jprev = 0, prev = *n[0]; j < ARRAY_SIZE(*n); ++j) { - PhysPageEntry *pe = *n + j; - - if (pe->ptr == prev.ptr && pe->skip == prev.skip) { - continue; - } - - mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr); - - jprev = j; - prev = *pe; - } - - if (jprev != ARRAY_SIZE(*n)) { - mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr); - } - } -} - -/* Require any discards to work. */ -static unsigned int ram_block_discard_required_cnt; -/* Require only coordinated discards to work. */ -static unsigned int ram_block_coordinated_discard_required_cnt; -/* Disable any discards. */ -static unsigned int ram_block_discard_disabled_cnt; -/* Disable only uncoordinated discards. */ -static unsigned int ram_block_uncoordinated_discard_disabled_cnt; -static QemuMutex ram_block_discard_disable_mutex; - -static void ram_block_discard_disable_mutex_lock(void) -{ - static gsize initialized; - - if (g_once_init_enter(&initialized)) { - qemu_mutex_init(&ram_block_discard_disable_mutex); - g_once_init_leave(&initialized, 1); - } - qemu_mutex_lock(&ram_block_discard_disable_mutex); -} - -static void ram_block_discard_disable_mutex_unlock(void) -{ - qemu_mutex_unlock(&ram_block_discard_disable_mutex); -} - -int ram_block_discard_disable(bool state) -{ - int ret = 0; - - ram_block_discard_disable_mutex_lock(); - if (!state) { - ram_block_discard_disabled_cnt--; - } else if (ram_block_discard_required_cnt || - ram_block_coordinated_discard_required_cnt) { - ret = -EBUSY; - } else { - ram_block_discard_disabled_cnt++; - } - ram_block_discard_disable_mutex_unlock(); - return ret; -} - -int ram_block_uncoordinated_discard_disable(bool state) -{ - int ret = 0; - - ram_block_discard_disable_mutex_lock(); - if (!state) { - ram_block_uncoordinated_discard_disabled_cnt--; - } else if (ram_block_discard_required_cnt) { - ret = -EBUSY; - } else { - ram_block_uncoordinated_discard_disabled_cnt++; - } - ram_block_discard_disable_mutex_unlock(); - return ret; -} - -int ram_block_discard_require(bool state) -{ - int ret = 0; - - ram_block_discard_disable_mutex_lock(); - if (!state) { - ram_block_discard_required_cnt--; - } else if (ram_block_discard_disabled_cnt || - ram_block_uncoordinated_discard_disabled_cnt) { - ret = -EBUSY; - } else { - ram_block_discard_required_cnt++; - } - ram_block_discard_disable_mutex_unlock(); - return ret; -} - -int ram_block_coordinated_discard_require(bool state) -{ - int ret = 0; - - ram_block_discard_disable_mutex_lock(); - if (!state) { - ram_block_coordinated_discard_required_cnt--; - } else if (ram_block_discard_disabled_cnt) { - ret = -EBUSY; - } else { - ram_block_coordinated_discard_required_cnt++; - } - ram_block_discard_disable_mutex_unlock(); - return ret; -} - -bool ram_block_discard_is_disabled(void) -{ - return qatomic_read(&ram_block_discard_disabled_cnt) || - qatomic_read(&ram_block_uncoordinated_discard_disabled_cnt); -} - -bool ram_block_discard_is_required(void) -{ - return qatomic_read(&ram_block_discard_required_cnt) || - qatomic_read(&ram_block_coordinated_discard_required_cnt); -} diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c deleted file mode 100644 index 74f4e41..0000000 --- a/softmmu/qdev-monitor.c +++ /dev/null @@ -1,1148 +0,0 @@ -/* - * Dynamic device configuration and creation. - * - * Copyright (c) 2009 CodeSourcery - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "qemu/osdep.h" -#include "hw/sysbus.h" -#include "monitor/hmp.h" -#include "monitor/monitor.h" -#include "monitor/qdev.h" -#include "sysemu/arch_init.h" -#include "qapi/error.h" -#include "qapi/qapi-commands-qdev.h" -#include "qapi/qmp/dispatch.h" -#include "qapi/qmp/qdict.h" -#include "qapi/qmp/qerror.h" -#include "qapi/qmp/qstring.h" -#include "qapi/qobject-input-visitor.h" -#include "qemu/config-file.h" -#include "qemu/error-report.h" -#include "qemu/help_option.h" -#include "qemu/option.h" -#include "qemu/qemu-print.h" -#include "qemu/option_int.h" -#include "sysemu/block-backend.h" -#include "migration/misc.h" -#include "migration/migration.h" -#include "qemu/cutils.h" -#include "hw/qdev-properties.h" -#include "hw/clock.h" -#include "hw/boards.h" - -/* - * Aliases were a bad idea from the start. Let's keep them - * from spreading further. - */ -typedef struct QDevAlias -{ - const char *typename; - const char *alias; - uint32_t arch_mask; -} QDevAlias; - -/* default virtio transport per architecture */ -#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | QEMU_ARCH_ARM | \ - QEMU_ARCH_HPPA | QEMU_ARCH_I386 | \ - QEMU_ARCH_MIPS | QEMU_ARCH_PPC | \ - QEMU_ARCH_RISCV | QEMU_ARCH_SH4 | \ - QEMU_ARCH_SPARC | QEMU_ARCH_XTENSA | \ - QEMU_ARCH_LOONGARCH) -#define QEMU_ARCH_VIRTIO_CCW (QEMU_ARCH_S390X) -#define QEMU_ARCH_VIRTIO_MMIO (QEMU_ARCH_M68K) - -/* Please keep this table sorted by typename. */ -static const QDevAlias qdev_alias_table[] = { - { "AC97", "ac97" }, /* -soundhw name */ - { "e1000", "e1000-82540em" }, - { "ES1370", "es1370" }, /* -soundhw name */ - { "ich9-ahci", "ahci" }, - { "lsi53c895a", "lsi" }, - { "virtio-9p-device", "virtio-9p", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-9p-ccw", "virtio-9p", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-9p-pci", "virtio-9p", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-balloon-device", "virtio-balloon", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-balloon-ccw", "virtio-balloon", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-balloon-pci", "virtio-balloon", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-blk-device", "virtio-blk", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-blk-ccw", "virtio-blk", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-blk-pci", "virtio-blk", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-gpu-device", "virtio-gpu", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-gpu-ccw", "virtio-gpu", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-gpu-pci", "virtio-gpu", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-gpu-gl-device", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-gpu-gl-pci", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-input-host-device", "virtio-input-host", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-input-host-ccw", "virtio-input-host", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-input-host-pci", "virtio-input-host", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-iommu-pci", "virtio-iommu", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-keyboard-device", "virtio-keyboard", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-keyboard-ccw", "virtio-keyboard", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-keyboard-pci", "virtio-keyboard", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-mouse-device", "virtio-mouse", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-mouse-ccw", "virtio-mouse", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-mouse-pci", "virtio-mouse", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-net-device", "virtio-net", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-net-ccw", "virtio-net", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-net-pci", "virtio-net", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-rng-device", "virtio-rng", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-rng-ccw", "virtio-rng", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-rng-pci", "virtio-rng", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-scsi-device", "virtio-scsi", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-scsi-ccw", "virtio-scsi", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-scsi-pci", "virtio-scsi", QEMU_ARCH_VIRTIO_PCI }, - { "virtio-serial-device", "virtio-serial", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-serial-ccw", "virtio-serial", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-serial-pci", "virtio-serial", QEMU_ARCH_VIRTIO_PCI}, - { "virtio-tablet-device", "virtio-tablet", QEMU_ARCH_VIRTIO_MMIO }, - { "virtio-tablet-ccw", "virtio-tablet", QEMU_ARCH_VIRTIO_CCW }, - { "virtio-tablet-pci", "virtio-tablet", QEMU_ARCH_VIRTIO_PCI }, - { } -}; - -static const char *qdev_class_get_alias(DeviceClass *dc) -{ - const char *typename = object_class_get_name(OBJECT_CLASS(dc)); - int i; - - for (i = 0; qdev_alias_table[i].typename; i++) { - if (qdev_alias_table[i].arch_mask && - !(qdev_alias_table[i].arch_mask & arch_type)) { - continue; - } - - if (strcmp(qdev_alias_table[i].typename, typename) == 0) { - return qdev_alias_table[i].alias; - } - } - - return NULL; -} - -static bool qdev_class_has_alias(DeviceClass *dc) -{ - return (qdev_class_get_alias(dc) != NULL); -} - -static void qdev_print_devinfo(DeviceClass *dc) -{ - qemu_printf("name \"%s\"", object_class_get_name(OBJECT_CLASS(dc))); - if (dc->bus_type) { - qemu_printf(", bus %s", dc->bus_type); - } - if (qdev_class_has_alias(dc)) { - qemu_printf(", alias \"%s\"", qdev_class_get_alias(dc)); - } - if (dc->desc) { - qemu_printf(", desc \"%s\"", dc->desc); - } - if (!dc->user_creatable) { - qemu_printf(", no-user"); - } - qemu_printf("\n"); -} - -static void qdev_print_devinfos(bool show_no_user) -{ - static const char *cat_name[DEVICE_CATEGORY_MAX + 1] = { - [DEVICE_CATEGORY_BRIDGE] = "Controller/Bridge/Hub", - [DEVICE_CATEGORY_USB] = "USB", - [DEVICE_CATEGORY_STORAGE] = "Storage", - [DEVICE_CATEGORY_NETWORK] = "Network", - [DEVICE_CATEGORY_INPUT] = "Input", - [DEVICE_CATEGORY_DISPLAY] = "Display", - [DEVICE_CATEGORY_SOUND] = "Sound", - [DEVICE_CATEGORY_MISC] = "Misc", - [DEVICE_CATEGORY_CPU] = "CPU", - [DEVICE_CATEGORY_WATCHDOG]= "Watchdog", - [DEVICE_CATEGORY_MAX] = "Uncategorized", - }; - GSList *list, *elt; - int i; - bool cat_printed; - - module_load_qom_all(); - list = object_class_get_list_sorted(TYPE_DEVICE, false); - - for (i = 0; i <= DEVICE_CATEGORY_MAX; i++) { - cat_printed = false; - for (elt = list; elt; elt = elt->next) { - DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data, - TYPE_DEVICE); - if ((i < DEVICE_CATEGORY_MAX - ? !test_bit(i, dc->categories) - : !bitmap_empty(dc->categories, DEVICE_CATEGORY_MAX)) - || (!show_no_user - && !dc->user_creatable)) { - continue; - } - if (!cat_printed) { - qemu_printf("%s%s devices:\n", i ? "\n" : "", cat_name[i]); - cat_printed = true; - } - qdev_print_devinfo(dc); - } - } - - g_slist_free(list); -} - -static const char *find_typename_by_alias(const char *alias) -{ - int i; - - for (i = 0; qdev_alias_table[i].alias; i++) { - if (qdev_alias_table[i].arch_mask && - !(qdev_alias_table[i].arch_mask & arch_type)) { - continue; - } - - if (strcmp(qdev_alias_table[i].alias, alias) == 0) { - return qdev_alias_table[i].typename; - } - } - - return NULL; -} - -static DeviceClass *qdev_get_device_class(const char **driver, Error **errp) -{ - ObjectClass *oc; - DeviceClass *dc; - const char *original_name = *driver; - - oc = module_object_class_by_name(*driver); - if (!oc) { - const char *typename = find_typename_by_alias(*driver); - - if (typename) { - *driver = typename; - oc = module_object_class_by_name(*driver); - } - } - - if (!object_class_dynamic_cast(oc, TYPE_DEVICE)) { - if (*driver != original_name) { - error_setg(errp, "'%s' (alias '%s') is not a valid device model" - " name", original_name, *driver); - } else { - error_setg(errp, "'%s' is not a valid device model name", *driver); - } - return NULL; - } - - if (object_class_is_abstract(oc)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver", - "a non-abstract device type"); - return NULL; - } - - dc = DEVICE_CLASS(oc); - if (!dc->user_creatable || - (phase_check(PHASE_MACHINE_READY) && !dc->hotpluggable)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver", - "a pluggable device type"); - return NULL; - } - - if (object_class_dynamic_cast(oc, TYPE_SYS_BUS_DEVICE)) { - /* sysbus devices need to be allowed by the machine */ - MachineClass *mc = MACHINE_CLASS(object_get_class(qdev_get_machine())); - if (!device_type_is_dynamic_sysbus(mc, *driver)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver", - "a dynamic sysbus device type for the machine"); - return NULL; - } - } - - return dc; -} - - -int qdev_device_help(QemuOpts *opts) -{ - Error *local_err = NULL; - const char *driver; - ObjectPropertyInfoList *prop_list; - ObjectPropertyInfoList *prop; - GPtrArray *array; - int i; - - driver = qemu_opt_get(opts, "driver"); - if (driver && is_help_option(driver)) { - qdev_print_devinfos(false); - return 1; - } - - if (!driver || !qemu_opt_has_help_opt(opts)) { - return 0; - } - - if (!object_class_by_name(driver)) { - const char *typename = find_typename_by_alias(driver); - - if (typename) { - driver = typename; - } - } - - prop_list = qmp_device_list_properties(driver, &local_err); - if (local_err) { - goto error; - } - - if (prop_list) { - qemu_printf("%s options:\n", driver); - } else { - qemu_printf("There are no options for %s.\n", driver); - } - array = g_ptr_array_new(); - for (prop = prop_list; prop; prop = prop->next) { - g_ptr_array_add(array, - object_property_help(prop->value->name, - prop->value->type, - prop->value->default_value, - prop->value->description)); - } - g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0); - for (i = 0; i < array->len; i++) { - qemu_printf("%s\n", (char *)array->pdata[i]); - } - g_ptr_array_set_free_func(array, g_free); - g_ptr_array_free(array, true); - qapi_free_ObjectPropertyInfoList(prop_list); - return 1; - -error: - error_report_err(local_err); - return 1; -} - -static Object *qdev_get_peripheral(void) -{ - static Object *dev; - - if (dev == NULL) { - dev = container_get(qdev_get_machine(), "/peripheral"); - } - - return dev; -} - -static Object *qdev_get_peripheral_anon(void) -{ - static Object *dev; - - if (dev == NULL) { - dev = container_get(qdev_get_machine(), "/peripheral-anon"); - } - - return dev; -} - -static void qbus_error_append_bus_list_hint(DeviceState *dev, - Error *const *errp) -{ - BusState *child; - const char *sep = " "; - - error_append_hint(errp, "child buses at \"%s\":", - dev->id ? dev->id : object_get_typename(OBJECT(dev))); - QLIST_FOREACH(child, &dev->child_bus, sibling) { - error_append_hint(errp, "%s\"%s\"", sep, child->name); - sep = ", "; - } - error_append_hint(errp, "\n"); -} - -static void qbus_error_append_dev_list_hint(BusState *bus, - Error *const *errp) -{ - BusChild *kid; - const char *sep = " "; - - error_append_hint(errp, "devices at \"%s\":", bus->name); - QTAILQ_FOREACH(kid, &bus->children, sibling) { - DeviceState *dev = kid->child; - error_append_hint(errp, "%s\"%s\"", sep, - object_get_typename(OBJECT(dev))); - if (dev->id) { - error_append_hint(errp, "/\"%s\"", dev->id); - } - sep = ", "; - } - error_append_hint(errp, "\n"); -} - -static BusState *qbus_find_bus(DeviceState *dev, char *elem) -{ - BusState *child; - - QLIST_FOREACH(child, &dev->child_bus, sibling) { - if (strcmp(child->name, elem) == 0) { - return child; - } - } - return NULL; -} - -static DeviceState *qbus_find_dev(BusState *bus, char *elem) -{ - BusChild *kid; - - /* - * try to match in order: - * (1) instance id, if present - * (2) driver name - * (3) driver alias, if present - */ - QTAILQ_FOREACH(kid, &bus->children, sibling) { - DeviceState *dev = kid->child; - if (dev->id && strcmp(dev->id, elem) == 0) { - return dev; - } - } - QTAILQ_FOREACH(kid, &bus->children, sibling) { - DeviceState *dev = kid->child; - if (strcmp(object_get_typename(OBJECT(dev)), elem) == 0) { - return dev; - } - } - QTAILQ_FOREACH(kid, &bus->children, sibling) { - DeviceState *dev = kid->child; - DeviceClass *dc = DEVICE_GET_CLASS(dev); - - if (qdev_class_has_alias(dc) && - strcmp(qdev_class_get_alias(dc), elem) == 0) { - return dev; - } - } - return NULL; -} - -static inline bool qbus_is_full(BusState *bus) -{ - BusClass *bus_class; - - if (bus->full) { - return true; - } - bus_class = BUS_GET_CLASS(bus); - return bus_class->max_dev && bus->num_children >= bus_class->max_dev; -} - -/* - * Search the tree rooted at @bus for a bus. - * If @name, search for a bus with that name. Note that bus names - * need not be unique. Yes, that's screwed up. - * Else search for a bus that is a subtype of @bus_typename. - * If more than one exists, prefer one that can take another device. - * Return the bus if found, else %NULL. - */ -static BusState *qbus_find_recursive(BusState *bus, const char *name, - const char *bus_typename) -{ - BusChild *kid; - BusState *pick, *child, *ret; - bool match; - - assert(name || bus_typename); - if (name) { - match = !strcmp(bus->name, name); - } else { - match = !!object_dynamic_cast(OBJECT(bus), bus_typename); - } - - if (match && !qbus_is_full(bus)) { - return bus; /* root matches and isn't full */ - } - - pick = match ? bus : NULL; - - QTAILQ_FOREACH(kid, &bus->children, sibling) { - DeviceState *dev = kid->child; - QLIST_FOREACH(child, &dev->child_bus, sibling) { - ret = qbus_find_recursive(child, name, bus_typename); - if (ret && !qbus_is_full(ret)) { - return ret; /* a descendant matches and isn't full */ - } - if (ret && !pick) { - pick = ret; - } - } - } - - /* root or a descendant matches, but is full */ - return pick; -} - -static BusState *qbus_find(const char *path, Error **errp) -{ - DeviceState *dev; - BusState *bus; - char elem[128]; - int pos, len; - - /* find start element */ - if (path[0] == '/') { - bus = sysbus_get_default(); - pos = 0; - } else { - if (sscanf(path, "%127[^/]%n", elem, &len) != 1) { - assert(!path[0]); - elem[0] = len = 0; - } - bus = qbus_find_recursive(sysbus_get_default(), elem, NULL); - if (!bus) { - error_setg(errp, "Bus '%s' not found", elem); - return NULL; - } - pos = len; - } - - for (;;) { - assert(path[pos] == '/' || !path[pos]); - while (path[pos] == '/') { - pos++; - } - if (path[pos] == '\0') { - break; - } - - /* find device */ - if (sscanf(path+pos, "%127[^/]%n", elem, &len) != 1) { - g_assert_not_reached(); - elem[0] = len = 0; - } - pos += len; - dev = qbus_find_dev(bus, elem); - if (!dev) { - error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, - "Device '%s' not found", elem); - qbus_error_append_dev_list_hint(bus, errp); - return NULL; - } - - assert(path[pos] == '/' || !path[pos]); - while (path[pos] == '/') { - pos++; - } - if (path[pos] == '\0') { - /* last specified element is a device. If it has exactly - * one child bus accept it nevertheless */ - if (dev->num_child_bus == 1) { - bus = QLIST_FIRST(&dev->child_bus); - break; - } - if (dev->num_child_bus) { - error_setg(errp, "Device '%s' has multiple child buses", - elem); - qbus_error_append_bus_list_hint(dev, errp); - } else { - error_setg(errp, "Device '%s' has no child bus", elem); - } - return NULL; - } - - /* find bus */ - if (sscanf(path+pos, "%127[^/]%n", elem, &len) != 1) { - g_assert_not_reached(); - elem[0] = len = 0; - } - pos += len; - bus = qbus_find_bus(dev, elem); - if (!bus) { - error_setg(errp, "Bus '%s' not found", elem); - qbus_error_append_bus_list_hint(dev, errp); - return NULL; - } - } - - if (qbus_is_full(bus)) { - error_setg(errp, "Bus '%s' is full", path); - return NULL; - } - return bus; -} - -/* Takes ownership of @id, will be freed when deleting the device */ -const char *qdev_set_id(DeviceState *dev, char *id, Error **errp) -{ - ObjectProperty *prop; - - assert(!dev->id && !dev->realized); - - /* - * object_property_[try_]add_child() below will assert the device - * has no parent - */ - if (id) { - prop = object_property_try_add_child(qdev_get_peripheral(), id, - OBJECT(dev), NULL); - if (prop) { - dev->id = id; - } else { - error_setg(errp, "Duplicate device ID '%s'", id); - g_free(id); - return NULL; - } - } else { - static int anon_count; - gchar *name = g_strdup_printf("device[%d]", anon_count++); - prop = object_property_add_child(qdev_get_peripheral_anon(), name, - OBJECT(dev)); - g_free(name); - } - - return prop->name; -} - -DeviceState *qdev_device_add_from_qdict(const QDict *opts, - bool from_json, Error **errp) -{ - ERRP_GUARD(); - DeviceClass *dc; - const char *driver, *path; - char *id; - DeviceState *dev = NULL; - BusState *bus = NULL; - - driver = qdict_get_try_str(opts, "driver"); - if (!driver) { - error_setg(errp, QERR_MISSING_PARAMETER, "driver"); - return NULL; - } - - /* find driver */ - dc = qdev_get_device_class(&driver, errp); - if (!dc) { - return NULL; - } - - /* find bus */ - path = qdict_get_try_str(opts, "bus"); - if (path != NULL) { - bus = qbus_find(path, errp); - if (!bus) { - return NULL; - } - if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { - error_setg(errp, "Device '%s' can't go on %s bus", - driver, object_get_typename(OBJECT(bus))); - return NULL; - } - } else if (dc->bus_type != NULL) { - bus = qbus_find_recursive(sysbus_get_default(), NULL, dc->bus_type); - if (!bus || qbus_is_full(bus)) { - error_setg(errp, "No '%s' bus found for device '%s'", - dc->bus_type, driver); - return NULL; - } - } - - if (qdev_should_hide_device(opts, from_json, errp)) { - if (bus && !qbus_is_hotpluggable(bus)) { - error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); - } - return NULL; - } else if (*errp) { - return NULL; - } - - if (phase_check(PHASE_MACHINE_READY) && bus && !qbus_is_hotpluggable(bus)) { - error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); - return NULL; - } - - if (!migration_is_idle()) { - error_setg(errp, "device_add not allowed while migrating"); - return NULL; - } - - /* create device */ - dev = qdev_new(driver); - - /* Check whether the hotplug is allowed by the machine */ - if (phase_check(PHASE_MACHINE_READY)) { - if (!qdev_hotplug_allowed(dev, errp)) { - goto err_del_dev; - } - - if (!bus && !qdev_get_machine_hotplug_handler(dev)) { - /* No bus, no machine hotplug handler --> device is not hotpluggable */ - error_setg(errp, "Device '%s' can not be hotplugged on this machine", - driver); - goto err_del_dev; - } - } - - /* - * set dev's parent and register its id. - * If it fails it means the id is already taken. - */ - id = g_strdup(qdict_get_try_str(opts, "id")); - if (!qdev_set_id(dev, id, errp)) { - goto err_del_dev; - } - - /* set properties */ - dev->opts = qdict_clone_shallow(opts); - qdict_del(dev->opts, "driver"); - qdict_del(dev->opts, "bus"); - qdict_del(dev->opts, "id"); - - object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json, - errp); - if (*errp) { - goto err_del_dev; - } - - if (!qdev_realize(dev, bus, errp)) { - goto err_del_dev; - } - return dev; - -err_del_dev: - if (dev) { - object_unparent(OBJECT(dev)); - object_unref(OBJECT(dev)); - } - return NULL; -} - -/* Takes ownership of @opts on success */ -DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) -{ - QDict *qdict = qemu_opts_to_qdict(opts, NULL); - DeviceState *ret; - - ret = qdev_device_add_from_qdict(qdict, false, errp); - if (ret) { - qemu_opts_del(opts); - } - qobject_unref(qdict); - return ret; -} - -#define qdev_printf(fmt, ...) monitor_printf(mon, "%*s" fmt, indent, "", ## __VA_ARGS__) -static void qbus_print(Monitor *mon, BusState *bus, int indent); - -static void qdev_print_props(Monitor *mon, DeviceState *dev, Property *props, - int indent) -{ - if (!props) - return; - for (; props->name; props++) { - char *value; - char *legacy_name = g_strdup_printf("legacy-%s", props->name); - - if (object_property_get_type(OBJECT(dev), legacy_name, NULL)) { - value = object_property_get_str(OBJECT(dev), legacy_name, NULL); - } else { - value = object_property_print(OBJECT(dev), props->name, true, - NULL); - } - g_free(legacy_name); - - if (!value) { - continue; - } - qdev_printf("%s = %s\n", props->name, - *value ? value : ""); - g_free(value); - } -} - -static void bus_print_dev(BusState *bus, Monitor *mon, DeviceState *dev, int indent) -{ - BusClass *bc = BUS_GET_CLASS(bus); - - if (bc->print_dev) { - bc->print_dev(mon, dev, indent); - } -} - -static void qdev_print(Monitor *mon, DeviceState *dev, int indent) -{ - ObjectClass *class; - BusState *child; - NamedGPIOList *ngl; - NamedClockList *ncl; - - qdev_printf("dev: %s, id \"%s\"\n", object_get_typename(OBJECT(dev)), - dev->id ? dev->id : ""); - indent += 2; - QLIST_FOREACH(ngl, &dev->gpios, node) { - if (ngl->num_in) { - qdev_printf("gpio-in \"%s\" %d\n", ngl->name ? ngl->name : "", - ngl->num_in); - } - if (ngl->num_out) { - qdev_printf("gpio-out \"%s\" %d\n", ngl->name ? ngl->name : "", - ngl->num_out); - } - } - QLIST_FOREACH(ncl, &dev->clocks, node) { - g_autofree char *freq_str = clock_display_freq(ncl->clock); - qdev_printf("clock-%s%s \"%s\" freq_hz=%s\n", - ncl->output ? "out" : "in", - ncl->alias ? " (alias)" : "", - ncl->name, freq_str); - } - class = object_get_class(OBJECT(dev)); - do { - qdev_print_props(mon, dev, DEVICE_CLASS(class)->props_, indent); - class = object_class_get_parent(class); - } while (class != object_class_by_name(TYPE_DEVICE)); - bus_print_dev(dev->parent_bus, mon, dev, indent); - QLIST_FOREACH(child, &dev->child_bus, sibling) { - qbus_print(mon, child, indent); - } -} - -static void qbus_print(Monitor *mon, BusState *bus, int indent) -{ - BusChild *kid; - - qdev_printf("bus: %s\n", bus->name); - indent += 2; - qdev_printf("type %s\n", object_get_typename(OBJECT(bus))); - QTAILQ_FOREACH(kid, &bus->children, sibling) { - DeviceState *dev = kid->child; - qdev_print(mon, dev, indent); - } -} -#undef qdev_printf - -void hmp_info_qtree(Monitor *mon, const QDict *qdict) -{ - if (sysbus_get_default()) - qbus_print(mon, sysbus_get_default(), 0); -} - -void hmp_info_qdm(Monitor *mon, const QDict *qdict) -{ - qdev_print_devinfos(true); -} - -void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp) -{ - QemuOpts *opts; - DeviceState *dev; - - opts = qemu_opts_from_qdict(qemu_find_opts("device"), qdict, errp); - if (!opts) { - return; - } - if (!monitor_cur_is_qmp() && qdev_device_help(opts)) { - qemu_opts_del(opts); - return; - } - dev = qdev_device_add(opts, errp); - - /* - * Drain all pending RCU callbacks. This is done because - * some bus related operations can delay a device removal - * (in this case this can happen if device is added and then - * removed due to a configuration error) - * to a RCU callback, but user might expect that this interface - * will finish its job completely once qmp command returns result - * to the user - */ - drain_call_rcu(); - - if (!dev) { - qemu_opts_del(opts); - return; - } - object_unref(OBJECT(dev)); -} - -static DeviceState *find_device_state(const char *id, Error **errp) -{ - Object *obj = object_resolve_path_at(qdev_get_peripheral(), id); - DeviceState *dev; - - if (!obj) { - error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, - "Device '%s' not found", id); - return NULL; - } - - dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE); - if (!dev) { - error_setg(errp, "%s is not a hotpluggable device", id); - return NULL; - } - - return dev; -} - -void qdev_unplug(DeviceState *dev, Error **errp) -{ - DeviceClass *dc = DEVICE_GET_CLASS(dev); - HotplugHandler *hotplug_ctrl; - HotplugHandlerClass *hdc; - Error *local_err = NULL; - - if (qdev_unplug_blocked(dev, errp)) { - return; - } - - if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) { - error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name); - return; - } - - if (!dc->hotpluggable) { - error_setg(errp, QERR_DEVICE_NO_HOTPLUG, - object_get_typename(OBJECT(dev))); - return; - } - - if (!migration_is_idle() && !dev->allow_unplug_during_migration) { - error_setg(errp, "device_del not allowed while migrating"); - return; - } - - qdev_hot_removed = true; - - hotplug_ctrl = qdev_get_hotplug_handler(dev); - /* hotpluggable device MUST have HotplugHandler, if it doesn't - * then something is very wrong with it */ - g_assert(hotplug_ctrl); - - /* If device supports async unplug just request it to be done, - * otherwise just remove it synchronously */ - hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl); - if (hdc->unplug_request) { - hotplug_handler_unplug_request(hotplug_ctrl, dev, &local_err); - } else { - hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); - if (!local_err) { - object_unparent(OBJECT(dev)); - } - } - error_propagate(errp, local_err); -} - -void qmp_device_del(const char *id, Error **errp) -{ - DeviceState *dev = find_device_state(id, errp); - if (dev != NULL) { - if (dev->pending_deleted_event && - (dev->pending_deleted_expires_ms == 0 || - dev->pending_deleted_expires_ms > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) { - error_setg(errp, "Device %s is already in the " - "process of unplug", id); - return; - } - - qdev_unplug(dev, errp); - } -} - -void hmp_device_add(Monitor *mon, const QDict *qdict) -{ - Error *err = NULL; - - qmp_device_add((QDict *)qdict, NULL, &err); - hmp_handle_error(mon, err); -} - -void hmp_device_del(Monitor *mon, const QDict *qdict) -{ - const char *id = qdict_get_str(qdict, "id"); - Error *err = NULL; - - qmp_device_del(id, &err); - hmp_handle_error(mon, err); -} - -void device_add_completion(ReadLineState *rs, int nb_args, const char *str) -{ - GSList *list, *elt; - size_t len; - - if (nb_args != 2) { - return; - } - - len = strlen(str); - readline_set_completion_index(rs, len); - list = elt = object_class_get_list(TYPE_DEVICE, false); - while (elt) { - DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data, - TYPE_DEVICE); - - if (dc->user_creatable) { - readline_add_completion_of(rs, str, - object_class_get_name(OBJECT_CLASS(dc))); - } - elt = elt->next; - } - g_slist_free(list); -} - -static int qdev_add_hotpluggable_device(Object *obj, void *opaque) -{ - GSList **list = opaque; - DeviceState *dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE); - - if (dev == NULL) { - return 0; - } - - if (dev->realized && object_property_get_bool(obj, "hotpluggable", NULL)) { - *list = g_slist_append(*list, dev); - } - - return 0; -} - -static GSList *qdev_build_hotpluggable_device_list(Object *peripheral) -{ - GSList *list = NULL; - - object_child_foreach(peripheral, qdev_add_hotpluggable_device, &list); - - return list; -} - -static void peripheral_device_del_completion(ReadLineState *rs, - const char *str) -{ - Object *peripheral = container_get(qdev_get_machine(), "/peripheral"); - GSList *list, *item; - - list = qdev_build_hotpluggable_device_list(peripheral); - if (!list) { - return; - } - - for (item = list; item; item = g_slist_next(item)) { - DeviceState *dev = item->data; - - if (dev->id) { - readline_add_completion_of(rs, str, dev->id); - } - } - - g_slist_free(list); -} - -void device_del_completion(ReadLineState *rs, int nb_args, const char *str) -{ - if (nb_args != 2) { - return; - } - - readline_set_completion_index(rs, strlen(str)); - peripheral_device_del_completion(rs, str); -} - -BlockBackend *blk_by_qdev_id(const char *id, Error **errp) -{ - DeviceState *dev; - BlockBackend *blk; - - GLOBAL_STATE_CODE(); - - dev = find_device_state(id, errp); - if (dev == NULL) { - return NULL; - } - - blk = blk_by_dev(dev); - if (!blk) { - error_setg(errp, "Device does not have a block device backend"); - } - return blk; -} - -QemuOptsList qemu_device_opts = { - .name = "device", - .implied_opt_name = "driver", - .head = QTAILQ_HEAD_INITIALIZER(qemu_device_opts.head), - .desc = { - /* - * no elements => accept any - * sanity checking will happen later - * when setting device properties - */ - { /* end of list */ } - }, -}; - -QemuOptsList qemu_global_opts = { - .name = "global", - .head = QTAILQ_HEAD_INITIALIZER(qemu_global_opts.head), - .desc = { - { - .name = "driver", - .type = QEMU_OPT_STRING, - },{ - .name = "property", - .type = QEMU_OPT_STRING, - },{ - .name = "value", - .type = QEMU_OPT_STRING, - }, - { /* end of list */ } - }, -}; - -int qemu_global_option(const char *str) -{ - char driver[64], property[64]; - QemuOpts *opts; - int rc, offset; - - rc = sscanf(str, "%63[^.=].%63[^=]%n", driver, property, &offset); - if (rc == 2 && str[offset] == '=') { - opts = qemu_opts_create(&qemu_global_opts, NULL, 0, &error_abort); - qemu_opt_set(opts, "driver", driver, &error_abort); - qemu_opt_set(opts, "property", property, &error_abort); - qemu_opt_set(opts, "value", str + offset + 1, &error_abort); - return 0; - } - - opts = qemu_opts_parse_noisily(&qemu_global_opts, str, false); - if (!opts) { - return -1; - } - if (!qemu_opt_get(opts, "driver") - || !qemu_opt_get(opts, "property") - || !qemu_opt_get(opts, "value")) { - error_report("options 'driver', 'property', and 'value'" - " are required"); - return -1; - } - - return 0; -} - -bool qmp_command_available(const QmpCommand *cmd, Error **errp) -{ - if (!phase_check(PHASE_MACHINE_READY) && - !(cmd->options & QCO_ALLOW_PRECONFIG)) { - error_setg(errp, "The command '%s' is permitted only after machine initialization has completed", - cmd->name); - return false; - } - return true; -} diff --git a/softmmu/qemu-seccomp.c b/softmmu/qemu-seccomp.c deleted file mode 100644 index 4d7439e..0000000 --- a/softmmu/qemu-seccomp.c +++ /dev/null @@ -1,486 +0,0 @@ -/* - * QEMU seccomp mode 2 support with libseccomp - * - * Copyright IBM, Corp. 2012 - * - * Authors: - * Eduardo Otubo - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "qemu/config-file.h" -#include "qemu/option.h" -#include "qemu/module.h" -#include -#include -#include "sysemu/seccomp.h" -#include - -/* For some architectures (notably ARM) cacheflush is not supported until - * libseccomp 2.2.3, but configure enforces that we are using a more recent - * version on those hosts, so it is OK for this check to be less strict. - */ -#if SCMP_VER_MAJOR >= 3 - #define HAVE_CACHEFLUSH -#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 2 - #define HAVE_CACHEFLUSH -#endif - -struct QemuSeccompSyscall { - int32_t num; - uint8_t set; - uint8_t narg; - const struct scmp_arg_cmp *arg_cmp; - uint32_t action; -}; - -const struct scmp_arg_cmp sched_setscheduler_arg[] = { - /* was SCMP_A1(SCMP_CMP_NE, SCHED_IDLE), but expanded due to GCC 4.x bug */ - { .arg = 1, .op = SCMP_CMP_NE, .datum_a = SCHED_IDLE } -}; - -/* - * See 'NOTES' in 'man 2 clone' - s390 & cross have 'flags' in - * different position to other architectures - */ -#if defined(HOST_S390X) || defined(HOST_S390) || defined(HOST_CRIS) -#define CLONE_FLAGS_ARG 1 -#else -#define CLONE_FLAGS_ARG 0 -#endif - -#ifndef CLONE_PIDFD -# define CLONE_PIDFD 0x00001000 -#endif - -#define REQUIRE_CLONE_FLAG(flag) \ - const struct scmp_arg_cmp clone_arg ## flag[] = { \ - { .arg = CLONE_FLAGS_ARG, \ - .op = SCMP_CMP_MASKED_EQ, \ - .datum_a = flag, .datum_b = 0 } } - -#define FORBID_CLONE_FLAG(flag) \ - const struct scmp_arg_cmp clone_arg ## flag[] = { \ - { .arg = CLONE_FLAGS_ARG, \ - .op = SCMP_CMP_MASKED_EQ, \ - .datum_a = flag, .datum_b = flag } } - -#define RULE_CLONE_FLAG(flag) \ - { SCMP_SYS(clone), QEMU_SECCOMP_SET_SPAWN, \ - ARRAY_SIZE(clone_arg ## flag), clone_arg ## flag, SCMP_ACT_TRAP } - -/* If no CLONE_* flags are set, except CSIGNAL, deny */ -const struct scmp_arg_cmp clone_arg_none[] = { - { .arg = CLONE_FLAGS_ARG, - .op = SCMP_CMP_MASKED_EQ, - .datum_a = ~(CSIGNAL), .datum_b = 0 } -}; - -/* - * pthread_create should always set all of these. - */ -REQUIRE_CLONE_FLAG(CLONE_VM); -REQUIRE_CLONE_FLAG(CLONE_FS); -REQUIRE_CLONE_FLAG(CLONE_FILES); -REQUIRE_CLONE_FLAG(CLONE_SIGHAND); -REQUIRE_CLONE_FLAG(CLONE_THREAD); -REQUIRE_CLONE_FLAG(CLONE_SYSVSEM); -REQUIRE_CLONE_FLAG(CLONE_SETTLS); -REQUIRE_CLONE_FLAG(CLONE_PARENT_SETTID); -REQUIRE_CLONE_FLAG(CLONE_CHILD_CLEARTID); -/* - * Musl sets this in pthread_create too, but it is - * obsolete and harmless since its behaviour is - * subsumed under CLONE_THREAD - */ -/*REQUIRE_CLONE_FLAG(CLONE_DETACHED);*/ - - -/* - * These all indicate an attempt to spawn a process - * instead of a thread, or other undesirable scenarios - */ -FORBID_CLONE_FLAG(CLONE_PIDFD); -FORBID_CLONE_FLAG(CLONE_PTRACE); -FORBID_CLONE_FLAG(CLONE_VFORK); -FORBID_CLONE_FLAG(CLONE_PARENT); -FORBID_CLONE_FLAG(CLONE_NEWNS); -FORBID_CLONE_FLAG(CLONE_UNTRACED); -FORBID_CLONE_FLAG(CLONE_NEWCGROUP); -FORBID_CLONE_FLAG(CLONE_NEWUTS); -FORBID_CLONE_FLAG(CLONE_NEWIPC); -FORBID_CLONE_FLAG(CLONE_NEWUSER); -FORBID_CLONE_FLAG(CLONE_NEWPID); -FORBID_CLONE_FLAG(CLONE_NEWNET); -FORBID_CLONE_FLAG(CLONE_IO); - - -static const struct QemuSeccompSyscall denylist[] = { - /* default set of syscalls that should get blocked */ - { SCMP_SYS(reboot), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(swapon), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(swapoff), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(syslog), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(mount), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(umount), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(kexec_load), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(afs_syscall), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(break), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(ftime), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(getpmsg), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(gtty), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(lock), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(mpx), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(prof), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(profil), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(putpmsg), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(security), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(stty), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(tuxcall), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(ulimit), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(vserver), QEMU_SECCOMP_SET_DEFAULT, - 0, NULL, SCMP_ACT_TRAP }, - /* obsolete */ - { SCMP_SYS(readdir), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(_sysctl), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(bdflush), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(create_module), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(get_kernel_syms), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(query_module), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(sgetmask), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(ssetmask), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(sysfs), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(uselib), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(ustat), QEMU_SECCOMP_SET_OBSOLETE, - 0, NULL, SCMP_ACT_TRAP }, - /* privileged */ - { SCMP_SYS(setuid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(setgid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(setpgid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(setsid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(setreuid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(setregid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(setresuid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(setresgid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(setfsuid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(setfsgid), QEMU_SECCOMP_SET_PRIVILEGED, - 0, NULL, SCMP_ACT_TRAP }, - /* spawn */ - { SCMP_SYS(fork), QEMU_SECCOMP_SET_SPAWN, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(vfork), QEMU_SECCOMP_SET_SPAWN, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(execve), QEMU_SECCOMP_SET_SPAWN, - 0, NULL, SCMP_ACT_TRAP }, - { SCMP_SYS(clone), QEMU_SECCOMP_SET_SPAWN, - ARRAY_SIZE(clone_arg_none), clone_arg_none, SCMP_ACT_TRAP }, - RULE_CLONE_FLAG(CLONE_VM), - RULE_CLONE_FLAG(CLONE_FS), - RULE_CLONE_FLAG(CLONE_FILES), - RULE_CLONE_FLAG(CLONE_SIGHAND), - RULE_CLONE_FLAG(CLONE_THREAD), - RULE_CLONE_FLAG(CLONE_SYSVSEM), - RULE_CLONE_FLAG(CLONE_SETTLS), - RULE_CLONE_FLAG(CLONE_PARENT_SETTID), - RULE_CLONE_FLAG(CLONE_CHILD_CLEARTID), - /*RULE_CLONE_FLAG(CLONE_DETACHED),*/ - RULE_CLONE_FLAG(CLONE_PIDFD), - RULE_CLONE_FLAG(CLONE_PTRACE), - RULE_CLONE_FLAG(CLONE_VFORK), - RULE_CLONE_FLAG(CLONE_PARENT), - RULE_CLONE_FLAG(CLONE_NEWNS), - RULE_CLONE_FLAG(CLONE_UNTRACED), - RULE_CLONE_FLAG(CLONE_NEWCGROUP), - RULE_CLONE_FLAG(CLONE_NEWUTS), - RULE_CLONE_FLAG(CLONE_NEWIPC), - RULE_CLONE_FLAG(CLONE_NEWUSER), - RULE_CLONE_FLAG(CLONE_NEWPID), - RULE_CLONE_FLAG(CLONE_NEWNET), - RULE_CLONE_FLAG(CLONE_IO), -#ifdef __SNR_clone3 - { SCMP_SYS(clone3), QEMU_SECCOMP_SET_SPAWN, - 0, NULL, SCMP_ACT_ERRNO(ENOSYS) }, -#endif -#ifdef __SNR_execveat - { SCMP_SYS(execveat), QEMU_SECCOMP_SET_SPAWN }, -#endif - { SCMP_SYS(setns), QEMU_SECCOMP_SET_SPAWN }, - { SCMP_SYS(unshare), QEMU_SECCOMP_SET_SPAWN }, - /* resource control */ - { SCMP_SYS(setpriority), QEMU_SECCOMP_SET_RESOURCECTL, - 0, NULL, SCMP_ACT_ERRNO(EPERM) }, - { SCMP_SYS(sched_setparam), QEMU_SECCOMP_SET_RESOURCECTL, - 0, NULL, SCMP_ACT_ERRNO(EPERM) }, - { SCMP_SYS(sched_setscheduler), QEMU_SECCOMP_SET_RESOURCECTL, - ARRAY_SIZE(sched_setscheduler_arg), sched_setscheduler_arg, - SCMP_ACT_ERRNO(EPERM) }, - { SCMP_SYS(sched_setaffinity), QEMU_SECCOMP_SET_RESOURCECTL, - 0, NULL, SCMP_ACT_ERRNO(EPERM) }, -}; - -static inline __attribute__((unused)) int -qemu_seccomp(unsigned int operation, unsigned int flags, void *args) -{ -#ifdef __NR_seccomp - return syscall(__NR_seccomp, operation, flags, args); -#else - errno = ENOSYS; - return -1; -#endif -} - -static uint32_t qemu_seccomp_update_action(uint32_t action) -{ -#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \ - defined(SECCOMP_RET_KILL_PROCESS) - if (action == SCMP_ACT_TRAP) { - static int kill_process = -1; - if (kill_process == -1) { - uint32_t testaction = SECCOMP_RET_KILL_PROCESS; - - if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &testaction) == 0) { - kill_process = 1; - } else { - kill_process = 0; - } - } - if (kill_process == 1) { - return SCMP_ACT_KILL_PROCESS; - } - } -#endif - return action; -} - - -static int seccomp_start(uint32_t seccomp_opts, Error **errp) -{ - int rc = -1; - unsigned int i = 0; - scmp_filter_ctx ctx; - - ctx = seccomp_init(SCMP_ACT_ALLOW); - if (ctx == NULL) { - error_setg(errp, "failed to initialize seccomp context"); - goto seccomp_return; - } - -#if defined(CONFIG_SECCOMP_SYSRAWRC) - /* - * This must be the first seccomp_attr_set() call to have full - * error propagation from subsequent seccomp APIs. - */ - rc = seccomp_attr_set(ctx, SCMP_FLTATR_API_SYSRAWRC, 1); - if (rc != 0) { - error_setg_errno(errp, -rc, - "failed to set seccomp rawrc attribute"); - goto seccomp_return; - } -#endif - - rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); - if (rc != 0) { - error_setg_errno(errp, -rc, - "failed to set seccomp thread synchronization"); - goto seccomp_return; - } - - for (i = 0; i < ARRAY_SIZE(denylist); i++) { - uint32_t action; - if (!(seccomp_opts & denylist[i].set)) { - continue; - } - - action = qemu_seccomp_update_action(denylist[i].action); - rc = seccomp_rule_add_array(ctx, action, denylist[i].num, - denylist[i].narg, denylist[i].arg_cmp); - if (rc < 0) { - error_setg_errno(errp, -rc, - "failed to add seccomp denylist rules"); - goto seccomp_return; - } - } - - rc = seccomp_load(ctx); - if (rc < 0) { - error_setg_errno(errp, -rc, - "failed to load seccomp syscall filter in kernel"); - } - - seccomp_return: - seccomp_release(ctx); - return rc < 0 ? -1 : 0; -} - -int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) -{ - if (qemu_opt_get_bool(opts, "enable", false)) { - uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT - | QEMU_SECCOMP_SET_OBSOLETE; - const char *value = NULL; - - value = qemu_opt_get(opts, "obsolete"); - if (value) { - if (g_str_equal(value, "allow")) { - seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE; - } else if (g_str_equal(value, "deny")) { - /* this is the default option, this if is here - * to provide a little bit of consistency for - * the command line */ - } else { - error_setg(errp, "invalid argument for obsolete"); - return -1; - } - } - - value = qemu_opt_get(opts, "elevateprivileges"); - if (value) { - if (g_str_equal(value, "deny")) { - seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; - } else if (g_str_equal(value, "children")) { - seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; - - /* calling prctl directly because we're - * not sure if host has CAP_SYS_ADMIN set*/ - if (prctl(PR_SET_NO_NEW_PRIVS, 1)) { - error_setg(errp, "failed to set no_new_privs aborting"); - return -1; - } - } else if (g_str_equal(value, "allow")) { - /* default value */ - } else { - error_setg(errp, "invalid argument for elevateprivileges"); - return -1; - } - } - - value = qemu_opt_get(opts, "spawn"); - if (value) { - if (g_str_equal(value, "deny")) { - seccomp_opts |= QEMU_SECCOMP_SET_SPAWN; - } else if (g_str_equal(value, "allow")) { - /* default value */ - } else { - error_setg(errp, "invalid argument for spawn"); - return -1; - } - } - - value = qemu_opt_get(opts, "resourcecontrol"); - if (value) { - if (g_str_equal(value, "deny")) { - seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL; - } else if (g_str_equal(value, "allow")) { - /* default value */ - } else { - error_setg(errp, "invalid argument for resourcecontrol"); - return -1; - } - } - - if (seccomp_start(seccomp_opts, errp) < 0) { - return -1; - } - } - - return 0; -} - -static QemuOptsList qemu_sandbox_opts = { - .name = "sandbox", - .implied_opt_name = "enable", - .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head), - .desc = { - { - .name = "enable", - .type = QEMU_OPT_BOOL, - }, - { - .name = "obsolete", - .type = QEMU_OPT_STRING, - }, - { - .name = "elevateprivileges", - .type = QEMU_OPT_STRING, - }, - { - .name = "spawn", - .type = QEMU_OPT_STRING, - }, - { - .name = "resourcecontrol", - .type = QEMU_OPT_STRING, - }, - { /* end of list */ } - }, -}; - -static void seccomp_register(void) -{ - bool add = false; - - /* FIXME: use seccomp_api_get() >= 2 check when released */ - -#if defined(SECCOMP_FILTER_FLAG_TSYNC) - int check; - - /* check host TSYNC capability, it returns errno == ENOSYS if unavailable */ - check = qemu_seccomp(SECCOMP_SET_MODE_FILTER, - SECCOMP_FILTER_FLAG_TSYNC, NULL); - if (check < 0 && errno == EFAULT) { - add = true; - } -#endif - - if (add) { - qemu_add_opts(&qemu_sandbox_opts); - } -} -opts_init(seccomp_register); diff --git a/softmmu/qtest.c b/softmmu/qtest.c deleted file mode 100644 index 35b643a..0000000 --- a/softmmu/qtest.c +++ /dev/null @@ -1,1070 +0,0 @@ -/* - * Test Server - * - * Copyright IBM, Corp. 2011 - * - * Authors: - * Anthony Liguori - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "sysemu/qtest.h" -#include "sysemu/runstate.h" -#include "chardev/char-fe.h" -#include "exec/ioport.h" -#include "exec/memory.h" -#include "exec/tswap.h" -#include "hw/qdev-core.h" -#include "hw/irq.h" -#include "qemu/accel.h" -#include "sysemu/cpu-timers.h" -#include "qemu/config-file.h" -#include "qemu/option.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qemu/cutils.h" -#include "qom/object_interfaces.h" - -#define MAX_IRQ 256 - -#define TYPE_QTEST "qtest" - -OBJECT_DECLARE_SIMPLE_TYPE(QTest, QTEST) - -struct QTest { - Object parent; - - bool has_machine_link; - char *chr_name; - Chardev *chr; - CharBackend qtest_chr; - char *log; -}; - -bool qtest_allowed; - -static DeviceState *irq_intercept_dev; -static FILE *qtest_log_fp; -static QTest *qtest; -static GString *inbuf; -static int irq_levels[MAX_IRQ]; -static GTimer *timer; -static bool qtest_opened; -static void (*qtest_server_send)(void*, const char*); -static void *qtest_server_send_opaque; - -#define FMT_timeval "%.06f" - -/** - * DOC: QTest Protocol - * - * Line based protocol, request/response based. Server can send async messages - * so clients should always handle many async messages before the response - * comes in. - * - * Valid requests - * ^^^^^^^^^^^^^^ - * - * Clock management: - * """"""""""""""""" - * - * The qtest client is completely in charge of the QEMU_CLOCK_VIRTUAL. qtest commands - * let you adjust the value of the clock (monotonically). All the commands - * return the current value of the clock in nanoseconds. - * - * .. code-block:: none - * - * > clock_step - * < OK VALUE - * - * Advance the clock to the next deadline. Useful when waiting for - * asynchronous events. - * - * .. code-block:: none - * - * > clock_step NS - * < OK VALUE - * - * Advance the clock by NS nanoseconds. - * - * .. code-block:: none - * - * > clock_set NS - * < OK VALUE - * - * Advance the clock to NS nanoseconds (do nothing if it's already past). - * - * PIO and memory access: - * """""""""""""""""""""" - * - * .. code-block:: none - * - * > outb ADDR VALUE - * < OK - * - * .. code-block:: none - * - * > outw ADDR VALUE - * < OK - * - * .. code-block:: none - * - * > outl ADDR VALUE - * < OK - * - * .. code-block:: none - * - * > inb ADDR - * < OK VALUE - * - * .. code-block:: none - * - * > inw ADDR - * < OK VALUE - * - * .. code-block:: none - * - * > inl ADDR - * < OK VALUE - * - * .. code-block:: none - * - * > writeb ADDR VALUE - * < OK - * - * .. code-block:: none - * - * > writew ADDR VALUE - * < OK - * - * .. code-block:: none - * - * > writel ADDR VALUE - * < OK - * - * .. code-block:: none - * - * > writeq ADDR VALUE - * < OK - * - * .. code-block:: none - * - * > readb ADDR - * < OK VALUE - * - * .. code-block:: none - * - * > readw ADDR - * < OK VALUE - * - * .. code-block:: none - * - * > readl ADDR - * < OK VALUE - * - * .. code-block:: none - * - * > readq ADDR - * < OK VALUE - * - * .. code-block:: none - * - * > read ADDR SIZE - * < OK DATA - * - * .. code-block:: none - * - * > write ADDR SIZE DATA - * < OK - * - * .. code-block:: none - * - * > b64read ADDR SIZE - * < OK B64_DATA - * - * .. code-block:: none - * - * > b64write ADDR SIZE B64_DATA - * < OK - * - * .. code-block:: none - * - * > memset ADDR SIZE VALUE - * < OK - * - * ADDR, SIZE, VALUE are all integers parsed with strtoul() with a base of 0. - * For 'memset' a zero size is permitted and does nothing. - * - * DATA is an arbitrarily long hex number prefixed with '0x'. If it's smaller - * than the expected size, the value will be zero filled at the end of the data - * sequence. - * - * B64_DATA is an arbitrarily long base64 encoded string. - * If the sizes do not match, the data will be truncated. - * - * IRQ management: - * """"""""""""""" - * - * .. code-block:: none - * - * > irq_intercept_in QOM-PATH - * < OK - * - * .. code-block:: none - * - * > irq_intercept_out QOM-PATH - * < OK - * - * Attach to the gpio-in (resp. gpio-out) pins exported by the device at - * QOM-PATH. When the pin is triggered, one of the following async messages - * will be printed to the qtest stream:: - * - * IRQ raise NUM - * IRQ lower NUM - * - * where NUM is an IRQ number. For the PC, interrupts can be intercepted - * simply with "irq_intercept_in ioapic" (note that IRQ0 comes out with - * NUM=0 even though it is remapped to GSI 2). - * - * Setting interrupt level: - * """""""""""""""""""""""" - * - * .. code-block:: none - * - * > set_irq_in QOM-PATH NAME NUM LEVEL - * < OK - * - * where NAME is the name of the irq/gpio list, NUM is an IRQ number and - * LEVEL is an signed integer IRQ level. - * - * Forcibly set the given interrupt pin to the given level. - * - */ - -static int hex2nib(char ch) -{ - if (ch >= '0' && ch <= '9') { - return ch - '0'; - } else if (ch >= 'a' && ch <= 'f') { - return 10 + (ch - 'a'); - } else if (ch >= 'A' && ch <= 'F') { - return 10 + (ch - 'A'); - } else { - return -1; - } -} - -void qtest_send_prefix(CharBackend *chr) -{ - if (!qtest_log_fp || !qtest_opened) { - return; - } - - fprintf(qtest_log_fp, "[S +" FMT_timeval "] ", g_timer_elapsed(timer, NULL)); -} - -static void G_GNUC_PRINTF(1, 2) qtest_log_send(const char *fmt, ...) -{ - va_list ap; - - if (!qtest_log_fp || !qtest_opened) { - return; - } - - qtest_send_prefix(NULL); - - va_start(ap, fmt); - vfprintf(qtest_log_fp, fmt, ap); - va_end(ap); -} - -static void qtest_server_char_be_send(void *opaque, const char *str) -{ - size_t len = strlen(str); - CharBackend* chr = (CharBackend *)opaque; - qemu_chr_fe_write_all(chr, (uint8_t *)str, len); - if (qtest_log_fp && qtest_opened) { - fprintf(qtest_log_fp, "%s", str); - } -} - -static void qtest_send(CharBackend *chr, const char *str) -{ - qtest_server_send(qtest_server_send_opaque, str); -} - -void qtest_sendf(CharBackend *chr, const char *fmt, ...) -{ - va_list ap; - gchar *buffer; - - va_start(ap, fmt); - buffer = g_strdup_vprintf(fmt, ap); - qtest_send(chr, buffer); - g_free(buffer); - va_end(ap); -} - -static void qtest_irq_handler(void *opaque, int n, int level) -{ - qemu_irq old_irq = *(qemu_irq *)opaque; - qemu_set_irq(old_irq, level); - - if (irq_levels[n] != level) { - CharBackend *chr = &qtest->qtest_chr; - irq_levels[n] = level; - qtest_send_prefix(chr); - qtest_sendf(chr, "IRQ %s %d\n", - level ? "raise" : "lower", n); - } -} - -static int64_t qtest_clock_counter; - -int64_t qtest_get_virtual_clock(void) -{ - return qatomic_read_i64(&qtest_clock_counter); -} - -static void qtest_set_virtual_clock(int64_t count) -{ - qatomic_set_i64(&qtest_clock_counter, count); -} - -static void qtest_clock_warp(int64_t dest) -{ - int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - AioContext *aio_context; - assert(qtest_enabled()); - aio_context = qemu_get_aio_context(); - while (clock < dest) { - int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - QEMU_TIMER_ATTR_ALL); - int64_t warp = qemu_soonest_timeout(dest - clock, deadline); - - qtest_set_virtual_clock(qtest_get_virtual_clock() + warp); - - qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); - timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]); - clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - } - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); -} - -static bool (*process_command_cb)(CharBackend *chr, gchar **words); - -void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, gchar **words)) -{ - assert(!process_command_cb); /* Switch to a list if we need more than one */ - - process_command_cb = pc_cb; -} - -static void qtest_install_gpio_out_intercept(DeviceState *dev, const char *name, int n) -{ - qemu_irq *disconnected = g_new0(qemu_irq, 1); - qemu_irq icpt = qemu_allocate_irq(qtest_irq_handler, - disconnected, n); - - *disconnected = qdev_intercept_gpio_out(dev, icpt, name, n); -} - -static void qtest_process_command(CharBackend *chr, gchar **words) -{ - const gchar *command; - - g_assert(words); - - command = words[0]; - - if (qtest_log_fp) { - int i; - - fprintf(qtest_log_fp, "[R +" FMT_timeval "]", g_timer_elapsed(timer, NULL)); - for (i = 0; words[i]; i++) { - fprintf(qtest_log_fp, " %s", words[i]); - } - fprintf(qtest_log_fp, "\n"); - } - - g_assert(command); - if (strcmp(words[0], "irq_intercept_out") == 0 - || strcmp(words[0], "irq_intercept_in") == 0) { - DeviceState *dev; - NamedGPIOList *ngl; - bool is_named; - bool is_outbound; - bool interception_succeeded = false; - - g_assert(words[1]); - is_named = words[2] != NULL; - is_outbound = words[0][14] == 'o'; - dev = DEVICE(object_resolve_path(words[1], NULL)); - if (!dev) { - qtest_send_prefix(chr); - qtest_send(chr, "FAIL Unknown device\n"); - return; - } - - if (is_named && !is_outbound) { - qtest_send_prefix(chr); - qtest_send(chr, "FAIL Interception of named in-GPIOs not yet supported\n"); - return; - } - - if (irq_intercept_dev) { - qtest_send_prefix(chr); - if (irq_intercept_dev != dev) { - qtest_send(chr, "FAIL IRQ intercept already enabled\n"); - } else { - qtest_send(chr, "OK\n"); - } - return; - } - - QLIST_FOREACH(ngl, &dev->gpios, node) { - /* We don't support inbound interception of named GPIOs yet */ - if (is_outbound) { - /* NULL is valid and matchable, for "unnamed GPIO" */ - if (g_strcmp0(ngl->name, words[2]) == 0) { - int i; - for (i = 0; i < ngl->num_out; ++i) { - qtest_install_gpio_out_intercept(dev, ngl->name, i); - } - interception_succeeded = true; - } - } else { - qemu_irq_intercept_in(ngl->in, qtest_irq_handler, - ngl->num_in); - interception_succeeded = true; - } - } - - qtest_send_prefix(chr); - if (interception_succeeded) { - irq_intercept_dev = dev; - qtest_send(chr, "OK\n"); - } else { - qtest_send(chr, "FAIL No intercepts installed\n"); - } - } else if (strcmp(words[0], "set_irq_in") == 0) { - DeviceState *dev; - qemu_irq irq; - char *name; - int ret; - int num; - int level; - - g_assert(words[1] && words[2] && words[3] && words[4]); - - dev = DEVICE(object_resolve_path(words[1], NULL)); - if (!dev) { - qtest_send_prefix(chr); - qtest_send(chr, "FAIL Unknown device\n"); - return; - } - - if (strcmp(words[2], "unnamed-gpio-in") == 0) { - name = NULL; - } else { - name = words[2]; - } - - ret = qemu_strtoi(words[3], NULL, 0, &num); - g_assert(!ret); - ret = qemu_strtoi(words[4], NULL, 0, &level); - g_assert(!ret); - - irq = qdev_get_gpio_in_named(dev, name, num); - - qemu_set_irq(irq, level); - qtest_send_prefix(chr); - qtest_send(chr, "OK\n"); - } else if (strcmp(words[0], "outb") == 0 || - strcmp(words[0], "outw") == 0 || - strcmp(words[0], "outl") == 0) { - unsigned long addr; - unsigned long value; - int ret; - - g_assert(words[1] && words[2]); - ret = qemu_strtoul(words[1], NULL, 0, &addr); - g_assert(ret == 0); - ret = qemu_strtoul(words[2], NULL, 0, &value); - g_assert(ret == 0); - g_assert(addr <= 0xffff); - - if (words[0][3] == 'b') { - cpu_outb(addr, value); - } else if (words[0][3] == 'w') { - cpu_outw(addr, value); - } else if (words[0][3] == 'l') { - cpu_outl(addr, value); - } - qtest_send_prefix(chr); - qtest_send(chr, "OK\n"); - } else if (strcmp(words[0], "inb") == 0 || - strcmp(words[0], "inw") == 0 || - strcmp(words[0], "inl") == 0) { - unsigned long addr; - uint32_t value = -1U; - int ret; - - g_assert(words[1]); - ret = qemu_strtoul(words[1], NULL, 0, &addr); - g_assert(ret == 0); - g_assert(addr <= 0xffff); - - if (words[0][2] == 'b') { - value = cpu_inb(addr); - } else if (words[0][2] == 'w') { - value = cpu_inw(addr); - } else if (words[0][2] == 'l') { - value = cpu_inl(addr); - } - qtest_send_prefix(chr); - qtest_sendf(chr, "OK 0x%04x\n", value); - } else if (strcmp(words[0], "writeb") == 0 || - strcmp(words[0], "writew") == 0 || - strcmp(words[0], "writel") == 0 || - strcmp(words[0], "writeq") == 0) { - uint64_t addr; - uint64_t value; - int ret; - - g_assert(words[1] && words[2]); - ret = qemu_strtou64(words[1], NULL, 0, &addr); - g_assert(ret == 0); - ret = qemu_strtou64(words[2], NULL, 0, &value); - g_assert(ret == 0); - - if (words[0][5] == 'b') { - uint8_t data = value; - address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, - &data, 1); - } else if (words[0][5] == 'w') { - uint16_t data = value; - tswap16s(&data); - address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, - &data, 2); - } else if (words[0][5] == 'l') { - uint32_t data = value; - tswap32s(&data); - address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, - &data, 4); - } else if (words[0][5] == 'q') { - uint64_t data = value; - tswap64s(&data); - address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, - &data, 8); - } - qtest_send_prefix(chr); - qtest_send(chr, "OK\n"); - } else if (strcmp(words[0], "readb") == 0 || - strcmp(words[0], "readw") == 0 || - strcmp(words[0], "readl") == 0 || - strcmp(words[0], "readq") == 0) { - uint64_t addr; - uint64_t value = UINT64_C(-1); - int ret; - - g_assert(words[1]); - ret = qemu_strtou64(words[1], NULL, 0, &addr); - g_assert(ret == 0); - - if (words[0][4] == 'b') { - uint8_t data; - address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, - &data, 1); - value = data; - } else if (words[0][4] == 'w') { - uint16_t data; - address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, - &data, 2); - value = tswap16(data); - } else if (words[0][4] == 'l') { - uint32_t data; - address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, - &data, 4); - value = tswap32(data); - } else if (words[0][4] == 'q') { - address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, - &value, 8); - tswap64s(&value); - } - qtest_send_prefix(chr); - qtest_sendf(chr, "OK 0x%016" PRIx64 "\n", value); - } else if (strcmp(words[0], "read") == 0) { - uint64_t addr, len, i; - uint8_t *data; - char *enc; - int ret; - - g_assert(words[1] && words[2]); - ret = qemu_strtou64(words[1], NULL, 0, &addr); - g_assert(ret == 0); - ret = qemu_strtou64(words[2], NULL, 0, &len); - g_assert(ret == 0); - /* We'd send garbage to libqtest if len is 0 */ - g_assert(len); - - data = g_malloc(len); - address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, - len); - - enc = g_malloc(2 * len + 1); - for (i = 0; i < len; i++) { - sprintf(&enc[i * 2], "%02x", data[i]); - } - - qtest_send_prefix(chr); - qtest_sendf(chr, "OK 0x%s\n", enc); - - g_free(data); - g_free(enc); - } else if (strcmp(words[0], "b64read") == 0) { - uint64_t addr, len; - uint8_t *data; - gchar *b64_data; - int ret; - - g_assert(words[1] && words[2]); - ret = qemu_strtou64(words[1], NULL, 0, &addr); - g_assert(ret == 0); - ret = qemu_strtou64(words[2], NULL, 0, &len); - g_assert(ret == 0); - - data = g_malloc(len); - address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, - len); - b64_data = g_base64_encode(data, len); - qtest_send_prefix(chr); - qtest_sendf(chr, "OK %s\n", b64_data); - - g_free(data); - g_free(b64_data); - } else if (strcmp(words[0], "write") == 0) { - uint64_t addr, len, i; - uint8_t *data; - size_t data_len; - int ret; - - g_assert(words[1] && words[2] && words[3]); - ret = qemu_strtou64(words[1], NULL, 0, &addr); - g_assert(ret == 0); - ret = qemu_strtou64(words[2], NULL, 0, &len); - g_assert(ret == 0); - - data_len = strlen(words[3]); - if (data_len < 3) { - qtest_send(chr, "ERR invalid argument size\n"); - return; - } - - data = g_malloc(len); - for (i = 0; i < len; i++) { - if ((i * 2 + 4) <= data_len) { - data[i] = hex2nib(words[3][i * 2 + 2]) << 4; - data[i] |= hex2nib(words[3][i * 2 + 3]); - } else { - data[i] = 0; - } - } - address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, - len); - g_free(data); - - qtest_send_prefix(chr); - qtest_send(chr, "OK\n"); - } else if (strcmp(words[0], "memset") == 0) { - uint64_t addr, len; - uint8_t *data; - unsigned long pattern; - int ret; - - g_assert(words[1] && words[2] && words[3]); - ret = qemu_strtou64(words[1], NULL, 0, &addr); - g_assert(ret == 0); - ret = qemu_strtou64(words[2], NULL, 0, &len); - g_assert(ret == 0); - ret = qemu_strtoul(words[3], NULL, 0, &pattern); - g_assert(ret == 0); - - if (len) { - data = g_malloc(len); - memset(data, pattern, len); - address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, - data, len); - g_free(data); - } - - qtest_send_prefix(chr); - qtest_send(chr, "OK\n"); - } else if (strcmp(words[0], "b64write") == 0) { - uint64_t addr, len; - uint8_t *data; - size_t data_len; - gsize out_len; - int ret; - - g_assert(words[1] && words[2] && words[3]); - ret = qemu_strtou64(words[1], NULL, 0, &addr); - g_assert(ret == 0); - ret = qemu_strtou64(words[2], NULL, 0, &len); - g_assert(ret == 0); - - data_len = strlen(words[3]); - if (data_len < 3) { - qtest_send(chr, "ERR invalid argument size\n"); - return; - } - - data = g_base64_decode_inplace(words[3], &out_len); - if (out_len != len) { - qtest_log_send("b64write: data length mismatch (told %"PRIu64", " - "found %zu)\n", - len, out_len); - out_len = MIN(out_len, len); - } - - address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, - len); - - qtest_send_prefix(chr); - qtest_send(chr, "OK\n"); - } else if (strcmp(words[0], "endianness") == 0) { - qtest_send_prefix(chr); - if (target_words_bigendian()) { - qtest_sendf(chr, "OK big\n"); - } else { - qtest_sendf(chr, "OK little\n"); - } - } else if (qtest_enabled() && strcmp(words[0], "clock_step") == 0) { - int64_t ns; - - if (words[1]) { - int ret = qemu_strtoi64(words[1], NULL, 0, &ns); - g_assert(ret == 0); - } else { - ns = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - QEMU_TIMER_ATTR_ALL); - } - qtest_clock_warp(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns); - qtest_send_prefix(chr); - qtest_sendf(chr, "OK %"PRIi64"\n", - (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); - } else if (strcmp(words[0], "module_load") == 0) { - Error *local_err = NULL; - int rv; - g_assert(words[1] && words[2]); - - qtest_send_prefix(chr); - rv = module_load(words[1], words[2], &local_err); - if (rv > 0) { - qtest_sendf(chr, "OK\n"); - } else { - if (rv < 0) { - error_report_err(local_err); - } - qtest_sendf(chr, "FAIL\n"); - } - } else if (qtest_enabled() && strcmp(words[0], "clock_set") == 0) { - int64_t ns; - int ret; - - g_assert(words[1]); - ret = qemu_strtoi64(words[1], NULL, 0, &ns); - g_assert(ret == 0); - qtest_clock_warp(ns); - qtest_send_prefix(chr); - qtest_sendf(chr, "OK %"PRIi64"\n", - (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); - } else if (process_command_cb && process_command_cb(chr, words)) { - /* Command got consumed by the callback handler */ - } else { - qtest_send_prefix(chr); - qtest_sendf(chr, "FAIL Unknown command '%s'\n", words[0]); - } -} - -static void qtest_process_inbuf(CharBackend *chr, GString *inbuf) -{ - char *end; - - while ((end = strchr(inbuf->str, '\n')) != NULL) { - size_t offset; - GString *cmd; - gchar **words; - - offset = end - inbuf->str; - - cmd = g_string_new_len(inbuf->str, offset); - g_string_erase(inbuf, 0, offset + 1); - - words = g_strsplit(cmd->str, " ", 0); - qtest_process_command(chr, words); - g_strfreev(words); - - g_string_free(cmd, TRUE); - } -} - -static void qtest_read(void *opaque, const uint8_t *buf, int size) -{ - CharBackend *chr = opaque; - - g_string_append_len(inbuf, (const gchar *)buf, size); - qtest_process_inbuf(chr, inbuf); -} - -static int qtest_can_read(void *opaque) -{ - return 1024; -} - -static void qtest_event(void *opaque, QEMUChrEvent event) -{ - int i; - - switch (event) { - case CHR_EVENT_OPENED: - /* - * We used to call qemu_system_reset() here, hoping we could - * use the same process for multiple tests that way. Never - * used. Injects an extra reset even when it's not used, and - * that can mess up tests, e.g. -boot once. - */ - for (i = 0; i < ARRAY_SIZE(irq_levels); i++) { - irq_levels[i] = 0; - } - - g_clear_pointer(&timer, g_timer_destroy); - timer = g_timer_new(); - qtest_opened = true; - if (qtest_log_fp) { - fprintf(qtest_log_fp, "[I " FMT_timeval "] OPENED\n", g_timer_elapsed(timer, NULL)); - } - break; - case CHR_EVENT_CLOSED: - qtest_opened = false; - if (qtest_log_fp) { - fprintf(qtest_log_fp, "[I +" FMT_timeval "] CLOSED\n", g_timer_elapsed(timer, NULL)); - } - g_clear_pointer(&timer, g_timer_destroy); - break; - default: - break; - } -} - -void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **errp) -{ - ERRP_GUARD(); - Chardev *chr; - Object *qtest; - - chr = qemu_chr_new("qtest", qtest_chrdev, NULL); - if (chr == NULL) { - error_setg(errp, "Failed to initialize device for qtest: \"%s\"", - qtest_chrdev); - return; - } - - qtest = object_new(TYPE_QTEST); - object_property_set_str(qtest, "chardev", chr->label, &error_abort); - if (qtest_log) { - object_property_set_str(qtest, "log", qtest_log, &error_abort); - } - object_property_add_child(qdev_get_machine(), "qtest", qtest); - user_creatable_complete(USER_CREATABLE(qtest), errp); - if (*errp) { - object_unparent(qtest); - } - object_unref(OBJECT(chr)); - object_unref(qtest); -} - -static bool qtest_server_start(QTest *q, Error **errp) -{ - Chardev *chr = q->chr; - const char *qtest_log = q->log; - - if (qtest_log) { - if (strcmp(qtest_log, "none") != 0) { - qtest_log_fp = fopen(qtest_log, "w+"); - } - } else { - qtest_log_fp = stderr; - } - - if (!qemu_chr_fe_init(&q->qtest_chr, chr, errp)) { - return false; - } - qemu_chr_fe_set_handlers(&q->qtest_chr, qtest_can_read, qtest_read, - qtest_event, NULL, &q->qtest_chr, NULL, true); - qemu_chr_fe_set_echo(&q->qtest_chr, true); - - inbuf = g_string_new(""); - - if (!qtest_server_send) { - qtest_server_set_send_handler(qtest_server_char_be_send, &q->qtest_chr); - } - qtest = q; - return true; -} - -void qtest_server_set_send_handler(void (*send)(void*, const char*), - void *opaque) -{ - qtest_server_send = send; - qtest_server_send_opaque = opaque; -} - -bool qtest_driver(void) -{ - return qtest && qtest->qtest_chr.chr != NULL; -} - -void qtest_server_inproc_recv(void *dummy, const char *buf) -{ - static GString *gstr; - if (!gstr) { - gstr = g_string_new(NULL); - } - g_string_append(gstr, buf); - if (gstr->str[gstr->len - 1] == '\n') { - qtest_process_inbuf(NULL, gstr); - g_string_truncate(gstr, 0); - } -} - -static void qtest_complete(UserCreatable *uc, Error **errp) -{ - QTest *q = QTEST(uc); - if (qtest) { - error_setg(errp, "Only one instance of qtest can be created"); - return; - } - if (!q->chr_name) { - error_setg(errp, "No backend specified"); - return; - } - - if (OBJECT(uc)->parent != qdev_get_machine()) { - q->has_machine_link = true; - object_property_add_const_link(qdev_get_machine(), "qtest", OBJECT(uc)); - } else { - /* -qtest was used. */ - } - - qtest_server_start(q, errp); -} - -static void qtest_unparent(Object *obj) -{ - QTest *q = QTEST(obj); - - if (qtest == q) { - qemu_chr_fe_disconnect(&q->qtest_chr); - assert(!qtest_opened); - qemu_chr_fe_deinit(&q->qtest_chr, false); - if (qtest_log_fp) { - fclose(qtest_log_fp); - qtest_log_fp = NULL; - } - qtest = NULL; - } - - if (q->has_machine_link) { - object_property_del(qdev_get_machine(), "qtest"); - q->has_machine_link = false; - } -} - -static void qtest_set_log(Object *obj, const char *value, Error **errp) -{ - QTest *q = QTEST(obj); - - if (qtest == q) { - error_setg(errp, "Property 'log' can not be set now"); - } else { - g_free(q->log); - q->log = g_strdup(value); - } -} - -static char *qtest_get_log(Object *obj, Error **errp) -{ - QTest *q = QTEST(obj); - - return g_strdup(q->log); -} - -static void qtest_set_chardev(Object *obj, const char *value, Error **errp) -{ - QTest *q = QTEST(obj); - Chardev *chr; - - if (qtest == q) { - error_setg(errp, "Property 'chardev' can not be set now"); - return; - } - - chr = qemu_chr_find(value); - if (!chr) { - error_setg(errp, "Cannot find character device '%s'", value); - return; - } - - g_free(q->chr_name); - q->chr_name = g_strdup(value); - - if (q->chr) { - object_unref(q->chr); - } - q->chr = chr; - object_ref(chr); -} - -static char *qtest_get_chardev(Object *obj, Error **errp) -{ - QTest *q = QTEST(obj); - - return g_strdup(q->chr_name); -} - -static void qtest_class_init(ObjectClass *oc, void *data) -{ - UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); - - oc->unparent = qtest_unparent; - ucc->complete = qtest_complete; - - object_class_property_add_str(oc, "chardev", - qtest_get_chardev, qtest_set_chardev); - object_class_property_add_str(oc, "log", - qtest_get_log, qtest_set_log); -} - -static const TypeInfo qtest_info = { - .name = TYPE_QTEST, - .parent = TYPE_OBJECT, - .class_init = qtest_class_init, - .instance_size = sizeof(QTest), - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } - } -}; - -static void register_types(void) -{ - type_register_static(&qtest_info); -} - -type_init(register_types); diff --git a/softmmu/rtc.c b/softmmu/rtc.c deleted file mode 100644 index 4904581..0000000 --- a/softmmu/rtc.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * RTC configuration and clock read - * - * Copyright (c) 2003-2020 QEMU contributors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu/cutils.h" -#include "qapi/error.h" -#include "qapi/qmp/qerror.h" -#include "qemu/error-report.h" -#include "qemu/option.h" -#include "qemu/timer.h" -#include "qom/object.h" -#include "sysemu/replay.h" -#include "sysemu/sysemu.h" -#include "sysemu/rtc.h" -#include "hw/rtc/mc146818rtc.h" - -static enum { - RTC_BASE_UTC, - RTC_BASE_LOCALTIME, - RTC_BASE_DATETIME, -} rtc_base_type = RTC_BASE_UTC; -static time_t rtc_ref_start_datetime; -static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */ -static int rtc_host_datetime_offset = -1; /* valid & used only with - RTC_BASE_DATETIME */ -QEMUClockType rtc_clock; -/***********************************************************/ -/* RTC reference time/date access */ -static time_t qemu_ref_timedate(QEMUClockType clock) -{ - time_t value = qemu_clock_get_ms(clock) / 1000; - switch (clock) { - case QEMU_CLOCK_REALTIME: - value -= rtc_realtime_clock_offset; - /* fall through */ - case QEMU_CLOCK_VIRTUAL: - value += rtc_ref_start_datetime; - break; - case QEMU_CLOCK_HOST: - if (rtc_base_type == RTC_BASE_DATETIME) { - value -= rtc_host_datetime_offset; - } - break; - default: - assert(0); - } - return value; -} - -void qemu_get_timedate(struct tm *tm, time_t offset) -{ - time_t ti = qemu_ref_timedate(rtc_clock); - - ti += offset; - - switch (rtc_base_type) { - case RTC_BASE_DATETIME: - case RTC_BASE_UTC: - gmtime_r(&ti, tm); - break; - case RTC_BASE_LOCALTIME: - localtime_r(&ti, tm); - break; - } -} - -time_t qemu_timedate_diff(struct tm *tm) -{ - time_t seconds; - - switch (rtc_base_type) { - case RTC_BASE_DATETIME: - case RTC_BASE_UTC: - seconds = mktimegm(tm); - break; - case RTC_BASE_LOCALTIME: - { - struct tm tmp = *tm; - tmp.tm_isdst = -1; /* use timezone to figure it out */ - seconds = mktime(&tmp); - break; - } - default: - abort(); - } - - return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST); -} - -static void configure_rtc_base_datetime(const char *startdate) -{ - time_t rtc_start_datetime; - struct tm tm; - - if (sscanf(startdate, "%d-%d-%dT%d:%d:%d", &tm.tm_year, &tm.tm_mon, - &tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec) == 6) { - /* OK */ - } else if (sscanf(startdate, "%d-%d-%d", - &tm.tm_year, &tm.tm_mon, &tm.tm_mday) == 3) { - tm.tm_hour = 0; - tm.tm_min = 0; - tm.tm_sec = 0; - } else { - goto date_fail; - } - tm.tm_year -= 1900; - tm.tm_mon--; - rtc_start_datetime = mktimegm(&tm); - if (rtc_start_datetime == -1) { - date_fail: - error_report("invalid datetime format"); - error_printf("valid formats: " - "'2006-06-17T16:01:21' or '2006-06-17'\n"); - exit(1); - } - rtc_host_datetime_offset = rtc_ref_start_datetime - rtc_start_datetime; - rtc_ref_start_datetime = rtc_start_datetime; -} - -void configure_rtc(QemuOpts *opts) -{ - const char *value; - - /* Set defaults */ - rtc_clock = QEMU_CLOCK_HOST; - rtc_ref_start_datetime = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000; - rtc_realtime_clock_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; - - value = qemu_opt_get(opts, "base"); - if (value) { - if (!strcmp(value, "utc")) { - rtc_base_type = RTC_BASE_UTC; - } else if (!strcmp(value, "localtime")) { - rtc_base_type = RTC_BASE_LOCALTIME; - replay_add_blocker("-rtc base=localtime"); - } else { - rtc_base_type = RTC_BASE_DATETIME; - configure_rtc_base_datetime(value); - } - } - value = qemu_opt_get(opts, "clock"); - if (value) { - if (!strcmp(value, "host")) { - rtc_clock = QEMU_CLOCK_HOST; - } else if (!strcmp(value, "rt")) { - rtc_clock = QEMU_CLOCK_REALTIME; - } else if (!strcmp(value, "vm")) { - rtc_clock = QEMU_CLOCK_VIRTUAL; - } else { - error_report("invalid option value '%s'", value); - exit(1); - } - } - value = qemu_opt_get(opts, "driftfix"); - if (value) { - if (!strcmp(value, "slew")) { - object_register_sugar_prop(TYPE_MC146818_RTC, - "lost_tick_policy", - "slew", - false); - if (!object_class_by_name(TYPE_MC146818_RTC)) { - warn_report("driftfix 'slew' is not available with this machine"); - } - } else if (!strcmp(value, "none")) { - /* discard is default */ - } else { - error_report("invalid option value '%s'", value); - exit(1); - } - } -} diff --git a/softmmu/runstate-action.c b/softmmu/runstate-action.c deleted file mode 100644 index ae0761a..0000000 --- a/softmmu/runstate-action.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2020 Oracle and/or its affiliates. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "sysemu/runstate-action.h" -#include "sysemu/watchdog.h" -#include "qemu/config-file.h" -#include "qapi/error.h" -#include "qemu/option_int.h" - -RebootAction reboot_action = REBOOT_ACTION_RESET; -ShutdownAction shutdown_action = SHUTDOWN_ACTION_POWEROFF; -PanicAction panic_action = PANIC_ACTION_SHUTDOWN; - -/* - * Receives actions to be applied for specific guest events - * and sets the internal state as requested. - */ -void qmp_set_action(bool has_reboot, RebootAction reboot, - bool has_shutdown, ShutdownAction shutdown, - bool has_panic, PanicAction panic, - bool has_watchdog, WatchdogAction watchdog, - Error **errp) -{ - if (has_reboot) { - reboot_action = reboot; - } - - if (has_panic) { - panic_action = panic; - } - - if (has_watchdog) { - qmp_watchdog_set_action(watchdog, errp); - } - - /* Process shutdown last, in case the panic action needs to be altered */ - if (has_shutdown) { - shutdown_action = shutdown; - } -} diff --git a/softmmu/runstate-hmp-cmds.c b/softmmu/runstate-hmp-cmds.c deleted file mode 100644 index 2df670f..0000000 --- a/softmmu/runstate-hmp-cmds.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * HMP commands related to run state - * - * Copyright IBM, Corp. 2011 - * - * Authors: - * Anthony Liguori - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include "qemu/osdep.h" -#include "exec/cpu-common.h" -#include "monitor/hmp.h" -#include "monitor/monitor.h" -#include "qapi/error.h" -#include "qapi/qapi-commands-run-state.h" -#include "qapi/qmp/qdict.h" -#include "qemu/accel.h" - -void hmp_info_status(Monitor *mon, const QDict *qdict) -{ - StatusInfo *info; - - info = qmp_query_status(NULL); - - monitor_printf(mon, "VM status: %s", - info->running ? "running" : "paused"); - - if (!info->running && info->status != RUN_STATE_PAUSED) { - monitor_printf(mon, " (%s)", RunState_str(info->status)); - } - - monitor_printf(mon, "\n"); - - qapi_free_StatusInfo(info); -} - -void hmp_one_insn_per_tb(Monitor *mon, const QDict *qdict) -{ - const char *option = qdict_get_try_str(qdict, "option"); - AccelState *accel = current_accel(); - bool newval; - - if (!object_property_find(OBJECT(accel), "one-insn-per-tb")) { - monitor_printf(mon, - "This accelerator does not support setting one-insn-per-tb\n"); - return; - } - - if (!option || !strcmp(option, "on")) { - newval = true; - } else if (!strcmp(option, "off")) { - newval = false; - } else { - monitor_printf(mon, "unexpected option %s\n", option); - return; - } - /* If the property exists then setting it can never fail */ - object_property_set_bool(OBJECT(accel), "one-insn-per-tb", - newval, &error_abort); -} - -void hmp_watchdog_action(Monitor *mon, const QDict *qdict) -{ - Error *err = NULL; - WatchdogAction action; - char *qapi_value; - - qapi_value = g_ascii_strdown(qdict_get_str(qdict, "action"), -1); - action = qapi_enum_parse(&WatchdogAction_lookup, qapi_value, -1, &err); - g_free(qapi_value); - if (err) { - hmp_handle_error(mon, err); - return; - } - qmp_watchdog_set_action(action, &error_abort); -} - -void watchdog_action_completion(ReadLineState *rs, int nb_args, const char *str) -{ - int i; - - if (nb_args != 2) { - return; - } - readline_set_completion_index(rs, strlen(str)); - for (i = 0; i < WATCHDOG_ACTION__MAX; i++) { - readline_add_completion_of(rs, str, WatchdogAction_str(i)); - } -} diff --git a/softmmu/runstate.c b/softmmu/runstate.c deleted file mode 100644 index 1652ed0..0000000 --- a/softmmu/runstate.c +++ /dev/null @@ -1,871 +0,0 @@ -/* - * QEMU main system emulation loop - * - * Copyright (c) 2003-2020 QEMU contributors - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "audio/audio.h" -#include "block/block.h" -#include "block/export.h" -#include "chardev/char.h" -#include "crypto/cipher.h" -#include "crypto/init.h" -#include "exec/cpu-common.h" -#include "gdbstub/syscalls.h" -#include "hw/boards.h" -#include "migration/misc.h" -#include "migration/postcopy-ram.h" -#include "monitor/monitor.h" -#include "net/net.h" -#include "net/vhost_net.h" -#include "qapi/error.h" -#include "qapi/qapi-commands-run-state.h" -#include "qapi/qapi-events-run-state.h" -#include "qemu/accel.h" -#include "qemu/error-report.h" -#include "qemu/job.h" -#include "qemu/log.h" -#include "qemu/module.h" -#include "qemu/plugin.h" -#include "qemu/sockets.h" -#include "qemu/timer.h" -#include "qemu/thread.h" -#include "qom/object.h" -#include "qom/object_interfaces.h" -#include "sysemu/cpus.h" -#include "sysemu/qtest.h" -#include "sysemu/replay.h" -#include "sysemu/reset.h" -#include "sysemu/runstate.h" -#include "sysemu/runstate-action.h" -#include "sysemu/sysemu.h" -#include "sysemu/tpm.h" -#include "trace.h" - -static NotifierList exit_notifiers = - NOTIFIER_LIST_INITIALIZER(exit_notifiers); - -static RunState current_run_state = RUN_STATE_PRELAUNCH; - -/* We use RUN_STATE__MAX but any invalid value will do */ -static RunState vmstop_requested = RUN_STATE__MAX; -static QemuMutex vmstop_lock; - -typedef struct { - RunState from; - RunState to; -} RunStateTransition; - -static const RunStateTransition runstate_transitions_def[] = { - { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, - - { RUN_STATE_DEBUG, RUN_STATE_RUNNING }, - { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_DEBUG, RUN_STATE_PRELAUNCH }, - - { RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR }, - { RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR }, - { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED }, - { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING }, - { RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN }, - { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED }, - { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG }, - { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED }, - { RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH }, - { RUN_STATE_INMIGRATE, RUN_STATE_POSTMIGRATE }, - { RUN_STATE_INMIGRATE, RUN_STATE_COLO }, - - { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED }, - { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PRELAUNCH }, - - { RUN_STATE_IO_ERROR, RUN_STATE_RUNNING }, - { RUN_STATE_IO_ERROR, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_IO_ERROR, RUN_STATE_PRELAUNCH }, - - { RUN_STATE_PAUSED, RUN_STATE_RUNNING }, - { RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_PAUSED, RUN_STATE_POSTMIGRATE }, - { RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH }, - { RUN_STATE_PAUSED, RUN_STATE_COLO}, - - { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING }, - { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_POSTMIGRATE, RUN_STATE_PRELAUNCH }, - - { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, - { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, - - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_INTERNAL_ERROR }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_IO_ERROR }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SHUTDOWN }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SUSPENDED }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_WATCHDOG }, - { RUN_STATE_FINISH_MIGRATE, RUN_STATE_GUEST_PANICKED }, - - { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING }, - { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH }, - - { RUN_STATE_COLO, RUN_STATE_RUNNING }, - { RUN_STATE_COLO, RUN_STATE_PRELAUNCH }, - { RUN_STATE_COLO, RUN_STATE_SHUTDOWN}, - - { RUN_STATE_RUNNING, RUN_STATE_DEBUG }, - { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR }, - { RUN_STATE_RUNNING, RUN_STATE_IO_ERROR }, - { RUN_STATE_RUNNING, RUN_STATE_PAUSED }, - { RUN_STATE_RUNNING, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_RUNNING, RUN_STATE_RESTORE_VM }, - { RUN_STATE_RUNNING, RUN_STATE_SAVE_VM }, - { RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN }, - { RUN_STATE_RUNNING, RUN_STATE_WATCHDOG }, - { RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED }, - { RUN_STATE_RUNNING, RUN_STATE_COLO}, - - { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING }, - - { RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED }, - { RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_SHUTDOWN, RUN_STATE_PRELAUNCH }, - { RUN_STATE_SHUTDOWN, RUN_STATE_COLO }, - - { RUN_STATE_DEBUG, RUN_STATE_SUSPENDED }, - { RUN_STATE_RUNNING, RUN_STATE_SUSPENDED }, - { RUN_STATE_SUSPENDED, RUN_STATE_RUNNING }, - { RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH }, - { RUN_STATE_SUSPENDED, RUN_STATE_COLO}, - - { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING }, - { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_WATCHDOG, RUN_STATE_PRELAUNCH }, - { RUN_STATE_WATCHDOG, RUN_STATE_COLO}, - - { RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING }, - { RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE }, - { RUN_STATE_GUEST_PANICKED, RUN_STATE_PRELAUNCH }, - - { RUN_STATE__MAX, RUN_STATE__MAX }, -}; - -static bool runstate_valid_transitions[RUN_STATE__MAX][RUN_STATE__MAX]; - -bool runstate_check(RunState state) -{ - return current_run_state == state; -} - -static void runstate_init(void) -{ - const RunStateTransition *p; - - memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions)); - for (p = &runstate_transitions_def[0]; p->from != RUN_STATE__MAX; p++) { - runstate_valid_transitions[p->from][p->to] = true; - } - - qemu_mutex_init(&vmstop_lock); -} - -/* This function will abort() on invalid state transitions */ -void runstate_set(RunState new_state) -{ - assert(new_state < RUN_STATE__MAX); - - trace_runstate_set(current_run_state, RunState_str(current_run_state), - new_state, RunState_str(new_state)); - - if (current_run_state == new_state) { - return; - } - - if (!runstate_valid_transitions[current_run_state][new_state]) { - error_report("invalid runstate transition: '%s' -> '%s'", - RunState_str(current_run_state), - RunState_str(new_state)); - abort(); - } - - current_run_state = new_state; -} - -RunState runstate_get(void) -{ - return current_run_state; -} - -bool runstate_is_running(void) -{ - return runstate_check(RUN_STATE_RUNNING); -} - -bool runstate_needs_reset(void) -{ - return runstate_check(RUN_STATE_INTERNAL_ERROR) || - runstate_check(RUN_STATE_SHUTDOWN); -} - -StatusInfo *qmp_query_status(Error **errp) -{ - StatusInfo *info = g_malloc0(sizeof(*info)); - AccelState *accel = current_accel(); - - /* - * We ignore errors, which will happen if the accelerator - * is not TCG. "singlestep" is meaningless for other accelerators, - * so we will set the StatusInfo field to false for those. - */ - info->singlestep = object_property_get_bool(OBJECT(accel), - "one-insn-per-tb", NULL); - info->running = runstate_is_running(); - info->status = current_run_state; - - return info; -} - -bool qemu_vmstop_requested(RunState *r) -{ - qemu_mutex_lock(&vmstop_lock); - *r = vmstop_requested; - vmstop_requested = RUN_STATE__MAX; - qemu_mutex_unlock(&vmstop_lock); - return *r < RUN_STATE__MAX; -} - -void qemu_system_vmstop_request_prepare(void) -{ - qemu_mutex_lock(&vmstop_lock); -} - -void qemu_system_vmstop_request(RunState state) -{ - vmstop_requested = state; - qemu_mutex_unlock(&vmstop_lock); - qemu_notify_event(); -} -struct VMChangeStateEntry { - VMChangeStateHandler *cb; - VMChangeStateHandler *prepare_cb; - void *opaque; - QTAILQ_ENTRY(VMChangeStateEntry) entries; - int priority; -}; - -static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head = - QTAILQ_HEAD_INITIALIZER(vm_change_state_head); - -/** - * qemu_add_vm_change_state_handler_prio: - * @cb: the callback to invoke - * @opaque: user data passed to the callback - * @priority: low priorities execute first when the vm runs and the reverse is - * true when the vm stops - * - * Register a callback function that is invoked when the vm starts or stops - * running. - * - * Returns: an entry to be freed using qemu_del_vm_change_state_handler() - */ -VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( - VMChangeStateHandler *cb, void *opaque, int priority) -{ - return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque, - priority); -} - -/** - * qemu_add_vm_change_state_handler_prio_full: - * @cb: the main callback to invoke - * @prepare_cb: a callback to invoke before the main callback - * @opaque: user data passed to the callbacks - * @priority: low priorities execute first when the vm runs and the reverse is - * true when the vm stops - * - * Register a main callback function and an optional prepare callback function - * that are invoked when the vm starts or stops running. The main callback and - * the prepare callback are called in two separate phases: First all prepare - * callbacks are called and only then all main callbacks are called. As its - * name suggests, the prepare callback can be used to do some preparatory work - * before invoking the main callback. - * - * Returns: an entry to be freed using qemu_del_vm_change_state_handler() - */ -VMChangeStateEntry * -qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, - VMChangeStateHandler *prepare_cb, - void *opaque, int priority) -{ - VMChangeStateEntry *e; - VMChangeStateEntry *other; - - e = g_malloc0(sizeof(*e)); - e->cb = cb; - e->prepare_cb = prepare_cb; - e->opaque = opaque; - e->priority = priority; - - /* Keep list sorted in ascending priority order */ - QTAILQ_FOREACH(other, &vm_change_state_head, entries) { - if (priority < other->priority) { - QTAILQ_INSERT_BEFORE(other, e, entries); - return e; - } - } - - QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries); - return e; -} - -VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, - void *opaque) -{ - return qemu_add_vm_change_state_handler_prio(cb, opaque, 0); -} - -void qemu_del_vm_change_state_handler(VMChangeStateEntry *e) -{ - QTAILQ_REMOVE(&vm_change_state_head, e, entries); - g_free(e); -} - -void vm_state_notify(bool running, RunState state) -{ - VMChangeStateEntry *e, *next; - - trace_vm_state_notify(running, state, RunState_str(state)); - - if (running) { - QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { - if (e->prepare_cb) { - e->prepare_cb(e->opaque, running, state); - } - } - - QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { - e->cb(e->opaque, running, state); - } - } else { - QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { - if (e->prepare_cb) { - e->prepare_cb(e->opaque, running, state); - } - } - - QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { - e->cb(e->opaque, running, state); - } - } -} - -static ShutdownCause reset_requested; -static ShutdownCause shutdown_requested; -static int shutdown_signal; -static pid_t shutdown_pid; -static int powerdown_requested; -static int debug_requested; -static int suspend_requested; -static WakeupReason wakeup_reason; -static NotifierList powerdown_notifiers = - NOTIFIER_LIST_INITIALIZER(powerdown_notifiers); -static NotifierList suspend_notifiers = - NOTIFIER_LIST_INITIALIZER(suspend_notifiers); -static NotifierList wakeup_notifiers = - NOTIFIER_LIST_INITIALIZER(wakeup_notifiers); -static NotifierList shutdown_notifiers = - NOTIFIER_LIST_INITIALIZER(shutdown_notifiers); -static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE); - -ShutdownCause qemu_shutdown_requested_get(void) -{ - return shutdown_requested; -} - -ShutdownCause qemu_reset_requested_get(void) -{ - return reset_requested; -} - -static int qemu_shutdown_requested(void) -{ - return qatomic_xchg(&shutdown_requested, SHUTDOWN_CAUSE_NONE); -} - -static void qemu_kill_report(void) -{ - if (!qtest_driver() && shutdown_signal) { - if (shutdown_pid == 0) { - /* This happens for eg ^C at the terminal, so it's worth - * avoiding printing an odd message in that case. - */ - error_report("terminating on signal %d", shutdown_signal); - } else { - char *shutdown_cmd = qemu_get_pid_name(shutdown_pid); - - error_report("terminating on signal %d from pid " FMT_pid " (%s)", - shutdown_signal, shutdown_pid, - shutdown_cmd ? shutdown_cmd : ""); - g_free(shutdown_cmd); - } - shutdown_signal = 0; - } -} - -static ShutdownCause qemu_reset_requested(void) -{ - ShutdownCause r = reset_requested; - - if (r && replay_checkpoint(CHECKPOINT_RESET_REQUESTED)) { - reset_requested = SHUTDOWN_CAUSE_NONE; - return r; - } - return SHUTDOWN_CAUSE_NONE; -} - -static int qemu_suspend_requested(void) -{ - int r = suspend_requested; - if (r && replay_checkpoint(CHECKPOINT_SUSPEND_REQUESTED)) { - suspend_requested = 0; - return r; - } - return false; -} - -static WakeupReason qemu_wakeup_requested(void) -{ - return wakeup_reason; -} - -static int qemu_powerdown_requested(void) -{ - int r = powerdown_requested; - powerdown_requested = 0; - return r; -} - -static int qemu_debug_requested(void) -{ - int r = debug_requested; - debug_requested = 0; - return r; -} - -/* - * Reset the VM. Issue an event unless @reason is SHUTDOWN_CAUSE_NONE. - */ -void qemu_system_reset(ShutdownCause reason) -{ - MachineClass *mc; - - mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL; - - cpu_synchronize_all_states(); - - if (mc && mc->reset) { - mc->reset(current_machine, reason); - } else { - qemu_devices_reset(reason); - } - switch (reason) { - case SHUTDOWN_CAUSE_NONE: - case SHUTDOWN_CAUSE_SUBSYSTEM_RESET: - case SHUTDOWN_CAUSE_SNAPSHOT_LOAD: - break; - default: - qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); - } - cpu_synchronize_all_post_reset(); -} - -/* - * Wake the VM after suspend. - */ -static void qemu_system_wakeup(void) -{ - MachineClass *mc; - - mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL; - - if (mc && mc->wakeup) { - mc->wakeup(current_machine); - } -} - -void qemu_system_guest_panicked(GuestPanicInformation *info) -{ - qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed"); - - if (current_cpu) { - current_cpu->crash_occurred = true; - } - /* - * TODO: Currently the available panic actions are: none, pause, and - * shutdown, but in principle debug and reset could be supported as well. - * Investigate any potential use cases for the unimplemented actions. - */ - if (panic_action == PANIC_ACTION_PAUSE - || (panic_action == PANIC_ACTION_SHUTDOWN && shutdown_action == SHUTDOWN_ACTION_PAUSE)) { - qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, info); - vm_stop(RUN_STATE_GUEST_PANICKED); - } else if (panic_action == PANIC_ACTION_SHUTDOWN || - panic_action == PANIC_ACTION_EXIT_FAILURE) { - qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, info); - vm_stop(RUN_STATE_GUEST_PANICKED); - qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_PANIC); - } else { - qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_RUN, info); - } - - if (info) { - if (info->type == GUEST_PANIC_INFORMATION_TYPE_HYPER_V) { - qemu_log_mask(LOG_GUEST_ERROR, "\nHV crash parameters: (%#"PRIx64 - " %#"PRIx64" %#"PRIx64" %#"PRIx64" %#"PRIx64")\n", - info->u.hyper_v.arg1, - info->u.hyper_v.arg2, - info->u.hyper_v.arg3, - info->u.hyper_v.arg4, - info->u.hyper_v.arg5); - } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_S390) { - qemu_log_mask(LOG_GUEST_ERROR, " on cpu %d: %s\n" - "PSW: 0x%016" PRIx64 " 0x%016" PRIx64"\n", - info->u.s390.core, - S390CrashReason_str(info->u.s390.reason), - info->u.s390.psw_mask, - info->u.s390.psw_addr); - } - qapi_free_GuestPanicInformation(info); - } -} - -void qemu_system_guest_crashloaded(GuestPanicInformation *info) -{ - qemu_log_mask(LOG_GUEST_ERROR, "Guest crash loaded"); - qapi_event_send_guest_crashloaded(GUEST_PANIC_ACTION_RUN, info); - qapi_free_GuestPanicInformation(info); -} - -void qemu_system_reset_request(ShutdownCause reason) -{ - if (reboot_action == REBOOT_ACTION_SHUTDOWN && - reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) { - shutdown_requested = reason; - } else if (!cpus_are_resettable()) { - error_report("cpus are not resettable, terminating"); - shutdown_requested = reason; - } else { - reset_requested = reason; - } - cpu_stop_current(); - qemu_notify_event(); -} - -static void qemu_system_suspend(void) -{ - pause_all_vcpus(); - notifier_list_notify(&suspend_notifiers, NULL); - runstate_set(RUN_STATE_SUSPENDED); - qapi_event_send_suspend(); -} - -void qemu_system_suspend_request(void) -{ - if (runstate_check(RUN_STATE_SUSPENDED)) { - return; - } - suspend_requested = 1; - cpu_stop_current(); - qemu_notify_event(); -} - -void qemu_register_suspend_notifier(Notifier *notifier) -{ - notifier_list_add(&suspend_notifiers, notifier); -} - -void qemu_system_wakeup_request(WakeupReason reason, Error **errp) -{ - trace_system_wakeup_request(reason); - - if (!runstate_check(RUN_STATE_SUSPENDED)) { - error_setg(errp, - "Unable to wake up: guest is not in suspended state"); - return; - } - if (!(wakeup_reason_mask & (1 << reason))) { - return; - } - runstate_set(RUN_STATE_RUNNING); - wakeup_reason = reason; - qemu_notify_event(); -} - -void qemu_system_wakeup_enable(WakeupReason reason, bool enabled) -{ - if (enabled) { - wakeup_reason_mask |= (1 << reason); - } else { - wakeup_reason_mask &= ~(1 << reason); - } -} - -void qemu_register_wakeup_notifier(Notifier *notifier) -{ - notifier_list_add(&wakeup_notifiers, notifier); -} - -static bool wakeup_suspend_enabled; - -void qemu_register_wakeup_support(void) -{ - wakeup_suspend_enabled = true; -} - -bool qemu_wakeup_suspend_enabled(void) -{ - return wakeup_suspend_enabled; -} - -void qemu_system_killed(int signal, pid_t pid) -{ - shutdown_signal = signal; - shutdown_pid = pid; - shutdown_action = SHUTDOWN_ACTION_POWEROFF; - - /* Cannot call qemu_system_shutdown_request directly because - * we are in a signal handler. - */ - shutdown_requested = SHUTDOWN_CAUSE_HOST_SIGNAL; - qemu_notify_event(); -} - -void qemu_system_shutdown_request(ShutdownCause reason) -{ - trace_qemu_system_shutdown_request(reason); - replay_shutdown_request(reason); - shutdown_requested = reason; - qemu_notify_event(); -} - -static void qemu_system_powerdown(void) -{ - qapi_event_send_powerdown(); - notifier_list_notify(&powerdown_notifiers, NULL); -} - -static void qemu_system_shutdown(ShutdownCause cause) -{ - qapi_event_send_shutdown(shutdown_caused_by_guest(cause), cause); - notifier_list_notify(&shutdown_notifiers, &cause); -} - -void qemu_system_powerdown_request(void) -{ - trace_qemu_system_powerdown_request(); - powerdown_requested = 1; - qemu_notify_event(); -} - -void qemu_register_powerdown_notifier(Notifier *notifier) -{ - notifier_list_add(&powerdown_notifiers, notifier); -} - -void qemu_register_shutdown_notifier(Notifier *notifier) -{ - notifier_list_add(&shutdown_notifiers, notifier); -} - -void qemu_system_debug_request(void) -{ - debug_requested = 1; - qemu_notify_event(); -} - -static bool main_loop_should_exit(int *status) -{ - RunState r; - ShutdownCause request; - - if (qemu_debug_requested()) { - vm_stop(RUN_STATE_DEBUG); - } - if (qemu_suspend_requested()) { - qemu_system_suspend(); - } - request = qemu_shutdown_requested(); - if (request) { - qemu_kill_report(); - qemu_system_shutdown(request); - if (shutdown_action == SHUTDOWN_ACTION_PAUSE) { - vm_stop(RUN_STATE_SHUTDOWN); - } else { - if (request == SHUTDOWN_CAUSE_GUEST_PANIC && - panic_action == PANIC_ACTION_EXIT_FAILURE) { - *status = EXIT_FAILURE; - } - return true; - } - } - request = qemu_reset_requested(); - if (request) { - pause_all_vcpus(); - qemu_system_reset(request); - resume_all_vcpus(); - /* - * runstate can change in pause_all_vcpus() - * as iothread mutex is unlocked - */ - if (!runstate_check(RUN_STATE_RUNNING) && - !runstate_check(RUN_STATE_INMIGRATE) && - !runstate_check(RUN_STATE_FINISH_MIGRATE)) { - runstate_set(RUN_STATE_PRELAUNCH); - } - } - if (qemu_wakeup_requested()) { - pause_all_vcpus(); - qemu_system_wakeup(); - notifier_list_notify(&wakeup_notifiers, &wakeup_reason); - wakeup_reason = QEMU_WAKEUP_REASON_NONE; - resume_all_vcpus(); - qapi_event_send_wakeup(); - } - if (qemu_powerdown_requested()) { - qemu_system_powerdown(); - } - if (qemu_vmstop_requested(&r)) { - vm_stop(r); - } - return false; -} - -int qemu_main_loop(void) -{ - int status = EXIT_SUCCESS; - - while (!main_loop_should_exit(&status)) { - main_loop_wait(false); - } - - return status; -} - -void qemu_add_exit_notifier(Notifier *notify) -{ - notifier_list_add(&exit_notifiers, notify); -} - -void qemu_remove_exit_notifier(Notifier *notify) -{ - notifier_remove(notify); -} - -static void qemu_run_exit_notifiers(void) -{ - notifier_list_notify(&exit_notifiers, NULL); -} - -void qemu_init_subsystems(void) -{ - Error *err = NULL; - - os_set_line_buffering(); - - module_call_init(MODULE_INIT_TRACE); - - qemu_init_cpu_list(); - qemu_init_cpu_loop(); - qemu_mutex_lock_iothread(); - - atexit(qemu_run_exit_notifiers); - - module_call_init(MODULE_INIT_QOM); - module_call_init(MODULE_INIT_MIGRATION); - - runstate_init(); - precopy_infrastructure_init(); - postcopy_infrastructure_init(); - monitor_init_globals(); - - if (qcrypto_init(&err) < 0) { - error_reportf_err(err, "cannot initialize crypto: "); - exit(1); - } - - os_setup_early_signal_handling(); - - bdrv_init_with_whitelist(); - socket_init(); -} - - -void qemu_cleanup(void) -{ - gdb_exit(0); - - /* - * cleaning up the migration object cancels any existing migration - * try to do this early so that it also stops using devices. - */ - migration_shutdown(); - - /* - * Close the exports before draining the block layer. The export - * drivers may have coroutines yielding on it, so we need to clean - * them up before the drain, as otherwise they may be get stuck in - * blk_wait_while_drained(). - */ - blk_exp_close_all(); - - - /* No more vcpu or device emulation activity beyond this point */ - vm_shutdown(); - replay_finish(); - - /* - * We must cancel all block jobs while the block layer is drained, - * or cancelling will be affected by throttling and thus may block - * for an extended period of time. - * Begin the drained section after vm_shutdown() to avoid requests being - * stuck in the BlockBackend's request queue. - * We do not need to end this section, because we do not want any - * requests happening from here on anyway. - */ - bdrv_drain_all_begin(); - job_cancel_sync_all(); - bdrv_close_all(); - - /* vhost-user must be cleaned up before chardevs. */ - tpm_cleanup(); - net_cleanup(); - audio_cleanup(); - monitor_cleanup(); - qemu_chr_cleanup(); - user_creatable_cleanup(); - /* TODO: unref root container, check all devices are ok */ -} diff --git a/softmmu/timers-state.h b/softmmu/timers-state.h deleted file mode 100644 index 94bb739..0000000 --- a/softmmu/timers-state.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifndef TIMERS_STATE_H -#define TIMERS_STATE_H - -/* timers state, for sharing between icount and cpu-timers */ - -typedef struct TimersState { - /* Protected by BQL. */ - int64_t cpu_ticks_prev; - int64_t cpu_ticks_offset; - - /* - * Protect fields that can be respectively read outside the - * BQL, and written from multiple threads. - */ - QemuSeqLock vm_clock_seqlock; - QemuSpin vm_clock_lock; - - int16_t cpu_ticks_enabled; - - /* Conversion factor from emulated instructions to virtual clock ticks. */ - int16_t icount_time_shift; - /* Icount delta used for shift auto adjust. */ - int64_t last_delta; - - /* Compensate for varying guest execution speed. */ - aligned_int64_t qemu_icount_bias; - - int64_t vm_clock_warp_start; - int64_t cpu_clock_offset; - - /* Only written by TCG thread */ - int64_t qemu_icount; - - /* for adjusting icount */ - QEMUTimer *icount_rt_timer; - QEMUTimer *icount_vm_timer; - QEMUTimer *icount_warp_timer; -} TimersState; - -extern TimersState timers_state; - -/* - * icount needs this internal from cpu-timers when adjusting the icount shift. - */ -int64_t cpu_get_clock_locked(void); - -#endif /* TIMERS_STATE_H */ diff --git a/softmmu/tpm-hmp-cmds.c b/softmmu/tpm-hmp-cmds.c deleted file mode 100644 index 9ed6ad6..0000000 --- a/softmmu/tpm-hmp-cmds.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * HMP commands related to TPM - * - * This work is licensed under the terms of the GNU GPL, version 2 or - * (at your option) any later version. - */ - -#include "qemu/osdep.h" -#include "qapi/qapi-commands-tpm.h" -#include "monitor/monitor.h" -#include "monitor/hmp.h" -#include "qapi/error.h" - -void hmp_info_tpm(Monitor *mon, const QDict *qdict) -{ -#ifdef CONFIG_TPM - TPMInfoList *info_list, *info; - Error *err = NULL; - unsigned int c = 0; - TPMPassthroughOptions *tpo; - TPMEmulatorOptions *teo; - - info_list = qmp_query_tpm(&err); - if (err) { - monitor_printf(mon, "TPM device not supported\n"); - error_free(err); - return; - } - - if (info_list) { - monitor_printf(mon, "TPM device:\n"); - } - - for (info = info_list; info; info = info->next) { - TPMInfo *ti = info->value; - monitor_printf(mon, " tpm%d: model=%s\n", - c, TpmModel_str(ti->model)); - - monitor_printf(mon, " \\ %s: type=%s", - ti->id, TpmType_str(ti->options->type)); - - switch (ti->options->type) { - case TPM_TYPE_PASSTHROUGH: - tpo = ti->options->u.passthrough.data; - monitor_printf(mon, "%s%s%s%s", - tpo->path ? ",path=" : "", - tpo->path ?: "", - tpo->cancel_path ? ",cancel-path=" : "", - tpo->cancel_path ?: ""); - break; - case TPM_TYPE_EMULATOR: - teo = ti->options->u.emulator.data; - monitor_printf(mon, ",chardev=%s", teo->chardev); - break; - case TPM_TYPE__MAX: - break; - } - monitor_printf(mon, "\n"); - c++; - } - qapi_free_TPMInfoList(info_list); -#else - monitor_printf(mon, "TPM device not supported\n"); -#endif /* CONFIG_TPM */ -} diff --git a/softmmu/tpm.c b/softmmu/tpm.c deleted file mode 100644 index 578563f..0000000 --- a/softmmu/tpm.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * TPM configuration - * - * Copyright (C) 2011-2013 IBM Corporation - * - * Authors: - * Stefan Berger - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - * Based on net.c - */ - -#include "qemu/osdep.h" - -#include "qapi/error.h" -#include "qapi/qapi-commands-tpm.h" -#include "qapi/qmp/qerror.h" -#include "sysemu/tpm_backend.h" -#include "sysemu/tpm.h" -#include "qemu/config-file.h" -#include "qemu/error-report.h" - -static QLIST_HEAD(, TPMBackend) tpm_backends = - QLIST_HEAD_INITIALIZER(tpm_backends); - -static const TPMBackendClass * -tpm_be_find_by_type(enum TpmType type) -{ - ObjectClass *oc; - char *typename = g_strdup_printf("tpm-%s", TpmType_str(type)); - - oc = object_class_by_name(typename); - g_free(typename); - - if (!object_class_dynamic_cast(oc, TYPE_TPM_BACKEND)) { - return NULL; - } - - return TPM_BACKEND_CLASS(oc); -} - -/* - * Walk the list of available TPM backend drivers and display them on the - * screen. - */ -static void tpm_display_backend_drivers(void) -{ - bool got_one = false; - int i; - - for (i = 0; i < TPM_TYPE__MAX; i++) { - const TPMBackendClass *bc = tpm_be_find_by_type(i); - if (!bc) { - continue; - } - if (!got_one) { - error_printf("Supported TPM types (choose only one):\n"); - got_one = true; - } - error_printf("%12s %s\n", TpmType_str(i), bc->desc); - } - if (!got_one) { - error_printf("No TPM backend types are available\n"); - } -} - -/* - * Find the TPM with the given Id - */ -TPMBackend *qemu_find_tpm_be(const char *id) -{ - TPMBackend *drv; - - if (id) { - QLIST_FOREACH(drv, &tpm_backends, list) { - if (!strcmp(drv->id, id)) { - return drv; - } - } - } - - return NULL; -} - -static int tpm_init_tpmdev(void *dummy, QemuOpts *opts, Error **errp) -{ - /* - * Use of error_report() in a function with an Error ** parameter - * is suspicious. It is okay here. The parameter only exists to - * make the function usable with qemu_opts_foreach(). It is not - * actually used. - */ - const char *value; - const char *id; - const TPMBackendClass *be; - TPMBackend *drv; - Error *local_err = NULL; - int i; - - if (!QLIST_EMPTY(&tpm_backends)) { - error_report("Only one TPM is allowed."); - return 1; - } - - id = qemu_opts_id(opts); - if (id == NULL) { - error_report(QERR_MISSING_PARAMETER, "id"); - return 1; - } - - value = qemu_opt_get(opts, "type"); - if (!value) { - error_report(QERR_MISSING_PARAMETER, "type"); - tpm_display_backend_drivers(); - return 1; - } - - i = qapi_enum_parse(&TpmType_lookup, value, -1, NULL); - be = i >= 0 ? tpm_be_find_by_type(i) : NULL; - if (be == NULL) { - error_report(QERR_INVALID_PARAMETER_VALUE, - "type", "a TPM backend type"); - tpm_display_backend_drivers(); - return 1; - } - - /* validate backend specific opts */ - if (!qemu_opts_validate(opts, be->opts, &local_err)) { - error_report_err(local_err); - return 1; - } - - drv = be->create(opts); - if (!drv) { - return 1; - } - - drv->id = g_strdup(id); - QLIST_INSERT_HEAD(&tpm_backends, drv, list); - - return 0; -} - -/* - * Walk the list of TPM backend drivers that are in use and call their - * destroy function to have them cleaned up. - */ -void tpm_cleanup(void) -{ - TPMBackend *drv, *next; - - QLIST_FOREACH_SAFE(drv, &tpm_backends, list, next) { - QLIST_REMOVE(drv, list); - object_unref(OBJECT(drv)); - } -} - -/* - * Initialize the TPM. Process the tpmdev command line options describing the - * TPM backend. - */ -int tpm_init(void) -{ - if (qemu_opts_foreach(qemu_find_opts("tpmdev"), - tpm_init_tpmdev, NULL, NULL)) { - return -1; - } - - return 0; -} - -/* - * Parse the TPM configuration options. - * To display all available TPM backends the user may use '-tpmdev help' - */ -int tpm_config_parse(QemuOptsList *opts_list, const char *optarg) -{ - QemuOpts *opts; - - if (!strcmp(optarg, "help")) { - tpm_display_backend_drivers(); - return -1; - } - opts = qemu_opts_parse_noisily(opts_list, optarg, true); - if (!opts) { - return -1; - } - return 0; -} - -/* - * Walk the list of active TPM backends and collect information about them. - */ -TPMInfoList *qmp_query_tpm(Error **errp) -{ - TPMBackend *drv; - TPMInfoList *head = NULL, **tail = &head; - - QLIST_FOREACH(drv, &tpm_backends, list) { - if (!drv->tpmif) { - continue; - } - - QAPI_LIST_APPEND(tail, tpm_backend_query_tpm(drv)); - } - - return head; -} - -TpmTypeList *qmp_query_tpm_types(Error **errp) -{ - unsigned int i = 0; - TpmTypeList *head = NULL, **tail = &head; - - for (i = 0; i < TPM_TYPE__MAX; i++) { - if (!tpm_be_find_by_type(i)) { - continue; - } - QAPI_LIST_APPEND(tail, i); - } - - return head; -} -TpmModelList *qmp_query_tpm_models(Error **errp) -{ - TpmModelList *head = NULL, **tail = &head; - GSList *e, *l = object_class_get_list(TYPE_TPM_IF, false); - - for (e = l; e; e = e->next) { - TPMIfClass *c = TPM_IF_CLASS(e->data); - - QAPI_LIST_APPEND(tail, c->model); - } - g_slist_free(l); - - return head; -} diff --git a/softmmu/trace-events b/softmmu/trace-events deleted file mode 100644 index 69c9044..0000000 --- a/softmmu/trace-events +++ /dev/null @@ -1,40 +0,0 @@ -# See docs/devel/tracing.rst for syntax documentation. - -# balloon.c -# Since requests are raised via monitor, not many tracepoints are needed. -balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu" - -# ioport.c -cpu_in(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u" -cpu_out(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u" - -# memory.c -memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'" -memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'" -memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u" -memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u" -memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u" -memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u" -memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)" -flatview_new(void *view, void *root) "%p (root %p)" -flatview_destroy(void *view, void *root) "%p (root %p)" -flatview_destroy_rcu(void *view, void *root) "%p (root %p)" -global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32 - -# cpus.c -vm_stop_flush_all(int ret) "ret %d" - -# vl.c -vm_state_notify(int running, int reason, const char *reason_str) "running %d reason %d (%s)" -load_file(const char *name, const char *path) "name %s location %s" -runstate_set(int current_state, const char *current_state_str, int new_state, const char *new_state_str) "current_run_state %d (%s) new_state %d (%s)" -system_wakeup_request(int reason) "reason=%d" -qemu_system_shutdown_request(int reason) "reason=%d" -qemu_system_powerdown_request(void) "" - -#dirtylimit.c -dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: max cpus %d" -dirtylimit_state_finalize(void) -dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us" -dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64 -dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us" diff --git a/softmmu/trace.h b/softmmu/trace.h deleted file mode 100644 index 2ad1011..0000000 --- a/softmmu/trace.h +++ /dev/null @@ -1 +0,0 @@ -#include "trace/trace-softmmu.h" diff --git a/softmmu/vl.c b/softmmu/vl.c deleted file mode 100644 index 98e071e..0000000 --- a/softmmu/vl.c +++ /dev/null @@ -1,3730 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu/help-texts.h" -#include "qemu/datadir.h" -#include "qemu/units.h" -#include "exec/cpu-common.h" -#include "exec/page-vary.h" -#include "hw/qdev-properties.h" -#include "qapi/compat-policy.h" -#include "qapi/error.h" -#include "qapi/qmp/qdict.h" -#include "qapi/qmp/qstring.h" -#include "qapi/qmp/qjson.h" -#include "qemu-version.h" -#include "qemu/cutils.h" -#include "qemu/help_option.h" -#include "qemu/hw-version.h" -#include "qemu/uuid.h" -#include "sysemu/reset.h" -#include "sysemu/runstate.h" -#include "sysemu/runstate-action.h" -#include "sysemu/seccomp.h" -#include "sysemu/tcg.h" -#include "sysemu/xen.h" - -#include "qemu/error-report.h" -#include "qemu/sockets.h" -#include "qemu/accel.h" -#include "qemu/async-teardown.h" -#include "hw/usb.h" -#include "hw/isa/isa.h" -#include "hw/scsi/scsi.h" -#include "hw/display/vga.h" -#include "hw/firmware/smbios.h" -#include "hw/acpi/acpi.h" -#include "hw/xen/xen.h" -#include "hw/loader.h" -#include "monitor/qdev.h" -#include "net/net.h" -#include "net/slirp.h" -#include "monitor/monitor.h" -#include "ui/console.h" -#include "ui/input.h" -#include "sysemu/sysemu.h" -#include "sysemu/numa.h" -#include "sysemu/hostmem.h" -#include "exec/gdbstub.h" -#include "qemu/timer.h" -#include "chardev/char.h" -#include "qemu/bitmap.h" -#include "qemu/log.h" -#include "sysemu/blockdev.h" -#include "hw/block/block.h" -#include "hw/i386/x86.h" -#include "hw/i386/pc.h" -#include "migration/misc.h" -#include "migration/snapshot.h" -#include "sysemu/tpm.h" -#include "sysemu/dma.h" -#include "hw/audio/soundhw.h" -#include "audio/audio.h" -#include "sysemu/cpus.h" -#include "sysemu/cpu-timers.h" -#include "migration/colo.h" -#include "migration/postcopy-ram.h" -#include "sysemu/kvm.h" -#include "qapi/qobject-input-visitor.h" -#include "qemu/option.h" -#include "qemu/config-file.h" -#include "qemu/main-loop.h" -#ifdef CONFIG_VIRTFS -#include "fsdev/qemu-fsdev.h" -#endif -#include "sysemu/qtest.h" -#ifdef CONFIG_TCG -#include "accel/tcg/perf.h" -#endif - -#include "disas/disas.h" - -#include "trace.h" -#include "trace/control.h" -#include "qemu/plugin.h" -#include "qemu/queue.h" -#include "sysemu/arch_init.h" -#include "exec/confidential-guest-support.h" - -#include "ui/qemu-spice.h" -#include "qapi/string-input-visitor.h" -#include "qapi/opts-visitor.h" -#include "qapi/clone-visitor.h" -#include "qom/object_interfaces.h" -#include "semihosting/semihost.h" -#include "crypto/init.h" -#include "sysemu/replay.h" -#include "qapi/qapi-events-run-state.h" -#include "qapi/qapi-types-audio.h" -#include "qapi/qapi-visit-audio.h" -#include "qapi/qapi-visit-block-core.h" -#include "qapi/qapi-visit-compat.h" -#include "qapi/qapi-visit-machine.h" -#include "qapi/qapi-visit-ui.h" -#include "qapi/qapi-commands-block-core.h" -#include "qapi/qapi-commands-migration.h" -#include "qapi/qapi-commands-misc.h" -#include "qapi/qapi-visit-qom.h" -#include "qapi/qapi-commands-ui.h" -#include "block/qdict.h" -#include "qapi/qmp/qerror.h" -#include "sysemu/iothread.h" -#include "qemu/guest-random.h" -#include "qemu/keyval.h" - -#define MAX_VIRTIO_CONSOLES 1 - -typedef struct BlockdevOptionsQueueEntry { - BlockdevOptions *bdo; - Location loc; - QSIMPLEQ_ENTRY(BlockdevOptionsQueueEntry) entry; -} BlockdevOptionsQueueEntry; - -typedef QSIMPLEQ_HEAD(, BlockdevOptionsQueueEntry) BlockdevOptionsQueue; - -typedef struct ObjectOption { - ObjectOptions *opts; - QTAILQ_ENTRY(ObjectOption) next; -} ObjectOption; - -typedef struct DeviceOption { - QDict *opts; - Location loc; - QTAILQ_ENTRY(DeviceOption) next; -} DeviceOption; - -static const char *cpu_option; -static const char *mem_path; -static const char *incoming; -static const char *loadvm; -static const char *accelerators; -static bool have_custom_ram_size; -static const char *ram_memdev_id; -static QDict *machine_opts_dict; -static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts); -static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts); -static int display_remote; -static int snapshot; -static bool preconfig_requested; -static QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list); -static BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue); -static bool nographic = false; -static int mem_prealloc; /* force preallocation of physical target memory */ -static const char *vga_model = NULL; -static DisplayOptions dpy; -static int num_serial_hds; -static Chardev **serial_hds; -static const char *log_mask; -static const char *log_file; -static bool list_data_dirs; -static const char *qtest_chrdev; -static const char *qtest_log; -static bool opt_one_insn_per_tb; - -static int has_defaults = 1; -static int default_serial = 1; -static int default_parallel = 1; -static int default_monitor = 1; -static int default_floppy = 1; -static int default_cdrom = 1; -static int default_sdcard = 1; -static int default_vga = 1; -static int default_net = 1; - -static struct { - const char *driver; - int *flag; -} default_list[] = { - { .driver = "isa-serial", .flag = &default_serial }, - { .driver = "isa-parallel", .flag = &default_parallel }, - { .driver = "isa-fdc", .flag = &default_floppy }, - { .driver = "floppy", .flag = &default_floppy }, - { .driver = "ide-cd", .flag = &default_cdrom }, - { .driver = "ide-hd", .flag = &default_cdrom }, - { .driver = "scsi-cd", .flag = &default_cdrom }, - { .driver = "scsi-hd", .flag = &default_cdrom }, - { .driver = "VGA", .flag = &default_vga }, - { .driver = "isa-vga", .flag = &default_vga }, - { .driver = "cirrus-vga", .flag = &default_vga }, - { .driver = "isa-cirrus-vga", .flag = &default_vga }, - { .driver = "vmware-svga", .flag = &default_vga }, - { .driver = "qxl-vga", .flag = &default_vga }, - { .driver = "virtio-vga", .flag = &default_vga }, - { .driver = "ati-vga", .flag = &default_vga }, - { .driver = "vhost-user-vga", .flag = &default_vga }, - { .driver = "virtio-vga-gl", .flag = &default_vga }, -}; - -static QemuOptsList qemu_rtc_opts = { - .name = "rtc", - .head = QTAILQ_HEAD_INITIALIZER(qemu_rtc_opts.head), - .merge_lists = true, - .desc = { - { - .name = "base", - .type = QEMU_OPT_STRING, - },{ - .name = "clock", - .type = QEMU_OPT_STRING, - },{ - .name = "driftfix", - .type = QEMU_OPT_STRING, - }, - { /* end of list */ } - }, -}; - -static QemuOptsList qemu_option_rom_opts = { - .name = "option-rom", - .implied_opt_name = "romfile", - .head = QTAILQ_HEAD_INITIALIZER(qemu_option_rom_opts.head), - .desc = { - { - .name = "bootindex", - .type = QEMU_OPT_NUMBER, - }, { - .name = "romfile", - .type = QEMU_OPT_STRING, - }, - { /* end of list */ } - }, -}; - -static QemuOptsList qemu_accel_opts = { - .name = "accel", - .implied_opt_name = "accel", - .head = QTAILQ_HEAD_INITIALIZER(qemu_accel_opts.head), - .desc = { - /* - * no elements => accept any - * sanity checking will happen later - * when setting accelerator properties - */ - { } - }, -}; - -static QemuOptsList qemu_boot_opts = { - .name = "boot-opts", - .implied_opt_name = "order", - .merge_lists = true, - .head = QTAILQ_HEAD_INITIALIZER(qemu_boot_opts.head), - .desc = { - { - .name = "order", - .type = QEMU_OPT_STRING, - }, { - .name = "once", - .type = QEMU_OPT_STRING, - }, { - .name = "menu", - .type = QEMU_OPT_BOOL, - }, { - .name = "splash", - .type = QEMU_OPT_STRING, - }, { - .name = "splash-time", - .type = QEMU_OPT_NUMBER, - }, { - .name = "reboot-timeout", - .type = QEMU_OPT_NUMBER, - }, { - .name = "strict", - .type = QEMU_OPT_BOOL, - }, - { /*End of list */ } - }, -}; - -static QemuOptsList qemu_add_fd_opts = { - .name = "add-fd", - .head = QTAILQ_HEAD_INITIALIZER(qemu_add_fd_opts.head), - .desc = { - { - .name = "fd", - .type = QEMU_OPT_NUMBER, - .help = "file descriptor of which a duplicate is added to fd set", - },{ - .name = "set", - .type = QEMU_OPT_NUMBER, - .help = "ID of the fd set to add fd to", - },{ - .name = "opaque", - .type = QEMU_OPT_STRING, - .help = "free-form string used to describe fd", - }, - { /* end of list */ } - }, -}; - -static QemuOptsList qemu_object_opts = { - .name = "object", - .implied_opt_name = "qom-type", - .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head), - .desc = { - { } - }, -}; - -static QemuOptsList qemu_tpmdev_opts = { - .name = "tpmdev", - .implied_opt_name = "type", - .head = QTAILQ_HEAD_INITIALIZER(qemu_tpmdev_opts.head), - .desc = { - /* options are defined in the TPM backends */ - { /* end of list */ } - }, -}; - -static QemuOptsList qemu_overcommit_opts = { - .name = "overcommit", - .head = QTAILQ_HEAD_INITIALIZER(qemu_overcommit_opts.head), - .desc = { - { - .name = "mem-lock", - .type = QEMU_OPT_BOOL, - }, - { - .name = "cpu-pm", - .type = QEMU_OPT_BOOL, - }, - { /* end of list */ } - }, -}; - -static QemuOptsList qemu_msg_opts = { - .name = "msg", - .head = QTAILQ_HEAD_INITIALIZER(qemu_msg_opts.head), - .desc = { - { - .name = "timestamp", - .type = QEMU_OPT_BOOL, - }, - { - .name = "guest-name", - .type = QEMU_OPT_BOOL, - .help = "Prepends guest name for error messages but only if " - "-name guest is set otherwise option is ignored\n", - }, - { /* end of list */ } - }, -}; - -static QemuOptsList qemu_name_opts = { - .name = "name", - .implied_opt_name = "guest", - .merge_lists = true, - .head = QTAILQ_HEAD_INITIALIZER(qemu_name_opts.head), - .desc = { - { - .name = "guest", - .type = QEMU_OPT_STRING, - .help = "Sets the name of the guest.\n" - "This name will be displayed in the SDL window caption.\n" - "The name will also be used for the VNC server", - }, { - .name = "process", - .type = QEMU_OPT_STRING, - .help = "Sets the name of the QEMU process, as shown in top etc", - }, { - .name = "debug-threads", - .type = QEMU_OPT_BOOL, - .help = "When enabled, name the individual threads; defaults off.\n" - "NOTE: The thread names are for debugging and not a\n" - "stable API.", - }, - { /* End of list */ } - }, -}; - -static QemuOptsList qemu_mem_opts = { - .name = "memory", - .implied_opt_name = "size", - .head = QTAILQ_HEAD_INITIALIZER(qemu_mem_opts.head), - .merge_lists = true, - .desc = { - { - .name = "size", - .type = QEMU_OPT_SIZE, - }, - { - .name = "slots", - .type = QEMU_OPT_NUMBER, - }, - { - .name = "maxmem", - .type = QEMU_OPT_SIZE, - }, - { /* end of list */ } - }, -}; - -static QemuOptsList qemu_icount_opts = { - .name = "icount", - .implied_opt_name = "shift", - .merge_lists = true, - .head = QTAILQ_HEAD_INITIALIZER(qemu_icount_opts.head), - .desc = { - { - .name = "shift", - .type = QEMU_OPT_STRING, - }, { - .name = "align", - .type = QEMU_OPT_BOOL, - }, { - .name = "sleep", - .type = QEMU_OPT_BOOL, - }, { - .name = "rr", - .type = QEMU_OPT_STRING, - }, { - .name = "rrfile", - .type = QEMU_OPT_STRING, - }, { - .name = "rrsnapshot", - .type = QEMU_OPT_STRING, - }, - { /* end of list */ } - }, -}; - -static QemuOptsList qemu_fw_cfg_opts = { - .name = "fw_cfg", - .implied_opt_name = "name", - .head = QTAILQ_HEAD_INITIALIZER(qemu_fw_cfg_opts.head), - .desc = { - { - .name = "name", - .type = QEMU_OPT_STRING, - .help = "Sets the fw_cfg name of the blob to be inserted", - }, { - .name = "file", - .type = QEMU_OPT_STRING, - .help = "Sets the name of the file from which " - "the fw_cfg blob will be loaded", - }, { - .name = "string", - .type = QEMU_OPT_STRING, - .help = "Sets content of the blob to be inserted from a string", - }, { - .name = "gen_id", - .type = QEMU_OPT_STRING, - .help = "Sets id of the object generating the fw_cfg blob " - "to be inserted", - }, - { /* end of list */ } - }, -}; - -static QemuOptsList qemu_action_opts = { - .name = "action", - .merge_lists = true, - .head = QTAILQ_HEAD_INITIALIZER(qemu_action_opts.head), - .desc = { - { - .name = "shutdown", - .type = QEMU_OPT_STRING, - },{ - .name = "reboot", - .type = QEMU_OPT_STRING, - },{ - .name = "panic", - .type = QEMU_OPT_STRING, - },{ - .name = "watchdog", - .type = QEMU_OPT_STRING, - }, - { /* end of list */ } - }, -}; - -const char *qemu_get_vm_name(void) -{ - return qemu_name; -} - -static void default_driver_disable(const char *driver) -{ - int i; - - if (!driver) { - return; - } - - for (i = 0; i < ARRAY_SIZE(default_list); i++) { - if (strcmp(default_list[i].driver, driver) != 0) - continue; - *(default_list[i].flag) = 0; - } -} - -static int default_driver_check(void *opaque, QemuOpts *opts, Error **errp) -{ - const char *driver = qemu_opt_get(opts, "driver"); - - default_driver_disable(driver); - return 0; -} - -static void default_driver_check_json(void) -{ - DeviceOption *opt; - - QTAILQ_FOREACH(opt, &device_opts, next) { - const char *driver = qdict_get_try_str(opt->opts, "driver"); - default_driver_disable(driver); - } -} - -static int parse_name(void *opaque, QemuOpts *opts, Error **errp) -{ - const char *proc_name; - - if (qemu_opt_get(opts, "debug-threads")) { - qemu_thread_naming(qemu_opt_get_bool(opts, "debug-threads", false)); - } - qemu_name = qemu_opt_get(opts, "guest"); - - proc_name = qemu_opt_get(opts, "process"); - if (proc_name) { - os_set_proc_name(proc_name); - } - - return 0; -} - -bool defaults_enabled(void) -{ - return has_defaults; -} - -#ifndef _WIN32 -static int parse_add_fd(void *opaque, QemuOpts *opts, Error **errp) -{ - int fd, dupfd, flags; - int64_t fdset_id; - const char *fd_opaque = NULL; - AddfdInfo *fdinfo; - - fd = qemu_opt_get_number(opts, "fd", -1); - fdset_id = qemu_opt_get_number(opts, "set", -1); - fd_opaque = qemu_opt_get(opts, "opaque"); - - if (fd < 0) { - error_setg(errp, "fd option is required and must be non-negative"); - return -1; - } - - if (fd <= STDERR_FILENO) { - error_setg(errp, "fd cannot be a standard I/O stream"); - return -1; - } - - /* - * All fds inherited across exec() necessarily have FD_CLOEXEC - * clear, while qemu sets FD_CLOEXEC on all other fds used internally. - */ - flags = fcntl(fd, F_GETFD); - if (flags == -1 || (flags & FD_CLOEXEC)) { - error_setg(errp, "fd is not valid or already in use"); - return -1; - } - - if (fdset_id < 0) { - error_setg(errp, "set option is required and must be non-negative"); - return -1; - } - -#ifdef F_DUPFD_CLOEXEC - dupfd = fcntl(fd, F_DUPFD_CLOEXEC, 0); -#else - dupfd = dup(fd); - if (dupfd != -1) { - qemu_set_cloexec(dupfd); - } -#endif - if (dupfd == -1) { - error_setg(errp, "error duplicating fd: %s", strerror(errno)); - return -1; - } - - /* add the duplicate fd, and optionally the opaque string, to the fd set */ - fdinfo = monitor_fdset_add_fd(dupfd, true, fdset_id, fd_opaque, - &error_abort); - g_free(fdinfo); - - return 0; -} - -static int cleanup_add_fd(void *opaque, QemuOpts *opts, Error **errp) -{ - int fd; - - fd = qemu_opt_get_number(opts, "fd", -1); - close(fd); - - return 0; -} -#endif - -/***********************************************************/ -/* QEMU Block devices */ - -#define HD_OPTS "media=disk" -#define CDROM_OPTS "media=cdrom" -#define FD_OPTS "" -#define PFLASH_OPTS "" -#define MTD_OPTS "" -#define SD_OPTS "" - -static int drive_init_func(void *opaque, QemuOpts *opts, Error **errp) -{ - BlockInterfaceType *block_default_type = opaque; - - return drive_new(opts, *block_default_type, errp) == NULL; -} - -static int drive_enable_snapshot(void *opaque, QemuOpts *opts, Error **errp) -{ - if (qemu_opt_get(opts, "snapshot") == NULL) { - qemu_opt_set(opts, "snapshot", "on", &error_abort); - } - return 0; -} - -static void default_drive(int enable, int snapshot, BlockInterfaceType type, - int index, const char *optstr) -{ - QemuOpts *opts; - DriveInfo *dinfo; - - if (!enable || drive_get_by_index(type, index)) { - return; - } - - opts = drive_add(type, index, NULL, optstr); - if (snapshot) { - drive_enable_snapshot(NULL, opts, NULL); - } - - dinfo = drive_new(opts, type, &error_abort); - dinfo->is_default = true; - -} - -static void configure_blockdev(BlockdevOptionsQueue *bdo_queue, - MachineClass *machine_class, int snapshot) -{ - /* - * If the currently selected machine wishes to override the - * units-per-bus property of its default HBA interface type, do so - * now. - */ - if (machine_class->units_per_default_bus) { - override_max_devs(machine_class->block_default_type, - machine_class->units_per_default_bus); - } - - /* open the virtual block devices */ - while (!QSIMPLEQ_EMPTY(bdo_queue)) { - BlockdevOptionsQueueEntry *bdo = QSIMPLEQ_FIRST(bdo_queue); - - QSIMPLEQ_REMOVE_HEAD(bdo_queue, entry); - loc_push_restore(&bdo->loc); - qmp_blockdev_add(bdo->bdo, &error_fatal); - loc_pop(&bdo->loc); - qapi_free_BlockdevOptions(bdo->bdo); - g_free(bdo); - } - if (snapshot) { - qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, - NULL, NULL); - } - if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, - &machine_class->block_default_type, &error_fatal)) { - /* We printed help */ - exit(0); - } - - default_drive(default_cdrom, snapshot, machine_class->block_default_type, 2, - CDROM_OPTS); - default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); - default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - -} - -static QemuOptsList qemu_smp_opts = { - .name = "smp-opts", - .implied_opt_name = "cpus", - .merge_lists = true, - .head = QTAILQ_HEAD_INITIALIZER(qemu_smp_opts.head), - .desc = { - { - .name = "cpus", - .type = QEMU_OPT_NUMBER, - }, { - .name = "sockets", - .type = QEMU_OPT_NUMBER, - }, { - .name = "dies", - .type = QEMU_OPT_NUMBER, - }, { - .name = "clusters", - .type = QEMU_OPT_NUMBER, - }, { - .name = "cores", - .type = QEMU_OPT_NUMBER, - }, { - .name = "threads", - .type = QEMU_OPT_NUMBER, - }, { - .name = "maxcpus", - .type = QEMU_OPT_NUMBER, - }, - { /*End of list */ } - }, -}; - -#if defined(CONFIG_POSIX) -static QemuOptsList qemu_run_with_opts = { - .name = "run-with", - .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), - .desc = { -#if defined(CONFIG_LINUX) - { - .name = "async-teardown", - .type = QEMU_OPT_BOOL, - }, -#endif - { - .name = "chroot", - .type = QEMU_OPT_STRING, - }, - { /* end of list */ } - }, -}; - -#define qemu_add_run_with_opts() qemu_add_opts(&qemu_run_with_opts) - -#else - -#define qemu_add_run_with_opts() - -#endif /* CONFIG_POSIX */ - -static void realtime_init(void) -{ - if (enable_mlock) { - if (os_mlock() < 0) { - error_report("locking memory failed"); - exit(1); - } - } -} - - -static void configure_msg(QemuOpts *opts) -{ - message_with_timestamp = qemu_opt_get_bool(opts, "timestamp", false); - error_with_guestname = qemu_opt_get_bool(opts, "guest-name", false); -} - - -/***********************************************************/ -/* USB devices */ - -static int usb_device_add(const char *devname) -{ - USBDevice *dev = NULL; - - if (!machine_usb(current_machine)) { - return -1; - } - - dev = usbdevice_create(devname); - if (!dev) - return -1; - - return 0; -} - -static int usb_parse(const char *cmdline) -{ - int r; - r = usb_device_add(cmdline); - if (r < 0) { - error_report("could not add USB device '%s'", cmdline); - } - return r; -} - -/***********************************************************/ -/* machine registration */ - -static MachineClass *find_machine(const char *name, GSList *machines) -{ - GSList *el; - - for (el = machines; el; el = el->next) { - MachineClass *mc = el->data; - - if (!strcmp(mc->name, name) || !g_strcmp0(mc->alias, name)) { - return mc; - } - } - - return NULL; -} - -static MachineClass *find_default_machine(GSList *machines) -{ - GSList *el; - MachineClass *default_machineclass = NULL; - - for (el = machines; el; el = el->next) { - MachineClass *mc = el->data; - - if (mc->is_default) { - assert(default_machineclass == NULL && "Multiple default machines"); - default_machineclass = mc; - } - } - - return default_machineclass; -} - -static void version(void) -{ - printf("QEMU emulator version " QEMU_FULL_VERSION "\n" - QEMU_COPYRIGHT "\n"); -} - -static void help(int exitcode) -{ - version(); - printf("usage: %s [options] [disk_image]\n\n" - "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", - g_get_prgname()); - -#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \ - if ((arch_mask) & arch_type) \ - fputs(opt_help, stdout); - -#define ARCHHEADING(text, arch_mask) \ - if ((arch_mask) & arch_type) \ - puts(stringify(text)); - -#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL) - -#include "qemu-options.def" - - printf("\nDuring emulation, the following keys are useful:\n" - "ctrl-alt-f toggle full screen\n" - "ctrl-alt-n switch to virtual console 'n'\n" - "ctrl-alt toggle mouse and keyboard grab\n" - "\n" - "When using -nographic, press 'ctrl-a h' to get some help.\n" - "\n" - QEMU_HELP_BOTTOM "\n"); - - exit(exitcode); -} - -enum { - -#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \ - opt_enum, -#define DEFHEADING(text) -#define ARCHHEADING(text, arch_mask) - -#include "qemu-options.def" -}; - -#define HAS_ARG 0x0001 - -typedef struct QEMUOption { - const char *name; - int flags; - int index; - uint32_t arch_mask; -} QEMUOption; - -static const QEMUOption qemu_options[] = { - { "h", 0, QEMU_OPTION_h, QEMU_ARCH_ALL }, - -#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \ - { option, opt_arg, opt_enum, arch_mask }, -#define DEFHEADING(text) -#define ARCHHEADING(text, arch_mask) - -#include "qemu-options.def" - { /* end of list */ } -}; - -typedef struct VGAInterfaceInfo { - const char *opt_name; /* option name */ - const char *name; /* human-readable name */ - /* Class names indicating that support is available. - * If no class is specified, the interface is always available */ - const char *class_names[2]; -} VGAInterfaceInfo; - -static const VGAInterfaceInfo vga_interfaces[VGA_TYPE_MAX] = { - [VGA_NONE] = { - .opt_name = "none", - .name = "no graphic card", - }, - [VGA_STD] = { - .opt_name = "std", - .name = "standard VGA", - .class_names = { "VGA", "isa-vga" }, - }, - [VGA_CIRRUS] = { - .opt_name = "cirrus", - .name = "Cirrus VGA", - .class_names = { "cirrus-vga", "isa-cirrus-vga" }, - }, - [VGA_VMWARE] = { - .opt_name = "vmware", - .name = "VMWare SVGA", - .class_names = { "vmware-svga" }, - }, - [VGA_VIRTIO] = { - .opt_name = "virtio", - .name = "Virtio VGA", - .class_names = { "virtio-vga" }, - }, - [VGA_QXL] = { - .opt_name = "qxl", - .name = "QXL VGA", - .class_names = { "qxl-vga" }, - }, - [VGA_TCX] = { - .opt_name = "tcx", - .name = "TCX framebuffer", - .class_names = { "sun-tcx" }, - }, - [VGA_CG3] = { - .opt_name = "cg3", - .name = "CG3 framebuffer", - .class_names = { "cgthree" }, - }, -#ifdef CONFIG_XEN_BACKEND - [VGA_XENFB] = { - .opt_name = "xenfb", - .name = "Xen paravirtualized framebuffer", - }, -#endif -}; - -static bool vga_interface_available(VGAInterfaceType t) -{ - const VGAInterfaceInfo *ti = &vga_interfaces[t]; - - assert(t < VGA_TYPE_MAX); - return !ti->class_names[0] || - module_object_class_by_name(ti->class_names[0]) || - module_object_class_by_name(ti->class_names[1]); -} - -static const char * -get_default_vga_model(const MachineClass *machine_class) -{ - if (machine_class->default_display) { - for (int t = 0; t < VGA_TYPE_MAX; t++) { - const VGAInterfaceInfo *ti = &vga_interfaces[t]; - - if (ti->opt_name && vga_interface_available(t) && - g_str_equal(ti->opt_name, machine_class->default_display)) { - return machine_class->default_display; - } - } - - warn_report_once("Default display '%s' is not available in this binary", - machine_class->default_display); - return NULL; - } else if (vga_interface_available(VGA_CIRRUS)) { - return "cirrus"; - } else if (vga_interface_available(VGA_STD)) { - return "std"; - } - - return NULL; -} - -static void select_vgahw(const MachineClass *machine_class, const char *p) -{ - const char *opts; - int t; - - if (g_str_equal(p, "help")) { - const char *def = get_default_vga_model(machine_class); - - for (t = 0; t < VGA_TYPE_MAX; t++) { - const VGAInterfaceInfo *ti = &vga_interfaces[t]; - - if (vga_interface_available(t) && ti->opt_name) { - printf("%-20s %s%s\n", ti->opt_name, ti->name ?: "", - (def && g_str_equal(ti->opt_name, def)) ? - " (default)" : ""); - } - } - exit(0); - } - - assert(vga_interface_type == VGA_NONE); - for (t = 0; t < VGA_TYPE_MAX; t++) { - const VGAInterfaceInfo *ti = &vga_interfaces[t]; - if (ti->opt_name && strstart(p, ti->opt_name, &opts)) { - if (!vga_interface_available(t)) { - error_report("%s not available", ti->name); - exit(1); - } - vga_interface_type = t; - break; - } - } - if (t == VGA_TYPE_MAX) { - invalid_vga: - error_report("unknown vga type: %s", p); - exit(1); - } - while (*opts) { - const char *nextopt; - - if (strstart(opts, ",retrace=", &nextopt)) { - opts = nextopt; - if (strstart(opts, "dumb", &nextopt)) - vga_retrace_method = VGA_RETRACE_DUMB; - else if (strstart(opts, "precise", &nextopt)) - vga_retrace_method = VGA_RETRACE_PRECISE; - else goto invalid_vga; - } else goto invalid_vga; - opts = nextopt; - } -} - -static void parse_display_qapi(const char *optarg) -{ - DisplayOptions *opts; - Visitor *v; - - v = qobject_input_visitor_new_str(optarg, "type", &error_fatal); - - visit_type_DisplayOptions(v, NULL, &opts, &error_fatal); - QAPI_CLONE_MEMBERS(DisplayOptions, &dpy, opts); - - qapi_free_DisplayOptions(opts); - visit_free(v); -} - -DisplayOptions *qmp_query_display_options(Error **errp) -{ - return QAPI_CLONE(DisplayOptions, &dpy); -} - -static void parse_display(const char *p) -{ - const char *opts; - - if (is_help_option(p)) { - qemu_display_help(); - exit(0); - } - - if (strstart(p, "vnc", &opts)) { - /* - * vnc isn't a (local) DisplayType but a protocol for remote - * display access. - */ - if (*opts == '=') { - vnc_parse(opts + 1); - } else { - error_report("VNC requires a display argument vnc="); - exit(1); - } - } else { - parse_display_qapi(p); - } -} - -static inline bool nonempty_str(const char *str) -{ - return str && *str; -} - -static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp) -{ - gchar *buf; - size_t size; - const char *name, *file, *str, *gen_id; - FWCfgState *fw_cfg = (FWCfgState *) opaque; - - if (fw_cfg == NULL) { - error_setg(errp, "fw_cfg device not available"); - return -1; - } - name = qemu_opt_get(opts, "name"); - file = qemu_opt_get(opts, "file"); - str = qemu_opt_get(opts, "string"); - gen_id = qemu_opt_get(opts, "gen_id"); - - /* we need the name, and exactly one of: file, content string, gen_id */ - if (!nonempty_str(name) || - nonempty_str(file) + nonempty_str(str) + nonempty_str(gen_id) != 1) { - error_setg(errp, "name, plus exactly one of file," - " string and gen_id, are needed"); - return -1; - } - if (strlen(name) > FW_CFG_MAX_FILE_PATH - 1) { - error_setg(errp, "name too long (max. %d char)", - FW_CFG_MAX_FILE_PATH - 1); - return -1; - } - if (nonempty_str(gen_id)) { - /* - * In this particular case where the content is populated - * internally, the "etc/" namespace protection is relaxed, - * so do not emit a warning. - */ - } else if (strncmp(name, "opt/", 4) != 0) { - warn_report("externally provided fw_cfg item names " - "should be prefixed with \"opt/\""); - } - if (nonempty_str(str)) { - size = strlen(str); /* NUL terminator NOT included in fw_cfg blob */ - buf = g_memdup(str, size); - } else if (nonempty_str(gen_id)) { - if (!fw_cfg_add_from_generator(fw_cfg, name, gen_id, errp)) { - return -1; - } - return 0; - } else { - GError *err = NULL; - if (!g_file_get_contents(file, &buf, &size, &err)) { - error_setg(errp, "can't load %s: %s", file, err->message); - g_error_free(err); - return -1; - } - } - /* For legacy, keep user files in a specific global order. */ - fw_cfg_set_order_override(fw_cfg, FW_CFG_ORDER_OVERRIDE_USER); - fw_cfg_add_file(fw_cfg, name, buf, size); - fw_cfg_reset_order_override(fw_cfg); - return 0; -} - -static int device_help_func(void *opaque, QemuOpts *opts, Error **errp) -{ - return qdev_device_help(opts); -} - -static int device_init_func(void *opaque, QemuOpts *opts, Error **errp) -{ - DeviceState *dev; - - dev = qdev_device_add(opts, errp); - if (!dev && *errp) { - error_report_err(*errp); - return -1; - } else if (dev) { - object_unref(OBJECT(dev)); - } - return 0; -} - -static int chardev_init_func(void *opaque, QemuOpts *opts, Error **errp) -{ - Error *local_err = NULL; - - if (!qemu_chr_new_from_opts(opts, NULL, &local_err)) { - if (local_err) { - error_propagate(errp, local_err); - return -1; - } - exit(0); - } - return 0; -} - -#ifdef CONFIG_VIRTFS -static int fsdev_init_func(void *opaque, QemuOpts *opts, Error **errp) -{ - return qemu_fsdev_add(opts, errp); -} -#endif - -static int mon_init_func(void *opaque, QemuOpts *opts, Error **errp) -{ - return monitor_init_opts(opts, errp); -} - -static void monitor_parse(const char *optarg, const char *mode, bool pretty) -{ - static int monitor_device_index = 0; - QemuOpts *opts; - const char *p; - char label[32]; - - if (strstart(optarg, "chardev:", &p)) { - snprintf(label, sizeof(label), "%s", p); - } else { - snprintf(label, sizeof(label), "compat_monitor%d", - monitor_device_index); - opts = qemu_chr_parse_compat(label, optarg, true); - if (!opts) { - error_report("parse error: %s", optarg); - exit(1); - } - } - - opts = qemu_opts_create(qemu_find_opts("mon"), label, 1, &error_fatal); - qemu_opt_set(opts, "mode", mode, &error_abort); - qemu_opt_set(opts, "chardev", label, &error_abort); - if (!strcmp(mode, "control")) { - qemu_opt_set_bool(opts, "pretty", pretty, &error_abort); - } else { - assert(pretty == false); - } - monitor_device_index++; -} - -struct device_config { - enum { - DEV_USB, /* -usbdevice */ - DEV_SERIAL, /* -serial */ - DEV_PARALLEL, /* -parallel */ - DEV_DEBUGCON, /* -debugcon */ - DEV_GDB, /* -gdb, -s */ - DEV_SCLP, /* s390 sclp */ - } type; - const char *cmdline; - Location loc; - QTAILQ_ENTRY(device_config) next; -}; - -static QTAILQ_HEAD(, device_config) device_configs = - QTAILQ_HEAD_INITIALIZER(device_configs); - -static void add_device_config(int type, const char *cmdline) -{ - struct device_config *conf; - - conf = g_malloc0(sizeof(*conf)); - conf->type = type; - conf->cmdline = cmdline; - loc_save(&conf->loc); - QTAILQ_INSERT_TAIL(&device_configs, conf, next); -} - -static int foreach_device_config(int type, int (*func)(const char *cmdline)) -{ - struct device_config *conf; - int rc; - - QTAILQ_FOREACH(conf, &device_configs, next) { - if (conf->type != type) - continue; - loc_push_restore(&conf->loc); - rc = func(conf->cmdline); - loc_pop(&conf->loc); - if (rc) { - return rc; - } - } - return 0; -} - -static void qemu_disable_default_devices(void) -{ - MachineClass *machine_class = MACHINE_GET_CLASS(current_machine); - - default_driver_check_json(); - qemu_opts_foreach(qemu_find_opts("device"), - default_driver_check, NULL, NULL); - qemu_opts_foreach(qemu_find_opts("global"), - default_driver_check, NULL, NULL); - - if (!vga_model && !default_vga) { - vga_interface_type = VGA_DEVICE; - vga_interface_created = true; - } - if (!has_defaults || machine_class->no_serial) { - default_serial = 0; - } - if (!has_defaults || machine_class->no_parallel) { - default_parallel = 0; - } - if (!has_defaults || machine_class->no_floppy) { - default_floppy = 0; - } - if (!has_defaults || machine_class->no_cdrom) { - default_cdrom = 0; - } - if (!has_defaults || machine_class->no_sdcard) { - default_sdcard = 0; - } - if (!has_defaults) { - default_monitor = 0; - default_net = 0; - default_vga = 0; - } else { - if (default_net && machine_class->default_nic && - !module_object_class_by_name(machine_class->default_nic)) { - warn_report("Default NIC '%s' is not available in this binary", - machine_class->default_nic); - default_net = 0; - } - } -} - -static void qemu_create_default_devices(void) -{ - MachineClass *machine_class = MACHINE_GET_CLASS(current_machine); - - if (is_daemonized()) { - /* According to documentation and historically, -nographic redirects - * serial port, parallel port and monitor to stdio, which does not work - * with -daemonize. We can redirect these to null instead, but since - * -nographic is legacy, let's just error out. - * We disallow -nographic only if all other ports are not redirected - * explicitly, to not break existing legacy setups which uses - * -nographic _and_ redirects all ports explicitly - this is valid - * usage, -nographic is just a no-op in this case. - */ - if (nographic - && (default_parallel || default_serial || default_monitor)) { - error_report("-nographic cannot be used with -daemonize"); - exit(1); - } - } - - if (nographic) { - if (default_parallel) - add_device_config(DEV_PARALLEL, "null"); - if (default_serial && default_monitor) { - add_device_config(DEV_SERIAL, "mon:stdio"); - } else { - if (default_serial) - add_device_config(DEV_SERIAL, "stdio"); - if (default_monitor) - monitor_parse("stdio", "readline", false); - } - } else { - if (default_serial) - add_device_config(DEV_SERIAL, "vc:80Cx24C"); - if (default_parallel) - add_device_config(DEV_PARALLEL, "vc:80Cx24C"); - if (default_monitor) - monitor_parse("vc:80Cx24C", "readline", false); - } - - if (default_net) { - QemuOptsList *net = qemu_find_opts("net"); - qemu_opts_parse(net, "nic", true, &error_abort); -#ifdef CONFIG_SLIRP - qemu_opts_parse(net, "user", true, &error_abort); -#endif - } - -#if defined(CONFIG_VNC) - if (!QTAILQ_EMPTY(&(qemu_find_opts("vnc")->head))) { - display_remote++; - } -#endif - if (dpy.type == DISPLAY_TYPE_DEFAULT && !display_remote) { - if (!qemu_display_find_default(&dpy)) { - dpy.type = DISPLAY_TYPE_NONE; -#if defined(CONFIG_VNC) - vnc_parse("localhost:0,to=99,id=default"); -#endif - } - } - if (dpy.type == DISPLAY_TYPE_DEFAULT) { - dpy.type = DISPLAY_TYPE_NONE; - } - - /* If no default VGA is requested, the default is "none". */ - if (default_vga) { - vga_model = get_default_vga_model(machine_class); - } - if (vga_model) { - select_vgahw(machine_class, vga_model); - } -} - -static int serial_parse(const char *devname) -{ - int index = num_serial_hds; - char label[32]; - - if (strcmp(devname, "none") == 0) - return 0; - snprintf(label, sizeof(label), "serial%d", index); - serial_hds = g_renew(Chardev *, serial_hds, index + 1); - - serial_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL); - if (!serial_hds[index]) { - error_report("could not connect serial device" - " to character backend '%s'", devname); - return -1; - } - num_serial_hds++; - return 0; -} - -Chardev *serial_hd(int i) -{ - assert(i >= 0); - if (i < num_serial_hds) { - return serial_hds[i]; - } - return NULL; -} - -static int parallel_parse(const char *devname) -{ - static int index = 0; - char label[32]; - - if (strcmp(devname, "none") == 0) - return 0; - if (index == MAX_PARALLEL_PORTS) { - error_report("too many parallel ports"); - exit(1); - } - snprintf(label, sizeof(label), "parallel%d", index); - parallel_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL); - if (!parallel_hds[index]) { - error_report("could not connect parallel device" - " to character backend '%s'", devname); - return -1; - } - index++; - return 0; -} - -static int debugcon_parse(const char *devname) -{ - QemuOpts *opts; - - if (!qemu_chr_new_mux_mon("debugcon", devname, NULL)) { - error_report("invalid character backend '%s'", devname); - exit(1); - } - opts = qemu_opts_create(qemu_find_opts("device"), "debugcon", 1, NULL); - if (!opts) { - error_report("already have a debugcon device"); - exit(1); - } - qemu_opt_set(opts, "driver", "isa-debugcon", &error_abort); - qemu_opt_set(opts, "chardev", "debugcon", &error_abort); - return 0; -} - -static gint machine_class_cmp(gconstpointer a, gconstpointer b) -{ - const MachineClass *mc1 = a, *mc2 = b; - int res; - - if (mc1->family == NULL) { - if (mc2->family == NULL) { - /* Compare standalone machine types against each other; they sort - * in increasing order. - */ - return strcmp(object_class_get_name(OBJECT_CLASS(mc1)), - object_class_get_name(OBJECT_CLASS(mc2))); - } - - /* Standalone machine types sort after families. */ - return 1; - } - - if (mc2->family == NULL) { - /* Families sort before standalone machine types. */ - return -1; - } - - /* Families sort between each other alphabetically increasingly. */ - res = strcmp(mc1->family, mc2->family); - if (res != 0) { - return res; - } - - /* Within the same family, machine types sort in decreasing order. */ - return strcmp(object_class_get_name(OBJECT_CLASS(mc2)), - object_class_get_name(OBJECT_CLASS(mc1))); -} - -static void machine_help_func(const QDict *qdict) -{ - GSList *machines, *el; - const char *type = qdict_get_try_str(qdict, "type"); - - machines = object_class_get_list(TYPE_MACHINE, false); - if (type) { - ObjectClass *machine_class = OBJECT_CLASS(find_machine(type, machines)); - if (machine_class) { - type_print_class_properties(object_class_get_name(machine_class)); - return; - } - } - - printf("Supported machines are:\n"); - machines = g_slist_sort(machines, machine_class_cmp); - for (el = machines; el; el = el->next) { - MachineClass *mc = el->data; - if (mc->alias) { - printf("%-20s %s (alias of %s)\n", mc->alias, mc->desc, mc->name); - } - printf("%-20s %s%s%s\n", mc->name, mc->desc, - mc->is_default ? " (default)" : "", - mc->deprecation_reason ? " (deprecated)" : ""); - } -} - -static void -machine_merge_property(const char *propname, QDict *prop, Error **errp) -{ - QDict *opts; - - opts = qdict_new(); - /* Preserve the caller's reference to prop. */ - qobject_ref(prop); - qdict_put(opts, propname, prop); - keyval_merge(machine_opts_dict, opts, errp); - qobject_unref(opts); -} - -static void -machine_parse_property_opt(QemuOptsList *opts_list, const char *propname, - const char *arg) -{ - QDict *prop = NULL; - bool help = false; - - prop = keyval_parse(arg, opts_list->implied_opt_name, &help, &error_fatal); - if (help) { - qemu_opts_print_help(opts_list, true); - exit(0); - } - machine_merge_property(propname, prop, &error_fatal); - qobject_unref(prop); -} - -static const char *pid_file; -struct UnlinkPidfileNotifier { - Notifier notifier; - char *pid_file_realpath; -}; -static struct UnlinkPidfileNotifier qemu_unlink_pidfile_notifier; - -static void qemu_unlink_pidfile(Notifier *n, void *data) -{ - struct UnlinkPidfileNotifier *upn; - - upn = DO_UPCAST(struct UnlinkPidfileNotifier, notifier, n); - unlink(upn->pid_file_realpath); -} - -static const QEMUOption *lookup_opt(int argc, char **argv, - const char **poptarg, int *poptind) -{ - const QEMUOption *popt; - int optind = *poptind; - char *r = argv[optind]; - const char *optarg; - - loc_set_cmdline(argv, optind, 1); - optind++; - /* Treat --foo the same as -foo. */ - if (r[1] == '-') - r++; - popt = qemu_options; - for(;;) { - if (!popt->name) { - error_report("invalid option"); - exit(1); - } - if (!strcmp(popt->name, r + 1)) - break; - popt++; - } - if (popt->flags & HAS_ARG) { - if (optind >= argc) { - error_report("requires an argument"); - exit(1); - } - optarg = argv[optind++]; - loc_set_cmdline(argv, optind - 2, 2); - } else { - optarg = NULL; - } - - *poptarg = optarg; - *poptind = optind; - - return popt; -} - -static MachineClass *select_machine(QDict *qdict, Error **errp) -{ - const char *optarg = qdict_get_try_str(qdict, "type"); - GSList *machines = object_class_get_list(TYPE_MACHINE, false); - MachineClass *machine_class; - Error *local_err = NULL; - - if (optarg) { - machine_class = find_machine(optarg, machines); - qdict_del(qdict, "type"); - if (!machine_class) { - error_setg(&local_err, "unsupported machine type"); - } - } else { - machine_class = find_default_machine(machines); - if (!machine_class) { - error_setg(&local_err, "No machine specified, and there is no default"); - } - } - - g_slist_free(machines); - if (local_err) { - error_append_hint(&local_err, "Use -machine help to list supported machines\n"); - error_propagate(errp, local_err); - } - return machine_class; -} - -static int object_parse_property_opt(Object *obj, - const char *name, const char *value, - const char *skip, Error **errp) -{ - if (g_str_equal(name, skip)) { - return 0; - } - - if (!object_property_parse(obj, name, value, errp)) { - return -1; - } - - return 0; -} - -/* *Non*recursively replace underscores with dashes in QDict keys. */ -static void keyval_dashify(QDict *qdict, Error **errp) -{ - const QDictEntry *ent, *next; - char *p; - - for (ent = qdict_first(qdict); ent; ent = next) { - g_autofree char *new_key = NULL; - - next = qdict_next(qdict, ent); - if (!strchr(ent->key, '_')) { - continue; - } - new_key = g_strdup(ent->key); - for (p = new_key; *p; p++) { - if (*p == '_') { - *p = '-'; - } - } - if (qdict_haskey(qdict, new_key)) { - error_setg(errp, "Conflict between '%s' and '%s'", ent->key, new_key); - return; - } - qobject_ref(ent->value); - qdict_put_obj(qdict, new_key, ent->value); - qdict_del(qdict, ent->key); - } -} - -static void qemu_apply_legacy_machine_options(QDict *qdict) -{ - const char *value; - QObject *prop; - - keyval_dashify(qdict, &error_fatal); - - /* Legacy options do not correspond to MachineState properties. */ - value = qdict_get_try_str(qdict, "accel"); - if (value) { - accelerators = g_strdup(value); - qdict_del(qdict, "accel"); - } - - value = qdict_get_try_str(qdict, "igd-passthru"); - if (value) { - object_register_sugar_prop(ACCEL_CLASS_NAME("xen"), "igd-passthru", value, - false); - qdict_del(qdict, "igd-passthru"); - } - - value = qdict_get_try_str(qdict, "kvm-shadow-mem"); - if (value) { - object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), "kvm-shadow-mem", value, - false); - qdict_del(qdict, "kvm-shadow-mem"); - } - - value = qdict_get_try_str(qdict, "kernel-irqchip"); - if (value) { - object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), "kernel-irqchip", value, - false); - object_register_sugar_prop(ACCEL_CLASS_NAME("whpx"), "kernel-irqchip", value, - false); - qdict_del(qdict, "kernel-irqchip"); - } - - value = qdict_get_try_str(qdict, "memory-backend"); - if (value) { - if (mem_path) { - error_report("'-mem-path' can't be used together with" - "'-machine memory-backend'"); - exit(EXIT_FAILURE); - } - - /* Resolved later. */ - ram_memdev_id = g_strdup(value); - qdict_del(qdict, "memory-backend"); - } - - prop = qdict_get(qdict, "memory"); - if (prop) { - have_custom_ram_size = - qobject_type(prop) == QTYPE_QDICT && - qdict_haskey(qobject_to(QDict, prop), "size"); - } -} - -static void object_option_foreach_add(bool (*type_opt_predicate)(const char *)) -{ - ObjectOption *opt, *next; - - QTAILQ_FOREACH_SAFE(opt, &object_opts, next, next) { - const char *type = ObjectType_str(opt->opts->qom_type); - if (type_opt_predicate(type)) { - user_creatable_add_qapi(opt->opts, &error_fatal); - qapi_free_ObjectOptions(opt->opts); - QTAILQ_REMOVE(&object_opts, opt, next); - g_free(opt); - } - } -} - -static void object_option_add_visitor(Visitor *v) -{ - ObjectOption *opt = g_new0(ObjectOption, 1); - visit_type_ObjectOptions(v, NULL, &opt->opts, &error_fatal); - QTAILQ_INSERT_TAIL(&object_opts, opt, next); -} - -static void object_option_parse(const char *optarg) -{ - QemuOpts *opts; - const char *type; - Visitor *v; - - if (optarg[0] == '{') { - QObject *obj = qobject_from_json(optarg, &error_fatal); - - v = qobject_input_visitor_new(obj); - qobject_unref(obj); - } else { - opts = qemu_opts_parse_noisily(qemu_find_opts("object"), - optarg, true); - if (!opts) { - exit(1); - } - - type = qemu_opt_get(opts, "qom-type"); - if (!type) { - error_setg(&error_fatal, QERR_MISSING_PARAMETER, "qom-type"); - } - if (user_creatable_print_help(type, opts)) { - exit(0); - } - - v = opts_visitor_new(opts); - } - - object_option_add_visitor(v); - visit_free(v); -} - -/* - * Very early object creation, before the sandbox options have been activated. - */ -static bool object_create_pre_sandbox(const char *type) -{ - /* - * Objects should in general not get initialized "too early" without - * a reason. If you add one, state the reason in a comment! - */ - - /* - * Reason: -sandbox on,resourcecontrol=deny disallows setting CPU - * affinity of threads. - */ - if (g_str_equal(type, "thread-context")) { - return true; - } - - return false; -} - -/* - * Initial object creation happens before all other - * QEMU data types are created. The majority of objects - * can be created at this point. The rng-egd object - * cannot be created here, as it depends on the chardev - * already existing. - */ -static bool object_create_early(const char *type) -{ - /* - * Objects should not be made "delayed" without a reason. If you - * add one, state the reason in a comment! - */ - - /* Reason: already created. */ - if (object_create_pre_sandbox(type)) { - return false; - } - - /* Reason: property "chardev" */ - if (g_str_equal(type, "rng-egd") || - g_str_equal(type, "qtest")) { - return false; - } - -#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) - /* Reason: cryptodev-vhost-user property "chardev" */ - if (g_str_equal(type, "cryptodev-vhost-user")) { - return false; - } -#endif - - /* Reason: vhost-user-blk-server property "node-name" */ - if (g_str_equal(type, "vhost-user-blk-server")) { - return false; - } - /* - * Reason: filter-* property "netdev" etc. - */ - if (g_str_equal(type, "filter-buffer") || - g_str_equal(type, "filter-dump") || - g_str_equal(type, "filter-mirror") || - g_str_equal(type, "filter-redirector") || - g_str_equal(type, "colo-compare") || - g_str_equal(type, "filter-rewriter") || - g_str_equal(type, "filter-replay")) { - return false; - } - - /* - * Allocation of large amounts of memory may delay - * chardev initialization for too long, and trigger timeouts - * on software that waits for a monitor socket to be created - * (e.g. libvirt). - */ - if (g_str_has_prefix(type, "memory-backend-")) { - return false; - } - - return true; -} - -static void qemu_apply_machine_options(QDict *qdict) -{ - object_set_properties_from_keyval(OBJECT(current_machine), qdict, false, &error_fatal); - - if (semihosting_enabled(false) && !semihosting_get_argc()) { - /* fall back to the -kernel/-append */ - semihosting_arg_fallback(current_machine->kernel_filename, current_machine->kernel_cmdline); - } - - if (current_machine->smp.cpus > 1) { - replay_add_blocker("smp"); - } -} - -static void qemu_create_early_backends(void) -{ - MachineClass *machine_class = MACHINE_GET_CLASS(current_machine); -#if defined(CONFIG_SDL) - const bool use_sdl = (dpy.type == DISPLAY_TYPE_SDL); -#else - const bool use_sdl = false; -#endif -#if defined(CONFIG_GTK) - const bool use_gtk = (dpy.type == DISPLAY_TYPE_GTK); -#else - const bool use_gtk = false; -#endif - - if (dpy.has_window_close && !use_gtk && !use_sdl) { - error_report("window-close is only valid for GTK and SDL, " - "ignoring option"); - } - - qemu_display_early_init(&dpy); - qemu_console_early_init(); - - if (dpy.has_gl && dpy.gl != DISPLAYGL_MODE_OFF && display_opengl == 0) { -#if defined(CONFIG_OPENGL) - error_report("OpenGL is not supported by the display"); -#else - error_report("OpenGL support is disabled"); -#endif - exit(1); - } - - object_option_foreach_add(object_create_early); - - /* spice needs the timers to be initialized by this point */ - /* spice must initialize before audio as it changes the default audiodev */ - /* spice must initialize before chardevs (for spicevmc and spiceport) */ - qemu_spice.init(); - - qemu_opts_foreach(qemu_find_opts("chardev"), - chardev_init_func, NULL, &error_fatal); - -#ifdef CONFIG_VIRTFS - qemu_opts_foreach(qemu_find_opts("fsdev"), - fsdev_init_func, NULL, &error_fatal); -#endif - - /* - * Note: we need to create audio and block backends before - * setting machine properties, so they can be referred to. - */ - configure_blockdev(&bdo_queue, machine_class, snapshot); - audio_init_audiodevs(); -} - - -/* - * The remainder of object creation happens after the - * creation of chardev, fsdev, net clients and device data types. - */ -static bool object_create_late(const char *type) -{ - return !object_create_early(type) && !object_create_pre_sandbox(type); -} - -static void qemu_create_late_backends(void) -{ - if (qtest_chrdev) { - qtest_server_init(qtest_chrdev, qtest_log, &error_fatal); - } - - net_init_clients(); - - object_option_foreach_add(object_create_late); - - if (tpm_init() < 0) { - exit(1); - } - - qemu_opts_foreach(qemu_find_opts("mon"), - mon_init_func, NULL, &error_fatal); - - if (foreach_device_config(DEV_SERIAL, serial_parse) < 0) - exit(1); - if (foreach_device_config(DEV_PARALLEL, parallel_parse) < 0) - exit(1); - if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0) - exit(1); - - /* now chardevs have been created we may have semihosting to connect */ - qemu_semihosting_chardev_init(); -} - -static void qemu_resolve_machine_memdev(void) -{ - if (ram_memdev_id) { - Object *backend; - ram_addr_t backend_size; - - backend = object_resolve_path_type(ram_memdev_id, - TYPE_MEMORY_BACKEND, NULL); - if (!backend) { - error_report("Memory backend '%s' not found", ram_memdev_id); - exit(EXIT_FAILURE); - } - if (!have_custom_ram_size) { - backend_size = object_property_get_uint(backend, "size", &error_abort); - current_machine->ram_size = backend_size; - } - object_property_set_link(OBJECT(current_machine), - "memory-backend", backend, &error_fatal); - } -} - -static void parse_memory_options(void) -{ - QemuOpts *opts = qemu_find_opts_singleton("memory"); - QDict *dict, *prop; - const char *mem_str; - Location loc; - - loc_push_none(&loc); - qemu_opts_loc_restore(opts); - - prop = qdict_new(); - - if (qemu_opt_get_size(opts, "size", 0) != 0) { - /* Fix up legacy suffix-less format */ - mem_str = qemu_opt_get(opts, "size"); - if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) { - g_autofree char *mib_str = g_strdup_printf("%sM", mem_str); - qdict_put_str(prop, "size", mib_str); - } else { - qdict_put_str(prop, "size", mem_str); - } - } - - if (qemu_opt_get(opts, "maxmem")) { - qdict_put_str(prop, "max-size", qemu_opt_get(opts, "maxmem")); - } - if (qemu_opt_get(opts, "slots")) { - qdict_put_str(prop, "slots", qemu_opt_get(opts, "slots")); - } - - dict = qdict_new(); - qdict_put(dict, "memory", prop); - keyval_merge(machine_opts_dict, dict, &error_fatal); - qobject_unref(dict); - loc_pop(&loc); -} - -static void qemu_create_machine(QDict *qdict) -{ - MachineClass *machine_class = select_machine(qdict, &error_fatal); - object_set_machine_compat_props(machine_class->compat_props); - - current_machine = MACHINE(object_new_with_class(OBJECT_CLASS(machine_class))); - object_property_add_child(object_get_root(), "machine", - OBJECT(current_machine)); - object_property_add_child(container_get(OBJECT(current_machine), - "/unattached"), - "sysbus", OBJECT(sysbus_get_default())); - - if (machine_class->minimum_page_bits) { - if (!set_preferred_target_page_bits(machine_class->minimum_page_bits)) { - /* This would be a board error: specifying a minimum smaller than - * a target's compile-time fixed setting. - */ - g_assert_not_reached(); - } - } - - cpu_exec_init_all(); - page_size_init(); - - if (machine_class->hw_version) { - qemu_set_hw_version(machine_class->hw_version); - } - - /* - * Get the default machine options from the machine if it is not already - * specified either by the configuration file or by the command line. - */ - if (machine_class->default_machine_opts) { - QDict *default_opts = - keyval_parse(machine_class->default_machine_opts, NULL, NULL, - &error_abort); - qemu_apply_legacy_machine_options(default_opts); - object_set_properties_from_keyval(OBJECT(current_machine), default_opts, - false, &error_abort); - qobject_unref(default_opts); - } -} - -static int global_init_func(void *opaque, QemuOpts *opts, Error **errp) -{ - GlobalProperty *g; - - g = g_malloc0(sizeof(*g)); - g->driver = qemu_opt_get(opts, "driver"); - g->property = qemu_opt_get(opts, "property"); - g->value = qemu_opt_get(opts, "value"); - qdev_prop_register_global(g); - return 0; -} - -/* - * Return whether configuration group @group is stored in QemuOpts, or - * recorded as one or more QDicts by qemu_record_config_group. - */ -static bool is_qemuopts_group(const char *group) -{ - if (g_str_equal(group, "object") || - g_str_equal(group, "audiodev") || - g_str_equal(group, "machine") || - g_str_equal(group, "smp-opts") || - g_str_equal(group, "boot-opts")) { - return false; - } - return true; -} - -static void qemu_record_config_group(const char *group, QDict *dict, - bool from_json, Error **errp) -{ - if (g_str_equal(group, "object")) { - Visitor *v = qobject_input_visitor_new_keyval(QOBJECT(dict)); - object_option_add_visitor(v); - visit_free(v); - - } else if (g_str_equal(group, "audiodev")) { - Audiodev *dev = NULL; - Visitor *v = qobject_input_visitor_new_keyval(QOBJECT(dict)); - if (visit_type_Audiodev(v, NULL, &dev, errp)) { - audio_define(dev); - } - visit_free(v); - - } else if (g_str_equal(group, "machine")) { - /* - * Cannot merge string-valued and type-safe dictionaries, so JSON - * is not accepted yet for -M. - */ - assert(!from_json); - keyval_merge(machine_opts_dict, dict, errp); - } else if (g_str_equal(group, "smp-opts")) { - machine_merge_property("smp", dict, &error_fatal); - } else if (g_str_equal(group, "boot-opts")) { - machine_merge_property("boot", dict, &error_fatal); - } else { - abort(); - } -} - -/* - * Parse non-QemuOpts config file groups, pass the rest to - * qemu_config_do_parse. - */ -static void qemu_parse_config_group(const char *group, QDict *qdict, - void *opaque, Error **errp) -{ - QObject *crumpled; - if (is_qemuopts_group(group)) { - qemu_config_do_parse(group, qdict, opaque, errp); - return; - } - - crumpled = qdict_crumple(qdict, errp); - if (!crumpled) { - return; - } - switch (qobject_type(crumpled)) { - case QTYPE_QDICT: - qemu_record_config_group(group, qobject_to(QDict, crumpled), false, errp); - break; - case QTYPE_QLIST: - error_setg(errp, "Lists cannot be at top level of a configuration section"); - break; - default: - g_assert_not_reached(); - } - qobject_unref(crumpled); -} - -static void qemu_read_default_config_file(Error **errp) -{ - ERRP_GUARD(); - int ret; - g_autofree char *file = get_relocated_path(CONFIG_QEMU_CONFDIR "/qemu.conf"); - - ret = qemu_read_config_file(file, qemu_parse_config_group, errp); - if (ret < 0) { - if (ret == -ENOENT) { - error_free(*errp); - *errp = NULL; - } - } -} - -static void qemu_set_option(const char *str, Error **errp) -{ - char group[64], id[64], arg[64]; - QemuOptsList *list; - QemuOpts *opts; - int rc, offset; - - rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset); - if (rc < 3 || str[offset] != '=') { - error_setg(errp, "can't parse: \"%s\"", str); - return; - } - - if (!is_qemuopts_group(group)) { - error_setg(errp, "-set is not supported with %s", group); - } else { - list = qemu_find_opts_err(group, errp); - if (list) { - opts = qemu_opts_find(list, id); - if (!opts) { - error_setg(errp, "there is no %s \"%s\" defined", group, id); - return; - } - qemu_opt_set(opts, arg, str + offset + 1, errp); - } - } -} - -static void user_register_global_props(void) -{ - qemu_opts_foreach(qemu_find_opts("global"), - global_init_func, NULL, NULL); -} - -static int do_configure_icount(void *opaque, QemuOpts *opts, Error **errp) -{ - icount_configure(opts, errp); - return 0; -} - -static int accelerator_set_property(void *opaque, - const char *name, const char *value, - Error **errp) -{ - return object_parse_property_opt(opaque, name, value, "accel", errp); -} - -static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp) -{ - bool *p_init_failed = opaque; - const char *acc = qemu_opt_get(opts, "accel"); - AccelClass *ac = accel_find(acc); - AccelState *accel; - int ret; - bool qtest_with_kvm; - - if (!acc) { - error_setg(errp, QERR_MISSING_PARAMETER, "accel"); - goto bad; - } - - qtest_with_kvm = g_str_equal(acc, "kvm") && qtest_chrdev != NULL; - - if (!ac) { - if (!qtest_with_kvm) { - error_report("invalid accelerator %s", acc); - } - goto bad; - } - accel = ACCEL(object_new_with_class(OBJECT_CLASS(ac))); - object_apply_compat_props(OBJECT(accel)); - qemu_opt_foreach(opts, accelerator_set_property, - accel, - &error_fatal); - /* - * If legacy -singlestep option is set, honour it for TCG and - * silently ignore for any other accelerator (which is how this - * option has always behaved). - */ - if (opt_one_insn_per_tb) { - /* - * This will always succeed for TCG, and we want to ignore - * the error from trying to set a nonexistent property - * on any other accelerator. - */ - object_property_set_bool(OBJECT(accel), "one-insn-per-tb", true, NULL); - } - ret = accel_init_machine(accel, current_machine); - if (ret < 0) { - if (!qtest_with_kvm || ret != -ENOENT) { - error_report("failed to initialize %s: %s", acc, strerror(-ret)); - } - goto bad; - } - - return 1; - -bad: - *p_init_failed = true; - return 0; -} - -static void configure_accelerators(const char *progname) -{ - bool init_failed = false; - - qemu_opts_foreach(qemu_find_opts("icount"), - do_configure_icount, NULL, &error_fatal); - - if (QTAILQ_EMPTY(&qemu_accel_opts.head)) { - char **accel_list, **tmp; - - if (accelerators == NULL) { - /* Select the default accelerator */ - bool have_tcg = accel_find("tcg"); - bool have_kvm = accel_find("kvm"); - - if (have_tcg && have_kvm) { - if (g_str_has_suffix(progname, "kvm")) { - /* If the program name ends with "kvm", we prefer KVM */ - accelerators = "kvm:tcg"; - } else { - accelerators = "tcg:kvm"; - } - } else if (have_kvm) { - accelerators = "kvm"; - } else if (have_tcg) { - accelerators = "tcg"; - } else { - error_report("No accelerator selected and" - " no default accelerator available"); - exit(1); - } - } - accel_list = g_strsplit(accelerators, ":", 0); - - for (tmp = accel_list; *tmp; tmp++) { - /* - * Filter invalid accelerators here, to prevent obscenities - * such as "-machine accel=tcg,,thread=single". - */ - if (accel_find(*tmp)) { - qemu_opts_parse_noisily(qemu_find_opts("accel"), *tmp, true); - } else { - init_failed = true; - error_report("invalid accelerator %s", *tmp); - } - } - g_strfreev(accel_list); - } else { - if (accelerators != NULL) { - error_report("The -accel and \"-machine accel=\" options are incompatible"); - exit(1); - } - } - - if (!qemu_opts_foreach(qemu_find_opts("accel"), - do_configure_accelerator, &init_failed, &error_fatal)) { - if (!init_failed) { - error_report("no accelerator found"); - } - exit(1); - } - - if (init_failed && !qtest_chrdev) { - error_report("falling back to %s", current_accel_name()); - } - - if (icount_enabled() && !tcg_enabled()) { - error_report("-icount is not allowed with hardware virtualization"); - exit(1); - } -} - -static void qemu_validate_options(const QDict *machine_opts) -{ - const char *kernel_filename = qdict_get_try_str(machine_opts, "kernel"); - const char *initrd_filename = qdict_get_try_str(machine_opts, "initrd"); - const char *kernel_cmdline = qdict_get_try_str(machine_opts, "append"); - - if (kernel_filename == NULL) { - if (kernel_cmdline != NULL) { - error_report("-append only allowed with -kernel option"); - exit(1); - } - - if (initrd_filename != NULL) { - error_report("-initrd only allowed with -kernel option"); - exit(1); - } - } - - if (loadvm && preconfig_requested) { - error_report("'preconfig' and 'loadvm' options are " - "mutually exclusive"); - exit(EXIT_FAILURE); - } - if (incoming && preconfig_requested && strcmp(incoming, "defer") != 0) { - error_report("'preconfig' supports '-incoming defer' only"); - exit(EXIT_FAILURE); - } - -#ifdef CONFIG_CURSES - if (is_daemonized() && dpy.type == DISPLAY_TYPE_CURSES) { - error_report("curses display cannot be used with -daemonize"); - exit(1); - } -#endif -} - -static void qemu_process_sugar_options(void) -{ - if (mem_prealloc) { - QObject *smp = qdict_get(machine_opts_dict, "smp"); - if (smp && qobject_type(smp) == QTYPE_QDICT) { - QObject *cpus = qdict_get(qobject_to(QDict, smp), "cpus"); - if (cpus && qobject_type(cpus) == QTYPE_QSTRING) { - const char *val = qstring_get_str(qobject_to(QString, cpus)); - object_register_sugar_prop("memory-backend", "prealloc-threads", - val, false); - } - } - object_register_sugar_prop("memory-backend", "prealloc", "on", false); - } -} - -/* -action processing */ - -/* - * Process all the -action parameters parsed from cmdline. - */ -static int process_runstate_actions(void *opaque, QemuOpts *opts, Error **errp) -{ - Error *local_err = NULL; - QDict *qdict = qemu_opts_to_qdict(opts, NULL); - QObject *ret = NULL; - qmp_marshal_set_action(qdict, &ret, &local_err); - qobject_unref(ret); - qobject_unref(qdict); - if (local_err) { - error_propagate(errp, local_err); - return 1; - } - return 0; -} - -static void qemu_process_early_options(void) -{ - qemu_opts_foreach(qemu_find_opts("name"), - parse_name, NULL, &error_fatal); - - object_option_foreach_add(object_create_pre_sandbox); - -#ifdef CONFIG_SECCOMP - QemuOptsList *olist = qemu_find_opts_err("sandbox", NULL); - if (olist) { - qemu_opts_foreach(olist, parse_sandbox, NULL, &error_fatal); - } -#endif - - if (qemu_opts_foreach(qemu_find_opts("action"), - process_runstate_actions, NULL, &error_fatal)) { - exit(1); - } - -#ifndef _WIN32 - qemu_opts_foreach(qemu_find_opts("add-fd"), - parse_add_fd, NULL, &error_fatal); - - qemu_opts_foreach(qemu_find_opts("add-fd"), - cleanup_add_fd, NULL, &error_fatal); -#endif - - /* Open the logfile at this point and set the log mask if necessary. */ - { - int mask = 0; - if (log_mask) { - mask = qemu_str_to_log_mask(log_mask); - if (!mask) { - qemu_print_log_usage(stdout); - exit(1); - } - } - qemu_set_log_filename_flags(log_file, mask, &error_fatal); - } - - qemu_add_default_firmwarepath(); -} - -static void qemu_process_help_options(void) -{ - /* - * Check for -cpu help and -device help before we call select_machine(), - * which will return an error if the architecture has no default machine - * type and the user did not specify one, so that the user doesn't need - * to say '-cpu help -machine something'. - */ - if (cpu_option && is_help_option(cpu_option)) { - list_cpus(); - exit(0); - } - - if (qemu_opts_foreach(qemu_find_opts("device"), - device_help_func, NULL, NULL)) { - exit(0); - } - - /* -L help lists the data directories and exits. */ - if (list_data_dirs) { - qemu_list_data_dirs(); - exit(0); - } -} - -static void qemu_maybe_daemonize(const char *pid_file) -{ - Error *err = NULL; - - os_daemonize(); - rcu_disable_atfork(); - - if (pid_file) { - char *pid_file_realpath = NULL; - - if (!qemu_write_pidfile(pid_file, &err)) { - error_reportf_err(err, "cannot create PID file: "); - exit(1); - } - - pid_file_realpath = g_malloc0(PATH_MAX); - if (!realpath(pid_file, pid_file_realpath)) { - if (errno != ENOENT) { - warn_report("not removing PID file on exit: cannot resolve PID " - "file path: %s: %s", pid_file, strerror(errno)); - } - return; - } - - qemu_unlink_pidfile_notifier = (struct UnlinkPidfileNotifier) { - .notifier = { - .notify = qemu_unlink_pidfile, - }, - .pid_file_realpath = pid_file_realpath, - }; - qemu_add_exit_notifier(&qemu_unlink_pidfile_notifier.notifier); - } -} - -static void qemu_init_displays(void) -{ - DisplayState *ds; - - /* init local displays */ - ds = init_displaystate(); - qemu_display_init(ds, &dpy); - - /* must be after terminal init, SDL library changes signal handlers */ - os_setup_signal_handling(); - - /* init remote displays */ -#ifdef CONFIG_VNC - qemu_opts_foreach(qemu_find_opts("vnc"), - vnc_init_func, NULL, &error_fatal); -#endif - - if (using_spice) { - qemu_spice.display_init(); - } -} - -static void qemu_init_board(void) -{ - /* process plugin before CPUs are created, but once -smp has been parsed */ - qemu_plugin_load_list(&plugin_list, &error_fatal); - - /* From here on we enter MACHINE_PHASE_INITIALIZED. */ - machine_run_board_init(current_machine, mem_path, &error_fatal); - - drive_check_orphaned(); - - realtime_init(); -} - -static void qemu_create_cli_devices(void) -{ - DeviceOption *opt; - - soundhw_init(); - - qemu_opts_foreach(qemu_find_opts("fw_cfg"), - parse_fw_cfg, fw_cfg_find(), &error_fatal); - - /* init USB devices */ - if (machine_usb(current_machine)) { - if (foreach_device_config(DEV_USB, usb_parse) < 0) - exit(1); - } - - /* init generic devices */ - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE); - qemu_opts_foreach(qemu_find_opts("device"), - device_init_func, NULL, &error_fatal); - QTAILQ_FOREACH(opt, &device_opts, next) { - DeviceState *dev; - loc_push_restore(&opt->loc); - /* - * TODO Eventually we should call qmp_device_add() here to make sure it - * behaves the same, but QMP still has to accept incorrectly typed - * options until libvirt is fixed and we want to be strict on the CLI - * from the start, so call qdev_device_add_from_qdict() directly for - * now. - */ - dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal); - object_unref(OBJECT(dev)); - loc_pop(&opt->loc); - } - rom_reset_order_override(); -} - -static void qemu_machine_creation_done(void) -{ - MachineState *machine = MACHINE(qdev_get_machine()); - - /* Did we create any drives that we failed to create a device for? */ - drive_check_orphaned(); - - /* Don't warn about the default network setup that you get if - * no command line -net or -netdev options are specified. There - * are two cases that we would otherwise complain about: - * (1) board doesn't support a NIC but the implicit "-net nic" - * requested one - * (2) CONFIG_SLIRP not set, in which case the implicit "-net nic" - * sets up a nic that isn't connected to anything. - */ - if (!default_net && (!qtest_enabled() || has_defaults)) { - net_check_clients(); - } - - qdev_prop_check_globals(); - - qdev_machine_creation_done(); - - if (machine->cgs) { - /* - * Verify that Confidential Guest Support has actually been initialized - */ - assert(machine->cgs->ready); - } - - if (foreach_device_config(DEV_GDB, gdbserver_start) < 0) { - exit(1); - } - if (!vga_interface_created && !default_vga && - vga_interface_type != VGA_NONE) { - warn_report("A -vga option was passed but this machine " - "type does not use that option; " - "No VGA device has been created"); - } -} - -void qmp_x_exit_preconfig(Error **errp) -{ - if (phase_check(PHASE_MACHINE_INITIALIZED)) { - error_setg(errp, "The command is permitted only before machine initialization"); - return; - } - - qemu_init_board(); - qemu_create_cli_devices(); - qemu_machine_creation_done(); - - if (loadvm) { - load_snapshot(loadvm, NULL, false, NULL, &error_fatal); - } - if (replay_mode != REPLAY_MODE_NONE) { - replay_vmstate_init(); - } - - if (incoming) { - Error *local_err = NULL; - if (strcmp(incoming, "defer") != 0) { - qmp_migrate_incoming(incoming, &local_err); - if (local_err) { - error_reportf_err(local_err, "-incoming %s: ", incoming); - exit(1); - } - } - } else if (autostart) { - qmp_cont(NULL); - } -} - -void qemu_init(int argc, char **argv) -{ - QemuOpts *opts; - QemuOpts *icount_opts = NULL, *accel_opts = NULL; - QemuOptsList *olist; - int optind; - const char *optarg; - MachineClass *machine_class; - bool userconfig = true; - FILE *vmstate_dump_file = NULL; - - qemu_add_opts(&qemu_drive_opts); - qemu_add_drive_opts(&qemu_legacy_drive_opts); - qemu_add_drive_opts(&qemu_common_drive_opts); - qemu_add_drive_opts(&qemu_drive_opts); - qemu_add_drive_opts(&bdrv_runtime_opts); - qemu_add_opts(&qemu_chardev_opts); - qemu_add_opts(&qemu_device_opts); - qemu_add_opts(&qemu_netdev_opts); - qemu_add_opts(&qemu_nic_opts); - qemu_add_opts(&qemu_net_opts); - qemu_add_opts(&qemu_rtc_opts); - qemu_add_opts(&qemu_global_opts); - qemu_add_opts(&qemu_mon_opts); - qemu_add_opts(&qemu_trace_opts); - qemu_plugin_add_opts(); - qemu_add_opts(&qemu_option_rom_opts); - qemu_add_opts(&qemu_accel_opts); - qemu_add_opts(&qemu_mem_opts); - qemu_add_opts(&qemu_smp_opts); - qemu_add_opts(&qemu_boot_opts); - qemu_add_opts(&qemu_add_fd_opts); - qemu_add_opts(&qemu_object_opts); - qemu_add_opts(&qemu_tpmdev_opts); - qemu_add_opts(&qemu_overcommit_opts); - qemu_add_opts(&qemu_msg_opts); - qemu_add_opts(&qemu_name_opts); - qemu_add_opts(&qemu_numa_opts); - qemu_add_opts(&qemu_icount_opts); - qemu_add_opts(&qemu_semihosting_config_opts); - qemu_add_opts(&qemu_fw_cfg_opts); - qemu_add_opts(&qemu_action_opts); - qemu_add_run_with_opts(); - module_call_init(MODULE_INIT_OPTS); - - error_init(argv[0]); - qemu_init_exec_dir(argv[0]); - - qemu_init_arch_modules(); - - qemu_init_subsystems(); - - /* first pass of option parsing */ - optind = 1; - while (optind < argc) { - if (argv[optind][0] != '-') { - /* disk image */ - optind++; - } else { - const QEMUOption *popt; - - popt = lookup_opt(argc, argv, &optarg, &optind); - switch (popt->index) { - case QEMU_OPTION_nouserconfig: - userconfig = false; - break; - } - } - } - - machine_opts_dict = qdict_new(); - if (userconfig) { - qemu_read_default_config_file(&error_fatal); - } - - /* second pass of option parsing */ - optind = 1; - for(;;) { - if (optind >= argc) - break; - if (argv[optind][0] != '-') { - loc_set_cmdline(argv, optind, 1); - drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); - } else { - const QEMUOption *popt; - - popt = lookup_opt(argc, argv, &optarg, &optind); - if (!(popt->arch_mask & arch_type)) { - error_report("Option not supported for this target"); - exit(1); - } - switch(popt->index) { - case QEMU_OPTION_cpu: - /* hw initialization will check this */ - cpu_option = optarg; - break; - case QEMU_OPTION_hda: - case QEMU_OPTION_hdb: - case QEMU_OPTION_hdc: - case QEMU_OPTION_hdd: - drive_add(IF_DEFAULT, popt->index - QEMU_OPTION_hda, optarg, - HD_OPTS); - break; - case QEMU_OPTION_blockdev: - { - Visitor *v; - BlockdevOptionsQueueEntry *bdo; - - v = qobject_input_visitor_new_str(optarg, "driver", - &error_fatal); - - bdo = g_new(BlockdevOptionsQueueEntry, 1); - visit_type_BlockdevOptions(v, NULL, &bdo->bdo, - &error_fatal); - visit_free(v); - loc_save(&bdo->loc); - QSIMPLEQ_INSERT_TAIL(&bdo_queue, bdo, entry); - break; - } - case QEMU_OPTION_drive: - opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), - optarg, false); - if (opts == NULL) { - exit(1); - } - break; - case QEMU_OPTION_set: - qemu_set_option(optarg, &error_fatal); - break; - case QEMU_OPTION_global: - if (qemu_global_option(optarg) != 0) - exit(1); - break; - case QEMU_OPTION_mtdblock: - drive_add(IF_MTD, -1, optarg, MTD_OPTS); - break; - case QEMU_OPTION_sd: - drive_add(IF_SD, -1, optarg, SD_OPTS); - break; - case QEMU_OPTION_pflash: - drive_add(IF_PFLASH, -1, optarg, PFLASH_OPTS); - break; - case QEMU_OPTION_snapshot: - snapshot = 1; - replay_add_blocker("-snapshot"); - break; - case QEMU_OPTION_numa: - opts = qemu_opts_parse_noisily(qemu_find_opts("numa"), - optarg, true); - if (!opts) { - exit(1); - } - break; - case QEMU_OPTION_display: - parse_display(optarg); - break; - case QEMU_OPTION_nographic: - qdict_put_str(machine_opts_dict, "graphics", "off"); - nographic = true; - dpy.type = DISPLAY_TYPE_NONE; - break; - case QEMU_OPTION_portrait: - graphic_rotate = 90; - break; - case QEMU_OPTION_rotate: - graphic_rotate = strtol(optarg, (char **) &optarg, 10); - if (graphic_rotate != 0 && graphic_rotate != 90 && - graphic_rotate != 180 && graphic_rotate != 270) { - error_report("only 90, 180, 270 deg rotation is available"); - exit(1); - } - break; - case QEMU_OPTION_kernel: - qdict_put_str(machine_opts_dict, "kernel", optarg); - break; - case QEMU_OPTION_initrd: - qdict_put_str(machine_opts_dict, "initrd", optarg); - break; - case QEMU_OPTION_append: - qdict_put_str(machine_opts_dict, "append", optarg); - break; - case QEMU_OPTION_dtb: - qdict_put_str(machine_opts_dict, "dtb", optarg); - break; - case QEMU_OPTION_cdrom: - drive_add(IF_DEFAULT, 2, optarg, CDROM_OPTS); - break; - case QEMU_OPTION_boot: - machine_parse_property_opt(qemu_find_opts("boot-opts"), "boot", optarg); - break; - case QEMU_OPTION_fda: - case QEMU_OPTION_fdb: - drive_add(IF_FLOPPY, popt->index - QEMU_OPTION_fda, - optarg, FD_OPTS); - break; - case QEMU_OPTION_no_fd_bootchk: - fd_bootchk = 0; - break; - case QEMU_OPTION_netdev: - default_net = 0; - if (netdev_is_modern(optarg)) { - netdev_parse_modern(optarg); - } else { - net_client_parse(qemu_find_opts("netdev"), optarg); - } - break; - case QEMU_OPTION_nic: - default_net = 0; - net_client_parse(qemu_find_opts("nic"), optarg); - break; - case QEMU_OPTION_net: - default_net = 0; - net_client_parse(qemu_find_opts("net"), optarg); - break; -#ifdef CONFIG_LIBISCSI - case QEMU_OPTION_iscsi: - opts = qemu_opts_parse_noisily(qemu_find_opts("iscsi"), - optarg, false); - if (!opts) { - exit(1); - } - break; -#endif - case QEMU_OPTION_audiodev: - audio_parse_option(optarg); - break; - case QEMU_OPTION_audio: { - bool help; - char *model; - Audiodev *dev = NULL; - Visitor *v; - QDict *dict = keyval_parse(optarg, "driver", &help, &error_fatal); - if (help || (qdict_haskey(dict, "driver") && - is_help_option(qdict_get_str(dict, "driver")))) { - audio_help(); - exit(EXIT_SUCCESS); - } - if (!qdict_haskey(dict, "id")) { - qdict_put_str(dict, "id", "audiodev0"); - } - if (!qdict_haskey(dict, "model")) { - error_setg(&error_fatal, "Parameter 'model' is missing"); - } - model = g_strdup(qdict_get_str(dict, "model")); - qdict_del(dict, "model"); - if (is_help_option(model)) { - show_valid_soundhw(); - exit(0); - } - v = qobject_input_visitor_new_keyval(QOBJECT(dict)); - qobject_unref(dict); - visit_type_Audiodev(v, NULL, &dev, &error_fatal); - visit_free(v); - audio_define(dev); - select_soundhw(model, dev->id); - g_free(model); - break; - } - case QEMU_OPTION_h: - help(0); - break; - case QEMU_OPTION_version: - version(); - exit(0); - break; - case QEMU_OPTION_m: - opts = qemu_opts_parse_noisily(qemu_find_opts("memory"), optarg, true); - if (opts == NULL) { - exit(1); - } - break; -#ifdef CONFIG_TPM - case QEMU_OPTION_tpmdev: - if (tpm_config_parse(qemu_find_opts("tpmdev"), optarg) < 0) { - exit(1); - } - break; -#endif - case QEMU_OPTION_mempath: - mem_path = optarg; - break; - case QEMU_OPTION_mem_prealloc: - mem_prealloc = 1; - break; - case QEMU_OPTION_d: - log_mask = optarg; - break; - case QEMU_OPTION_D: - log_file = optarg; - break; - case QEMU_OPTION_DFILTER: - qemu_set_dfilter_ranges(optarg, &error_fatal); - break; -#if defined(CONFIG_TCG) && defined(CONFIG_LINUX) - case QEMU_OPTION_perfmap: - perf_enable_perfmap(); - break; - case QEMU_OPTION_jitdump: - perf_enable_jitdump(); - break; -#endif - case QEMU_OPTION_seed: - qemu_guest_random_seed_main(optarg, &error_fatal); - break; - case QEMU_OPTION_s: - add_device_config(DEV_GDB, "tcp::" DEFAULT_GDBSTUB_PORT); - break; - case QEMU_OPTION_gdb: - add_device_config(DEV_GDB, optarg); - break; - case QEMU_OPTION_L: - if (is_help_option(optarg)) { - list_data_dirs = true; - } else { - qemu_add_data_dir(g_strdup(optarg)); - } - break; - case QEMU_OPTION_bios: - qdict_put_str(machine_opts_dict, "firmware", optarg); - break; - case QEMU_OPTION_singlestep: - opt_one_insn_per_tb = true; - break; - case QEMU_OPTION_S: - autostart = 0; - break; - case QEMU_OPTION_k: - keyboard_layout = optarg; - break; - case QEMU_OPTION_vga: - vga_model = optarg; - default_vga = 0; - break; - case QEMU_OPTION_g: - { - const char *p; - int w, h, depth; - p = optarg; - w = strtol(p, (char **)&p, 10); - if (w <= 0) { - graphic_error: - error_report("invalid resolution or depth"); - exit(1); - } - if (*p != 'x') - goto graphic_error; - p++; - h = strtol(p, (char **)&p, 10); - if (h <= 0) - goto graphic_error; - if (*p == 'x') { - p++; - depth = strtol(p, (char **)&p, 10); - if (depth != 1 && depth != 2 && depth != 4 && - depth != 8 && depth != 15 && depth != 16 && - depth != 24 && depth != 32) - goto graphic_error; - } else if (*p == '\0') { - depth = graphic_depth; - } else { - goto graphic_error; - } - - graphic_width = w; - graphic_height = h; - graphic_depth = depth; - } - break; - case QEMU_OPTION_echr: - { - char *r; - term_escape_char = strtol(optarg, &r, 0); - if (r == optarg) - printf("Bad argument to echr\n"); - break; - } - case QEMU_OPTION_monitor: - default_monitor = 0; - if (strncmp(optarg, "none", 4)) { - monitor_parse(optarg, "readline", false); - } - break; - case QEMU_OPTION_qmp: - monitor_parse(optarg, "control", false); - default_monitor = 0; - break; - case QEMU_OPTION_qmp_pretty: - monitor_parse(optarg, "control", true); - default_monitor = 0; - break; - case QEMU_OPTION_mon: - opts = qemu_opts_parse_noisily(qemu_find_opts("mon"), optarg, - true); - if (!opts) { - exit(1); - } - default_monitor = 0; - break; - case QEMU_OPTION_chardev: - opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), - optarg, true); - if (!opts) { - exit(1); - } - break; - case QEMU_OPTION_fsdev: - olist = qemu_find_opts("fsdev"); - if (!olist) { - error_report("fsdev support is disabled"); - exit(1); - } - opts = qemu_opts_parse_noisily(olist, optarg, true); - if (!opts) { - exit(1); - } - break; - case QEMU_OPTION_virtfs: { - QemuOpts *fsdev; - QemuOpts *device; - const char *writeout, *sock_fd, *socket, *path, *security_model, - *multidevs; - - olist = qemu_find_opts("virtfs"); - if (!olist) { - error_report("virtfs support is disabled"); - exit(1); - } - opts = qemu_opts_parse_noisily(olist, optarg, true); - if (!opts) { - exit(1); - } - - if (qemu_opt_get(opts, "fsdriver") == NULL || - qemu_opt_get(opts, "mount_tag") == NULL) { - error_report("Usage: -virtfs fsdriver,mount_tag=tag"); - exit(1); - } - fsdev = qemu_opts_create(qemu_find_opts("fsdev"), - qemu_opts_id(opts) ?: - qemu_opt_get(opts, "mount_tag"), - 1, NULL); - if (!fsdev) { - error_report("duplicate or invalid fsdev id: %s", - qemu_opt_get(opts, "mount_tag")); - exit(1); - } - - writeout = qemu_opt_get(opts, "writeout"); - if (writeout) { -#ifdef CONFIG_SYNC_FILE_RANGE - qemu_opt_set(fsdev, "writeout", writeout, &error_abort); -#else - error_report("writeout=immediate not supported " - "on this platform"); - exit(1); -#endif - } - qemu_opt_set(fsdev, "fsdriver", - qemu_opt_get(opts, "fsdriver"), &error_abort); - path = qemu_opt_get(opts, "path"); - if (path) { - qemu_opt_set(fsdev, "path", path, &error_abort); - } - security_model = qemu_opt_get(opts, "security_model"); - if (security_model) { - qemu_opt_set(fsdev, "security_model", security_model, - &error_abort); - } - socket = qemu_opt_get(opts, "socket"); - if (socket) { - qemu_opt_set(fsdev, "socket", socket, &error_abort); - } - sock_fd = qemu_opt_get(opts, "sock_fd"); - if (sock_fd) { - qemu_opt_set(fsdev, "sock_fd", sock_fd, &error_abort); - } - - qemu_opt_set_bool(fsdev, "readonly", - qemu_opt_get_bool(opts, "readonly", 0), - &error_abort); - multidevs = qemu_opt_get(opts, "multidevs"); - if (multidevs) { - qemu_opt_set(fsdev, "multidevs", multidevs, &error_abort); - } - device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, - &error_abort); - qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort); - qemu_opt_set(device, "fsdev", - qemu_opts_id(fsdev), &error_abort); - qemu_opt_set(device, "mount_tag", - qemu_opt_get(opts, "mount_tag"), &error_abort); - break; - } - case QEMU_OPTION_serial: - add_device_config(DEV_SERIAL, optarg); - default_serial = 0; - if (strncmp(optarg, "mon:", 4) == 0) { - default_monitor = 0; - } - break; - case QEMU_OPTION_action: - olist = qemu_find_opts("action"); - if (!qemu_opts_parse_noisily(olist, optarg, false)) { - exit(1); - } - break; - case QEMU_OPTION_watchdog_action: { - opts = qemu_opts_create(qemu_find_opts("action"), NULL, 0, &error_abort); - qemu_opt_set(opts, "watchdog", optarg, &error_abort); - break; - } - case QEMU_OPTION_parallel: - add_device_config(DEV_PARALLEL, optarg); - default_parallel = 0; - if (strncmp(optarg, "mon:", 4) == 0) { - default_monitor = 0; - } - break; - case QEMU_OPTION_debugcon: - add_device_config(DEV_DEBUGCON, optarg); - break; - case QEMU_OPTION_loadvm: - loadvm = optarg; - break; - case QEMU_OPTION_full_screen: - dpy.has_full_screen = true; - dpy.full_screen = true; - break; - case QEMU_OPTION_pidfile: - pid_file = optarg; - break; - case QEMU_OPTION_win2k_hack: - win2k_install_hack = 1; - break; - case QEMU_OPTION_acpitable: - opts = qemu_opts_parse_noisily(qemu_find_opts("acpi"), - optarg, true); - if (!opts) { - exit(1); - } - acpi_table_add(opts, &error_fatal); - break; - case QEMU_OPTION_smbios: - opts = qemu_opts_parse_noisily(qemu_find_opts("smbios"), - optarg, false); - if (!opts) { - exit(1); - } - smbios_entry_add(opts, &error_fatal); - break; - case QEMU_OPTION_fwcfg: - opts = qemu_opts_parse_noisily(qemu_find_opts("fw_cfg"), - optarg, true); - if (opts == NULL) { - exit(1); - } - break; - case QEMU_OPTION_preconfig: - preconfig_requested = true; - break; - case QEMU_OPTION_enable_kvm: - qdict_put_str(machine_opts_dict, "accel", "kvm"); - break; - case QEMU_OPTION_M: - case QEMU_OPTION_machine: - { - bool help; - - keyval_parse_into(machine_opts_dict, optarg, "type", &help, &error_fatal); - if (help) { - machine_help_func(machine_opts_dict); - exit(EXIT_SUCCESS); - } - break; - } - case QEMU_OPTION_accel: - accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"), - optarg, true); - optarg = qemu_opt_get(accel_opts, "accel"); - if (!optarg || is_help_option(optarg)) { - printf("Accelerators supported in QEMU binary:\n"); - GSList *el, *accel_list = object_class_get_list(TYPE_ACCEL, - false); - for (el = accel_list; el; el = el->next) { - gchar *typename = g_strdup(object_class_get_name( - OBJECT_CLASS(el->data))); - /* omit qtest which is used for tests only */ - if (g_strcmp0(typename, ACCEL_CLASS_NAME("qtest")) && - g_str_has_suffix(typename, ACCEL_CLASS_SUFFIX)) { - gchar **optname = g_strsplit(typename, - ACCEL_CLASS_SUFFIX, 0); - printf("%s\n", optname[0]); - g_strfreev(optname); - } - g_free(typename); - } - g_slist_free(accel_list); - exit(0); - } - break; - case QEMU_OPTION_usb: - qdict_put_str(machine_opts_dict, "usb", "on"); - break; - case QEMU_OPTION_usbdevice: - qdict_put_str(machine_opts_dict, "usb", "on"); - add_device_config(DEV_USB, optarg); - break; - case QEMU_OPTION_device: - if (optarg[0] == '{') { - QObject *obj = qobject_from_json(optarg, &error_fatal); - DeviceOption *opt = g_new0(DeviceOption, 1); - opt->opts = qobject_to(QDict, obj); - loc_save(&opt->loc); - assert(opt->opts != NULL); - QTAILQ_INSERT_TAIL(&device_opts, opt, next); - } else { - if (!qemu_opts_parse_noisily(qemu_find_opts("device"), - optarg, true)) { - exit(1); - } - } - break; - case QEMU_OPTION_smp: - machine_parse_property_opt(qemu_find_opts("smp-opts"), - "smp", optarg); - break; - case QEMU_OPTION_vnc: - vnc_parse(optarg); - break; - case QEMU_OPTION_no_acpi: - warn_report("-no-acpi is deprecated, use '-machine acpi=off' instead"); - qdict_put_str(machine_opts_dict, "acpi", "off"); - break; - case QEMU_OPTION_no_hpet: - warn_report("-no-hpet is deprecated, use '-machine hpet=off' instead"); - qdict_put_str(machine_opts_dict, "hpet", "off"); - break; - case QEMU_OPTION_no_reboot: - olist = qemu_find_opts("action"); - qemu_opts_parse_noisily(olist, "reboot=shutdown", false); - break; - case QEMU_OPTION_no_shutdown: - olist = qemu_find_opts("action"); - qemu_opts_parse_noisily(olist, "shutdown=pause", false); - break; - case QEMU_OPTION_uuid: - if (qemu_uuid_parse(optarg, &qemu_uuid) < 0) { - error_report("failed to parse UUID string: wrong format"); - exit(1); - } - qemu_uuid_set = true; - break; - case QEMU_OPTION_option_rom: - if (nb_option_roms >= MAX_OPTION_ROMS) { - error_report("too many option ROMs"); - exit(1); - } - opts = qemu_opts_parse_noisily(qemu_find_opts("option-rom"), - optarg, true); - if (!opts) { - exit(1); - } - option_rom[nb_option_roms].name = qemu_opt_get(opts, "romfile"); - option_rom[nb_option_roms].bootindex = - qemu_opt_get_number(opts, "bootindex", -1); - if (!option_rom[nb_option_roms].name) { - error_report("Option ROM file is not specified"); - exit(1); - } - nb_option_roms++; - break; - case QEMU_OPTION_semihosting: - qemu_semihosting_enable(); - break; - case QEMU_OPTION_semihosting_config: - if (qemu_semihosting_config_options(optarg) != 0) { - exit(1); - } - break; - case QEMU_OPTION_name: - opts = qemu_opts_parse_noisily(qemu_find_opts("name"), - optarg, true); - if (!opts) { - exit(1); - } - /* Capture guest name if -msg guest-name is used later */ - error_guest_name = qemu_opt_get(opts, "guest"); - break; - case QEMU_OPTION_prom_env: - if (nb_prom_envs >= MAX_PROM_ENVS) { - error_report("too many prom variables"); - exit(1); - } - prom_envs[nb_prom_envs] = optarg; - nb_prom_envs++; - break; - case QEMU_OPTION_old_param: - old_param = 1; - break; - case QEMU_OPTION_rtc: - opts = qemu_opts_parse_noisily(qemu_find_opts("rtc"), optarg, - false); - if (!opts) { - exit(1); - } - break; - case QEMU_OPTION_icount: - icount_opts = qemu_opts_parse_noisily(qemu_find_opts("icount"), - optarg, true); - if (!icount_opts) { - exit(1); - } - break; - case QEMU_OPTION_incoming: - if (!incoming) { - runstate_set(RUN_STATE_INMIGRATE); - } - incoming = optarg; - break; - case QEMU_OPTION_only_migratable: - only_migratable = 1; - break; - case QEMU_OPTION_nodefaults: - has_defaults = 0; - break; - case QEMU_OPTION_xen_domid: - if (!(accel_find("xen")) && !(accel_find("kvm"))) { - error_report("Option not supported for this target"); - exit(1); - } - xen_domid = atoi(optarg); - break; - case QEMU_OPTION_xen_attach: - if (!(accel_find("xen"))) { - error_report("Option not supported for this target"); - exit(1); - } - xen_mode = XEN_ATTACH; - break; - case QEMU_OPTION_xen_domid_restrict: - if (!(accel_find("xen"))) { - error_report("Option not supported for this target"); - exit(1); - } - xen_domid_restrict = true; - break; - case QEMU_OPTION_trace: - trace_opt_parse(optarg); - break; - case QEMU_OPTION_plugin: - qemu_plugin_opt_parse(optarg, &plugin_list); - break; - case QEMU_OPTION_readconfig: - qemu_read_config_file(optarg, qemu_parse_config_group, &error_fatal); - break; -#ifdef CONFIG_SPICE - case QEMU_OPTION_spice: - olist = qemu_find_opts_err("spice", NULL); - if (!olist) { - error_report("spice support is disabled"); - exit(1); - } - opts = qemu_opts_parse_noisily(olist, optarg, false); - if (!opts) { - exit(1); - } - display_remote++; - break; -#endif - case QEMU_OPTION_qtest: - qtest_chrdev = optarg; - break; - case QEMU_OPTION_qtest_log: - qtest_log = optarg; - break; - case QEMU_OPTION_sandbox: - olist = qemu_find_opts("sandbox"); - if (!olist) { -#ifndef CONFIG_SECCOMP - error_report("-sandbox support is not enabled " - "in this QEMU binary"); -#endif - exit(1); - } - - opts = qemu_opts_parse_noisily(olist, optarg, true); - if (!opts) { - exit(1); - } - break; - case QEMU_OPTION_add_fd: -#ifndef _WIN32 - opts = qemu_opts_parse_noisily(qemu_find_opts("add-fd"), - optarg, false); - if (!opts) { - exit(1); - } -#else - error_report("File descriptor passing is disabled on this " - "platform"); - exit(1); -#endif - break; - case QEMU_OPTION_object: - object_option_parse(optarg); - break; - case QEMU_OPTION_overcommit: - opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"), - optarg, false); - if (!opts) { - exit(1); - } - enable_mlock = qemu_opt_get_bool(opts, "mem-lock", false); - enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", false); - break; - case QEMU_OPTION_compat: - { - CompatPolicy *opts_policy; - Visitor *v; - - v = qobject_input_visitor_new_str(optarg, NULL, - &error_fatal); - - visit_type_CompatPolicy(v, NULL, &opts_policy, &error_fatal); - QAPI_CLONE_MEMBERS(CompatPolicy, &compat_policy, opts_policy); - - qapi_free_CompatPolicy(opts_policy); - visit_free(v); - break; - } - case QEMU_OPTION_msg: - opts = qemu_opts_parse_noisily(qemu_find_opts("msg"), optarg, - false); - if (!opts) { - exit(1); - } - configure_msg(opts); - break; - case QEMU_OPTION_dump_vmstate: - if (vmstate_dump_file) { - error_report("only one '-dump-vmstate' " - "option may be given"); - exit(1); - } - vmstate_dump_file = fopen(optarg, "w"); - if (vmstate_dump_file == NULL) { - error_report("open %s: %s", optarg, strerror(errno)); - exit(1); - } - break; - case QEMU_OPTION_enable_sync_profile: - qsp_enable(); - break; - case QEMU_OPTION_nouserconfig: - /* Nothing to be parsed here. Especially, do not error out below. */ - break; -#if defined(CONFIG_POSIX) - case QEMU_OPTION_runas: - if (!os_set_runas(optarg)) { - error_report("User \"%s\" doesn't exist" - " (and is not :)", - optarg); - exit(1); - } - break; - case QEMU_OPTION_chroot: - warn_report("option is deprecated," - " use '-run-with chroot=...' instead"); - os_set_chroot(optarg); - break; - case QEMU_OPTION_daemonize: - os_set_daemonize(true); - break; -#if defined(CONFIG_LINUX) - /* deprecated */ - case QEMU_OPTION_asyncteardown: - init_async_teardown(); - break; -#endif - case QEMU_OPTION_run_with: { - const char *str; - opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"), - optarg, false); - if (!opts) { - exit(1); - } -#if defined(CONFIG_LINUX) - if (qemu_opt_get_bool(opts, "async-teardown", false)) { - init_async_teardown(); - } -#endif - str = qemu_opt_get(opts, "chroot"); - if (str) { - os_set_chroot(str); - } - break; - } -#endif /* CONFIG_POSIX */ - - default: - error_report("Option not supported in this build"); - exit(1); - } - } - } - /* - * Clear error location left behind by the loop. - * Best done right after the loop. Do not insert code here! - */ - loc_set_none(); - - qemu_validate_options(machine_opts_dict); - qemu_process_sugar_options(); - - /* - * These options affect everything else and should be processed - * before daemonizing. - */ - qemu_process_early_options(); - - qemu_process_help_options(); - qemu_maybe_daemonize(pid_file); - - /* - * The trace backend must be initialized after daemonizing. - * trace_init_backends() will call st_init(), which will create the - * trace thread in the parent, and also register st_flush_trace_buffer() - * in atexit(). This function will force the parent to wait for the - * writeout thread to finish, which will not occur, and the parent - * process will be left in the host. - */ - if (!trace_init_backends()) { - exit(1); - } - trace_init_file(); - - qemu_init_main_loop(&error_fatal); - cpu_timers_init(); - - user_register_global_props(); - replay_configure(icount_opts); - - configure_rtc(qemu_find_opts_singleton("rtc")); - - /* Transfer QemuOpts options into machine options */ - parse_memory_options(); - - qemu_create_machine(machine_opts_dict); - - suspend_mux_open(); - - qemu_disable_default_devices(); - qemu_create_default_devices(); - qemu_create_early_backends(); - - qemu_apply_legacy_machine_options(machine_opts_dict); - qemu_apply_machine_options(machine_opts_dict); - qobject_unref(machine_opts_dict); - phase_advance(PHASE_MACHINE_CREATED); - - /* - * Note: uses machine properties such as kernel-irqchip, must run - * after qemu_apply_machine_options. - */ - configure_accelerators(argv[0]); - phase_advance(PHASE_ACCEL_CREATED); - - /* - * Beware, QOM objects created before this point miss global and - * compat properties. - * - * Global properties get set up by qdev_prop_register_global(), - * called from user_register_global_props(), and certain option - * desugaring. Also in CPU feature desugaring (buried in - * parse_cpu_option()), which happens below this point, but may - * only target the CPU type, which can only be created after - * parse_cpu_option() returned the type. - * - * Machine compat properties: object_set_machine_compat_props(). - * Accelerator compat props: object_set_accelerator_compat_props(), - * called from do_configure_accelerator(). - */ - - machine_class = MACHINE_GET_CLASS(current_machine); - if (!qtest_enabled() && machine_class->deprecation_reason) { - warn_report("Machine type '%s' is deprecated: %s", - machine_class->name, machine_class->deprecation_reason); - } - - /* - * Create backends before creating migration objects, so that it can - * check against compatibilities on the backend memories (e.g. postcopy - * over memory-backend-file objects). - */ - qemu_create_late_backends(); - - /* - * Note: creates a QOM object, must run only after global and - * compat properties have been set up. - */ - migration_object_init(); - - /* parse features once if machine provides default cpu_type */ - current_machine->cpu_type = machine_class->default_cpu_type; - if (cpu_option) { - current_machine->cpu_type = parse_cpu_option(cpu_option); - } - /* NB: for machine none cpu_type could STILL be NULL here! */ - - qemu_resolve_machine_memdev(); - parse_numa_opts(current_machine); - - if (vmstate_dump_file) { - /* dump and exit */ - module_load_qom_all(); - dump_vmstate_json_to_file(vmstate_dump_file); - exit(0); - } - - if (!preconfig_requested) { - qmp_x_exit_preconfig(&error_fatal); - } - qemu_init_displays(); - accel_setup_post(current_machine); - os_setup_post(); - resume_mux_open(); -} diff --git a/softmmu/watchpoint.c b/softmmu/watchpoint.c deleted file mode 100644 index 45d1f12..0000000 --- a/softmmu/watchpoint.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * CPU watchpoints - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see . - */ - -#include "qemu/osdep.h" -#include "qemu/main-loop.h" -#include "qemu/error-report.h" -#include "exec/exec-all.h" -#include "exec/translate-all.h" -#include "sysemu/tcg.h" -#include "sysemu/replay.h" -#include "hw/core/tcg-cpu-ops.h" -#include "hw/core/cpu.h" - -/* Add a watchpoint. */ -int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, - int flags, CPUWatchpoint **watchpoint) -{ - CPUWatchpoint *wp; - vaddr in_page; - - /* forbid ranges which are empty or run off the end of the address space */ - if (len == 0 || (addr + len - 1) < addr) { - error_report("tried to set invalid watchpoint at %" - VADDR_PRIx ", len=%" VADDR_PRIu, addr, len); - return -EINVAL; - } - wp = g_malloc(sizeof(*wp)); - - wp->vaddr = addr; - wp->len = len; - wp->flags = flags; - - /* keep all GDB-injected watchpoints in front */ - if (flags & BP_GDB) { - QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry); - } else { - QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry); - } - - in_page = -(addr | TARGET_PAGE_MASK); - if (len <= in_page) { - tlb_flush_page(cpu, addr); - } else { - tlb_flush(cpu); - } - - if (watchpoint) { - *watchpoint = wp; - } - return 0; -} - -/* Remove a specific watchpoint. */ -int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len, - int flags) -{ - CPUWatchpoint *wp; - - QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - if (addr == wp->vaddr && len == wp->len - && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) { - cpu_watchpoint_remove_by_ref(cpu, wp); - return 0; - } - } - return -ENOENT; -} - -/* Remove a specific watchpoint by reference. */ -void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint) -{ - QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry); - - tlb_flush_page(cpu, watchpoint->vaddr); - - g_free(watchpoint); -} - -/* Remove all matching watchpoints. */ -void cpu_watchpoint_remove_all(CPUState *cpu, int mask) -{ - CPUWatchpoint *wp, *next; - - QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) { - if (wp->flags & mask) { - cpu_watchpoint_remove_by_ref(cpu, wp); - } - } -} - -#ifdef CONFIG_TCG - -/* - * Return true if this watchpoint address matches the specified - * access (ie the address range covered by the watchpoint overlaps - * partially or completely with the address range covered by the - * access). - */ -static inline bool watchpoint_address_matches(CPUWatchpoint *wp, - vaddr addr, vaddr len) -{ - /* - * We know the lengths are non-zero, but a little caution is - * required to avoid errors in the case where the range ends - * exactly at the top of the address space and so addr + len - * wraps round to zero. - */ - vaddr wpend = wp->vaddr + wp->len - 1; - vaddr addrend = addr + len - 1; - - return !(addr > wpend || wp->vaddr > addrend); -} - -/* Return flags for watchpoints that match addr + prot. */ -int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len) -{ - CPUWatchpoint *wp; - int ret = 0; - - QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - if (watchpoint_address_matches(wp, addr, len)) { - ret |= wp->flags; - } - } - return ret; -} - -/* Generate a debug exception if a watchpoint has been hit. */ -void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, - MemTxAttrs attrs, int flags, uintptr_t ra) -{ - CPUClass *cc = CPU_GET_CLASS(cpu); - CPUWatchpoint *wp; - - assert(tcg_enabled()); - if (cpu->watchpoint_hit) { - /* - * We re-entered the check after replacing the TB. - * Now raise the debug interrupt so that it will - * trigger after the current instruction. - */ - qemu_mutex_lock_iothread(); - cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG); - qemu_mutex_unlock_iothread(); - return; - } - - if (cc->tcg_ops->adjust_watchpoint_address) { - /* this is currently used only by ARM BE32 */ - addr = cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len); - } - - assert((flags & ~BP_MEM_ACCESS) == 0); - QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - int hit_flags = wp->flags & flags; - - if (hit_flags && watchpoint_address_matches(wp, addr, len)) { - if (replay_running_debug()) { - /* - * replay_breakpoint reads icount. - * Force recompile to succeed, because icount may - * be read only at the end of the block. - */ - if (!cpu->neg.can_do_io) { - /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ - | curr_cflags(cpu); - cpu_loop_exit_restore(cpu, ra); - } - /* - * Don't process the watchpoints when we are - * in a reverse debugging operation. - */ - replay_breakpoint(); - return; - } - - wp->flags |= hit_flags << BP_HIT_SHIFT; - wp->hitaddr = MAX(addr, wp->vaddr); - wp->hitattrs = attrs; - - if (wp->flags & BP_CPU - && cc->tcg_ops->debug_check_watchpoint - && !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) { - wp->flags &= ~BP_WATCHPOINT_HIT; - continue; - } - cpu->watchpoint_hit = wp; - - mmap_lock(); - /* This call also restores vCPU state */ - tb_check_watchpoint(cpu, ra); - if (wp->flags & BP_STOP_BEFORE_ACCESS) { - cpu->exception_index = EXCP_DEBUG; - mmap_unlock(); - cpu_loop_exit(cpu); - } else { - /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ - | curr_cflags(cpu); - mmap_unlock(); - cpu_loop_exit_noexc(cpu); - } - } else { - wp->flags &= ~BP_WATCHPOINT_HIT; - } - } -} - -#endif /* CONFIG_TCG */ diff --git a/system/arch_init.c b/system/arch_init.c new file mode 100644 index 0000000..79716f9 --- /dev/null +++ b/system/arch_init.c @@ -0,0 +1,50 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "sysemu/arch_init.h" + +#ifdef TARGET_SPARC +int graphic_width = 1024; +int graphic_height = 768; +int graphic_depth = 8; +#elif defined(TARGET_M68K) +int graphic_width = 800; +int graphic_height = 600; +int graphic_depth = 8; +#else +int graphic_width = 800; +int graphic_height = 600; +int graphic_depth = 32; +#endif + +const uint32_t arch_type = QEMU_ARCH; + +void qemu_init_arch_modules(void) +{ +#ifdef CONFIG_MODULES + module_init_info(qemu_modinfo); + module_allow_arch(TARGET_NAME); +#endif +} diff --git a/system/async-teardown.c b/system/async-teardown.c new file mode 100644 index 0000000..396963c --- /dev/null +++ b/system/async-teardown.c @@ -0,0 +1,143 @@ +/* + * Asynchronous teardown + * + * Copyright IBM, Corp. 2022 + * + * Authors: + * Claudio Imbrenda + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include +#include +#include + +#include "qemu/async-teardown.h" + +#ifdef _SC_THREAD_STACK_MIN +#define CLONE_STACK_SIZE sysconf(_SC_THREAD_STACK_MIN) +#else +#define CLONE_STACK_SIZE 16384 +#endif + +static pid_t the_ppid; + +/* + * Close all open file descriptors. + */ +static void close_all_open_fd(void) +{ + struct dirent *de; + int fd, dfd; + DIR *dir; + +#ifdef CONFIG_CLOSE_RANGE + int r = close_range(0, ~0U, 0); + if (!r) { + /* Success, no need to try other ways. */ + return; + } +#endif + + dir = opendir("/proc/self/fd"); + if (!dir) { + /* If /proc is not mounted, there is nothing that can be done. */ + return; + } + /* Avoid closing the directory. */ + dfd = dirfd(dir); + + for (de = readdir(dir); de; de = readdir(dir)) { + fd = atoi(de->d_name); + if (fd != dfd) { + close(fd); + } + } + closedir(dir); +} + +static void hup_handler(int signal) +{ + /* Check every second if this process has been reparented. */ + while (the_ppid == getppid()) { + /* sleep() is safe to use in a signal handler. */ + sleep(1); + } + + /* At this point the parent process has terminated completely. */ + _exit(0); +} + +static int async_teardown_fn(void *arg) +{ + struct sigaction sa = { .sa_handler = hup_handler }; + sigset_t hup_signal; + char name[16]; + + /* Set a meaningful name for this process. */ + snprintf(name, 16, "cleanup/%d", the_ppid); + prctl(PR_SET_NAME, (unsigned long)name); + + /* + * Close all file descriptors that might have been inherited from the + * main qemu process when doing clone, needed to make libvirt happy. + * Not using close_range for increased compatibility with older kernels. + */ + close_all_open_fd(); + + /* Set up a handler for SIGHUP and unblock SIGHUP. */ + sigaction(SIGHUP, &sa, NULL); + sigemptyset(&hup_signal); + sigaddset(&hup_signal, SIGHUP); + sigprocmask(SIG_UNBLOCK, &hup_signal, NULL); + + /* Ask to receive SIGHUP when the parent dies. */ + prctl(PR_SET_PDEATHSIG, SIGHUP); + + /* + * Sleep forever, unless the parent process has already terminated. The + * only interruption can come from the SIGHUP signal, which in normal + * operation is received when the parent process dies. + */ + if (the_ppid == getppid()) { + pause(); + } + + /* At this point the parent process has terminated completely. */ + _exit(0); +} + +/* + * Allocate a new stack of a reasonable size, and return a pointer to its top. + */ +static void *new_stack_for_clone(void) +{ + size_t stack_size = CLONE_STACK_SIZE; + char *stack_ptr; + + /* Allocate a new stack and get a pointer to its top. */ + stack_ptr = qemu_alloc_stack(&stack_size); + stack_ptr += stack_size; + + return stack_ptr; +} + +/* + * Block all signals, start (clone) a new process sharing the address space + * with qemu (CLONE_VM), then restore signals. + */ +void init_async_teardown(void) +{ + sigset_t all_signals, old_signals; + + the_ppid = getpid(); + + sigfillset(&all_signals); + sigprocmask(SIG_BLOCK, &all_signals, &old_signals); + clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL); + sigprocmask(SIG_SETMASK, &old_signals, NULL); +} diff --git a/system/balloon.c b/system/balloon.c new file mode 100644 index 0000000..e0e8969 --- /dev/null +++ b/system/balloon.c @@ -0,0 +1,106 @@ +/* + * Generic Balloon handlers and management + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (C) 2011 Red Hat, Inc. + * Copyright (C) 2011 Amit Shah + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/atomic.h" +#include "sysemu/kvm.h" +#include "sysemu/balloon.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qmp/qerror.h" +#include "trace.h" + +static QEMUBalloonEvent *balloon_event_fn; +static QEMUBalloonStatus *balloon_stat_fn; +static void *balloon_opaque; + +static bool have_balloon(Error **errp) +{ + if (kvm_enabled() && !kvm_has_sync_mmu()) { + error_set(errp, ERROR_CLASS_KVM_MISSING_CAP, + "Using KVM without synchronous MMU, balloon unavailable"); + return false; + } + if (!balloon_event_fn) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, + "No balloon device has been activated"); + return false; + } + return true; +} + +int qemu_add_balloon_handler(QEMUBalloonEvent *event_func, + QEMUBalloonStatus *stat_func, void *opaque) +{ + if (balloon_event_fn || balloon_stat_fn || balloon_opaque) { + /* We're already registered one balloon handler. How many can + * a guest really have? + */ + return -1; + } + balloon_event_fn = event_func; + balloon_stat_fn = stat_func; + balloon_opaque = opaque; + return 0; +} + +void qemu_remove_balloon_handler(void *opaque) +{ + if (balloon_opaque != opaque) { + return; + } + balloon_event_fn = NULL; + balloon_stat_fn = NULL; + balloon_opaque = NULL; +} + +BalloonInfo *qmp_query_balloon(Error **errp) +{ + BalloonInfo *info; + + if (!have_balloon(errp)) { + return NULL; + } + + info = g_malloc0(sizeof(*info)); + balloon_stat_fn(balloon_opaque, info); + return info; +} + +void qmp_balloon(int64_t target, Error **errp) +{ + if (!have_balloon(errp)) { + return; + } + + if (target <= 0) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size"); + return; + } + + trace_balloon_event(balloon_opaque, target); + balloon_event_fn(balloon_opaque, target); +} diff --git a/system/bootdevice.c b/system/bootdevice.c new file mode 100644 index 0000000..2106f10 --- /dev/null +++ b/system/bootdevice.c @@ -0,0 +1,430 @@ +/* + * QEMU Boot Device Implement + * + * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO., LTD. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "sysemu/sysemu.h" +#include "qapi/visitor.h" +#include "qemu/error-report.h" +#include "sysemu/reset.h" +#include "hw/qdev-core.h" +#include "hw/boards.h" + +typedef struct FWBootEntry FWBootEntry; + +struct FWBootEntry { + QTAILQ_ENTRY(FWBootEntry) link; + int32_t bootindex; + DeviceState *dev; + char *suffix; +}; + +static QTAILQ_HEAD(, FWBootEntry) fw_boot_order = + QTAILQ_HEAD_INITIALIZER(fw_boot_order); +static QEMUBootSetHandler *boot_set_handler; +static void *boot_set_opaque; + +void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque) +{ + boot_set_handler = func; + boot_set_opaque = opaque; +} + +void qemu_boot_set(const char *boot_order, Error **errp) +{ + Error *local_err = NULL; + + if (!boot_set_handler) { + error_setg(errp, "no function defined to set boot device list for" + " this architecture"); + return; + } + + validate_bootdevices(boot_order, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + boot_set_handler(boot_set_opaque, boot_order, errp); +} + +void validate_bootdevices(const char *devices, Error **errp) +{ + /* We just do some generic consistency checks */ + const char *p; + int bitmap = 0; + + for (p = devices; *p != '\0'; p++) { + /* Allowed boot devices are: + * a-b: floppy disk drives + * c-f: IDE disk drives + * g-m: machine implementation dependent drives + * n-p: network devices + * It's up to each machine implementation to check if the given boot + * devices match the actual hardware implementation and firmware + * features. + */ + if (*p < 'a' || *p > 'p') { + error_setg(errp, "Invalid boot device '%c'", *p); + return; + } + if (bitmap & (1 << (*p - 'a'))) { + error_setg(errp, "Boot device '%c' was given twice", *p); + return; + } + bitmap |= 1 << (*p - 'a'); + } +} + +void restore_boot_order(void *opaque) +{ + char *normal_boot_order = opaque; + static int first = 1; + + /* Restore boot order and remove ourselves after the first boot */ + if (first) { + first = 0; + return; + } + + if (boot_set_handler) { + qemu_boot_set(normal_boot_order, &error_abort); + } + + qemu_unregister_reset(restore_boot_order, normal_boot_order); + g_free(normal_boot_order); +} + +void check_boot_index(int32_t bootindex, Error **errp) +{ + FWBootEntry *i; + + if (bootindex >= 0) { + QTAILQ_FOREACH(i, &fw_boot_order, link) { + if (i->bootindex == bootindex) { + error_setg(errp, "The bootindex %d has already been used", + bootindex); + return; + } + } + } +} + +void del_boot_device_path(DeviceState *dev, const char *suffix) +{ + FWBootEntry *i; + + if (dev == NULL) { + return; + } + + QTAILQ_FOREACH(i, &fw_boot_order, link) { + if ((!suffix || !g_strcmp0(i->suffix, suffix)) && + i->dev == dev) { + QTAILQ_REMOVE(&fw_boot_order, i, link); + g_free(i->suffix); + g_free(i); + + break; + } + } +} + +void add_boot_device_path(int32_t bootindex, DeviceState *dev, + const char *suffix) +{ + FWBootEntry *node, *i; + + if (bootindex < 0) { + del_boot_device_path(dev, suffix); + return; + } + + assert(dev != NULL || suffix != NULL); + + del_boot_device_path(dev, suffix); + + node = g_new0(FWBootEntry, 1); + node->bootindex = bootindex; + node->suffix = g_strdup(suffix); + node->dev = dev; + + QTAILQ_FOREACH(i, &fw_boot_order, link) { + if (i->bootindex == bootindex) { + error_report("Two devices with same boot index %d", bootindex); + exit(1); + } else if (i->bootindex < bootindex) { + continue; + } + QTAILQ_INSERT_BEFORE(i, node, link); + return; + } + QTAILQ_INSERT_TAIL(&fw_boot_order, node, link); +} + +DeviceState *get_boot_device(uint32_t position) +{ + uint32_t counter = 0; + FWBootEntry *i = NULL; + DeviceState *res = NULL; + + if (!QTAILQ_EMPTY(&fw_boot_order)) { + QTAILQ_FOREACH(i, &fw_boot_order, link) { + if (counter == position) { + res = i->dev; + break; + } + counter++; + } + } + return res; +} + +static char *get_boot_device_path(DeviceState *dev, bool ignore_suffixes, + const char *suffix) +{ + char *devpath = NULL, *s = NULL, *d, *bootpath; + + if (dev) { + devpath = qdev_get_fw_dev_path(dev); + assert(devpath); + } + + if (!ignore_suffixes) { + if (dev) { + d = qdev_get_own_fw_dev_path_from_handler(dev->parent_bus, dev); + if (d) { + assert(!suffix); + s = d; + } else { + s = g_strdup(suffix); + } + } else { + s = g_strdup(suffix); + } + } + + bootpath = g_strdup_printf("%s%s", + devpath ? devpath : "", + s ? s : ""); + g_free(devpath); + g_free(s); + + return bootpath; +} + +/* + * This function returns null terminated string that consist of new line + * separated device paths. + * + * memory pointed by "size" is assigned total length of the array in bytes + * + */ +char *get_boot_devices_list(size_t *size) +{ + FWBootEntry *i; + size_t total = 0; + char *list = NULL; + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + bool ignore_suffixes = mc->ignore_boot_device_suffixes; + + QTAILQ_FOREACH(i, &fw_boot_order, link) { + char *bootpath; + size_t len; + + bootpath = get_boot_device_path(i->dev, ignore_suffixes, i->suffix); + + if (total) { + list[total-1] = '\n'; + } + len = strlen(bootpath) + 1; + list = g_realloc(list, total + len); + memcpy(&list[total], bootpath, len); + total += len; + g_free(bootpath); + } + + *size = total; + + if (current_machine->boot_config.has_strict && + current_machine->boot_config.strict && *size > 0) { + list[total-1] = '\n'; + list = g_realloc(list, total + 5); + memcpy(&list[total], "HALT", 5); + *size = total + 5; + } + return list; +} + +typedef struct { + int32_t *bootindex; + const char *suffix; + DeviceState *dev; +} BootIndexProperty; + +static void device_get_bootindex(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + BootIndexProperty *prop = opaque; + visit_type_int32(v, name, prop->bootindex, errp); +} + +static void device_set_bootindex(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + BootIndexProperty *prop = opaque; + int32_t boot_index; + Error *local_err = NULL; + + if (!visit_type_int32(v, name, &boot_index, errp)) { + return; + } + /* check whether bootindex is present in fw_boot_order list */ + check_boot_index(boot_index, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + /* change bootindex to a new one */ + *prop->bootindex = boot_index; + + add_boot_device_path(*prop->bootindex, prop->dev, prop->suffix); +} + +static void property_release_bootindex(Object *obj, const char *name, + void *opaque) + +{ + BootIndexProperty *prop = opaque; + + del_boot_device_path(prop->dev, prop->suffix); + g_free(prop); +} + +void device_add_bootindex_property(Object *obj, int32_t *bootindex, + const char *name, const char *suffix, + DeviceState *dev) +{ + BootIndexProperty *prop = g_malloc0(sizeof(*prop)); + + prop->bootindex = bootindex; + prop->suffix = suffix; + prop->dev = dev; + + object_property_add(obj, name, "int32", + device_get_bootindex, + device_set_bootindex, + property_release_bootindex, + prop); + + /* initialize devices' bootindex property to -1 */ + object_property_set_int(obj, name, -1, NULL); +} + +typedef struct FWLCHSEntry FWLCHSEntry; + +struct FWLCHSEntry { + QTAILQ_ENTRY(FWLCHSEntry) link; + DeviceState *dev; + char *suffix; + uint32_t lcyls; + uint32_t lheads; + uint32_t lsecs; +}; + +static QTAILQ_HEAD(, FWLCHSEntry) fw_lchs = + QTAILQ_HEAD_INITIALIZER(fw_lchs); + +void add_boot_device_lchs(DeviceState *dev, const char *suffix, + uint32_t lcyls, uint32_t lheads, uint32_t lsecs) +{ + FWLCHSEntry *node; + + if (!lcyls && !lheads && !lsecs) { + return; + } + + assert(dev != NULL || suffix != NULL); + + node = g_new0(FWLCHSEntry, 1); + node->suffix = g_strdup(suffix); + node->dev = dev; + node->lcyls = lcyls; + node->lheads = lheads; + node->lsecs = lsecs; + + QTAILQ_INSERT_TAIL(&fw_lchs, node, link); +} + +void del_boot_device_lchs(DeviceState *dev, const char *suffix) +{ + FWLCHSEntry *i; + + if (dev == NULL) { + return; + } + + QTAILQ_FOREACH(i, &fw_lchs, link) { + if ((!suffix || !g_strcmp0(i->suffix, suffix)) && + i->dev == dev) { + QTAILQ_REMOVE(&fw_lchs, i, link); + g_free(i->suffix); + g_free(i); + + break; + } + } +} + +char *get_boot_devices_lchs_list(size_t *size) +{ + FWLCHSEntry *i; + size_t total = 0; + char *list = NULL; + + QTAILQ_FOREACH(i, &fw_lchs, link) { + char *bootpath; + char *chs_string; + size_t len; + + bootpath = get_boot_device_path(i->dev, false, i->suffix); + chs_string = g_strdup_printf("%s %" PRIu32 " %" PRIu32 " %" PRIu32, + bootpath, i->lcyls, i->lheads, i->lsecs); + + if (total) { + list[total - 1] = '\n'; + } + len = strlen(chs_string) + 1; + list = g_realloc(list, total + len); + memcpy(&list[total], chs_string, len); + total += len; + g_free(chs_string); + g_free(bootpath); + } + + *size = total; + + return list; +} diff --git a/system/cpu-throttle.c b/system/cpu-throttle.c new file mode 100644 index 0000000..d9bb30a --- /dev/null +++ b/system/cpu-throttle.c @@ -0,0 +1,128 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/thread.h" +#include "hw/core/cpu.h" +#include "qemu/main-loop.h" +#include "sysemu/cpus.h" +#include "sysemu/cpu-throttle.h" + +/* vcpu throttling controls */ +static QEMUTimer *throttle_timer; +static unsigned int throttle_percentage; + +#define CPU_THROTTLE_PCT_MIN 1 +#define CPU_THROTTLE_PCT_MAX 99 +#define CPU_THROTTLE_TIMESLICE_NS 10000000 + +static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) +{ + double pct; + double throttle_ratio; + int64_t sleeptime_ns, endtime_ns; + + if (!cpu_throttle_get_percentage()) { + return; + } + + pct = (double)cpu_throttle_get_percentage() / 100; + throttle_ratio = pct / (1 - pct); + /* Add 1ns to fix double's rounding error (like 0.9999999...) */ + sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1); + endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns; + while (sleeptime_ns > 0 && !cpu->stop) { + if (sleeptime_ns > SCALE_MS) { + qemu_cond_timedwait_iothread(cpu->halt_cond, + sleeptime_ns / SCALE_MS); + } else { + qemu_mutex_unlock_iothread(); + g_usleep(sleeptime_ns / SCALE_US); + qemu_mutex_lock_iothread(); + } + sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + } + qatomic_set(&cpu->throttle_thread_scheduled, 0); +} + +static void cpu_throttle_timer_tick(void *opaque) +{ + CPUState *cpu; + double pct; + + /* Stop the timer if needed */ + if (!cpu_throttle_get_percentage()) { + return; + } + CPU_FOREACH(cpu) { + if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) { + async_run_on_cpu(cpu, cpu_throttle_thread, + RUN_ON_CPU_NULL); + } + } + + pct = (double)cpu_throttle_get_percentage() / 100; + timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + + CPU_THROTTLE_TIMESLICE_NS / (1 - pct)); +} + +void cpu_throttle_set(int new_throttle_pct) +{ + /* + * boolean to store whether throttle is already active or not, + * before modifying throttle_percentage + */ + bool throttle_active = cpu_throttle_active(); + + /* Ensure throttle percentage is within valid range */ + new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX); + new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN); + + qatomic_set(&throttle_percentage, new_throttle_pct); + + if (!throttle_active) { + cpu_throttle_timer_tick(NULL); + } +} + +void cpu_throttle_stop(void) +{ + qatomic_set(&throttle_percentage, 0); +} + +bool cpu_throttle_active(void) +{ + return (cpu_throttle_get_percentage() != 0); +} + +int cpu_throttle_get_percentage(void) +{ + return qatomic_read(&throttle_percentage); +} + +void cpu_throttle_init(void) +{ + throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, + cpu_throttle_timer_tick, NULL); +} diff --git a/system/cpu-timers.c b/system/cpu-timers.c new file mode 100644 index 0000000..7452d97 --- /dev/null +++ b/system/cpu-timers.c @@ -0,0 +1,277 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "sysemu/cpus.h" +#include "qemu/main-loop.h" +#include "qemu/option.h" +#include "qemu/seqlock.h" +#include "sysemu/replay.h" +#include "sysemu/runstate.h" +#include "hw/core/cpu.h" +#include "sysemu/cpu-timers.h" +#include "sysemu/cpu-throttle.h" +#include "sysemu/cpu-timers-internal.h" + +/* clock and ticks */ + +static int64_t cpu_get_ticks_locked(void) +{ + int64_t ticks = timers_state.cpu_ticks_offset; + if (timers_state.cpu_ticks_enabled) { + ticks += cpu_get_host_ticks(); + } + + if (timers_state.cpu_ticks_prev > ticks) { + /* Non increasing ticks may happen if the host uses software suspend. */ + timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks; + ticks = timers_state.cpu_ticks_prev; + } + + timers_state.cpu_ticks_prev = ticks; + return ticks; +} + +/* + * return the time elapsed in VM between vm_start and vm_stop. + * cpu_get_ticks() uses units of the host CPU cycle counter. + */ +int64_t cpu_get_ticks(void) +{ + int64_t ticks; + + qemu_spin_lock(&timers_state.vm_clock_lock); + ticks = cpu_get_ticks_locked(); + qemu_spin_unlock(&timers_state.vm_clock_lock); + return ticks; +} + +int64_t cpu_get_clock_locked(void) +{ + int64_t time; + + time = timers_state.cpu_clock_offset; + if (timers_state.cpu_ticks_enabled) { + time += get_clock(); + } + + return time; +} + +/* + * Return the monotonic time elapsed in VM, i.e., + * the time between vm_start and vm_stop + */ +int64_t cpu_get_clock(void) +{ + int64_t ti; + unsigned start; + + do { + start = seqlock_read_begin(&timers_state.vm_clock_seqlock); + ti = cpu_get_clock_locked(); + } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); + + return ti; +} + +/* + * enable cpu_get_ticks() + * Caller must hold BQL which serves as mutex for vm_clock_seqlock. + */ +void cpu_enable_ticks(void) +{ + seqlock_write_lock(&timers_state.vm_clock_seqlock, + &timers_state.vm_clock_lock); + if (!timers_state.cpu_ticks_enabled) { + timers_state.cpu_ticks_offset -= cpu_get_host_ticks(); + timers_state.cpu_clock_offset -= get_clock(); + timers_state.cpu_ticks_enabled = 1; + } + seqlock_write_unlock(&timers_state.vm_clock_seqlock, + &timers_state.vm_clock_lock); +} + +/* + * disable cpu_get_ticks() : the clock is stopped. You must not call + * cpu_get_ticks() after that. + * Caller must hold BQL which serves as mutex for vm_clock_seqlock. + */ +void cpu_disable_ticks(void) +{ + seqlock_write_lock(&timers_state.vm_clock_seqlock, + &timers_state.vm_clock_lock); + if (timers_state.cpu_ticks_enabled) { + timers_state.cpu_ticks_offset += cpu_get_host_ticks(); + timers_state.cpu_clock_offset = cpu_get_clock_locked(); + timers_state.cpu_ticks_enabled = 0; + } + seqlock_write_unlock(&timers_state.vm_clock_seqlock, + &timers_state.vm_clock_lock); +} + +static bool icount_state_needed(void *opaque) +{ + return icount_enabled(); +} + +static bool warp_timer_state_needed(void *opaque) +{ + TimersState *s = opaque; + return s->icount_warp_timer != NULL; +} + +static bool adjust_timers_state_needed(void *opaque) +{ + TimersState *s = opaque; + return s->icount_rt_timer != NULL; +} + +static bool icount_shift_state_needed(void *opaque) +{ + return icount_enabled() == 2; +} + +/* + * Subsection for warp timer migration is optional, because may not be created + */ +static const VMStateDescription icount_vmstate_warp_timer = { + .name = "timer/icount/warp_timer", + .version_id = 1, + .minimum_version_id = 1, + .needed = warp_timer_state_needed, + .fields = (VMStateField[]) { + VMSTATE_INT64(vm_clock_warp_start, TimersState), + VMSTATE_TIMER_PTR(icount_warp_timer, TimersState), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription icount_vmstate_adjust_timers = { + .name = "timer/icount/timers", + .version_id = 1, + .minimum_version_id = 1, + .needed = adjust_timers_state_needed, + .fields = (VMStateField[]) { + VMSTATE_TIMER_PTR(icount_rt_timer, TimersState), + VMSTATE_TIMER_PTR(icount_vm_timer, TimersState), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription icount_vmstate_shift = { + .name = "timer/icount/shift", + .version_id = 2, + .minimum_version_id = 2, + .needed = icount_shift_state_needed, + .fields = (VMStateField[]) { + VMSTATE_INT16(icount_time_shift, TimersState), + VMSTATE_INT64(last_delta, TimersState), + VMSTATE_END_OF_LIST() + } +}; + +/* + * This is a subsection for icount migration. + */ +static const VMStateDescription icount_vmstate_timers = { + .name = "timer/icount", + .version_id = 1, + .minimum_version_id = 1, + .needed = icount_state_needed, + .fields = (VMStateField[]) { + VMSTATE_INT64(qemu_icount_bias, TimersState), + VMSTATE_INT64(qemu_icount, TimersState), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &icount_vmstate_warp_timer, + &icount_vmstate_adjust_timers, + &icount_vmstate_shift, + NULL + } +}; + +static const VMStateDescription vmstate_timers = { + .name = "timer", + .version_id = 2, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_INT64(cpu_ticks_offset, TimersState), + VMSTATE_UNUSED(8), + VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &icount_vmstate_timers, + NULL + } +}; + +static void do_nothing(CPUState *cpu, run_on_cpu_data unused) +{ +} + +void qemu_timer_notify_cb(void *opaque, QEMUClockType type) +{ + if (!icount_enabled() || type != QEMU_CLOCK_VIRTUAL) { + qemu_notify_event(); + return; + } + + if (qemu_in_vcpu_thread()) { + /* + * A CPU is currently running; kick it back out to the + * tcg_cpu_exec() loop so it will recalculate its + * icount deadline immediately. + */ + qemu_cpu_kick(current_cpu); + } else if (first_cpu) { + /* + * qemu_cpu_kick is not enough to kick a halted CPU out of + * qemu_tcg_wait_io_event. async_run_on_cpu, instead, + * causes cpu_thread_is_idle to return false. This way, + * handle_icount_deadline can run. + * If we have no CPUs at all for some reason, we don't + * need to do anything. + */ + async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL); + } +} + +TimersState timers_state; + +/* initialize timers state and the cpu throttle for convenience */ +void cpu_timers_init(void) +{ + seqlock_init(&timers_state.vm_clock_seqlock); + qemu_spin_init(&timers_state.vm_clock_lock); + vmstate_register(NULL, 0, &vmstate_timers, &timers_state); + + cpu_throttle_init(); +} diff --git a/system/cpus.c b/system/cpus.c new file mode 100644 index 0000000..0848e0d --- /dev/null +++ b/system/cpus.c @@ -0,0 +1,822 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "monitor/monitor.h" +#include "qemu/coroutine-tls.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qapi-commands-misc.h" +#include "qapi/qapi-events-run-state.h" +#include "qapi/qmp/qerror.h" +#include "exec/gdbstub.h" +#include "sysemu/hw_accel.h" +#include "exec/cpu-common.h" +#include "qemu/thread.h" +#include "qemu/main-loop.h" +#include "qemu/plugin.h" +#include "sysemu/cpus.h" +#include "qemu/guest-random.h" +#include "hw/nmi.h" +#include "sysemu/replay.h" +#include "sysemu/runstate.h" +#include "sysemu/cpu-timers.h" +#include "sysemu/whpx.h" +#include "hw/boards.h" +#include "hw/hw.h" +#include "trace.h" + +#ifdef CONFIG_LINUX + +#include + +#ifndef PR_MCE_KILL +#define PR_MCE_KILL 33 +#endif + +#ifndef PR_MCE_KILL_SET +#define PR_MCE_KILL_SET 1 +#endif + +#ifndef PR_MCE_KILL_EARLY +#define PR_MCE_KILL_EARLY 1 +#endif + +#endif /* CONFIG_LINUX */ + +static QemuMutex qemu_global_mutex; + +/* + * The chosen accelerator is supposed to register this. + */ +static const AccelOpsClass *cpus_accel; + +bool cpu_is_stopped(CPUState *cpu) +{ + return cpu->stopped || !runstate_is_running(); +} + +bool cpu_work_list_empty(CPUState *cpu) +{ + return QSIMPLEQ_EMPTY_ATOMIC(&cpu->work_list); +} + +bool cpu_thread_is_idle(CPUState *cpu) +{ + if (cpu->stop || !cpu_work_list_empty(cpu)) { + return false; + } + if (cpu_is_stopped(cpu)) { + return true; + } + if (!cpu->halted || cpu_has_work(cpu)) { + return false; + } + if (cpus_accel->cpu_thread_is_idle) { + return cpus_accel->cpu_thread_is_idle(cpu); + } + return true; +} + +bool all_cpu_threads_idle(void) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + if (!cpu_thread_is_idle(cpu)) { + return false; + } + } + return true; +} + +/***********************************************************/ +void hw_error(const char *fmt, ...) +{ + va_list ap; + CPUState *cpu; + + va_start(ap, fmt); + fprintf(stderr, "qemu: hardware error: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + CPU_FOREACH(cpu) { + fprintf(stderr, "CPU #%d:\n", cpu->cpu_index); + cpu_dump_state(cpu, stderr, CPU_DUMP_FPU); + } + va_end(ap); + abort(); +} + +void cpu_synchronize_all_states(void) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + cpu_synchronize_state(cpu); + } +} + +void cpu_synchronize_all_post_reset(void) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + cpu_synchronize_post_reset(cpu); + } +} + +void cpu_synchronize_all_post_init(void) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + cpu_synchronize_post_init(cpu); + } +} + +void cpu_synchronize_all_pre_loadvm(void) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + cpu_synchronize_pre_loadvm(cpu); + } +} + +void cpu_synchronize_state(CPUState *cpu) +{ + if (cpus_accel->synchronize_state) { + cpus_accel->synchronize_state(cpu); + } +} + +void cpu_synchronize_post_reset(CPUState *cpu) +{ + if (cpus_accel->synchronize_post_reset) { + cpus_accel->synchronize_post_reset(cpu); + } +} + +void cpu_synchronize_post_init(CPUState *cpu) +{ + if (cpus_accel->synchronize_post_init) { + cpus_accel->synchronize_post_init(cpu); + } +} + +void cpu_synchronize_pre_loadvm(CPUState *cpu) +{ + if (cpus_accel->synchronize_pre_loadvm) { + cpus_accel->synchronize_pre_loadvm(cpu); + } +} + +bool cpus_are_resettable(void) +{ + if (cpus_accel->cpus_are_resettable) { + return cpus_accel->cpus_are_resettable(); + } + return true; +} + +int64_t cpus_get_virtual_clock(void) +{ + /* + * XXX + * + * need to check that cpus_accel is not NULL, because qcow2 calls + * qemu_get_clock_ns(CLOCK_VIRTUAL) without any accel initialized and + * with ticks disabled in some io-tests: + * 030 040 041 060 099 120 127 140 156 161 172 181 191 192 195 203 229 249 256 267 + * + * is this expected? + * + * XXX + */ + if (cpus_accel && cpus_accel->get_virtual_clock) { + return cpus_accel->get_virtual_clock(); + } + return cpu_get_clock(); +} + +/* + * return the time elapsed in VM between vm_start and vm_stop. Unless + * icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle + * counter. + */ +int64_t cpus_get_elapsed_ticks(void) +{ + if (cpus_accel->get_elapsed_ticks) { + return cpus_accel->get_elapsed_ticks(); + } + return cpu_get_ticks(); +} + +static void generic_handle_interrupt(CPUState *cpu, int mask) +{ + cpu->interrupt_request |= mask; + + if (!qemu_cpu_is_self(cpu)) { + qemu_cpu_kick(cpu); + } +} + +void cpu_interrupt(CPUState *cpu, int mask) +{ + if (cpus_accel->handle_interrupt) { + cpus_accel->handle_interrupt(cpu, mask); + } else { + generic_handle_interrupt(cpu, mask); + } +} + +static int do_vm_stop(RunState state, bool send_stop) +{ + int ret = 0; + + if (runstate_is_running()) { + runstate_set(state); + cpu_disable_ticks(); + pause_all_vcpus(); + vm_state_notify(0, state); + if (send_stop) { + qapi_event_send_stop(); + } + } + + bdrv_drain_all(); + ret = bdrv_flush_all(); + trace_vm_stop_flush_all(ret); + + return ret; +} + +/* Special vm_stop() variant for terminating the process. Historically clients + * did not expect a QMP STOP event and so we need to retain compatibility. + */ +int vm_shutdown(void) +{ + return do_vm_stop(RUN_STATE_SHUTDOWN, false); +} + +bool cpu_can_run(CPUState *cpu) +{ + if (cpu->stop) { + return false; + } + if (cpu_is_stopped(cpu)) { + return false; + } + return true; +} + +void cpu_handle_guest_debug(CPUState *cpu) +{ + if (replay_running_debug()) { + if (!cpu->singlestep_enabled) { + /* + * Report about the breakpoint and + * make a single step to skip it + */ + replay_breakpoint(); + cpu_single_step(cpu, SSTEP_ENABLE); + } else { + cpu_single_step(cpu, 0); + } + } else { + gdb_set_stop_cpu(cpu); + qemu_system_debug_request(); + cpu->stopped = true; + } +} + +#ifdef CONFIG_LINUX +static void sigbus_reraise(void) +{ + sigset_t set; + struct sigaction action; + + memset(&action, 0, sizeof(action)); + action.sa_handler = SIG_DFL; + if (!sigaction(SIGBUS, &action, NULL)) { + raise(SIGBUS); + sigemptyset(&set); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); + } + perror("Failed to re-raise SIGBUS!"); + abort(); +} + +static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) +{ + if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) { + sigbus_reraise(); + } + + if (current_cpu) { + /* Called asynchronously in VCPU thread. */ + if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) { + sigbus_reraise(); + } + } else { + /* Called synchronously (via signalfd) in main thread. */ + if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { + sigbus_reraise(); + } + } +} + +static void qemu_init_sigbus(void) +{ + struct sigaction action; + + /* + * ALERT: when modifying this, take care that SIGBUS forwarding in + * qemu_prealloc_mem() will continue working as expected. + */ + memset(&action, 0, sizeof(action)); + action.sa_flags = SA_SIGINFO; + action.sa_sigaction = sigbus_handler; + sigaction(SIGBUS, &action, NULL); + + prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); +} +#else /* !CONFIG_LINUX */ +static void qemu_init_sigbus(void) +{ +} +#endif /* !CONFIG_LINUX */ + +static QemuThread io_thread; + +/* cpu creation */ +static QemuCond qemu_cpu_cond; +/* system init */ +static QemuCond qemu_pause_cond; + +void qemu_init_cpu_loop(void) +{ + qemu_init_sigbus(); + qemu_cond_init(&qemu_cpu_cond); + qemu_cond_init(&qemu_pause_cond); + qemu_mutex_init(&qemu_global_mutex); + + qemu_thread_get_self(&io_thread); +} + +void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data) +{ + do_run_on_cpu(cpu, func, data, &qemu_global_mutex); +} + +static void qemu_cpu_stop(CPUState *cpu, bool exit) +{ + g_assert(qemu_cpu_is_self(cpu)); + cpu->stop = false; + cpu->stopped = true; + if (exit) { + cpu_exit(cpu); + } + qemu_cond_broadcast(&qemu_pause_cond); +} + +void qemu_wait_io_event_common(CPUState *cpu) +{ + qatomic_set_mb(&cpu->thread_kicked, false); + if (cpu->stop) { + qemu_cpu_stop(cpu, false); + } + process_queued_cpu_work(cpu); +} + +void qemu_wait_io_event(CPUState *cpu) +{ + bool slept = false; + + while (cpu_thread_is_idle(cpu)) { + if (!slept) { + slept = true; + qemu_plugin_vcpu_idle_cb(cpu); + } + qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); + } + if (slept) { + qemu_plugin_vcpu_resume_cb(cpu); + } + + qemu_wait_io_event_common(cpu); +} + +void cpus_kick_thread(CPUState *cpu) +{ + if (cpu->thread_kicked) { + return; + } + cpu->thread_kicked = true; + +#ifndef _WIN32 + int err = pthread_kill(cpu->thread->thread, SIG_IPI); + if (err && err != ESRCH) { + fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); + exit(1); + } +#else + qemu_sem_post(&cpu->sem); +#endif +} + +void qemu_cpu_kick(CPUState *cpu) +{ + qemu_cond_broadcast(cpu->halt_cond); + if (cpus_accel->kick_vcpu_thread) { + cpus_accel->kick_vcpu_thread(cpu); + } else { /* default */ + cpus_kick_thread(cpu); + } +} + +void qemu_cpu_kick_self(void) +{ + assert(current_cpu); + cpus_kick_thread(current_cpu); +} + +bool qemu_cpu_is_self(CPUState *cpu) +{ + return qemu_thread_is_self(cpu->thread); +} + +bool qemu_in_vcpu_thread(void) +{ + return current_cpu && qemu_cpu_is_self(current_cpu); +} + +QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked) + +bool qemu_mutex_iothread_locked(void) +{ + return get_iothread_locked(); +} + +bool qemu_in_main_thread(void) +{ + return qemu_mutex_iothread_locked(); +} + +/* + * The BQL is taken from so many places that it is worth profiling the + * callers directly, instead of funneling them all through a single function. + */ +void qemu_mutex_lock_iothread_impl(const char *file, int line) +{ + QemuMutexLockFunc bql_lock = qatomic_read(&qemu_bql_mutex_lock_func); + + g_assert(!qemu_mutex_iothread_locked()); + bql_lock(&qemu_global_mutex, file, line); + set_iothread_locked(true); +} + +void qemu_mutex_unlock_iothread(void) +{ + g_assert(qemu_mutex_iothread_locked()); + set_iothread_locked(false); + qemu_mutex_unlock(&qemu_global_mutex); +} + +void qemu_cond_wait_iothread(QemuCond *cond) +{ + qemu_cond_wait(cond, &qemu_global_mutex); +} + +void qemu_cond_timedwait_iothread(QemuCond *cond, int ms) +{ + qemu_cond_timedwait(cond, &qemu_global_mutex, ms); +} + +/* signal CPU creation */ +void cpu_thread_signal_created(CPUState *cpu) +{ + cpu->created = true; + qemu_cond_signal(&qemu_cpu_cond); +} + +/* signal CPU destruction */ +void cpu_thread_signal_destroyed(CPUState *cpu) +{ + cpu->created = false; + qemu_cond_signal(&qemu_cpu_cond); +} + + +static bool all_vcpus_paused(void) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + if (!cpu->stopped) { + return false; + } + } + + return true; +} + +void pause_all_vcpus(void) +{ + CPUState *cpu; + + qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); + CPU_FOREACH(cpu) { + if (qemu_cpu_is_self(cpu)) { + qemu_cpu_stop(cpu, true); + } else { + cpu->stop = true; + qemu_cpu_kick(cpu); + } + } + + /* We need to drop the replay_lock so any vCPU threads woken up + * can finish their replay tasks + */ + replay_mutex_unlock(); + + while (!all_vcpus_paused()) { + qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); + CPU_FOREACH(cpu) { + qemu_cpu_kick(cpu); + } + } + + qemu_mutex_unlock_iothread(); + replay_mutex_lock(); + qemu_mutex_lock_iothread(); +} + +void cpu_resume(CPUState *cpu) +{ + cpu->stop = false; + cpu->stopped = false; + qemu_cpu_kick(cpu); +} + +void resume_all_vcpus(void) +{ + CPUState *cpu; + + if (!runstate_is_running()) { + return; + } + + qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); + CPU_FOREACH(cpu) { + cpu_resume(cpu); + } +} + +void cpu_remove_sync(CPUState *cpu) +{ + cpu->stop = true; + cpu->unplug = true; + qemu_cpu_kick(cpu); + qemu_mutex_unlock_iothread(); + qemu_thread_join(cpu->thread); + qemu_mutex_lock_iothread(); +} + +void cpus_register_accel(const AccelOpsClass *ops) +{ + assert(ops != NULL); + assert(ops->create_vcpu_thread != NULL); /* mandatory */ + cpus_accel = ops; +} + +const AccelOpsClass *cpus_get_accel(void) +{ + /* broken if we call this early */ + assert(cpus_accel); + return cpus_accel; +} + +void qemu_init_vcpu(CPUState *cpu) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + + cpu->nr_cores = ms->smp.cores; + cpu->nr_threads = ms->smp.threads; + cpu->stopped = true; + cpu->random_seed = qemu_guest_random_seed_thread_part1(); + + if (!cpu->as) { + /* If the target cpu hasn't set up any address spaces itself, + * give it the default one. + */ + cpu->num_ases = 1; + cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory); + } + + /* accelerators all implement the AccelOpsClass */ + g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL); + cpus_accel->create_vcpu_thread(cpu); + + while (!cpu->created) { + qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); + } +} + +void cpu_stop_current(void) +{ + if (current_cpu) { + current_cpu->stop = true; + cpu_exit(current_cpu); + } +} + +int vm_stop(RunState state) +{ + if (qemu_in_vcpu_thread()) { + qemu_system_vmstop_request_prepare(); + qemu_system_vmstop_request(state); + /* + * FIXME: should not return to device code in case + * vm_stop() has been requested. + */ + cpu_stop_current(); + return 0; + } + + return do_vm_stop(state, true); +} + +/** + * Prepare for (re)starting the VM. + * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already + * running or in case of an error condition), 0 otherwise. + */ +int vm_prepare_start(bool step_pending) +{ + RunState requested; + + qemu_vmstop_requested(&requested); + if (runstate_is_running() && requested == RUN_STATE__MAX) { + return -1; + } + + /* Ensure that a STOP/RESUME pair of events is emitted if a + * vmstop request was pending. The BLOCK_IO_ERROR event, for + * example, according to documentation is always followed by + * the STOP event. + */ + if (runstate_is_running()) { + qapi_event_send_stop(); + qapi_event_send_resume(); + return -1; + } + + /* + * WHPX accelerator needs to know whether we are going to step + * any CPUs, before starting the first one. + */ + if (cpus_accel->synchronize_pre_resume) { + cpus_accel->synchronize_pre_resume(step_pending); + } + + /* We are sending this now, but the CPUs will be resumed shortly later */ + qapi_event_send_resume(); + + cpu_enable_ticks(); + runstate_set(RUN_STATE_RUNNING); + vm_state_notify(1, RUN_STATE_RUNNING); + return 0; +} + +void vm_start(void) +{ + if (!vm_prepare_start(false)) { + resume_all_vcpus(); + } +} + +/* does a state transition even if the VM is already stopped, + current state is forgotten forever */ +int vm_stop_force_state(RunState state) +{ + if (runstate_is_running()) { + return vm_stop(state); + } else { + int ret; + runstate_set(state); + + bdrv_drain_all(); + /* Make sure to return an error if the flush in a previous vm_stop() + * failed. */ + ret = bdrv_flush_all(); + trace_vm_stop_flush_all(ret); + return ret; + } +} + +void qmp_memsave(int64_t addr, int64_t size, const char *filename, + bool has_cpu, int64_t cpu_index, Error **errp) +{ + FILE *f; + uint32_t l; + CPUState *cpu; + uint8_t buf[1024]; + int64_t orig_addr = addr, orig_size = size; + + if (!has_cpu) { + cpu_index = 0; + } + + cpu = qemu_get_cpu(cpu_index); + if (cpu == NULL) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index", + "a CPU number"); + return; + } + + f = fopen(filename, "wb"); + if (!f) { + error_setg_file_open(errp, errno, filename); + return; + } + + while (size != 0) { + l = sizeof(buf); + if (l > size) + l = size; + if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) { + error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64 + " specified", orig_addr, orig_size); + goto exit; + } + if (fwrite(buf, 1, l, f) != l) { + error_setg(errp, QERR_IO_ERROR); + goto exit; + } + addr += l; + size -= l; + } + +exit: + fclose(f); +} + +void qmp_pmemsave(int64_t addr, int64_t size, const char *filename, + Error **errp) +{ + FILE *f; + uint32_t l; + uint8_t buf[1024]; + + f = fopen(filename, "wb"); + if (!f) { + error_setg_file_open(errp, errno, filename); + return; + } + + while (size != 0) { + l = sizeof(buf); + if (l > size) + l = size; + cpu_physical_memory_read(addr, buf, l); + if (fwrite(buf, 1, l, f) != l) { + error_setg(errp, QERR_IO_ERROR); + goto exit; + } + addr += l; + size -= l; + } + +exit: + fclose(f); +} + +void qmp_inject_nmi(Error **errp) +{ + nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp); +} + diff --git a/system/datadir.c b/system/datadir.c new file mode 100644 index 0000000..c9237cb --- /dev/null +++ b/system/datadir.c @@ -0,0 +1,110 @@ +/* + * QEMU firmware and keymap file search + * + * Copyright (c) 2003-2020 QEMU contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/datadir.h" +#include "qemu/cutils.h" +#include "trace.h" + +static const char *data_dir[16]; +static int data_dir_idx; + +char *qemu_find_file(int type, const char *name) +{ + int i; + const char *subdir; + char *buf; + + /* Try the name as a straight path first */ + if (access(name, R_OK) == 0) { + trace_load_file(name, name); + return g_strdup(name); + } + + switch (type) { + case QEMU_FILE_TYPE_BIOS: + subdir = ""; + break; + case QEMU_FILE_TYPE_KEYMAP: + subdir = "keymaps/"; + break; + default: + abort(); + } + + for (i = 0; i < data_dir_idx; i++) { + buf = g_strdup_printf("%s/%s%s", data_dir[i], subdir, name); + if (access(buf, R_OK) == 0) { + trace_load_file(name, buf); + return buf; + } + g_free(buf); + } + return NULL; +} + +void qemu_add_data_dir(char *path) +{ + int i; + + if (path == NULL) { + return; + } + if (data_dir_idx == ARRAY_SIZE(data_dir)) { + return; + } + for (i = 0; i < data_dir_idx; i++) { + if (strcmp(data_dir[i], path) == 0) { + g_free(path); /* duplicate */ + return; + } + } + data_dir[data_dir_idx++] = path; +} + +void qemu_add_default_firmwarepath(void) +{ + static const char * const dirs[] = { + CONFIG_QEMU_FIRMWAREPATH + NULL + }; + + size_t i; + + /* add configured firmware directories */ + for (i = 0; dirs[i] != NULL; i++) { + qemu_add_data_dir(get_relocated_path(dirs[i])); + } + + /* try to find datadir relative to the executable path */ + qemu_add_data_dir(get_relocated_path(CONFIG_QEMU_DATADIR)); +} + +void qemu_list_data_dirs(void) +{ + int i; + for (i = 0; i < data_dir_idx; i++) { + printf("%s\n", data_dir[i]); + } +} diff --git a/system/device_tree.c b/system/device_tree.c new file mode 100644 index 0000000..eb5166c --- /dev/null +++ b/system/device_tree.c @@ -0,0 +1,703 @@ +/* + * Functions to help device tree manipulation using libfdt. + * It also provides functions to read entries from device tree proc + * interface. + * + * Copyright 2008 IBM Corporation. + * Authors: Jerone Young + * Hollis Blanchard + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#include "qemu/osdep.h" + +#ifdef CONFIG_LINUX +#include +#endif + +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/bswap.h" +#include "qemu/cutils.h" +#include "qemu/guest-random.h" +#include "sysemu/device_tree.h" +#include "hw/loader.h" +#include "hw/boards.h" +#include "qemu/config-file.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qmp/qdict.h" +#include "monitor/hmp.h" + +#include + +#define FDT_MAX_SIZE 0x100000 + +void *create_device_tree(int *sizep) +{ + void *fdt; + int ret; + + *sizep = FDT_MAX_SIZE; + fdt = g_malloc0(FDT_MAX_SIZE); + ret = fdt_create(fdt, FDT_MAX_SIZE); + if (ret < 0) { + goto fail; + } + ret = fdt_finish_reservemap(fdt); + if (ret < 0) { + goto fail; + } + ret = fdt_begin_node(fdt, ""); + if (ret < 0) { + goto fail; + } + ret = fdt_end_node(fdt); + if (ret < 0) { + goto fail; + } + ret = fdt_finish(fdt); + if (ret < 0) { + goto fail; + } + ret = fdt_open_into(fdt, fdt, *sizep); + if (ret) { + error_report("%s: Unable to copy device tree into memory: %s", + __func__, fdt_strerror(ret)); + exit(1); + } + + return fdt; +fail: + error_report("%s Couldn't create dt: %s", __func__, fdt_strerror(ret)); + exit(1); +} + +void *load_device_tree(const char *filename_path, int *sizep) +{ + int dt_size; + int dt_file_load_size; + int ret; + void *fdt = NULL; + + *sizep = 0; + dt_size = get_image_size(filename_path); + if (dt_size < 0) { + error_report("Unable to get size of device tree file '%s'", + filename_path); + goto fail; + } + if (dt_size > INT_MAX / 2 - 10000) { + error_report("Device tree file '%s' is too large", filename_path); + goto fail; + } + + /* Expand to 2x size to give enough room for manipulation. */ + dt_size += 10000; + dt_size *= 2; + /* First allocate space in qemu for device tree */ + fdt = g_malloc0(dt_size); + + dt_file_load_size = load_image_size(filename_path, fdt, dt_size); + if (dt_file_load_size < 0) { + error_report("Unable to open device tree file '%s'", + filename_path); + goto fail; + } + + ret = fdt_open_into(fdt, fdt, dt_size); + if (ret) { + error_report("%s: Unable to copy device tree into memory: %s", + __func__, fdt_strerror(ret)); + goto fail; + } + + /* Check sanity of device tree */ + if (fdt_check_header(fdt)) { + error_report("Device tree file loaded into memory is invalid: %s", + filename_path); + goto fail; + } + *sizep = dt_size; + return fdt; + +fail: + g_free(fdt); + return NULL; +} + +#ifdef CONFIG_LINUX + +#define SYSFS_DT_BASEDIR "/proc/device-tree" + +/** + * read_fstree: this function is inspired from dtc read_fstree + * @fdt: preallocated fdt blob buffer, to be populated + * @dirname: directory to scan under SYSFS_DT_BASEDIR + * the search is recursive and the tree is searched down to the + * leaves (property files). + * + * the function asserts in case of error + */ +static void read_fstree(void *fdt, const char *dirname) +{ + DIR *d; + struct dirent *de; + struct stat st; + const char *root_dir = SYSFS_DT_BASEDIR; + const char *parent_node; + + if (strstr(dirname, root_dir) != dirname) { + error_report("%s: %s must be searched within %s", + __func__, dirname, root_dir); + exit(1); + } + parent_node = &dirname[strlen(SYSFS_DT_BASEDIR)]; + + d = opendir(dirname); + if (!d) { + error_report("%s cannot open %s", __func__, dirname); + exit(1); + } + + while ((de = readdir(d)) != NULL) { + char *tmpnam; + + if (!g_strcmp0(de->d_name, ".") + || !g_strcmp0(de->d_name, "..")) { + continue; + } + + tmpnam = g_strdup_printf("%s/%s", dirname, de->d_name); + + if (lstat(tmpnam, &st) < 0) { + error_report("%s cannot lstat %s", __func__, tmpnam); + exit(1); + } + + if (S_ISREG(st.st_mode)) { + gchar *val; + gsize len; + + if (!g_file_get_contents(tmpnam, &val, &len, NULL)) { + error_report("%s not able to extract info from %s", + __func__, tmpnam); + exit(1); + } + + if (strlen(parent_node) > 0) { + qemu_fdt_setprop(fdt, parent_node, + de->d_name, val, len); + } else { + qemu_fdt_setprop(fdt, "/", de->d_name, val, len); + } + g_free(val); + } else if (S_ISDIR(st.st_mode)) { + char *node_name; + + node_name = g_strdup_printf("%s/%s", + parent_node, de->d_name); + qemu_fdt_add_subnode(fdt, node_name); + g_free(node_name); + read_fstree(fdt, tmpnam); + } + + g_free(tmpnam); + } + + closedir(d); +} + +/* load_device_tree_from_sysfs: extract the dt blob from host sysfs */ +void *load_device_tree_from_sysfs(void) +{ + void *host_fdt; + int host_fdt_size; + + host_fdt = create_device_tree(&host_fdt_size); + read_fstree(host_fdt, SYSFS_DT_BASEDIR); + if (fdt_check_header(host_fdt)) { + error_report("%s host device tree extracted into memory is invalid", + __func__); + exit(1); + } + return host_fdt; +} + +#endif /* CONFIG_LINUX */ + +static int findnode_nofail(void *fdt, const char *node_path) +{ + int offset; + + offset = fdt_path_offset(fdt, node_path); + if (offset < 0) { + error_report("%s Couldn't find node %s: %s", __func__, node_path, + fdt_strerror(offset)); + exit(1); + } + + return offset; +} + +char **qemu_fdt_node_unit_path(void *fdt, const char *name, Error **errp) +{ + char *prefix = g_strdup_printf("%s@", name); + unsigned int path_len = 16, n = 0; + GSList *path_list = NULL, *iter; + const char *iter_name; + int offset, len, ret; + char **path_array; + + offset = fdt_next_node(fdt, -1, NULL); + + while (offset >= 0) { + iter_name = fdt_get_name(fdt, offset, &len); + if (!iter_name) { + offset = len; + break; + } + if (!strcmp(iter_name, name) || g_str_has_prefix(iter_name, prefix)) { + char *path; + + path = g_malloc(path_len); + while ((ret = fdt_get_path(fdt, offset, path, path_len)) + == -FDT_ERR_NOSPACE) { + path_len += 16; + path = g_realloc(path, path_len); + } + path_list = g_slist_prepend(path_list, path); + n++; + } + offset = fdt_next_node(fdt, offset, NULL); + } + g_free(prefix); + + if (offset < 0 && offset != -FDT_ERR_NOTFOUND) { + error_setg(errp, "%s: abort parsing dt for %s node units: %s", + __func__, name, fdt_strerror(offset)); + for (iter = path_list; iter; iter = iter->next) { + g_free(iter->data); + } + g_slist_free(path_list); + return NULL; + } + + path_array = g_new(char *, n + 1); + path_array[n--] = NULL; + + for (iter = path_list; iter; iter = iter->next) { + path_array[n--] = iter->data; + } + + g_slist_free(path_list); + + return path_array; +} + +char **qemu_fdt_node_path(void *fdt, const char *name, const char *compat, + Error **errp) +{ + int offset, len, ret; + const char *iter_name; + unsigned int path_len = 16, n = 0; + GSList *path_list = NULL, *iter; + char **path_array; + + offset = fdt_node_offset_by_compatible(fdt, -1, compat); + + while (offset >= 0) { + iter_name = fdt_get_name(fdt, offset, &len); + if (!iter_name) { + offset = len; + break; + } + if (!name || !strcmp(iter_name, name)) { + char *path; + + path = g_malloc(path_len); + while ((ret = fdt_get_path(fdt, offset, path, path_len)) + == -FDT_ERR_NOSPACE) { + path_len += 16; + path = g_realloc(path, path_len); + } + path_list = g_slist_prepend(path_list, path); + n++; + } + offset = fdt_node_offset_by_compatible(fdt, offset, compat); + } + + if (offset < 0 && offset != -FDT_ERR_NOTFOUND) { + error_setg(errp, "%s: abort parsing dt for %s/%s: %s", + __func__, name, compat, fdt_strerror(offset)); + for (iter = path_list; iter; iter = iter->next) { + g_free(iter->data); + } + g_slist_free(path_list); + return NULL; + } + + path_array = g_new(char *, n + 1); + path_array[n--] = NULL; + + for (iter = path_list; iter; iter = iter->next) { + path_array[n--] = iter->data; + } + + g_slist_free(path_list); + + return path_array; +} + +int qemu_fdt_setprop(void *fdt, const char *node_path, + const char *property, const void *val, int size) +{ + int r; + + r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val, size); + if (r < 0) { + error_report("%s: Couldn't set %s/%s: %s", __func__, node_path, + property, fdt_strerror(r)); + exit(1); + } + + return r; +} + +int qemu_fdt_setprop_cell(void *fdt, const char *node_path, + const char *property, uint32_t val) +{ + int r; + + r = fdt_setprop_cell(fdt, findnode_nofail(fdt, node_path), property, val); + if (r < 0) { + error_report("%s: Couldn't set %s/%s = %#08x: %s", __func__, + node_path, property, val, fdt_strerror(r)); + exit(1); + } + + return r; +} + +int qemu_fdt_setprop_u64(void *fdt, const char *node_path, + const char *property, uint64_t val) +{ + val = cpu_to_be64(val); + return qemu_fdt_setprop(fdt, node_path, property, &val, sizeof(val)); +} + +int qemu_fdt_setprop_string(void *fdt, const char *node_path, + const char *property, const char *string) +{ + int r; + + r = fdt_setprop_string(fdt, findnode_nofail(fdt, node_path), property, string); + if (r < 0) { + error_report("%s: Couldn't set %s/%s = %s: %s", __func__, + node_path, property, string, fdt_strerror(r)); + exit(1); + } + + return r; +} + +/* + * libfdt doesn't allow us to add string arrays directly but they are + * test a series of null terminated strings with a length. We build + * the string up here so we can calculate the final length. + */ +int qemu_fdt_setprop_string_array(void *fdt, const char *node_path, + const char *prop, char **array, int len) +{ + int ret, i, total_len = 0; + char *str, *p; + for (i = 0; i < len; i++) { + total_len += strlen(array[i]) + 1; + } + p = str = g_malloc0(total_len); + for (i = 0; i < len; i++) { + int offset = strlen(array[i]) + 1; + pstrcpy(p, offset, array[i]); + p += offset; + } + + ret = qemu_fdt_setprop(fdt, node_path, prop, str, total_len); + g_free(str); + return ret; +} + +const void *qemu_fdt_getprop(void *fdt, const char *node_path, + const char *property, int *lenp, Error **errp) +{ + int len; + const void *r; + + if (!lenp) { + lenp = &len; + } + r = fdt_getprop(fdt, findnode_nofail(fdt, node_path), property, lenp); + if (!r) { + error_setg(errp, "%s: Couldn't get %s/%s: %s", __func__, + node_path, property, fdt_strerror(*lenp)); + } + return r; +} + +uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path, + const char *property, int *lenp, Error **errp) +{ + int len; + const uint32_t *p; + + if (!lenp) { + lenp = &len; + } + p = qemu_fdt_getprop(fdt, node_path, property, lenp, errp); + if (!p) { + return 0; + } else if (*lenp != 4) { + error_setg(errp, "%s: %s/%s not 4 bytes long (not a cell?)", + __func__, node_path, property); + *lenp = -EINVAL; + return 0; + } + return be32_to_cpu(*p); +} + +uint32_t qemu_fdt_get_phandle(void *fdt, const char *path) +{ + uint32_t r; + + r = fdt_get_phandle(fdt, findnode_nofail(fdt, path)); + if (r == 0) { + error_report("%s: Couldn't get phandle for %s: %s", __func__, + path, fdt_strerror(r)); + exit(1); + } + + return r; +} + +int qemu_fdt_setprop_phandle(void *fdt, const char *node_path, + const char *property, + const char *target_node_path) +{ + uint32_t phandle = qemu_fdt_get_phandle(fdt, target_node_path); + return qemu_fdt_setprop_cell(fdt, node_path, property, phandle); +} + +uint32_t qemu_fdt_alloc_phandle(void *fdt) +{ + static int phandle = 0x0; + + /* + * We need to find out if the user gave us special instruction at + * which phandle id to start allocating phandles. + */ + if (!phandle) { + phandle = machine_phandle_start(current_machine); + } + + if (!phandle) { + /* + * None or invalid phandle given on the command line, so fall back to + * default starting point. + */ + phandle = 0x8000; + } + + return phandle++; +} + +int qemu_fdt_nop_node(void *fdt, const char *node_path) +{ + int r; + + r = fdt_nop_node(fdt, findnode_nofail(fdt, node_path)); + if (r < 0) { + error_report("%s: Couldn't nop node %s: %s", __func__, node_path, + fdt_strerror(r)); + exit(1); + } + + return r; +} + +int qemu_fdt_add_subnode(void *fdt, const char *name) +{ + char *dupname = g_strdup(name); + char *basename = strrchr(dupname, '/'); + int retval; + int parent = 0; + + if (!basename) { + g_free(dupname); + return -1; + } + + basename[0] = '\0'; + basename++; + + if (dupname[0]) { + parent = findnode_nofail(fdt, dupname); + } + + retval = fdt_add_subnode(fdt, parent, basename); + if (retval < 0) { + error_report("%s: Failed to create subnode %s: %s", + __func__, name, fdt_strerror(retval)); + exit(1); + } + + g_free(dupname); + return retval; +} + +/* + * qemu_fdt_add_path: Like qemu_fdt_add_subnode(), but will add + * all missing subnodes from the given path. + */ +int qemu_fdt_add_path(void *fdt, const char *path) +{ + const char *name; + int namelen, retval; + int parent = 0; + + if (path[0] != '/') { + return -1; + } + + do { + name = path + 1; + path = strchr(name, '/'); + namelen = path != NULL ? path - name : strlen(name); + + retval = fdt_subnode_offset_namelen(fdt, parent, name, namelen); + if (retval < 0 && retval != -FDT_ERR_NOTFOUND) { + error_report("%s: Unexpected error in finding subnode %.*s: %s", + __func__, namelen, name, fdt_strerror(retval)); + exit(1); + } else if (retval == -FDT_ERR_NOTFOUND) { + retval = fdt_add_subnode_namelen(fdt, parent, name, namelen); + if (retval < 0) { + error_report("%s: Failed to create subnode %.*s: %s", + __func__, namelen, name, fdt_strerror(retval)); + exit(1); + } + } + + parent = retval; + } while (path); + + return retval; +} + +void qemu_fdt_dumpdtb(void *fdt, int size) +{ + const char *dumpdtb = current_machine->dumpdtb; + + if (dumpdtb) { + /* Dump the dtb to a file and quit */ + if (g_file_set_contents(dumpdtb, fdt, size, NULL)) { + info_report("dtb dumped to %s. Exiting.", dumpdtb); + exit(0); + } + error_report("%s: Failed dumping dtb to %s", __func__, dumpdtb); + exit(1); + } +} + +int qemu_fdt_setprop_sized_cells_from_array(void *fdt, + const char *node_path, + const char *property, + int numvalues, + uint64_t *values) +{ + uint32_t *propcells; + uint64_t value; + int cellnum, vnum, ncells; + uint32_t hival; + int ret; + + propcells = g_new0(uint32_t, numvalues * 2); + + cellnum = 0; + for (vnum = 0; vnum < numvalues; vnum++) { + ncells = values[vnum * 2]; + if (ncells != 1 && ncells != 2) { + ret = -1; + goto out; + } + value = values[vnum * 2 + 1]; + hival = cpu_to_be32(value >> 32); + if (ncells > 1) { + propcells[cellnum++] = hival; + } else if (hival != 0) { + ret = -1; + goto out; + } + propcells[cellnum++] = cpu_to_be32(value); + } + + ret = qemu_fdt_setprop(fdt, node_path, property, propcells, + cellnum * sizeof(uint32_t)); +out: + g_free(propcells); + return ret; +} + +void qmp_dumpdtb(const char *filename, Error **errp) +{ + g_autoptr(GError) err = NULL; + uint32_t size; + + if (!current_machine->fdt) { + error_setg(errp, "This machine doesn't have a FDT"); + return; + } + + size = fdt_totalsize(current_machine->fdt); + + g_assert(size > 0); + + if (!g_file_set_contents(filename, current_machine->fdt, size, &err)) { + error_setg(errp, "Error saving FDT to file %s: %s", + filename, err->message); + } +} + +void hmp_dumpdtb(Monitor *mon, const QDict *qdict) +{ + const char *filename = qdict_get_str(qdict, "filename"); + Error *local_err = NULL; + + qmp_dumpdtb(filename, &local_err); + + if (hmp_handle_error(mon, local_err)) { + return; + } + + info_report("dtb dumped to %s", filename); +} + +void qemu_fdt_randomize_seeds(void *fdt) +{ + int noffset, poffset, len; + const char *name; + uint8_t *data; + + for (noffset = fdt_next_node(fdt, 0, NULL); + noffset >= 0; + noffset = fdt_next_node(fdt, noffset, NULL)) { + for (poffset = fdt_first_property_offset(fdt, noffset); + poffset >= 0; + poffset = fdt_next_property_offset(fdt, poffset)) { + data = (uint8_t *)fdt_getprop_by_offset(fdt, poffset, &name, &len); + if (!data || strcmp(name, "rng-seed")) + continue; + qemu_guest_getrandom_nofail(data, len); + } + } +} diff --git a/system/dirtylimit.c b/system/dirtylimit.c new file mode 100644 index 0000000..fa959d7 --- /dev/null +++ b/system/dirtylimit.c @@ -0,0 +1,678 @@ +/* + * Dirty page rate limit implementation code + * + * Copyright (c) 2022 CHINA TELECOM CO.,LTD. + * + * Authors: + * Hyman Huang(黄勇) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qapi/qapi-commands-migration.h" +#include "qapi/qmp/qdict.h" +#include "qapi/error.h" +#include "sysemu/dirtyrate.h" +#include "sysemu/dirtylimit.h" +#include "monitor/hmp.h" +#include "monitor/monitor.h" +#include "exec/memory.h" +#include "exec/target_page.h" +#include "hw/boards.h" +#include "sysemu/kvm.h" +#include "trace.h" +#include "migration/misc.h" +#include "migration/migration.h" +#include "migration/options.h" + +/* + * Dirtylimit stop working if dirty page rate error + * value less than DIRTYLIMIT_TOLERANCE_RANGE + */ +#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ +/* + * Plus or minus vcpu sleep time linearly if dirty + * page rate error value percentage over + * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. + * Otherwise, plus or minus a fixed vcpu sleep time. + */ +#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 +/* + * Max vcpu sleep time percentage during a cycle + * composed of dirty ring full and sleep time. + */ +#define DIRTYLIMIT_THROTTLE_PCT_MAX 99 + +struct { + VcpuStat stat; + bool running; + QemuThread thread; +} *vcpu_dirty_rate_stat; + +typedef struct VcpuDirtyLimitState { + int cpu_index; + bool enabled; + /* + * Quota dirty page rate, unit is MB/s + * zero if not enabled. + */ + uint64_t quota; +} VcpuDirtyLimitState; + +struct { + VcpuDirtyLimitState *states; + /* Max cpus number configured by user */ + int max_cpus; + /* Number of vcpu under dirtylimit */ + int limited_nvcpu; +} *dirtylimit_state; + +/* protect dirtylimit_state */ +static QemuMutex dirtylimit_mutex; + +/* dirtylimit thread quit if dirtylimit_quit is true */ +static bool dirtylimit_quit; + +static void vcpu_dirty_rate_stat_collect(void) +{ + MigrationState *s = migrate_get_current(); + VcpuStat stat; + int i = 0; + int64_t period = DIRTYLIMIT_CALC_TIME_MS; + + if (migrate_dirty_limit() && + migration_is_active(s)) { + period = s->parameters.x_vcpu_dirty_limit_period; + } + + /* calculate vcpu dirtyrate */ + vcpu_calculate_dirtyrate(period, + &stat, + GLOBAL_DIRTY_LIMIT, + false); + + for (i = 0; i < stat.nvcpu; i++) { + vcpu_dirty_rate_stat->stat.rates[i].id = i; + vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = + stat.rates[i].dirty_rate; + } + + g_free(stat.rates); +} + +static void *vcpu_dirty_rate_stat_thread(void *opaque) +{ + rcu_register_thread(); + + /* start log sync */ + global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); + + while (qatomic_read(&vcpu_dirty_rate_stat->running)) { + vcpu_dirty_rate_stat_collect(); + if (dirtylimit_in_service()) { + dirtylimit_process(); + } + } + + /* stop log sync */ + global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); + + rcu_unregister_thread(); + return NULL; +} + +int64_t vcpu_dirty_rate_get(int cpu_index) +{ + DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; + return qatomic_read_i64(&rates[cpu_index].dirty_rate); +} + +void vcpu_dirty_rate_stat_start(void) +{ + if (qatomic_read(&vcpu_dirty_rate_stat->running)) { + return; + } + + qatomic_set(&vcpu_dirty_rate_stat->running, 1); + qemu_thread_create(&vcpu_dirty_rate_stat->thread, + "dirtyrate-stat", + vcpu_dirty_rate_stat_thread, + NULL, + QEMU_THREAD_JOINABLE); +} + +void vcpu_dirty_rate_stat_stop(void) +{ + qatomic_set(&vcpu_dirty_rate_stat->running, 0); + dirtylimit_state_unlock(); + qemu_mutex_unlock_iothread(); + qemu_thread_join(&vcpu_dirty_rate_stat->thread); + qemu_mutex_lock_iothread(); + dirtylimit_state_lock(); +} + +void vcpu_dirty_rate_stat_initialize(void) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + int max_cpus = ms->smp.max_cpus; + + vcpu_dirty_rate_stat = + g_malloc0(sizeof(*vcpu_dirty_rate_stat)); + + vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; + vcpu_dirty_rate_stat->stat.rates = + g_new0(DirtyRateVcpu, max_cpus); + + vcpu_dirty_rate_stat->running = false; +} + +void vcpu_dirty_rate_stat_finalize(void) +{ + g_free(vcpu_dirty_rate_stat->stat.rates); + vcpu_dirty_rate_stat->stat.rates = NULL; + + g_free(vcpu_dirty_rate_stat); + vcpu_dirty_rate_stat = NULL; +} + +void dirtylimit_state_lock(void) +{ + qemu_mutex_lock(&dirtylimit_mutex); +} + +void dirtylimit_state_unlock(void) +{ + qemu_mutex_unlock(&dirtylimit_mutex); +} + +static void +__attribute__((__constructor__)) dirtylimit_mutex_init(void) +{ + qemu_mutex_init(&dirtylimit_mutex); +} + +static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) +{ + return &dirtylimit_state->states[cpu_index]; +} + +void dirtylimit_state_initialize(void) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + int max_cpus = ms->smp.max_cpus; + int i; + + dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); + + dirtylimit_state->states = + g_new0(VcpuDirtyLimitState, max_cpus); + + for (i = 0; i < max_cpus; i++) { + dirtylimit_state->states[i].cpu_index = i; + } + + dirtylimit_state->max_cpus = max_cpus; + trace_dirtylimit_state_initialize(max_cpus); +} + +void dirtylimit_state_finalize(void) +{ + g_free(dirtylimit_state->states); + dirtylimit_state->states = NULL; + + g_free(dirtylimit_state); + dirtylimit_state = NULL; + + trace_dirtylimit_state_finalize(); +} + +bool dirtylimit_in_service(void) +{ + return !!dirtylimit_state; +} + +bool dirtylimit_vcpu_index_valid(int cpu_index) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + + return !(cpu_index < 0 || + cpu_index >= ms->smp.max_cpus); +} + +static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) +{ + static uint64_t max_dirtyrate; + uint64_t dirty_ring_size_MiB; + + dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size()); + + if (max_dirtyrate < dirtyrate) { + max_dirtyrate = dirtyrate; + } + + return dirty_ring_size_MiB * 1000000 / max_dirtyrate; +} + +static inline bool dirtylimit_done(uint64_t quota, + uint64_t current) +{ + uint64_t min, max; + + min = MIN(quota, current); + max = MAX(quota, current); + + return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; +} + +static inline bool +dirtylimit_need_linear_adjustment(uint64_t quota, + uint64_t current) +{ + uint64_t min, max; + + min = MIN(quota, current); + max = MAX(quota, current); + + return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; +} + +static void dirtylimit_set_throttle(CPUState *cpu, + uint64_t quota, + uint64_t current) +{ + int64_t ring_full_time_us = 0; + uint64_t sleep_pct = 0; + uint64_t throttle_us = 0; + + if (current == 0) { + cpu->throttle_us_per_full = 0; + return; + } + + ring_full_time_us = dirtylimit_dirty_ring_full_time(current); + + if (dirtylimit_need_linear_adjustment(quota, current)) { + if (quota < current) { + sleep_pct = (current - quota) * 100 / current; + throttle_us = + ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); + cpu->throttle_us_per_full += throttle_us; + } else { + sleep_pct = (quota - current) * 100 / quota; + throttle_us = + ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); + cpu->throttle_us_per_full -= throttle_us; + } + + trace_dirtylimit_throttle_pct(cpu->cpu_index, + sleep_pct, + throttle_us); + } else { + if (quota < current) { + cpu->throttle_us_per_full += ring_full_time_us / 10; + } else { + cpu->throttle_us_per_full -= ring_full_time_us / 10; + } + } + + /* + * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), + * current dirty page rate may never reach the quota, we should stop + * increasing sleep time? + */ + cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, + ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); + + cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); +} + +static void dirtylimit_adjust_throttle(CPUState *cpu) +{ + uint64_t quota = 0; + uint64_t current = 0; + int cpu_index = cpu->cpu_index; + + quota = dirtylimit_vcpu_get_state(cpu_index)->quota; + current = vcpu_dirty_rate_get(cpu_index); + + if (!dirtylimit_done(quota, current)) { + dirtylimit_set_throttle(cpu, quota, current); + } + + return; +} + +void dirtylimit_process(void) +{ + CPUState *cpu; + + if (!qatomic_read(&dirtylimit_quit)) { + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_state_unlock(); + return; + } + + CPU_FOREACH(cpu) { + if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { + continue; + } + dirtylimit_adjust_throttle(cpu); + } + dirtylimit_state_unlock(); + } +} + +void dirtylimit_change(bool start) +{ + if (start) { + qatomic_set(&dirtylimit_quit, 0); + } else { + qatomic_set(&dirtylimit_quit, 1); + } +} + +void dirtylimit_set_vcpu(int cpu_index, + uint64_t quota, + bool enable) +{ + trace_dirtylimit_set_vcpu(cpu_index, quota); + + if (enable) { + dirtylimit_state->states[cpu_index].quota = quota; + if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { + dirtylimit_state->limited_nvcpu++; + } + } else { + dirtylimit_state->states[cpu_index].quota = 0; + if (dirtylimit_state->states[cpu_index].enabled) { + dirtylimit_state->limited_nvcpu--; + } + } + + dirtylimit_state->states[cpu_index].enabled = enable; +} + +void dirtylimit_set_all(uint64_t quota, + bool enable) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + int max_cpus = ms->smp.max_cpus; + int i; + + for (i = 0; i < max_cpus; i++) { + dirtylimit_set_vcpu(i, quota, enable); + } +} + +void dirtylimit_vcpu_execute(CPUState *cpu) +{ + if (dirtylimit_in_service() && + dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && + cpu->throttle_us_per_full) { + trace_dirtylimit_vcpu_execute(cpu->cpu_index, + cpu->throttle_us_per_full); + usleep(cpu->throttle_us_per_full); + } +} + +static void dirtylimit_init(void) +{ + dirtylimit_state_initialize(); + dirtylimit_change(true); + vcpu_dirty_rate_stat_initialize(); + vcpu_dirty_rate_stat_start(); +} + +static void dirtylimit_cleanup(void) +{ + vcpu_dirty_rate_stat_stop(); + vcpu_dirty_rate_stat_finalize(); + dirtylimit_change(false); + dirtylimit_state_finalize(); +} + +/* + * dirty page rate limit is not allowed to set if migration + * is running with dirty-limit capability enabled. + */ +static bool dirtylimit_is_allowed(void) +{ + MigrationState *ms = migrate_get_current(); + + if (migration_is_running(ms->state) && + (!qemu_thread_is_self(&ms->thread)) && + migrate_dirty_limit() && + dirtylimit_in_service()) { + return false; + } + return true; +} + +void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, + int64_t cpu_index, + Error **errp) +{ + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { + return; + } + + if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { + error_setg(errp, "incorrect cpu index specified"); + return; + } + + if (!dirtylimit_is_allowed()) { + error_setg(errp, "can't cancel dirty page rate limit while" + " migration is running"); + return; + } + + if (!dirtylimit_in_service()) { + return; + } + + dirtylimit_state_lock(); + + if (has_cpu_index) { + dirtylimit_set_vcpu(cpu_index, 0, false); + } else { + dirtylimit_set_all(0, false); + } + + if (!dirtylimit_state->limited_nvcpu) { + dirtylimit_cleanup(); + } + + dirtylimit_state_unlock(); +} + +void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); + Error *err = NULL; + + qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " + "dirty limit for virtual CPU]\n"); +} + +void qmp_set_vcpu_dirty_limit(bool has_cpu_index, + int64_t cpu_index, + uint64_t dirty_rate, + Error **errp) +{ + if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { + error_setg(errp, "dirty page limit feature requires KVM with" + " accelerator property 'dirty-ring-size' set'"); + return; + } + + if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { + error_setg(errp, "incorrect cpu index specified"); + return; + } + + if (!dirtylimit_is_allowed()) { + error_setg(errp, "can't set dirty page rate limit while" + " migration is running"); + return; + } + + if (!dirty_rate) { + qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); + return; + } + + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_init(); + } + + if (has_cpu_index) { + dirtylimit_set_vcpu(cpu_index, dirty_rate, true); + } else { + dirtylimit_set_all(dirty_rate, true); + } + + dirtylimit_state_unlock(); +} + +void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); + int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); + Error *err = NULL; + + if (dirty_rate < 0) { + error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate); + goto out; + } + + qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); + +out: + hmp_handle_error(mon, err); +} + +/* Return the max throttle time of each virtual CPU */ +uint64_t dirtylimit_throttle_time_per_round(void) +{ + CPUState *cpu; + int64_t max = 0; + + CPU_FOREACH(cpu) { + if (cpu->throttle_us_per_full > max) { + max = cpu->throttle_us_per_full; + } + } + + return max; +} + +/* + * Estimate average dirty ring full time of each virtaul CPU. + * Return 0 if guest doesn't dirty memory. + */ +uint64_t dirtylimit_ring_full_time(void) +{ + CPUState *cpu; + uint64_t curr_rate = 0; + int nvcpus = 0; + + CPU_FOREACH(cpu) { + if (cpu->running) { + nvcpus++; + curr_rate += vcpu_dirty_rate_get(cpu->cpu_index); + } + } + + if (!curr_rate || !nvcpus) { + return 0; + } + + return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus); +} + +static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) +{ + DirtyLimitInfo *info = NULL; + + info = g_malloc0(sizeof(*info)); + info->cpu_index = cpu_index; + info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; + info->current_rate = vcpu_dirty_rate_get(cpu_index); + + return info; +} + +static struct DirtyLimitInfoList *dirtylimit_query_all(void) +{ + int i, index; + DirtyLimitInfo *info = NULL; + DirtyLimitInfoList *head = NULL, **tail = &head; + + dirtylimit_state_lock(); + + if (!dirtylimit_in_service()) { + dirtylimit_state_unlock(); + return NULL; + } + + for (i = 0; i < dirtylimit_state->max_cpus; i++) { + index = dirtylimit_state->states[i].cpu_index; + if (dirtylimit_vcpu_get_state(index)->enabled) { + info = dirtylimit_query_vcpu(index); + QAPI_LIST_APPEND(tail, info); + } + } + + dirtylimit_state_unlock(); + + return head; +} + +struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) +{ + if (!dirtylimit_in_service()) { + return NULL; + } + + return dirtylimit_query_all(); +} + +void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) +{ + DirtyLimitInfoList *info; + g_autoptr(DirtyLimitInfoList) head = NULL; + Error *err = NULL; + + if (!dirtylimit_in_service()) { + monitor_printf(mon, "Dirty page limit not enabled!\n"); + return; + } + + head = qmp_query_vcpu_dirty_limit(&err); + if (err) { + hmp_handle_error(mon, err); + return; + } + + for (info = head; info != NULL; info = info->next) { + monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," + " current rate %"PRIi64 " (MB/s)\n", + info->value->cpu_index, + info->value->limit_rate, + info->value->current_rate); + } +} diff --git a/system/dma-helpers.c b/system/dma-helpers.c new file mode 100644 index 0000000..36211ac --- /dev/null +++ b/system/dma-helpers.c @@ -0,0 +1,347 @@ +/* + * DMA helper functions + * + * Copyright (c) 2009,2020 Red Hat + * + * This work is licensed under the terms of the GNU General Public License + * (GNU GPL), version 2 or later. + */ + +#include "qemu/osdep.h" +#include "sysemu/block-backend.h" +#include "sysemu/dma.h" +#include "trace/trace-root.h" +#include "qemu/thread.h" +#include "qemu/main-loop.h" +#include "sysemu/cpu-timers.h" +#include "qemu/range.h" + +/* #define DEBUG_IOMMU */ + +MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr, + uint8_t c, dma_addr_t len, MemTxAttrs attrs) +{ + dma_barrier(as, DMA_DIRECTION_FROM_DEVICE); + + return address_space_set(as, addr, c, len, attrs); +} + +void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint, + AddressSpace *as) +{ + qsg->sg = g_new(ScatterGatherEntry, alloc_hint); + qsg->nsg = 0; + qsg->nalloc = alloc_hint; + qsg->size = 0; + qsg->as = as; + qsg->dev = dev; + object_ref(OBJECT(dev)); +} + +void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len) +{ + if (qsg->nsg == qsg->nalloc) { + qsg->nalloc = 2 * qsg->nalloc + 1; + qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc); + } + qsg->sg[qsg->nsg].base = base; + qsg->sg[qsg->nsg].len = len; + qsg->size += len; + ++qsg->nsg; +} + +void qemu_sglist_destroy(QEMUSGList *qsg) +{ + object_unref(OBJECT(qsg->dev)); + g_free(qsg->sg); + memset(qsg, 0, sizeof(*qsg)); +} + +typedef struct { + BlockAIOCB common; + AioContext *ctx; + BlockAIOCB *acb; + QEMUSGList *sg; + uint32_t align; + uint64_t offset; + DMADirection dir; + int sg_cur_index; + dma_addr_t sg_cur_byte; + QEMUIOVector iov; + QEMUBH *bh; + DMAIOFunc *io_func; + void *io_func_opaque; +} DMAAIOCB; + +static void dma_blk_cb(void *opaque, int ret); + +static void reschedule_dma(void *opaque) +{ + DMAAIOCB *dbs = (DMAAIOCB *)opaque; + + assert(!dbs->acb && dbs->bh); + qemu_bh_delete(dbs->bh); + dbs->bh = NULL; + dma_blk_cb(dbs, 0); +} + +static void dma_blk_unmap(DMAAIOCB *dbs) +{ + int i; + + for (i = 0; i < dbs->iov.niov; ++i) { + dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base, + dbs->iov.iov[i].iov_len, dbs->dir, + dbs->iov.iov[i].iov_len); + } + qemu_iovec_reset(&dbs->iov); +} + +static void dma_complete(DMAAIOCB *dbs, int ret) +{ + trace_dma_complete(dbs, ret, dbs->common.cb); + + assert(!dbs->acb && !dbs->bh); + dma_blk_unmap(dbs); + if (dbs->common.cb) { + dbs->common.cb(dbs->common.opaque, ret); + } + qemu_iovec_destroy(&dbs->iov); + qemu_aio_unref(dbs); +} + +static void dma_blk_cb(void *opaque, int ret) +{ + DMAAIOCB *dbs = (DMAAIOCB *)opaque; + AioContext *ctx = dbs->ctx; + dma_addr_t cur_addr, cur_len; + void *mem; + + trace_dma_blk_cb(dbs, ret); + + aio_context_acquire(ctx); + dbs->acb = NULL; + dbs->offset += dbs->iov.size; + + if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { + dma_complete(dbs, ret); + goto out; + } + dma_blk_unmap(dbs); + + while (dbs->sg_cur_index < dbs->sg->nsg) { + cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; + cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; + mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir, + MEMTXATTRS_UNSPECIFIED); + /* + * Make reads deterministic in icount mode. Windows sometimes issues + * disk read requests with overlapping SGs. It leads + * to non-determinism, because resulting buffer contents may be mixed + * from several sectors. This code splits all SGs into several + * groups. SGs in every group do not overlap. + */ + if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) { + int i; + for (i = 0 ; i < dbs->iov.niov ; ++i) { + if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base, + dbs->iov.iov[i].iov_len, (intptr_t)mem, + cur_len)) { + dma_memory_unmap(dbs->sg->as, mem, cur_len, + dbs->dir, cur_len); + mem = NULL; + break; + } + } + } + if (!mem) + break; + qemu_iovec_add(&dbs->iov, mem, cur_len); + dbs->sg_cur_byte += cur_len; + if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) { + dbs->sg_cur_byte = 0; + ++dbs->sg_cur_index; + } + } + + if (dbs->iov.size == 0) { + trace_dma_map_wait(dbs); + dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); + cpu_register_map_client(dbs->bh); + goto out; + } + + if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { + qemu_iovec_discard_back(&dbs->iov, + QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); + } + + dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, + dma_blk_cb, dbs, dbs->io_func_opaque); + assert(dbs->acb); +out: + aio_context_release(ctx); +} + +static void dma_aio_cancel(BlockAIOCB *acb) +{ + DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); + + trace_dma_aio_cancel(dbs); + + assert(!(dbs->acb && dbs->bh)); + if (dbs->acb) { + /* This will invoke dma_blk_cb. */ + blk_aio_cancel_async(dbs->acb); + return; + } + + if (dbs->bh) { + cpu_unregister_map_client(dbs->bh); + qemu_bh_delete(dbs->bh); + dbs->bh = NULL; + } + if (dbs->common.cb) { + dbs->common.cb(dbs->common.opaque, -ECANCELED); + } +} + +static const AIOCBInfo dma_aiocb_info = { + .aiocb_size = sizeof(DMAAIOCB), + .cancel_async = dma_aio_cancel, +}; + +BlockAIOCB *dma_blk_io(AioContext *ctx, + QEMUSGList *sg, uint64_t offset, uint32_t align, + DMAIOFunc *io_func, void *io_func_opaque, + BlockCompletionFunc *cb, + void *opaque, DMADirection dir) +{ + DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque); + + trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE)); + + dbs->acb = NULL; + dbs->sg = sg; + dbs->ctx = ctx; + dbs->offset = offset; + dbs->align = align; + dbs->sg_cur_index = 0; + dbs->sg_cur_byte = 0; + dbs->dir = dir; + dbs->io_func = io_func; + dbs->io_func_opaque = io_func_opaque; + dbs->bh = NULL; + qemu_iovec_init(&dbs->iov, sg->nsg); + dma_blk_cb(dbs, 0); + return &dbs->common; +} + + +static +BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque) +{ + BlockBackend *blk = opaque; + return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque); +} + +BlockAIOCB *dma_blk_read(BlockBackend *blk, + QEMUSGList *sg, uint64_t offset, uint32_t align, + void (*cb)(void *opaque, int ret), void *opaque) +{ + return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, + dma_blk_read_io_func, blk, cb, opaque, + DMA_DIRECTION_FROM_DEVICE); +} + +static +BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque) +{ + BlockBackend *blk = opaque; + return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque); +} + +BlockAIOCB *dma_blk_write(BlockBackend *blk, + QEMUSGList *sg, uint64_t offset, uint32_t align, + void (*cb)(void *opaque, int ret), void *opaque) +{ + return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, + dma_blk_write_io_func, blk, cb, opaque, + DMA_DIRECTION_TO_DEVICE); +} + + +static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual, + QEMUSGList *sg, DMADirection dir, + MemTxAttrs attrs) +{ + uint8_t *ptr = buf; + dma_addr_t xresidual; + int sg_cur_index; + MemTxResult res = MEMTX_OK; + + xresidual = sg->size; + sg_cur_index = 0; + len = MIN(len, xresidual); + while (len > 0) { + ScatterGatherEntry entry = sg->sg[sg_cur_index++]; + dma_addr_t xfer = MIN(len, entry.len); + res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs); + ptr += xfer; + len -= xfer; + xresidual -= xfer; + } + + if (residual) { + *residual = xresidual; + } + return res; +} + +MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual, + QEMUSGList *sg, MemTxAttrs attrs) +{ + return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs); +} + +MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual, + QEMUSGList *sg, MemTxAttrs attrs) +{ + return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs); +} + +void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie, + QEMUSGList *sg, enum BlockAcctType type) +{ + block_acct_start(blk_get_stats(blk), cookie, sg->size, type); +} + +uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits) +{ + uint64_t max_mask = UINT64_MAX, addr_mask = end - start; + uint64_t alignment_mask, size_mask; + + if (max_addr_bits != 64) { + max_mask = (1ULL << max_addr_bits) - 1; + } + + alignment_mask = start ? (start & -start) - 1 : max_mask; + alignment_mask = MIN(alignment_mask, max_mask); + size_mask = MIN(addr_mask, max_mask); + + if (alignment_mask <= size_mask) { + /* Increase the alignment of start */ + return alignment_mask; + } else { + /* Find the largest page mask from size */ + if (addr_mask == UINT64_MAX) { + return UINT64_MAX; + } + return (1ULL << (63 - clz64(addr_mask + 1))) - 1; + } +} + diff --git a/system/globals.c b/system/globals.c new file mode 100644 index 0000000..e83b542 --- /dev/null +++ b/system/globals.c @@ -0,0 +1,70 @@ +/* + * Global variables that (mostly) should not exist + * + * Copyright (c) 2003-2020 QEMU contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "exec/cpu-common.h" +#include "hw/display/vga.h" +#include "hw/loader.h" +#include "hw/xen/xen.h" +#include "net/net.h" +#include "sysemu/cpus.h" +#include "sysemu/sysemu.h" + +enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB; +int display_opengl; +const char* keyboard_layout; +bool enable_mlock; +bool enable_cpu_pm; +int nb_nics; +NICInfo nd_table[MAX_NICS]; +int autostart = 1; +int vga_interface_type = VGA_NONE; +bool vga_interface_created; +Chardev *parallel_hds[MAX_PARALLEL_PORTS]; +int win2k_install_hack; +int fd_bootchk = 1; +int graphic_rotate; +QEMUOptionRom option_rom[MAX_OPTION_ROMS]; +int nb_option_roms; +int old_param; +const char *qemu_name; +unsigned int nb_prom_envs; +const char *prom_envs[MAX_PROM_ENVS]; +uint8_t *boot_splash_filedata; +int only_migratable; /* turn it off unless user states otherwise */ +int icount_align_option; + +/* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the + * little-endian "wire format" described in the SMBIOS 2.6 specification. + */ +QemuUUID qemu_uuid; +bool qemu_uuid_set; + +uint32_t xen_domid; +enum xen_mode xen_mode = XEN_DISABLED; +bool xen_domid_restrict; +struct evtchn_backend_ops *xen_evtchn_ops; +struct gnttab_backend_ops *xen_gnttab_ops; +struct foreignmem_backend_ops *xen_foreignmem_ops; +struct xenstore_backend_ops *xen_xenstore_ops; diff --git a/system/ioport.c b/system/ioport.c new file mode 100644 index 0000000..1824aa8 --- /dev/null +++ b/system/ioport.c @@ -0,0 +1,346 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +/* + * split out ioport related stuffs from vl.c. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/ioport.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "trace.h" + +struct MemoryRegionPortioList { + Object obj; + + MemoryRegion mr; + void *portio_opaque; + MemoryRegionPortio *ports; +}; + +#define TYPE_MEMORY_REGION_PORTIO_LIST "memory-region-portio-list" +OBJECT_DECLARE_SIMPLE_TYPE(MemoryRegionPortioList, MEMORY_REGION_PORTIO_LIST) + +static uint64_t unassigned_io_read(void *opaque, hwaddr addr, unsigned size) +{ + return -1ULL; +} + +static void unassigned_io_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ +} + +const MemoryRegionOps unassigned_io_ops = { + .read = unassigned_io_read, + .write = unassigned_io_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +void cpu_outb(uint32_t addr, uint8_t val) +{ + trace_cpu_out(addr, 'b', val); + address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, + &val, 1); +} + +void cpu_outw(uint32_t addr, uint16_t val) +{ + uint8_t buf[2]; + + trace_cpu_out(addr, 'w', val); + stw_p(buf, val); + address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, + buf, 2); +} + +void cpu_outl(uint32_t addr, uint32_t val) +{ + uint8_t buf[4]; + + trace_cpu_out(addr, 'l', val); + stl_p(buf, val); + address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, + buf, 4); +} + +uint8_t cpu_inb(uint32_t addr) +{ + uint8_t val; + + address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, + &val, 1); + trace_cpu_in(addr, 'b', val); + return val; +} + +uint16_t cpu_inw(uint32_t addr) +{ + uint8_t buf[2]; + uint16_t val; + + address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 2); + val = lduw_p(buf); + trace_cpu_in(addr, 'w', val); + return val; +} + +uint32_t cpu_inl(uint32_t addr) +{ + uint8_t buf[4]; + uint32_t val; + + address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 4); + val = ldl_p(buf); + trace_cpu_in(addr, 'l', val); + return val; +} + +void portio_list_init(PortioList *piolist, + Object *owner, + const MemoryRegionPortio *callbacks, + void *opaque, const char *name) +{ + unsigned n = 0; + + while (callbacks[n].size) { + ++n; + } + + piolist->ports = callbacks; + piolist->nr = 0; + piolist->regions = g_new0(MemoryRegion *, n); + piolist->address_space = NULL; + piolist->opaque = opaque; + piolist->owner = owner; + piolist->name = name; + piolist->flush_coalesced_mmio = false; +} + +void portio_list_set_flush_coalesced(PortioList *piolist) +{ + piolist->flush_coalesced_mmio = true; +} + +void portio_list_destroy(PortioList *piolist) +{ + MemoryRegionPortioList *mrpio; + unsigned i; + + for (i = 0; i < piolist->nr; ++i) { + mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr); + object_unparent(OBJECT(&mrpio->mr)); + object_unref(mrpio); + } + g_free(piolist->regions); +} + +static const MemoryRegionPortio *find_portio(MemoryRegionPortioList *mrpio, + uint64_t offset, unsigned size, + bool write) +{ + const MemoryRegionPortio *mrp; + + for (mrp = mrpio->ports; mrp->size; ++mrp) { + if (offset >= mrp->offset && offset < mrp->offset + mrp->len && + size == mrp->size && + (write ? (bool)mrp->write : (bool)mrp->read)) { + return mrp; + } + } + return NULL; +} + +static uint64_t portio_read(void *opaque, hwaddr addr, unsigned size) +{ + MemoryRegionPortioList *mrpio = opaque; + const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, false); + uint64_t data; + + data = ((uint64_t)1 << (size * 8)) - 1; + if (mrp) { + data = mrp->read(mrpio->portio_opaque, mrp->base + addr); + } else if (size == 2) { + mrp = find_portio(mrpio, addr, 1, false); + if (mrp) { + data = mrp->read(mrpio->portio_opaque, mrp->base + addr); + if (addr + 1 < mrp->offset + mrp->len) { + data |= mrp->read(mrpio->portio_opaque, mrp->base + addr + 1) << 8; + } else { + data |= 0xff00; + } + } + } + return data; +} + +static void portio_write(void *opaque, hwaddr addr, uint64_t data, + unsigned size) +{ + MemoryRegionPortioList *mrpio = opaque; + const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, true); + + if (mrp) { + mrp->write(mrpio->portio_opaque, mrp->base + addr, data); + } else if (size == 2) { + mrp = find_portio(mrpio, addr, 1, true); + if (mrp) { + mrp->write(mrpio->portio_opaque, mrp->base + addr, data & 0xff); + if (addr + 1 < mrp->offset + mrp->len) { + mrp->write(mrpio->portio_opaque, mrp->base + addr + 1, data >> 8); + } + } + } +} + +static const MemoryRegionOps portio_ops = { + .read = portio_read, + .write = portio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.unaligned = true, + .impl.unaligned = true, +}; + +static void portio_list_add_1(PortioList *piolist, + const MemoryRegionPortio *pio_init, + unsigned count, unsigned start, + unsigned off_low, unsigned off_high) +{ + MemoryRegionPortioList *mrpio; + Object *owner; + char *name; + unsigned i; + + /* Copy the sub-list and null-terminate it. */ + mrpio = MEMORY_REGION_PORTIO_LIST( + object_new(TYPE_MEMORY_REGION_PORTIO_LIST)); + mrpio->portio_opaque = piolist->opaque; + mrpio->ports = g_malloc0(sizeof(MemoryRegionPortio) * (count + 1)); + memcpy(mrpio->ports, pio_init, sizeof(MemoryRegionPortio) * count); + memset(mrpio->ports + count, 0, sizeof(MemoryRegionPortio)); + + /* Adjust the offsets to all be zero-based for the region. */ + for (i = 0; i < count; ++i) { + mrpio->ports[i].offset -= off_low; + mrpio->ports[i].base = start + off_low; + } + + /* + * The MemoryRegion owner is the MemoryRegionPortioList since that manages + * the lifecycle via the refcount + */ + memory_region_init_io(&mrpio->mr, OBJECT(mrpio), &portio_ops, mrpio, + piolist->name, off_high - off_low); + + /* Reparent the MemoryRegion to the piolist owner */ + object_ref(&mrpio->mr); + object_unparent(OBJECT(&mrpio->mr)); + if (!piolist->owner) { + owner = container_get(qdev_get_machine(), "/unattached"); + } else { + owner = piolist->owner; + } + name = g_strdup_printf("%s[*]", piolist->name); + object_property_add_child(owner, name, OBJECT(&mrpio->mr)); + g_free(name); + + if (piolist->flush_coalesced_mmio) { + memory_region_set_flush_coalesced(&mrpio->mr); + } + memory_region_add_subregion(piolist->address_space, + start + off_low, &mrpio->mr); + piolist->regions[piolist->nr] = &mrpio->mr; + ++piolist->nr; +} + +void portio_list_add(PortioList *piolist, + MemoryRegion *address_space, + uint32_t start) +{ + const MemoryRegionPortio *pio, *pio_start = piolist->ports; + unsigned int off_low, off_high, off_last, count; + + piolist->address_space = address_space; + + /* Handle the first entry specially. */ + off_last = off_low = pio_start->offset; + off_high = off_low + pio_start->len + pio_start->size - 1; + count = 1; + + for (pio = pio_start + 1; pio->size != 0; pio++, count++) { + /* All entries must be sorted by offset. */ + assert(pio->offset >= off_last); + off_last = pio->offset; + + /* If we see a hole, break the region. */ + if (off_last > off_high) { + portio_list_add_1(piolist, pio_start, count, start, off_low, + off_high); + /* ... and start collecting anew. */ + pio_start = pio; + off_low = off_last; + off_high = off_low + pio->len + pio_start->size - 1; + count = 0; + } else if (off_last + pio->len > off_high) { + off_high = off_last + pio->len + pio_start->size - 1; + } + } + + /* There will always be an open sub-list. */ + portio_list_add_1(piolist, pio_start, count, start, off_low, off_high); +} + +void portio_list_del(PortioList *piolist) +{ + MemoryRegionPortioList *mrpio; + unsigned i; + + for (i = 0; i < piolist->nr; ++i) { + mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr); + memory_region_del_subregion(piolist->address_space, &mrpio->mr); + } +} + +static void memory_region_portio_list_finalize(Object *obj) +{ + MemoryRegionPortioList *mrpio = MEMORY_REGION_PORTIO_LIST(obj); + + object_unref(&mrpio->mr); + g_free(mrpio->ports); +} + +static const TypeInfo memory_region_portio_list_info = { + .parent = TYPE_OBJECT, + .name = TYPE_MEMORY_REGION_PORTIO_LIST, + .instance_size = sizeof(MemoryRegionPortioList), + .instance_finalize = memory_region_portio_list_finalize, +}; + +static void ioport_register_types(void) +{ + type_register_static(&memory_region_portio_list_info); +} + +type_init(ioport_register_types) diff --git a/system/main.c b/system/main.c new file mode 100644 index 0000000..694388b --- /dev/null +++ b/system/main.c @@ -0,0 +1,49 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2020 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu-main.h" +#include "sysemu/sysemu.h" + +#ifdef CONFIG_SDL +#include +#endif + +int qemu_default_main(void) +{ + int status; + + status = qemu_main_loop(); + qemu_cleanup(); + + return status; +} + +int (*qemu_main)(void) = qemu_default_main; + +int main(int argc, char **argv) +{ + qemu_init(argc, argv); + return qemu_main(); +} diff --git a/system/memory.c b/system/memory.c new file mode 100644 index 0000000..fa1c99f --- /dev/null +++ b/system/memory.c @@ -0,0 +1,3683 @@ +/* + * Physical memory management + * + * Copyright 2011 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Avi Kivity + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "exec/memory.h" +#include "qapi/visitor.h" +#include "qemu/bitops.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "qemu/qemu-print.h" +#include "qom/object.h" +#include "trace.h" + +#include "exec/memory-internal.h" +#include "exec/ram_addr.h" +#include "sysemu/kvm.h" +#include "sysemu/runstate.h" +#include "sysemu/tcg.h" +#include "qemu/accel.h" +#include "hw/boards.h" +#include "migration/vmstate.h" +#include "exec/address-spaces.h" + +//#define DEBUG_UNASSIGNED + +static unsigned memory_region_transaction_depth; +static bool memory_region_update_pending; +static bool ioeventfd_update_pending; +unsigned int global_dirty_tracking; + +static QTAILQ_HEAD(, MemoryListener) memory_listeners + = QTAILQ_HEAD_INITIALIZER(memory_listeners); + +static QTAILQ_HEAD(, AddressSpace) address_spaces + = QTAILQ_HEAD_INITIALIZER(address_spaces); + +static GHashTable *flat_views; + +typedef struct AddrRange AddrRange; + +/* + * Note that signed integers are needed for negative offsetting in aliases + * (large MemoryRegion::alias_offset). + */ +struct AddrRange { + Int128 start; + Int128 size; +}; + +static AddrRange addrrange_make(Int128 start, Int128 size) +{ + return (AddrRange) { start, size }; +} + +static bool addrrange_equal(AddrRange r1, AddrRange r2) +{ + return int128_eq(r1.start, r2.start) && int128_eq(r1.size, r2.size); +} + +static Int128 addrrange_end(AddrRange r) +{ + return int128_add(r.start, r.size); +} + +static AddrRange addrrange_shift(AddrRange range, Int128 delta) +{ + int128_addto(&range.start, delta); + return range; +} + +static bool addrrange_contains(AddrRange range, Int128 addr) +{ + return int128_ge(addr, range.start) + && int128_lt(addr, addrrange_end(range)); +} + +static bool addrrange_intersects(AddrRange r1, AddrRange r2) +{ + return addrrange_contains(r1, r2.start) + || addrrange_contains(r2, r1.start); +} + +static AddrRange addrrange_intersection(AddrRange r1, AddrRange r2) +{ + Int128 start = int128_max(r1.start, r2.start); + Int128 end = int128_min(addrrange_end(r1), addrrange_end(r2)); + return addrrange_make(start, int128_sub(end, start)); +} + +enum ListenerDirection { Forward, Reverse }; + +#define MEMORY_LISTENER_CALL_GLOBAL(_callback, _direction, _args...) \ + do { \ + MemoryListener *_listener; \ + \ + switch (_direction) { \ + case Forward: \ + QTAILQ_FOREACH(_listener, &memory_listeners, link) { \ + if (_listener->_callback) { \ + _listener->_callback(_listener, ##_args); \ + } \ + } \ + break; \ + case Reverse: \ + QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners, link) { \ + if (_listener->_callback) { \ + _listener->_callback(_listener, ##_args); \ + } \ + } \ + break; \ + default: \ + abort(); \ + } \ + } while (0) + +#define MEMORY_LISTENER_CALL(_as, _callback, _direction, _section, _args...) \ + do { \ + MemoryListener *_listener; \ + \ + switch (_direction) { \ + case Forward: \ + QTAILQ_FOREACH(_listener, &(_as)->listeners, link_as) { \ + if (_listener->_callback) { \ + _listener->_callback(_listener, _section, ##_args); \ + } \ + } \ + break; \ + case Reverse: \ + QTAILQ_FOREACH_REVERSE(_listener, &(_as)->listeners, link_as) { \ + if (_listener->_callback) { \ + _listener->_callback(_listener, _section, ##_args); \ + } \ + } \ + break; \ + default: \ + abort(); \ + } \ + } while (0) + +/* No need to ref/unref .mr, the FlatRange keeps it alive. */ +#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...) \ + do { \ + MemoryRegionSection mrs = section_from_flat_range(fr, \ + address_space_to_flatview(as)); \ + MEMORY_LISTENER_CALL(as, callback, dir, &mrs, ##_args); \ + } while(0) + +struct CoalescedMemoryRange { + AddrRange addr; + QTAILQ_ENTRY(CoalescedMemoryRange) link; +}; + +struct MemoryRegionIoeventfd { + AddrRange addr; + bool match_data; + uint64_t data; + EventNotifier *e; +}; + +static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd *a, + MemoryRegionIoeventfd *b) +{ + if (int128_lt(a->addr.start, b->addr.start)) { + return true; + } else if (int128_gt(a->addr.start, b->addr.start)) { + return false; + } else if (int128_lt(a->addr.size, b->addr.size)) { + return true; + } else if (int128_gt(a->addr.size, b->addr.size)) { + return false; + } else if (a->match_data < b->match_data) { + return true; + } else if (a->match_data > b->match_data) { + return false; + } else if (a->match_data) { + if (a->data < b->data) { + return true; + } else if (a->data > b->data) { + return false; + } + } + if (a->e < b->e) { + return true; + } else if (a->e > b->e) { + return false; + } + return false; +} + +static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd *a, + MemoryRegionIoeventfd *b) +{ + if (int128_eq(a->addr.start, b->addr.start) && + (!int128_nz(a->addr.size) || !int128_nz(b->addr.size) || + (int128_eq(a->addr.size, b->addr.size) && + (a->match_data == b->match_data) && + ((a->match_data && (a->data == b->data)) || !a->match_data) && + (a->e == b->e)))) + return true; + + return false; +} + +/* Range of memory in the global map. Addresses are absolute. */ +struct FlatRange { + MemoryRegion *mr; + hwaddr offset_in_region; + AddrRange addr; + uint8_t dirty_log_mask; + bool romd_mode; + bool readonly; + bool nonvolatile; +}; + +#define FOR_EACH_FLAT_RANGE(var, view) \ + for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var) + +static inline MemoryRegionSection +section_from_flat_range(FlatRange *fr, FlatView *fv) +{ + return (MemoryRegionSection) { + .mr = fr->mr, + .fv = fv, + .offset_within_region = fr->offset_in_region, + .size = fr->addr.size, + .offset_within_address_space = int128_get64(fr->addr.start), + .readonly = fr->readonly, + .nonvolatile = fr->nonvolatile, + }; +} + +static bool flatrange_equal(FlatRange *a, FlatRange *b) +{ + return a->mr == b->mr + && addrrange_equal(a->addr, b->addr) + && a->offset_in_region == b->offset_in_region + && a->romd_mode == b->romd_mode + && a->readonly == b->readonly + && a->nonvolatile == b->nonvolatile; +} + +static FlatView *flatview_new(MemoryRegion *mr_root) +{ + FlatView *view; + + view = g_new0(FlatView, 1); + view->ref = 1; + view->root = mr_root; + memory_region_ref(mr_root); + trace_flatview_new(view, mr_root); + + return view; +} + +/* Insert a range into a given position. Caller is responsible for maintaining + * sorting order. + */ +static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range) +{ + if (view->nr == view->nr_allocated) { + view->nr_allocated = MAX(2 * view->nr, 10); + view->ranges = g_realloc(view->ranges, + view->nr_allocated * sizeof(*view->ranges)); + } + memmove(view->ranges + pos + 1, view->ranges + pos, + (view->nr - pos) * sizeof(FlatRange)); + view->ranges[pos] = *range; + memory_region_ref(range->mr); + ++view->nr; +} + +static void flatview_destroy(FlatView *view) +{ + int i; + + trace_flatview_destroy(view, view->root); + if (view->dispatch) { + address_space_dispatch_free(view->dispatch); + } + for (i = 0; i < view->nr; i++) { + memory_region_unref(view->ranges[i].mr); + } + g_free(view->ranges); + memory_region_unref(view->root); + g_free(view); +} + +static bool flatview_ref(FlatView *view) +{ + return qatomic_fetch_inc_nonzero(&view->ref) > 0; +} + +void flatview_unref(FlatView *view) +{ + if (qatomic_fetch_dec(&view->ref) == 1) { + trace_flatview_destroy_rcu(view, view->root); + assert(view->root); + call_rcu(view, flatview_destroy, rcu); + } +} + +static bool can_merge(FlatRange *r1, FlatRange *r2) +{ + return int128_eq(addrrange_end(r1->addr), r2->addr.start) + && r1->mr == r2->mr + && int128_eq(int128_add(int128_make64(r1->offset_in_region), + r1->addr.size), + int128_make64(r2->offset_in_region)) + && r1->dirty_log_mask == r2->dirty_log_mask + && r1->romd_mode == r2->romd_mode + && r1->readonly == r2->readonly + && r1->nonvolatile == r2->nonvolatile; +} + +/* Attempt to simplify a view by merging adjacent ranges */ +static void flatview_simplify(FlatView *view) +{ + unsigned i, j, k; + + i = 0; + while (i < view->nr) { + j = i + 1; + while (j < view->nr + && can_merge(&view->ranges[j-1], &view->ranges[j])) { + int128_addto(&view->ranges[i].addr.size, view->ranges[j].addr.size); + ++j; + } + ++i; + for (k = i; k < j; k++) { + memory_region_unref(view->ranges[k].mr); + } + memmove(&view->ranges[i], &view->ranges[j], + (view->nr - j) * sizeof(view->ranges[j])); + view->nr -= j - i; + } +} + +static bool memory_region_big_endian(MemoryRegion *mr) +{ +#if TARGET_BIG_ENDIAN + return mr->ops->endianness != DEVICE_LITTLE_ENDIAN; +#else + return mr->ops->endianness == DEVICE_BIG_ENDIAN; +#endif +} + +static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op) +{ + if ((op & MO_BSWAP) != devend_memop(mr->ops->endianness)) { + switch (op & MO_SIZE) { + case MO_8: + break; + case MO_16: + *data = bswap16(*data); + break; + case MO_32: + *data = bswap32(*data); + break; + case MO_64: + *data = bswap64(*data); + break; + default: + g_assert_not_reached(); + } + } +} + +static inline void memory_region_shift_read_access(uint64_t *value, + signed shift, + uint64_t mask, + uint64_t tmp) +{ + if (shift >= 0) { + *value |= (tmp & mask) << shift; + } else { + *value |= (tmp & mask) >> -shift; + } +} + +static inline uint64_t memory_region_shift_write_access(uint64_t *value, + signed shift, + uint64_t mask) +{ + uint64_t tmp; + + if (shift >= 0) { + tmp = (*value >> shift) & mask; + } else { + tmp = (*value << -shift) & mask; + } + + return tmp; +} + +static hwaddr memory_region_to_absolute_addr(MemoryRegion *mr, hwaddr offset) +{ + MemoryRegion *root; + hwaddr abs_addr = offset; + + abs_addr += mr->addr; + for (root = mr; root->container; ) { + root = root->container; + abs_addr += root->addr; + } + + return abs_addr; +} + +static int get_cpu_index(void) +{ + if (current_cpu) { + return current_cpu->cpu_index; + } + return -1; +} + +static MemTxResult memory_region_read_accessor(MemoryRegion *mr, + hwaddr addr, + uint64_t *value, + unsigned size, + signed shift, + uint64_t mask, + MemTxAttrs attrs) +{ + uint64_t tmp; + + tmp = mr->ops->read(mr->opaque, addr, size); + if (mr->subpage) { + trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); + } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_READ)) { + hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); + trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size, + memory_region_name(mr)); + } + memory_region_shift_read_access(value, shift, mask, tmp); + return MEMTX_OK; +} + +static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr, + hwaddr addr, + uint64_t *value, + unsigned size, + signed shift, + uint64_t mask, + MemTxAttrs attrs) +{ + uint64_t tmp = 0; + MemTxResult r; + + r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs); + if (mr->subpage) { + trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size); + } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_READ)) { + hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); + trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size, + memory_region_name(mr)); + } + memory_region_shift_read_access(value, shift, mask, tmp); + return r; +} + +static MemTxResult memory_region_write_accessor(MemoryRegion *mr, + hwaddr addr, + uint64_t *value, + unsigned size, + signed shift, + uint64_t mask, + MemTxAttrs attrs) +{ + uint64_t tmp = memory_region_shift_write_access(value, shift, mask); + + if (mr->subpage) { + trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); + } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_WRITE)) { + hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); + trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size, + memory_region_name(mr)); + } + mr->ops->write(mr->opaque, addr, tmp, size); + return MEMTX_OK; +} + +static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr, + hwaddr addr, + uint64_t *value, + unsigned size, + signed shift, + uint64_t mask, + MemTxAttrs attrs) +{ + uint64_t tmp = memory_region_shift_write_access(value, shift, mask); + + if (mr->subpage) { + trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size); + } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_WRITE)) { + hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr); + trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size, + memory_region_name(mr)); + } + return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs); +} + +static MemTxResult access_with_adjusted_size(hwaddr addr, + uint64_t *value, + unsigned size, + unsigned access_size_min, + unsigned access_size_max, + MemTxResult (*access_fn) + (MemoryRegion *mr, + hwaddr addr, + uint64_t *value, + unsigned size, + signed shift, + uint64_t mask, + MemTxAttrs attrs), + MemoryRegion *mr, + MemTxAttrs attrs) +{ + uint64_t access_mask; + unsigned access_size; + unsigned i; + MemTxResult r = MEMTX_OK; + bool reentrancy_guard_applied = false; + + if (!access_size_min) { + access_size_min = 1; + } + if (!access_size_max) { + access_size_max = 4; + } + + /* Do not allow more than one simultaneous access to a device's IO Regions */ + if (mr->dev && !mr->disable_reentrancy_guard && + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { + warn_report_once("Blocked re-entrant IO on MemoryRegion: " + "%s at addr: 0x%" HWADDR_PRIX, + memory_region_name(mr), addr); + return MEMTX_ACCESS_ERROR; + } + mr->dev->mem_reentrancy_guard.engaged_in_io = true; + reentrancy_guard_applied = true; + } + + /* FIXME: support unaligned access? */ + access_size = MAX(MIN(size, access_size_max), access_size_min); + access_mask = MAKE_64BIT_MASK(0, access_size * 8); + if (memory_region_big_endian(mr)) { + for (i = 0; i < size; i += access_size) { + r |= access_fn(mr, addr + i, value, access_size, + (size - access_size - i) * 8, access_mask, attrs); + } + } else { + for (i = 0; i < size; i += access_size) { + r |= access_fn(mr, addr + i, value, access_size, i * 8, + access_mask, attrs); + } + } + if (mr->dev && reentrancy_guard_applied) { + mr->dev->mem_reentrancy_guard.engaged_in_io = false; + } + return r; +} + +static AddressSpace *memory_region_to_address_space(MemoryRegion *mr) +{ + AddressSpace *as; + + while (mr->container) { + mr = mr->container; + } + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + if (mr == as->root) { + return as; + } + } + return NULL; +} + +/* Render a memory region into the global view. Ranges in @view obscure + * ranges in @mr. + */ +static void render_memory_region(FlatView *view, + MemoryRegion *mr, + Int128 base, + AddrRange clip, + bool readonly, + bool nonvolatile) +{ + MemoryRegion *subregion; + unsigned i; + hwaddr offset_in_region; + Int128 remain; + Int128 now; + FlatRange fr; + AddrRange tmp; + + if (!mr->enabled) { + return; + } + + int128_addto(&base, int128_make64(mr->addr)); + readonly |= mr->readonly; + nonvolatile |= mr->nonvolatile; + + tmp = addrrange_make(base, mr->size); + + if (!addrrange_intersects(tmp, clip)) { + return; + } + + clip = addrrange_intersection(tmp, clip); + + if (mr->alias) { + int128_subfrom(&base, int128_make64(mr->alias->addr)); + int128_subfrom(&base, int128_make64(mr->alias_offset)); + render_memory_region(view, mr->alias, base, clip, + readonly, nonvolatile); + return; + } + + /* Render subregions in priority order. */ + QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) { + render_memory_region(view, subregion, base, clip, + readonly, nonvolatile); + } + + if (!mr->terminates) { + return; + } + + offset_in_region = int128_get64(int128_sub(clip.start, base)); + base = clip.start; + remain = clip.size; + + fr.mr = mr; + fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr); + fr.romd_mode = mr->romd_mode; + fr.readonly = readonly; + fr.nonvolatile = nonvolatile; + + /* Render the region itself into any gaps left by the current view. */ + for (i = 0; i < view->nr && int128_nz(remain); ++i) { + if (int128_ge(base, addrrange_end(view->ranges[i].addr))) { + continue; + } + if (int128_lt(base, view->ranges[i].addr.start)) { + now = int128_min(remain, + int128_sub(view->ranges[i].addr.start, base)); + fr.offset_in_region = offset_in_region; + fr.addr = addrrange_make(base, now); + flatview_insert(view, i, &fr); + ++i; + int128_addto(&base, now); + offset_in_region += int128_get64(now); + int128_subfrom(&remain, now); + } + now = int128_sub(int128_min(int128_add(base, remain), + addrrange_end(view->ranges[i].addr)), + base); + int128_addto(&base, now); + offset_in_region += int128_get64(now); + int128_subfrom(&remain, now); + } + if (int128_nz(remain)) { + fr.offset_in_region = offset_in_region; + fr.addr = addrrange_make(base, remain); + flatview_insert(view, i, &fr); + } +} + +void flatview_for_each_range(FlatView *fv, flatview_cb cb , void *opaque) +{ + FlatRange *fr; + + assert(fv); + assert(cb); + + FOR_EACH_FLAT_RANGE(fr, fv) { + if (cb(fr->addr.start, fr->addr.size, fr->mr, + fr->offset_in_region, opaque)) { + break; + } + } +} + +static MemoryRegion *memory_region_get_flatview_root(MemoryRegion *mr) +{ + while (mr->enabled) { + if (mr->alias) { + if (!mr->alias_offset && int128_ge(mr->size, mr->alias->size)) { + /* The alias is included in its entirety. Use it as + * the "real" root, so that we can share more FlatViews. + */ + mr = mr->alias; + continue; + } + } else if (!mr->terminates) { + unsigned int found = 0; + MemoryRegion *child, *next = NULL; + QTAILQ_FOREACH(child, &mr->subregions, subregions_link) { + if (child->enabled) { + if (++found > 1) { + next = NULL; + break; + } + if (!child->addr && int128_ge(mr->size, child->size)) { + /* A child is included in its entirety. If it's the only + * enabled one, use it in the hope of finding an alias down the + * way. This will also let us share FlatViews. + */ + next = child; + } + } + } + if (found == 0) { + return NULL; + } + if (next) { + mr = next; + continue; + } + } + + return mr; + } + + return NULL; +} + +/* Render a memory topology into a list of disjoint absolute ranges. */ +static FlatView *generate_memory_topology(MemoryRegion *mr) +{ + int i; + FlatView *view; + + view = flatview_new(mr); + + if (mr) { + render_memory_region(view, mr, int128_zero(), + addrrange_make(int128_zero(), int128_2_64()), + false, false); + } + flatview_simplify(view); + + view->dispatch = address_space_dispatch_new(view); + for (i = 0; i < view->nr; i++) { + MemoryRegionSection mrs = + section_from_flat_range(&view->ranges[i], view); + flatview_add_to_dispatch(view, &mrs); + } + address_space_dispatch_compact(view->dispatch); + g_hash_table_replace(flat_views, mr, view); + + return view; +} + +static void address_space_add_del_ioeventfds(AddressSpace *as, + MemoryRegionIoeventfd *fds_new, + unsigned fds_new_nb, + MemoryRegionIoeventfd *fds_old, + unsigned fds_old_nb) +{ + unsigned iold, inew; + MemoryRegionIoeventfd *fd; + MemoryRegionSection section; + + /* Generate a symmetric difference of the old and new fd sets, adding + * and deleting as necessary. + */ + + iold = inew = 0; + while (iold < fds_old_nb || inew < fds_new_nb) { + if (iold < fds_old_nb + && (inew == fds_new_nb + || memory_region_ioeventfd_before(&fds_old[iold], + &fds_new[inew]))) { + fd = &fds_old[iold]; + section = (MemoryRegionSection) { + .fv = address_space_to_flatview(as), + .offset_within_address_space = int128_get64(fd->addr.start), + .size = fd->addr.size, + }; + MEMORY_LISTENER_CALL(as, eventfd_del, Forward, §ion, + fd->match_data, fd->data, fd->e); + ++iold; + } else if (inew < fds_new_nb + && (iold == fds_old_nb + || memory_region_ioeventfd_before(&fds_new[inew], + &fds_old[iold]))) { + fd = &fds_new[inew]; + section = (MemoryRegionSection) { + .fv = address_space_to_flatview(as), + .offset_within_address_space = int128_get64(fd->addr.start), + .size = fd->addr.size, + }; + MEMORY_LISTENER_CALL(as, eventfd_add, Reverse, §ion, + fd->match_data, fd->data, fd->e); + ++inew; + } else { + ++iold; + ++inew; + } + } +} + +FlatView *address_space_get_flatview(AddressSpace *as) +{ + FlatView *view; + + RCU_READ_LOCK_GUARD(); + do { + view = address_space_to_flatview(as); + /* If somebody has replaced as->current_map concurrently, + * flatview_ref returns false. + */ + } while (!flatview_ref(view)); + return view; +} + +static void address_space_update_ioeventfds(AddressSpace *as) +{ + FlatView *view; + FlatRange *fr; + unsigned ioeventfd_nb = 0; + unsigned ioeventfd_max; + MemoryRegionIoeventfd *ioeventfds; + AddrRange tmp; + unsigned i; + + if (!as->ioeventfd_notifiers) { + return; + } + + /* + * It is likely that the number of ioeventfds hasn't changed much, so use + * the previous size as the starting value, with some headroom to avoid + * gratuitous reallocations. + */ + ioeventfd_max = QEMU_ALIGN_UP(as->ioeventfd_nb, 4); + ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max); + + view = address_space_get_flatview(as); + FOR_EACH_FLAT_RANGE(fr, view) { + for (i = 0; i < fr->mr->ioeventfd_nb; ++i) { + tmp = addrrange_shift(fr->mr->ioeventfds[i].addr, + int128_sub(fr->addr.start, + int128_make64(fr->offset_in_region))); + if (addrrange_intersects(fr->addr, tmp)) { + ++ioeventfd_nb; + if (ioeventfd_nb > ioeventfd_max) { + ioeventfd_max = MAX(ioeventfd_max * 2, 4); + ioeventfds = g_realloc(ioeventfds, + ioeventfd_max * sizeof(*ioeventfds)); + } + ioeventfds[ioeventfd_nb-1] = fr->mr->ioeventfds[i]; + ioeventfds[ioeventfd_nb-1].addr = tmp; + } + } + } + + address_space_add_del_ioeventfds(as, ioeventfds, ioeventfd_nb, + as->ioeventfds, as->ioeventfd_nb); + + g_free(as->ioeventfds); + as->ioeventfds = ioeventfds; + as->ioeventfd_nb = ioeventfd_nb; + flatview_unref(view); +} + +/* + * Notify the memory listeners about the coalesced IO change events of + * range `cmr'. Only the part that has intersection of the specified + * FlatRange will be sent. + */ +static void flat_range_coalesced_io_notify(FlatRange *fr, AddressSpace *as, + CoalescedMemoryRange *cmr, bool add) +{ + AddrRange tmp; + + tmp = addrrange_shift(cmr->addr, + int128_sub(fr->addr.start, + int128_make64(fr->offset_in_region))); + if (!addrrange_intersects(tmp, fr->addr)) { + return; + } + tmp = addrrange_intersection(tmp, fr->addr); + + if (add) { + MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, coalesced_io_add, + int128_get64(tmp.start), + int128_get64(tmp.size)); + } else { + MEMORY_LISTENER_UPDATE_REGION(fr, as, Reverse, coalesced_io_del, + int128_get64(tmp.start), + int128_get64(tmp.size)); + } +} + +static void flat_range_coalesced_io_del(FlatRange *fr, AddressSpace *as) +{ + CoalescedMemoryRange *cmr; + + QTAILQ_FOREACH(cmr, &fr->mr->coalesced, link) { + flat_range_coalesced_io_notify(fr, as, cmr, false); + } +} + +static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as) +{ + MemoryRegion *mr = fr->mr; + CoalescedMemoryRange *cmr; + + if (QTAILQ_EMPTY(&mr->coalesced)) { + return; + } + + QTAILQ_FOREACH(cmr, &mr->coalesced, link) { + flat_range_coalesced_io_notify(fr, as, cmr, true); + } +} + +static void address_space_update_topology_pass(AddressSpace *as, + const FlatView *old_view, + const FlatView *new_view, + bool adding) +{ + unsigned iold, inew; + FlatRange *frold, *frnew; + + /* Generate a symmetric difference of the old and new memory maps. + * Kill ranges in the old map, and instantiate ranges in the new map. + */ + iold = inew = 0; + while (iold < old_view->nr || inew < new_view->nr) { + if (iold < old_view->nr) { + frold = &old_view->ranges[iold]; + } else { + frold = NULL; + } + if (inew < new_view->nr) { + frnew = &new_view->ranges[inew]; + } else { + frnew = NULL; + } + + if (frold + && (!frnew + || int128_lt(frold->addr.start, frnew->addr.start) + || (int128_eq(frold->addr.start, frnew->addr.start) + && !flatrange_equal(frold, frnew)))) { + /* In old but not in new, or in both but attributes changed. */ + + if (!adding) { + flat_range_coalesced_io_del(frold, as); + MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del); + } + + ++iold; + } else if (frold && frnew && flatrange_equal(frold, frnew)) { + /* In both and unchanged (except logging may have changed) */ + + if (adding) { + MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop); + if (frnew->dirty_log_mask & ~frold->dirty_log_mask) { + MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start, + frold->dirty_log_mask, + frnew->dirty_log_mask); + } + if (frold->dirty_log_mask & ~frnew->dirty_log_mask) { + MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop, + frold->dirty_log_mask, + frnew->dirty_log_mask); + } + } + + ++iold; + ++inew; + } else { + /* In new */ + + if (adding) { + MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add); + flat_range_coalesced_io_add(frnew, as); + } + + ++inew; + } + } +} + +static void flatviews_init(void) +{ + static FlatView *empty_view; + + if (flat_views) { + return; + } + + flat_views = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, + (GDestroyNotify) flatview_unref); + if (!empty_view) { + empty_view = generate_memory_topology(NULL); + /* We keep it alive forever in the global variable. */ + flatview_ref(empty_view); + } else { + g_hash_table_replace(flat_views, NULL, empty_view); + flatview_ref(empty_view); + } +} + +static void flatviews_reset(void) +{ + AddressSpace *as; + + if (flat_views) { + g_hash_table_unref(flat_views); + flat_views = NULL; + } + flatviews_init(); + + /* Render unique FVs */ + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + MemoryRegion *physmr = memory_region_get_flatview_root(as->root); + + if (g_hash_table_lookup(flat_views, physmr)) { + continue; + } + + generate_memory_topology(physmr); + } +} + +static void address_space_set_flatview(AddressSpace *as) +{ + FlatView *old_view = address_space_to_flatview(as); + MemoryRegion *physmr = memory_region_get_flatview_root(as->root); + FlatView *new_view = g_hash_table_lookup(flat_views, physmr); + + assert(new_view); + + if (old_view == new_view) { + return; + } + + if (old_view) { + flatview_ref(old_view); + } + + flatview_ref(new_view); + + if (!QTAILQ_EMPTY(&as->listeners)) { + FlatView tmpview = { .nr = 0 }, *old_view2 = old_view; + + if (!old_view2) { + old_view2 = &tmpview; + } + address_space_update_topology_pass(as, old_view2, new_view, false); + address_space_update_topology_pass(as, old_view2, new_view, true); + } + + /* Writes are protected by the BQL. */ + qatomic_rcu_set(&as->current_map, new_view); + if (old_view) { + flatview_unref(old_view); + } + + /* Note that all the old MemoryRegions are still alive up to this + * point. This relieves most MemoryListeners from the need to + * ref/unref the MemoryRegions they get---unless they use them + * outside the iothread mutex, in which case precise reference + * counting is necessary. + */ + if (old_view) { + flatview_unref(old_view); + } +} + +static void address_space_update_topology(AddressSpace *as) +{ + MemoryRegion *physmr = memory_region_get_flatview_root(as->root); + + flatviews_init(); + if (!g_hash_table_lookup(flat_views, physmr)) { + generate_memory_topology(physmr); + } + address_space_set_flatview(as); +} + +void memory_region_transaction_begin(void) +{ + qemu_flush_coalesced_mmio_buffer(); + ++memory_region_transaction_depth; +} + +void memory_region_transaction_commit(void) +{ + AddressSpace *as; + + assert(memory_region_transaction_depth); + assert(qemu_mutex_iothread_locked()); + + --memory_region_transaction_depth; + if (!memory_region_transaction_depth) { + if (memory_region_update_pending) { + flatviews_reset(); + + MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); + + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + address_space_set_flatview(as); + address_space_update_ioeventfds(as); + } + memory_region_update_pending = false; + ioeventfd_update_pending = false; + MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); + } else if (ioeventfd_update_pending) { + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + address_space_update_ioeventfds(as); + } + ioeventfd_update_pending = false; + } + } +} + +static void memory_region_destructor_none(MemoryRegion *mr) +{ +} + +static void memory_region_destructor_ram(MemoryRegion *mr) +{ + qemu_ram_free(mr->ram_block); +} + +static bool memory_region_need_escape(char c) +{ + return c == '/' || c == '[' || c == '\\' || c == ']'; +} + +static char *memory_region_escape_name(const char *name) +{ + const char *p; + char *escaped, *q; + uint8_t c; + size_t bytes = 0; + + for (p = name; *p; p++) { + bytes += memory_region_need_escape(*p) ? 4 : 1; + } + if (bytes == p - name) { + return g_memdup(name, bytes + 1); + } + + escaped = g_malloc(bytes + 1); + for (p = name, q = escaped; *p; p++) { + c = *p; + if (unlikely(memory_region_need_escape(c))) { + *q++ = '\\'; + *q++ = 'x'; + *q++ = "0123456789abcdef"[c >> 4]; + c = "0123456789abcdef"[c & 15]; + } + *q++ = c; + } + *q = 0; + return escaped; +} + +static void memory_region_do_init(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size) +{ + mr->size = int128_make64(size); + if (size == UINT64_MAX) { + mr->size = int128_2_64(); + } + mr->name = g_strdup(name); + mr->owner = owner; + mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); + mr->ram_block = NULL; + + if (name) { + char *escaped_name = memory_region_escape_name(name); + char *name_array = g_strdup_printf("%s[*]", escaped_name); + + if (!owner) { + owner = container_get(qdev_get_machine(), "/unattached"); + } + + object_property_add_child(owner, name_array, OBJECT(mr)); + object_unref(OBJECT(mr)); + g_free(name_array); + g_free(escaped_name); + } +} + +void memory_region_init(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size) +{ + object_initialize(mr, sizeof(*mr), TYPE_MEMORY_REGION); + memory_region_do_init(mr, owner, name, size); +} + +static void memory_region_get_container(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + MemoryRegion *mr = MEMORY_REGION(obj); + char *path = (char *)""; + + if (mr->container) { + path = object_get_canonical_path(OBJECT(mr->container)); + } + visit_type_str(v, name, &path, errp); + if (mr->container) { + g_free(path); + } +} + +static Object *memory_region_resolve_container(Object *obj, void *opaque, + const char *part) +{ + MemoryRegion *mr = MEMORY_REGION(obj); + + return OBJECT(mr->container); +} + +static void memory_region_get_priority(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + MemoryRegion *mr = MEMORY_REGION(obj); + int32_t value = mr->priority; + + visit_type_int32(v, name, &value, errp); +} + +static void memory_region_get_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + MemoryRegion *mr = MEMORY_REGION(obj); + uint64_t value = memory_region_size(mr); + + visit_type_uint64(v, name, &value, errp); +} + +static void memory_region_initfn(Object *obj) +{ + MemoryRegion *mr = MEMORY_REGION(obj); + ObjectProperty *op; + + mr->ops = &unassigned_mem_ops; + mr->enabled = true; + mr->romd_mode = true; + mr->destructor = memory_region_destructor_none; + QTAILQ_INIT(&mr->subregions); + QTAILQ_INIT(&mr->coalesced); + + op = object_property_add(OBJECT(mr), "container", + "link<" TYPE_MEMORY_REGION ">", + memory_region_get_container, + NULL, /* memory_region_set_container */ + NULL, NULL); + op->resolve = memory_region_resolve_container; + + object_property_add_uint64_ptr(OBJECT(mr), "addr", + &mr->addr, OBJ_PROP_FLAG_READ); + object_property_add(OBJECT(mr), "priority", "uint32", + memory_region_get_priority, + NULL, /* memory_region_set_priority */ + NULL, NULL); + object_property_add(OBJECT(mr), "size", "uint64", + memory_region_get_size, + NULL, /* memory_region_set_size, */ + NULL, NULL); +} + +static void iommu_memory_region_initfn(Object *obj) +{ + MemoryRegion *mr = MEMORY_REGION(obj); + + mr->is_iommu = true; +} + +static uint64_t unassigned_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ +#ifdef DEBUG_UNASSIGNED + printf("Unassigned mem read " HWADDR_FMT_plx "\n", addr); +#endif + return 0; +} + +static void unassigned_mem_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ +#ifdef DEBUG_UNASSIGNED + printf("Unassigned mem write " HWADDR_FMT_plx " = 0x%"PRIx64"\n", addr, val); +#endif +} + +static bool unassigned_mem_accepts(void *opaque, hwaddr addr, + unsigned size, bool is_write, + MemTxAttrs attrs) +{ + return false; +} + +const MemoryRegionOps unassigned_mem_ops = { + .valid.accepts = unassigned_mem_accepts, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static uint64_t memory_region_ram_device_read(void *opaque, + hwaddr addr, unsigned size) +{ + MemoryRegion *mr = opaque; + uint64_t data = (uint64_t)~0; + + switch (size) { + case 1: + data = *(uint8_t *)(mr->ram_block->host + addr); + break; + case 2: + data = *(uint16_t *)(mr->ram_block->host + addr); + break; + case 4: + data = *(uint32_t *)(mr->ram_block->host + addr); + break; + case 8: + data = *(uint64_t *)(mr->ram_block->host + addr); + break; + } + + trace_memory_region_ram_device_read(get_cpu_index(), mr, addr, data, size); + + return data; +} + +static void memory_region_ram_device_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + MemoryRegion *mr = opaque; + + trace_memory_region_ram_device_write(get_cpu_index(), mr, addr, data, size); + + switch (size) { + case 1: + *(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data; + break; + case 2: + *(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data; + break; + case 4: + *(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data; + break; + case 8: + *(uint64_t *)(mr->ram_block->host + addr) = data; + break; + } +} + +static const MemoryRegionOps ram_device_mem_ops = { + .read = memory_region_ram_device_read, + .write = memory_region_ram_device_write, + .endianness = DEVICE_HOST_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = true, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = true, + }, +}; + +bool memory_region_access_valid(MemoryRegion *mr, + hwaddr addr, + unsigned size, + bool is_write, + MemTxAttrs attrs) +{ + if (mr->ops->valid.accepts + && !mr->ops->valid.accepts(mr->opaque, addr, size, is_write, attrs)) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX + ", size %u, region '%s', reason: rejected\n", + is_write ? "write" : "read", + addr, size, memory_region_name(mr)); + return false; + } + + if (!mr->ops->valid.unaligned && (addr & (size - 1))) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX + ", size %u, region '%s', reason: unaligned\n", + is_write ? "write" : "read", + addr, size, memory_region_name(mr)); + return false; + } + + /* Treat zero as compatibility all valid */ + if (!mr->ops->valid.max_access_size) { + return true; + } + + if (size > mr->ops->valid.max_access_size + || size < mr->ops->valid.min_access_size) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX + ", size %u, region '%s', reason: invalid size " + "(min:%u max:%u)\n", + is_write ? "write" : "read", + addr, size, memory_region_name(mr), + mr->ops->valid.min_access_size, + mr->ops->valid.max_access_size); + return false; + } + return true; +} + +static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr, + hwaddr addr, + uint64_t *pval, + unsigned size, + MemTxAttrs attrs) +{ + *pval = 0; + + if (mr->ops->read) { + return access_with_adjusted_size(addr, pval, size, + mr->ops->impl.min_access_size, + mr->ops->impl.max_access_size, + memory_region_read_accessor, + mr, attrs); + } else { + return access_with_adjusted_size(addr, pval, size, + mr->ops->impl.min_access_size, + mr->ops->impl.max_access_size, + memory_region_read_with_attrs_accessor, + mr, attrs); + } +} + +MemTxResult memory_region_dispatch_read(MemoryRegion *mr, + hwaddr addr, + uint64_t *pval, + MemOp op, + MemTxAttrs attrs) +{ + unsigned size = memop_size(op); + MemTxResult r; + + if (mr->alias) { + return memory_region_dispatch_read(mr->alias, + mr->alias_offset + addr, + pval, op, attrs); + } + if (!memory_region_access_valid(mr, addr, size, false, attrs)) { + *pval = unassigned_mem_read(mr, addr, size); + return MEMTX_DECODE_ERROR; + } + + r = memory_region_dispatch_read1(mr, addr, pval, size, attrs); + adjust_endianness(mr, pval, op); + return r; +} + +/* Return true if an eventfd was signalled */ +static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr, + hwaddr addr, + uint64_t data, + unsigned size, + MemTxAttrs attrs) +{ + MemoryRegionIoeventfd ioeventfd = { + .addr = addrrange_make(int128_make64(addr), int128_make64(size)), + .data = data, + }; + unsigned i; + + for (i = 0; i < mr->ioeventfd_nb; i++) { + ioeventfd.match_data = mr->ioeventfds[i].match_data; + ioeventfd.e = mr->ioeventfds[i].e; + + if (memory_region_ioeventfd_equal(&ioeventfd, &mr->ioeventfds[i])) { + event_notifier_set(ioeventfd.e); + return true; + } + } + + return false; +} + +MemTxResult memory_region_dispatch_write(MemoryRegion *mr, + hwaddr addr, + uint64_t data, + MemOp op, + MemTxAttrs attrs) +{ + unsigned size = memop_size(op); + + if (mr->alias) { + return memory_region_dispatch_write(mr->alias, + mr->alias_offset + addr, + data, op, attrs); + } + if (!memory_region_access_valid(mr, addr, size, true, attrs)) { + unassigned_mem_write(mr, addr, data, size); + return MEMTX_DECODE_ERROR; + } + + adjust_endianness(mr, &data, op); + + if ((!kvm_eventfds_enabled()) && + memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) { + return MEMTX_OK; + } + + if (mr->ops->write) { + return access_with_adjusted_size(addr, &data, size, + mr->ops->impl.min_access_size, + mr->ops->impl.max_access_size, + memory_region_write_accessor, mr, + attrs); + } else { + return + access_with_adjusted_size(addr, &data, size, + mr->ops->impl.min_access_size, + mr->ops->impl.max_access_size, + memory_region_write_with_attrs_accessor, + mr, attrs); + } +} + +void memory_region_init_io(MemoryRegion *mr, + Object *owner, + const MemoryRegionOps *ops, + void *opaque, + const char *name, + uint64_t size) +{ + memory_region_init(mr, owner, name, size); + mr->ops = ops ? ops : &unassigned_mem_ops; + mr->opaque = opaque; + mr->terminates = true; +} + +void memory_region_init_ram_nomigrate(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp) +{ + memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp); +} + +void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + uint32_t ram_flags, + Error **errp) +{ + Error *err = NULL; + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_block = qemu_ram_alloc(size, ram_flags, mr, &err); + if (err) { + mr->size = int128_zero(); + object_unparent(OBJECT(mr)); + error_propagate(errp, err); + } +} + +void memory_region_init_resizeable_ram(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + uint64_t max_size, + void (*resized)(const char*, + uint64_t length, + void *host), + Error **errp) +{ + Error *err = NULL; + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_block = qemu_ram_alloc_resizeable(size, max_size, resized, + mr, &err); + if (err) { + mr->size = int128_zero(); + object_unparent(OBJECT(mr)); + error_propagate(errp, err); + } +} + +#ifdef CONFIG_POSIX +void memory_region_init_ram_from_file(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + uint64_t align, + uint32_t ram_flags, + const char *path, + ram_addr_t offset, + Error **errp) +{ + Error *err = NULL; + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->readonly = !!(ram_flags & RAM_READONLY); + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->align = align; + mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, + offset, &err); + if (err) { + mr->size = int128_zero(); + object_unparent(OBJECT(mr)); + error_propagate(errp, err); + } +} + +void memory_region_init_ram_from_fd(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + uint32_t ram_flags, + int fd, + ram_addr_t offset, + Error **errp) +{ + Error *err = NULL; + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->readonly = !!(ram_flags & RAM_READONLY); + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, + &err); + if (err) { + mr->size = int128_zero(); + object_unparent(OBJECT(mr)); + error_propagate(errp, err); + } +} +#endif + +void memory_region_init_ram_ptr(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + void *ptr) +{ + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + + /* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL. */ + assert(ptr != NULL); + mr->ram_block = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal); +} + +void memory_region_init_ram_device_ptr(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + void *ptr) +{ + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->ram_device = true; + mr->ops = &ram_device_mem_ops; + mr->opaque = mr; + mr->destructor = memory_region_destructor_ram; + + /* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL. */ + assert(ptr != NULL); + mr->ram_block = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal); +} + +void memory_region_init_alias(MemoryRegion *mr, + Object *owner, + const char *name, + MemoryRegion *orig, + hwaddr offset, + uint64_t size) +{ + memory_region_init(mr, owner, name, size); + mr->alias = orig; + mr->alias_offset = offset; +} + +void memory_region_init_rom_nomigrate(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp) +{ + memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp); + mr->readonly = true; +} + +void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, + Object *owner, + const MemoryRegionOps *ops, + void *opaque, + const char *name, + uint64_t size, + Error **errp) +{ + Error *err = NULL; + assert(ops); + memory_region_init(mr, owner, name, size); + mr->ops = ops; + mr->opaque = opaque; + mr->terminates = true; + mr->rom_device = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_block = qemu_ram_alloc(size, 0, mr, &err); + if (err) { + mr->size = int128_zero(); + object_unparent(OBJECT(mr)); + error_propagate(errp, err); + } +} + +void memory_region_init_iommu(void *_iommu_mr, + size_t instance_size, + const char *mrtypename, + Object *owner, + const char *name, + uint64_t size) +{ + struct IOMMUMemoryRegion *iommu_mr; + struct MemoryRegion *mr; + + object_initialize(_iommu_mr, instance_size, mrtypename); + mr = MEMORY_REGION(_iommu_mr); + memory_region_do_init(mr, owner, name, size); + iommu_mr = IOMMU_MEMORY_REGION(mr); + mr->terminates = true; /* then re-forwards */ + QLIST_INIT(&iommu_mr->iommu_notify); + iommu_mr->iommu_notify_flags = IOMMU_NOTIFIER_NONE; +} + +static void memory_region_finalize(Object *obj) +{ + MemoryRegion *mr = MEMORY_REGION(obj); + + assert(!mr->container); + + /* We know the region is not visible in any address space (it + * does not have a container and cannot be a root either because + * it has no references, so we can blindly clear mr->enabled. + * memory_region_set_enabled instead could trigger a transaction + * and cause an infinite loop. + */ + mr->enabled = false; + memory_region_transaction_begin(); + while (!QTAILQ_EMPTY(&mr->subregions)) { + MemoryRegion *subregion = QTAILQ_FIRST(&mr->subregions); + memory_region_del_subregion(mr, subregion); + } + memory_region_transaction_commit(); + + mr->destructor(mr); + memory_region_clear_coalescing(mr); + g_free((char *)mr->name); + g_free(mr->ioeventfds); +} + +Object *memory_region_owner(MemoryRegion *mr) +{ + Object *obj = OBJECT(mr); + return obj->parent; +} + +void memory_region_ref(MemoryRegion *mr) +{ + /* MMIO callbacks most likely will access data that belongs + * to the owner, hence the need to ref/unref the owner whenever + * the memory region is in use. + * + * The memory region is a child of its owner. As long as the + * owner doesn't call unparent itself on the memory region, + * ref-ing the owner will also keep the memory region alive. + * Memory regions without an owner are supposed to never go away; + * we do not ref/unref them because it slows down DMA sensibly. + */ + if (mr && mr->owner) { + object_ref(mr->owner); + } +} + +void memory_region_unref(MemoryRegion *mr) +{ + if (mr && mr->owner) { + object_unref(mr->owner); + } +} + +uint64_t memory_region_size(MemoryRegion *mr) +{ + if (int128_eq(mr->size, int128_2_64())) { + return UINT64_MAX; + } + return int128_get64(mr->size); +} + +const char *memory_region_name(const MemoryRegion *mr) +{ + if (!mr->name) { + ((MemoryRegion *)mr)->name = + g_strdup(object_get_canonical_path_component(OBJECT(mr))); + } + return mr->name; +} + +bool memory_region_is_ram_device(MemoryRegion *mr) +{ + return mr->ram_device; +} + +bool memory_region_is_protected(MemoryRegion *mr) +{ + return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); +} + +uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) +{ + uint8_t mask = mr->dirty_log_mask; + RAMBlock *rb = mr->ram_block; + + if (global_dirty_tracking && ((rb && qemu_ram_is_migratable(rb)) || + memory_region_is_iommu(mr))) { + mask |= (1 << DIRTY_MEMORY_MIGRATION); + } + + if (tcg_enabled() && rb) { + /* TCG only cares about dirty memory logging for RAM, not IOMMU. */ + mask |= (1 << DIRTY_MEMORY_CODE); + } + return mask; +} + +bool memory_region_is_logging(MemoryRegion *mr, uint8_t client) +{ + return memory_region_get_dirty_log_mask(mr) & (1 << client); +} + +static int memory_region_update_iommu_notify_flags(IOMMUMemoryRegion *iommu_mr, + Error **errp) +{ + IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE; + IOMMUNotifier *iommu_notifier; + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + int ret = 0; + + IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { + flags |= iommu_notifier->notifier_flags; + } + + if (flags != iommu_mr->iommu_notify_flags && imrc->notify_flag_changed) { + ret = imrc->notify_flag_changed(iommu_mr, + iommu_mr->iommu_notify_flags, + flags, errp); + } + + if (!ret) { + iommu_mr->iommu_notify_flags = flags; + } + return ret; +} + +int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, + uint64_t page_size_mask, + Error **errp) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + int ret = 0; + + if (imrc->iommu_set_page_size_mask) { + ret = imrc->iommu_set_page_size_mask(iommu_mr, page_size_mask, errp); + } + return ret; +} + +int memory_region_register_iommu_notifier(MemoryRegion *mr, + IOMMUNotifier *n, Error **errp) +{ + IOMMUMemoryRegion *iommu_mr; + int ret; + + if (mr->alias) { + return memory_region_register_iommu_notifier(mr->alias, n, errp); + } + + /* We need to register for at least one bitfield */ + iommu_mr = IOMMU_MEMORY_REGION(mr); + assert(n->notifier_flags != IOMMU_NOTIFIER_NONE); + assert(n->start <= n->end); + assert(n->iommu_idx >= 0 && + n->iommu_idx < memory_region_iommu_num_indexes(iommu_mr)); + + QLIST_INSERT_HEAD(&iommu_mr->iommu_notify, n, node); + ret = memory_region_update_iommu_notify_flags(iommu_mr, errp); + if (ret) { + QLIST_REMOVE(n, node); + } + return ret; +} + +uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + + if (imrc->get_min_page_size) { + return imrc->get_min_page_size(iommu_mr); + } + return TARGET_PAGE_SIZE; +} + +void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) +{ + MemoryRegion *mr = MEMORY_REGION(iommu_mr); + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + hwaddr addr, granularity; + IOMMUTLBEntry iotlb; + + /* If the IOMMU has its own replay callback, override */ + if (imrc->replay) { + imrc->replay(iommu_mr, n); + return; + } + + granularity = memory_region_iommu_get_min_page_size(iommu_mr); + + for (addr = 0; addr < memory_region_size(mr); addr += granularity) { + iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx); + if (iotlb.perm != IOMMU_NONE) { + n->notify(n, &iotlb); + } + + /* if (2^64 - MR size) < granularity, it's possible to get an + * infinite loop here. This should catch such a wraparound */ + if ((addr + granularity) < addr) { + break; + } + } +} + +void memory_region_unregister_iommu_notifier(MemoryRegion *mr, + IOMMUNotifier *n) +{ + IOMMUMemoryRegion *iommu_mr; + + if (mr->alias) { + memory_region_unregister_iommu_notifier(mr->alias, n); + return; + } + QLIST_REMOVE(n, node); + iommu_mr = IOMMU_MEMORY_REGION(mr); + memory_region_update_iommu_notify_flags(iommu_mr, NULL); +} + +void memory_region_notify_iommu_one(IOMMUNotifier *notifier, + IOMMUTLBEvent *event) +{ + IOMMUTLBEntry *entry = &event->entry; + hwaddr entry_end = entry->iova + entry->addr_mask; + IOMMUTLBEntry tmp = *entry; + + if (event->type == IOMMU_NOTIFIER_UNMAP) { + assert(entry->perm == IOMMU_NONE); + } + + /* + * Skip the notification if the notification does not overlap + * with registered range. + */ + if (notifier->start > entry_end || notifier->end < entry->iova) { + return; + } + + if (notifier->notifier_flags & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { + /* Crop (iova, addr_mask) to range */ + tmp.iova = MAX(tmp.iova, notifier->start); + tmp.addr_mask = MIN(entry_end, notifier->end) - tmp.iova; + } else { + assert(entry->iova >= notifier->start && entry_end <= notifier->end); + } + + if (event->type & notifier->notifier_flags) { + notifier->notify(notifier, &tmp); + } +} + +void memory_region_unmap_iommu_notifier_range(IOMMUNotifier *notifier) +{ + IOMMUTLBEvent event; + + event.type = IOMMU_NOTIFIER_UNMAP; + event.entry.target_as = &address_space_memory; + event.entry.iova = notifier->start; + event.entry.perm = IOMMU_NONE; + event.entry.addr_mask = notifier->end - notifier->start; + + memory_region_notify_iommu_one(notifier, &event); +} + +void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + int iommu_idx, + IOMMUTLBEvent event) +{ + IOMMUNotifier *iommu_notifier; + + assert(memory_region_is_iommu(MEMORY_REGION(iommu_mr))); + + IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { + if (iommu_notifier->iommu_idx == iommu_idx) { + memory_region_notify_iommu_one(iommu_notifier, &event); + } + } +} + +int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr, + enum IOMMUMemoryRegionAttr attr, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + + if (!imrc->get_attr) { + return -EINVAL; + } + + return imrc->get_attr(iommu_mr, attr, data); +} + +int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr, + MemTxAttrs attrs) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + + if (!imrc->attrs_to_index) { + return 0; + } + + return imrc->attrs_to_index(iommu_mr, attrs); +} + +int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + + if (!imrc->num_indexes) { + return 1; + } + + return imrc->num_indexes(iommu_mr); +} + +RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) +{ + if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) { + return NULL; + } + return mr->rdm; +} + +void memory_region_set_ram_discard_manager(MemoryRegion *mr, + RamDiscardManager *rdm) +{ + g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr)); + g_assert(!rdm || !mr->rdm); + mr->rdm = rdm; +} + +uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, + const MemoryRegion *mr) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + + g_assert(rdmc->get_min_granularity); + return rdmc->get_min_granularity(rdm, mr); +} + +bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, + const MemoryRegionSection *section) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + + g_assert(rdmc->is_populated); + return rdmc->is_populated(rdm, section); +} + +int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamPopulate replay_fn, + void *opaque) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + + g_assert(rdmc->replay_populated); + return rdmc->replay_populated(rdm, section, replay_fn, opaque); +} + +void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscard replay_fn, + void *opaque) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + + g_assert(rdmc->replay_discarded); + rdmc->replay_discarded(rdm, section, replay_fn, opaque); +} + +void ram_discard_manager_register_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + + g_assert(rdmc->register_listener); + rdmc->register_listener(rdm, rdl, section); +} + +void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + + g_assert(rdmc->unregister_listener); + rdmc->unregister_listener(rdm, rdl); +} + +/* Called with rcu_read_lock held. */ +bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + ram_addr_t *ram_addr, bool *read_only, + bool *mr_has_discard_manager) +{ + MemoryRegion *mr; + hwaddr xlat; + hwaddr len = iotlb->addr_mask + 1; + bool writable = iotlb->perm & IOMMU_WO; + + if (mr_has_discard_manager) { + *mr_has_discard_manager = false; + } + /* + * The IOMMU TLB entry we have just covers translation through + * this IOMMU to its immediate target. We need to translate + * it the rest of the way through to memory. + */ + mr = address_space_translate(&address_space_memory, iotlb->translated_addr, + &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED); + if (!memory_region_is_ram(mr)) { + error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat); + return false; + } else if (memory_region_has_ram_discard_manager(mr)) { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); + MemoryRegionSection tmp = { + .mr = mr, + .offset_within_region = xlat, + .size = int128_make64(len), + }; + if (mr_has_discard_manager) { + *mr_has_discard_manager = true; + } + /* + * Malicious VMs can map memory into the IOMMU, which is expected + * to remain discarded. vfio will pin all pages, populating memory. + * Disallow that. vmstate priorities make sure any RamDiscardManager + * were already restored before IOMMUs are restored. + */ + if (!ram_discard_manager_is_populated(rdm, &tmp)) { + error_report("iommu map to discarded memory (e.g., unplugged via" + " virtio-mem): %" HWADDR_PRIx "", + iotlb->translated_addr); + return false; + } + } + + /* + * Translation truncates length to the IOMMU page size, + * check that it did not truncate too much. + */ + if (len & iotlb->addr_mask) { + error_report("iommu has granularity incompatible with target AS"); + return false; + } + + if (vaddr) { + *vaddr = memory_region_get_ram_ptr(mr) + xlat; + } + + if (ram_addr) { + *ram_addr = memory_region_get_ram_addr(mr) + xlat; + } + + if (read_only) { + *read_only = !writable || mr->readonly; + } + + return true; +} + +void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) +{ + uint8_t mask = 1 << client; + uint8_t old_logging; + + assert(client == DIRTY_MEMORY_VGA); + old_logging = mr->vga_logging_count; + mr->vga_logging_count += log ? 1 : -1; + if (!!old_logging == !!mr->vga_logging_count) { + return; + } + + memory_region_transaction_begin(); + mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask); + memory_region_update_pending |= mr->enabled; + memory_region_transaction_commit(); +} + +void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, + hwaddr size) +{ + assert(mr->ram_block); + cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr, + size, + memory_region_get_dirty_log_mask(mr)); +} + +/* + * If memory region `mr' is NULL, do global sync. Otherwise, sync + * dirty bitmap for the specified memory region. + */ +static void memory_region_sync_dirty_bitmap(MemoryRegion *mr, bool last_stage) +{ + MemoryListener *listener; + AddressSpace *as; + FlatView *view; + FlatRange *fr; + + /* If the same address space has multiple log_sync listeners, we + * visit that address space's FlatView multiple times. But because + * log_sync listeners are rare, it's still cheaper than walking each + * address space once. + */ + QTAILQ_FOREACH(listener, &memory_listeners, link) { + if (listener->log_sync) { + as = listener->address_space; + view = address_space_get_flatview(as); + FOR_EACH_FLAT_RANGE(fr, view) { + if (fr->dirty_log_mask && (!mr || fr->mr == mr)) { + MemoryRegionSection mrs = section_from_flat_range(fr, view); + listener->log_sync(listener, &mrs); + } + } + flatview_unref(view); + trace_memory_region_sync_dirty(mr ? mr->name : "(all)", listener->name, 0); + } else if (listener->log_sync_global) { + /* + * No matter whether MR is specified, what we can do here + * is to do a global sync, because we are not capable to + * sync in a finer granularity. + */ + listener->log_sync_global(listener, last_stage); + trace_memory_region_sync_dirty(mr ? mr->name : "(all)", listener->name, 1); + } + } +} + +void memory_region_clear_dirty_bitmap(MemoryRegion *mr, hwaddr start, + hwaddr len) +{ + MemoryRegionSection mrs; + MemoryListener *listener; + AddressSpace *as; + FlatView *view; + FlatRange *fr; + hwaddr sec_start, sec_end, sec_size; + + QTAILQ_FOREACH(listener, &memory_listeners, link) { + if (!listener->log_clear) { + continue; + } + as = listener->address_space; + view = address_space_get_flatview(as); + FOR_EACH_FLAT_RANGE(fr, view) { + if (!fr->dirty_log_mask || fr->mr != mr) { + /* + * Clear dirty bitmap operation only applies to those + * regions whose dirty logging is at least enabled + */ + continue; + } + + mrs = section_from_flat_range(fr, view); + + sec_start = MAX(mrs.offset_within_region, start); + sec_end = mrs.offset_within_region + int128_get64(mrs.size); + sec_end = MIN(sec_end, start + len); + + if (sec_start >= sec_end) { + /* + * If this memory region section has no intersection + * with the requested range, skip. + */ + continue; + } + + /* Valid case; shrink the section if needed */ + mrs.offset_within_address_space += + sec_start - mrs.offset_within_region; + mrs.offset_within_region = sec_start; + sec_size = sec_end - sec_start; + mrs.size = int128_make64(sec_size); + listener->log_clear(listener, &mrs); + } + flatview_unref(view); + } +} + +DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr, + hwaddr addr, + hwaddr size, + unsigned client) +{ + DirtyBitmapSnapshot *snapshot; + assert(mr->ram_block); + memory_region_sync_dirty_bitmap(mr, false); + snapshot = cpu_physical_memory_snapshot_and_clear_dirty(mr, addr, size, client); + memory_global_after_dirty_log_sync(); + return snapshot; +} + +bool memory_region_snapshot_get_dirty(MemoryRegion *mr, DirtyBitmapSnapshot *snap, + hwaddr addr, hwaddr size) +{ + assert(mr->ram_block); + return cpu_physical_memory_snapshot_get_dirty(snap, + memory_region_get_ram_addr(mr) + addr, size); +} + +void memory_region_set_readonly(MemoryRegion *mr, bool readonly) +{ + if (mr->readonly != readonly) { + memory_region_transaction_begin(); + mr->readonly = readonly; + memory_region_update_pending |= mr->enabled; + memory_region_transaction_commit(); + } +} + +void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile) +{ + if (mr->nonvolatile != nonvolatile) { + memory_region_transaction_begin(); + mr->nonvolatile = nonvolatile; + memory_region_update_pending |= mr->enabled; + memory_region_transaction_commit(); + } +} + +void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode) +{ + if (mr->romd_mode != romd_mode) { + memory_region_transaction_begin(); + mr->romd_mode = romd_mode; + memory_region_update_pending |= mr->enabled; + memory_region_transaction_commit(); + } +} + +void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, + hwaddr size, unsigned client) +{ + assert(mr->ram_block); + cpu_physical_memory_test_and_clear_dirty( + memory_region_get_ram_addr(mr) + addr, size, client); +} + +int memory_region_get_fd(MemoryRegion *mr) +{ + RCU_READ_LOCK_GUARD(); + while (mr->alias) { + mr = mr->alias; + } + return mr->ram_block->fd; +} + +void *memory_region_get_ram_ptr(MemoryRegion *mr) +{ + uint64_t offset = 0; + + RCU_READ_LOCK_GUARD(); + while (mr->alias) { + offset += mr->alias_offset; + mr = mr->alias; + } + assert(mr->ram_block); + return qemu_map_ram_ptr(mr->ram_block, offset); +} + +MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset) +{ + RAMBlock *block; + + block = qemu_ram_block_from_host(ptr, false, offset); + if (!block) { + return NULL; + } + + return block->mr; +} + +ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr) +{ + return mr->ram_block ? mr->ram_block->offset : RAM_ADDR_INVALID; +} + +void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp) +{ + assert(mr->ram_block); + + qemu_ram_resize(mr->ram_block, newsize, errp); +} + +void memory_region_msync(MemoryRegion *mr, hwaddr addr, hwaddr size) +{ + if (mr->ram_block) { + qemu_ram_msync(mr->ram_block, addr, size); + } +} + +void memory_region_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size) +{ + /* + * Might be extended case needed to cover + * different types of memory regions + */ + if (mr->dirty_log_mask) { + memory_region_msync(mr, addr, size); + } +} + +/* + * Call proper memory listeners about the change on the newly + * added/removed CoalescedMemoryRange. + */ +static void memory_region_update_coalesced_range(MemoryRegion *mr, + CoalescedMemoryRange *cmr, + bool add) +{ + AddressSpace *as; + FlatView *view; + FlatRange *fr; + + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + view = address_space_get_flatview(as); + FOR_EACH_FLAT_RANGE(fr, view) { + if (fr->mr == mr) { + flat_range_coalesced_io_notify(fr, as, cmr, add); + } + } + flatview_unref(view); + } +} + +void memory_region_set_coalescing(MemoryRegion *mr) +{ + memory_region_clear_coalescing(mr); + memory_region_add_coalescing(mr, 0, int128_get64(mr->size)); +} + +void memory_region_add_coalescing(MemoryRegion *mr, + hwaddr offset, + uint64_t size) +{ + CoalescedMemoryRange *cmr = g_malloc(sizeof(*cmr)); + + cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size)); + QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link); + memory_region_update_coalesced_range(mr, cmr, true); + memory_region_set_flush_coalesced(mr); +} + +void memory_region_clear_coalescing(MemoryRegion *mr) +{ + CoalescedMemoryRange *cmr; + + if (QTAILQ_EMPTY(&mr->coalesced)) { + return; + } + + qemu_flush_coalesced_mmio_buffer(); + mr->flush_coalesced_mmio = false; + + while (!QTAILQ_EMPTY(&mr->coalesced)) { + cmr = QTAILQ_FIRST(&mr->coalesced); + QTAILQ_REMOVE(&mr->coalesced, cmr, link); + memory_region_update_coalesced_range(mr, cmr, false); + g_free(cmr); + } +} + +void memory_region_set_flush_coalesced(MemoryRegion *mr) +{ + mr->flush_coalesced_mmio = true; +} + +void memory_region_clear_flush_coalesced(MemoryRegion *mr) +{ + qemu_flush_coalesced_mmio_buffer(); + if (QTAILQ_EMPTY(&mr->coalesced)) { + mr->flush_coalesced_mmio = false; + } +} + +static bool userspace_eventfd_warning; + +void memory_region_add_eventfd(MemoryRegion *mr, + hwaddr addr, + unsigned size, + bool match_data, + uint64_t data, + EventNotifier *e) +{ + MemoryRegionIoeventfd mrfd = { + .addr.start = int128_make64(addr), + .addr.size = int128_make64(size), + .match_data = match_data, + .data = data, + .e = e, + }; + unsigned i; + + if (kvm_enabled() && (!(kvm_eventfds_enabled() || + userspace_eventfd_warning))) { + userspace_eventfd_warning = true; + error_report("Using eventfd without MMIO binding in KVM. " + "Suboptimal performance expected"); + } + + if (size) { + adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE); + } + memory_region_transaction_begin(); + for (i = 0; i < mr->ioeventfd_nb; ++i) { + if (memory_region_ioeventfd_before(&mrfd, &mr->ioeventfds[i])) { + break; + } + } + ++mr->ioeventfd_nb; + mr->ioeventfds = g_realloc(mr->ioeventfds, + sizeof(*mr->ioeventfds) * mr->ioeventfd_nb); + memmove(&mr->ioeventfds[i+1], &mr->ioeventfds[i], + sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i)); + mr->ioeventfds[i] = mrfd; + ioeventfd_update_pending |= mr->enabled; + memory_region_transaction_commit(); +} + +void memory_region_del_eventfd(MemoryRegion *mr, + hwaddr addr, + unsigned size, + bool match_data, + uint64_t data, + EventNotifier *e) +{ + MemoryRegionIoeventfd mrfd = { + .addr.start = int128_make64(addr), + .addr.size = int128_make64(size), + .match_data = match_data, + .data = data, + .e = e, + }; + unsigned i; + + if (size) { + adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE); + } + memory_region_transaction_begin(); + for (i = 0; i < mr->ioeventfd_nb; ++i) { + if (memory_region_ioeventfd_equal(&mrfd, &mr->ioeventfds[i])) { + break; + } + } + assert(i != mr->ioeventfd_nb); + memmove(&mr->ioeventfds[i], &mr->ioeventfds[i+1], + sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb - (i+1))); + --mr->ioeventfd_nb; + mr->ioeventfds = g_realloc(mr->ioeventfds, + sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1); + ioeventfd_update_pending |= mr->enabled; + memory_region_transaction_commit(); +} + +static void memory_region_update_container_subregions(MemoryRegion *subregion) +{ + MemoryRegion *mr = subregion->container; + MemoryRegion *other; + + memory_region_transaction_begin(); + + memory_region_ref(subregion); + QTAILQ_FOREACH(other, &mr->subregions, subregions_link) { + if (subregion->priority >= other->priority) { + QTAILQ_INSERT_BEFORE(other, subregion, subregions_link); + goto done; + } + } + QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link); +done: + memory_region_update_pending |= mr->enabled && subregion->enabled; + memory_region_transaction_commit(); +} + +static void memory_region_add_subregion_common(MemoryRegion *mr, + hwaddr offset, + MemoryRegion *subregion) +{ + MemoryRegion *alias; + + assert(!subregion->container); + subregion->container = mr; + for (alias = subregion->alias; alias; alias = alias->alias) { + alias->mapped_via_alias++; + } + subregion->addr = offset; + memory_region_update_container_subregions(subregion); +} + +void memory_region_add_subregion(MemoryRegion *mr, + hwaddr offset, + MemoryRegion *subregion) +{ + subregion->priority = 0; + memory_region_add_subregion_common(mr, offset, subregion); +} + +void memory_region_add_subregion_overlap(MemoryRegion *mr, + hwaddr offset, + MemoryRegion *subregion, + int priority) +{ + subregion->priority = priority; + memory_region_add_subregion_common(mr, offset, subregion); +} + +void memory_region_del_subregion(MemoryRegion *mr, + MemoryRegion *subregion) +{ + MemoryRegion *alias; + + memory_region_transaction_begin(); + assert(subregion->container == mr); + subregion->container = NULL; + for (alias = subregion->alias; alias; alias = alias->alias) { + alias->mapped_via_alias--; + assert(alias->mapped_via_alias >= 0); + } + QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link); + memory_region_unref(subregion); + memory_region_update_pending |= mr->enabled && subregion->enabled; + memory_region_transaction_commit(); +} + +void memory_region_set_enabled(MemoryRegion *mr, bool enabled) +{ + if (enabled == mr->enabled) { + return; + } + memory_region_transaction_begin(); + mr->enabled = enabled; + memory_region_update_pending = true; + memory_region_transaction_commit(); +} + +void memory_region_set_size(MemoryRegion *mr, uint64_t size) +{ + Int128 s = int128_make64(size); + + if (size == UINT64_MAX) { + s = int128_2_64(); + } + if (int128_eq(s, mr->size)) { + return; + } + memory_region_transaction_begin(); + mr->size = s; + memory_region_update_pending = true; + memory_region_transaction_commit(); +} + +static void memory_region_readd_subregion(MemoryRegion *mr) +{ + MemoryRegion *container = mr->container; + + if (container) { + memory_region_transaction_begin(); + memory_region_ref(mr); + memory_region_del_subregion(container, mr); + memory_region_add_subregion_common(container, mr->addr, mr); + memory_region_unref(mr); + memory_region_transaction_commit(); + } +} + +void memory_region_set_address(MemoryRegion *mr, hwaddr addr) +{ + if (addr != mr->addr) { + mr->addr = addr; + memory_region_readd_subregion(mr); + } +} + +void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset) +{ + assert(mr->alias); + + if (offset == mr->alias_offset) { + return; + } + + memory_region_transaction_begin(); + mr->alias_offset = offset; + memory_region_update_pending |= mr->enabled; + memory_region_transaction_commit(); +} + +uint64_t memory_region_get_alignment(const MemoryRegion *mr) +{ + return mr->align; +} + +static int cmp_flatrange_addr(const void *addr_, const void *fr_) +{ + const AddrRange *addr = addr_; + const FlatRange *fr = fr_; + + if (int128_le(addrrange_end(*addr), fr->addr.start)) { + return -1; + } else if (int128_ge(addr->start, addrrange_end(fr->addr))) { + return 1; + } + return 0; +} + +static FlatRange *flatview_lookup(FlatView *view, AddrRange addr) +{ + return bsearch(&addr, view->ranges, view->nr, + sizeof(FlatRange), cmp_flatrange_addr); +} + +bool memory_region_is_mapped(MemoryRegion *mr) +{ + return !!mr->container || mr->mapped_via_alias; +} + +/* Same as memory_region_find, but it does not add a reference to the + * returned region. It must be called from an RCU critical section. + */ +static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr, + hwaddr addr, uint64_t size) +{ + MemoryRegionSection ret = { .mr = NULL }; + MemoryRegion *root; + AddressSpace *as; + AddrRange range; + FlatView *view; + FlatRange *fr; + + addr += mr->addr; + for (root = mr; root->container; ) { + root = root->container; + addr += root->addr; + } + + as = memory_region_to_address_space(root); + if (!as) { + return ret; + } + range = addrrange_make(int128_make64(addr), int128_make64(size)); + + view = address_space_to_flatview(as); + fr = flatview_lookup(view, range); + if (!fr) { + return ret; + } + + while (fr > view->ranges && addrrange_intersects(fr[-1].addr, range)) { + --fr; + } + + ret.mr = fr->mr; + ret.fv = view; + range = addrrange_intersection(range, fr->addr); + ret.offset_within_region = fr->offset_in_region; + ret.offset_within_region += int128_get64(int128_sub(range.start, + fr->addr.start)); + ret.size = range.size; + ret.offset_within_address_space = int128_get64(range.start); + ret.readonly = fr->readonly; + ret.nonvolatile = fr->nonvolatile; + return ret; +} + +MemoryRegionSection memory_region_find(MemoryRegion *mr, + hwaddr addr, uint64_t size) +{ + MemoryRegionSection ret; + RCU_READ_LOCK_GUARD(); + ret = memory_region_find_rcu(mr, addr, size); + if (ret.mr) { + memory_region_ref(ret.mr); + } + return ret; +} + +MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s) +{ + MemoryRegionSection *tmp = g_new(MemoryRegionSection, 1); + + *tmp = *s; + if (tmp->mr) { + memory_region_ref(tmp->mr); + } + if (tmp->fv) { + bool ret = flatview_ref(tmp->fv); + + g_assert(ret); + } + return tmp; +} + +void memory_region_section_free_copy(MemoryRegionSection *s) +{ + if (s->fv) { + flatview_unref(s->fv); + } + if (s->mr) { + memory_region_unref(s->mr); + } + g_free(s); +} + +bool memory_region_present(MemoryRegion *container, hwaddr addr) +{ + MemoryRegion *mr; + + RCU_READ_LOCK_GUARD(); + mr = memory_region_find_rcu(container, addr, 1).mr; + return mr && mr != container; +} + +void memory_global_dirty_log_sync(bool last_stage) +{ + memory_region_sync_dirty_bitmap(NULL, last_stage); +} + +void memory_global_after_dirty_log_sync(void) +{ + MEMORY_LISTENER_CALL_GLOBAL(log_global_after_sync, Forward); +} + +/* + * Dirty track stop flags that are postponed due to VM being stopped. Should + * only be used within vmstate_change hook. + */ +static unsigned int postponed_stop_flags; +static VMChangeStateEntry *vmstate_change; +static void memory_global_dirty_log_stop_postponed_run(void); + +void memory_global_dirty_log_start(unsigned int flags) +{ + unsigned int old_flags; + + assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); + + if (vmstate_change) { + /* If there is postponed stop(), operate on it first */ + postponed_stop_flags &= ~flags; + memory_global_dirty_log_stop_postponed_run(); + } + + flags &= ~global_dirty_tracking; + if (!flags) { + return; + } + + old_flags = global_dirty_tracking; + global_dirty_tracking |= flags; + trace_global_dirty_changed(global_dirty_tracking); + + if (!old_flags) { + MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward); + memory_region_transaction_begin(); + memory_region_update_pending = true; + memory_region_transaction_commit(); + } +} + +static void memory_global_dirty_log_do_stop(unsigned int flags) +{ + assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); + assert((global_dirty_tracking & flags) == flags); + global_dirty_tracking &= ~flags; + + trace_global_dirty_changed(global_dirty_tracking); + + if (!global_dirty_tracking) { + memory_region_transaction_begin(); + memory_region_update_pending = true; + memory_region_transaction_commit(); + MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse); + } +} + +/* + * Execute the postponed dirty log stop operations if there is, then reset + * everything (including the flags and the vmstate change hook). + */ +static void memory_global_dirty_log_stop_postponed_run(void) +{ + /* This must be called with the vmstate handler registered */ + assert(vmstate_change); + + /* Note: postponed_stop_flags can be cleared in log start routine */ + if (postponed_stop_flags) { + memory_global_dirty_log_do_stop(postponed_stop_flags); + postponed_stop_flags = 0; + } + + qemu_del_vm_change_state_handler(vmstate_change); + vmstate_change = NULL; +} + +static void memory_vm_change_state_handler(void *opaque, bool running, + RunState state) +{ + if (running) { + memory_global_dirty_log_stop_postponed_run(); + } +} + +void memory_global_dirty_log_stop(unsigned int flags) +{ + if (!runstate_is_running()) { + /* Postpone the dirty log stop, e.g., to when VM starts again */ + if (vmstate_change) { + /* Batch with previous postponed flags */ + postponed_stop_flags |= flags; + } else { + postponed_stop_flags = flags; + vmstate_change = qemu_add_vm_change_state_handler( + memory_vm_change_state_handler, NULL); + } + return; + } + + memory_global_dirty_log_do_stop(flags); +} + +static void listener_add_address_space(MemoryListener *listener, + AddressSpace *as) +{ + FlatView *view; + FlatRange *fr; + + if (listener->begin) { + listener->begin(listener); + } + if (global_dirty_tracking) { + if (listener->log_global_start) { + listener->log_global_start(listener); + } + } + + view = address_space_get_flatview(as); + FOR_EACH_FLAT_RANGE(fr, view) { + MemoryRegionSection section = section_from_flat_range(fr, view); + + if (listener->region_add) { + listener->region_add(listener, §ion); + } + if (fr->dirty_log_mask && listener->log_start) { + listener->log_start(listener, §ion, 0, fr->dirty_log_mask); + } + } + if (listener->commit) { + listener->commit(listener); + } + flatview_unref(view); +} + +static void listener_del_address_space(MemoryListener *listener, + AddressSpace *as) +{ + FlatView *view; + FlatRange *fr; + + if (listener->begin) { + listener->begin(listener); + } + view = address_space_get_flatview(as); + FOR_EACH_FLAT_RANGE(fr, view) { + MemoryRegionSection section = section_from_flat_range(fr, view); + + if (fr->dirty_log_mask && listener->log_stop) { + listener->log_stop(listener, §ion, fr->dirty_log_mask, 0); + } + if (listener->region_del) { + listener->region_del(listener, §ion); + } + } + if (listener->commit) { + listener->commit(listener); + } + flatview_unref(view); +} + +void memory_listener_register(MemoryListener *listener, AddressSpace *as) +{ + MemoryListener *other = NULL; + + /* Only one of them can be defined for a listener */ + assert(!(listener->log_sync && listener->log_sync_global)); + + listener->address_space = as; + if (QTAILQ_EMPTY(&memory_listeners) + || listener->priority >= QTAILQ_LAST(&memory_listeners)->priority) { + QTAILQ_INSERT_TAIL(&memory_listeners, listener, link); + } else { + QTAILQ_FOREACH(other, &memory_listeners, link) { + if (listener->priority < other->priority) { + break; + } + } + QTAILQ_INSERT_BEFORE(other, listener, link); + } + + if (QTAILQ_EMPTY(&as->listeners) + || listener->priority >= QTAILQ_LAST(&as->listeners)->priority) { + QTAILQ_INSERT_TAIL(&as->listeners, listener, link_as); + } else { + QTAILQ_FOREACH(other, &as->listeners, link_as) { + if (listener->priority < other->priority) { + break; + } + } + QTAILQ_INSERT_BEFORE(other, listener, link_as); + } + + listener_add_address_space(listener, as); + + if (listener->eventfd_add || listener->eventfd_del) { + as->ioeventfd_notifiers++; + } +} + +void memory_listener_unregister(MemoryListener *listener) +{ + if (!listener->address_space) { + return; + } + + if (listener->eventfd_add || listener->eventfd_del) { + listener->address_space->ioeventfd_notifiers--; + } + + listener_del_address_space(listener, listener->address_space); + QTAILQ_REMOVE(&memory_listeners, listener, link); + QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as); + listener->address_space = NULL; +} + +void address_space_remove_listeners(AddressSpace *as) +{ + while (!QTAILQ_EMPTY(&as->listeners)) { + memory_listener_unregister(QTAILQ_FIRST(&as->listeners)); + } +} + +void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) +{ + memory_region_ref(root); + as->root = root; + as->current_map = NULL; + as->ioeventfd_nb = 0; + as->ioeventfds = NULL; + QTAILQ_INIT(&as->listeners); + QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link); + as->name = g_strdup(name ? name : "anonymous"); + address_space_update_topology(as); + address_space_update_ioeventfds(as); +} + +static void do_address_space_destroy(AddressSpace *as) +{ + assert(QTAILQ_EMPTY(&as->listeners)); + + flatview_unref(as->current_map); + g_free(as->name); + g_free(as->ioeventfds); + memory_region_unref(as->root); +} + +void address_space_destroy(AddressSpace *as) +{ + MemoryRegion *root = as->root; + + /* Flush out anything from MemoryListeners listening in on this */ + memory_region_transaction_begin(); + as->root = NULL; + memory_region_transaction_commit(); + QTAILQ_REMOVE(&address_spaces, as, address_spaces_link); + + /* At this point, as->dispatch and as->current_map are dummy + * entries that the guest should never use. Wait for the old + * values to expire before freeing the data. + */ + as->root = root; + call_rcu(as, do_address_space_destroy, rcu); +} + +static const char *memory_region_type(MemoryRegion *mr) +{ + if (mr->alias) { + return memory_region_type(mr->alias); + } + if (memory_region_is_ram_device(mr)) { + return "ramd"; + } else if (memory_region_is_romd(mr)) { + return "romd"; + } else if (memory_region_is_rom(mr)) { + return "rom"; + } else if (memory_region_is_ram(mr)) { + return "ram"; + } else { + return "i/o"; + } +} + +typedef struct MemoryRegionList MemoryRegionList; + +struct MemoryRegionList { + const MemoryRegion *mr; + QTAILQ_ENTRY(MemoryRegionList) mrqueue; +}; + +typedef QTAILQ_HEAD(, MemoryRegionList) MemoryRegionListHead; + +#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \ + int128_sub((size), int128_one())) : 0) +#define MTREE_INDENT " " + +static void mtree_expand_owner(const char *label, Object *obj) +{ + DeviceState *dev = (DeviceState *) object_dynamic_cast(obj, TYPE_DEVICE); + + qemu_printf(" %s:{%s", label, dev ? "dev" : "obj"); + if (dev && dev->id) { + qemu_printf(" id=%s", dev->id); + } else { + char *canonical_path = object_get_canonical_path(obj); + if (canonical_path) { + qemu_printf(" path=%s", canonical_path); + g_free(canonical_path); + } else { + qemu_printf(" type=%s", object_get_typename(obj)); + } + } + qemu_printf("}"); +} + +static void mtree_print_mr_owner(const MemoryRegion *mr) +{ + Object *owner = mr->owner; + Object *parent = memory_region_owner((MemoryRegion *)mr); + + if (!owner && !parent) { + qemu_printf(" orphan"); + return; + } + if (owner) { + mtree_expand_owner("owner", owner); + } + if (parent && parent != owner) { + mtree_expand_owner("parent", parent); + } +} + +static void mtree_print_mr(const MemoryRegion *mr, unsigned int level, + hwaddr base, + MemoryRegionListHead *alias_print_queue, + bool owner, bool display_disabled) +{ + MemoryRegionList *new_ml, *ml, *next_ml; + MemoryRegionListHead submr_print_queue; + const MemoryRegion *submr; + unsigned int i; + hwaddr cur_start, cur_end; + + if (!mr) { + return; + } + + cur_start = base + mr->addr; + cur_end = cur_start + MR_SIZE(mr->size); + + /* + * Try to detect overflow of memory region. This should never + * happen normally. When it happens, we dump something to warn the + * user who is observing this. + */ + if (cur_start < base || cur_end < cur_start) { + qemu_printf("[DETECTED OVERFLOW!] "); + } + + if (mr->alias) { + bool found = false; + + /* check if the alias is already in the queue */ + QTAILQ_FOREACH(ml, alias_print_queue, mrqueue) { + if (ml->mr == mr->alias) { + found = true; + } + } + + if (!found) { + ml = g_new(MemoryRegionList, 1); + ml->mr = mr->alias; + QTAILQ_INSERT_TAIL(alias_print_queue, ml, mrqueue); + } + if (mr->enabled || display_disabled) { + for (i = 0; i < level; i++) { + qemu_printf(MTREE_INDENT); + } + qemu_printf(HWADDR_FMT_plx "-" HWADDR_FMT_plx + " (prio %d, %s%s): alias %s @%s " HWADDR_FMT_plx + "-" HWADDR_FMT_plx "%s", + cur_start, cur_end, + mr->priority, + mr->nonvolatile ? "nv-" : "", + memory_region_type((MemoryRegion *)mr), + memory_region_name(mr), + memory_region_name(mr->alias), + mr->alias_offset, + mr->alias_offset + MR_SIZE(mr->size), + mr->enabled ? "" : " [disabled]"); + if (owner) { + mtree_print_mr_owner(mr); + } + qemu_printf("\n"); + } + } else { + if (mr->enabled || display_disabled) { + for (i = 0; i < level; i++) { + qemu_printf(MTREE_INDENT); + } + qemu_printf(HWADDR_FMT_plx "-" HWADDR_FMT_plx + " (prio %d, %s%s): %s%s", + cur_start, cur_end, + mr->priority, + mr->nonvolatile ? "nv-" : "", + memory_region_type((MemoryRegion *)mr), + memory_region_name(mr), + mr->enabled ? "" : " [disabled]"); + if (owner) { + mtree_print_mr_owner(mr); + } + qemu_printf("\n"); + } + } + + QTAILQ_INIT(&submr_print_queue); + + QTAILQ_FOREACH(submr, &mr->subregions, subregions_link) { + new_ml = g_new(MemoryRegionList, 1); + new_ml->mr = submr; + QTAILQ_FOREACH(ml, &submr_print_queue, mrqueue) { + if (new_ml->mr->addr < ml->mr->addr || + (new_ml->mr->addr == ml->mr->addr && + new_ml->mr->priority > ml->mr->priority)) { + QTAILQ_INSERT_BEFORE(ml, new_ml, mrqueue); + new_ml = NULL; + break; + } + } + if (new_ml) { + QTAILQ_INSERT_TAIL(&submr_print_queue, new_ml, mrqueue); + } + } + + QTAILQ_FOREACH(ml, &submr_print_queue, mrqueue) { + mtree_print_mr(ml->mr, level + 1, cur_start, + alias_print_queue, owner, display_disabled); + } + + QTAILQ_FOREACH_SAFE(ml, &submr_print_queue, mrqueue, next_ml) { + g_free(ml); + } +} + +struct FlatViewInfo { + int counter; + bool dispatch_tree; + bool owner; + AccelClass *ac; +}; + +static void mtree_print_flatview(gpointer key, gpointer value, + gpointer user_data) +{ + FlatView *view = key; + GArray *fv_address_spaces = value; + struct FlatViewInfo *fvi = user_data; + FlatRange *range = &view->ranges[0]; + MemoryRegion *mr; + int n = view->nr; + int i; + AddressSpace *as; + + qemu_printf("FlatView #%d\n", fvi->counter); + ++fvi->counter; + + for (i = 0; i < fv_address_spaces->len; ++i) { + as = g_array_index(fv_address_spaces, AddressSpace*, i); + qemu_printf(" AS \"%s\", root: %s", + as->name, memory_region_name(as->root)); + if (as->root->alias) { + qemu_printf(", alias %s", memory_region_name(as->root->alias)); + } + qemu_printf("\n"); + } + + qemu_printf(" Root memory region: %s\n", + view->root ? memory_region_name(view->root) : "(none)"); + + if (n <= 0) { + qemu_printf(MTREE_INDENT "No rendered FlatView\n\n"); + return; + } + + while (n--) { + mr = range->mr; + if (range->offset_in_region) { + qemu_printf(MTREE_INDENT HWADDR_FMT_plx "-" HWADDR_FMT_plx + " (prio %d, %s%s): %s @" HWADDR_FMT_plx, + int128_get64(range->addr.start), + int128_get64(range->addr.start) + + MR_SIZE(range->addr.size), + mr->priority, + range->nonvolatile ? "nv-" : "", + range->readonly ? "rom" : memory_region_type(mr), + memory_region_name(mr), + range->offset_in_region); + } else { + qemu_printf(MTREE_INDENT HWADDR_FMT_plx "-" HWADDR_FMT_plx + " (prio %d, %s%s): %s", + int128_get64(range->addr.start), + int128_get64(range->addr.start) + + MR_SIZE(range->addr.size), + mr->priority, + range->nonvolatile ? "nv-" : "", + range->readonly ? "rom" : memory_region_type(mr), + memory_region_name(mr)); + } + if (fvi->owner) { + mtree_print_mr_owner(mr); + } + + if (fvi->ac) { + for (i = 0; i < fv_address_spaces->len; ++i) { + as = g_array_index(fv_address_spaces, AddressSpace*, i); + if (fvi->ac->has_memory(current_machine, as, + int128_get64(range->addr.start), + MR_SIZE(range->addr.size) + 1)) { + qemu_printf(" %s", fvi->ac->name); + } + } + } + qemu_printf("\n"); + range++; + } + +#if !defined(CONFIG_USER_ONLY) + if (fvi->dispatch_tree && view->root) { + mtree_print_dispatch(view->dispatch, view->root); + } +#endif + + qemu_printf("\n"); +} + +static gboolean mtree_info_flatview_free(gpointer key, gpointer value, + gpointer user_data) +{ + FlatView *view = key; + GArray *fv_address_spaces = value; + + g_array_unref(fv_address_spaces); + flatview_unref(view); + + return true; +} + +static void mtree_info_flatview(bool dispatch_tree, bool owner) +{ + struct FlatViewInfo fvi = { + .counter = 0, + .dispatch_tree = dispatch_tree, + .owner = owner, + }; + AddressSpace *as; + FlatView *view; + GArray *fv_address_spaces; + GHashTable *views = g_hash_table_new(g_direct_hash, g_direct_equal); + AccelClass *ac = ACCEL_GET_CLASS(current_accel()); + + if (ac->has_memory) { + fvi.ac = ac; + } + + /* Gather all FVs in one table */ + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + view = address_space_get_flatview(as); + + fv_address_spaces = g_hash_table_lookup(views, view); + if (!fv_address_spaces) { + fv_address_spaces = g_array_new(false, false, sizeof(as)); + g_hash_table_insert(views, view, fv_address_spaces); + } + + g_array_append_val(fv_address_spaces, as); + } + + /* Print */ + g_hash_table_foreach(views, mtree_print_flatview, &fvi); + + /* Free */ + g_hash_table_foreach_remove(views, mtree_info_flatview_free, 0); + g_hash_table_unref(views); +} + +struct AddressSpaceInfo { + MemoryRegionListHead *ml_head; + bool owner; + bool disabled; +}; + +/* Returns negative value if a < b; zero if a = b; positive value if a > b. */ +static gint address_space_compare_name(gconstpointer a, gconstpointer b) +{ + const AddressSpace *as_a = a; + const AddressSpace *as_b = b; + + return g_strcmp0(as_a->name, as_b->name); +} + +static void mtree_print_as_name(gpointer data, gpointer user_data) +{ + AddressSpace *as = data; + + qemu_printf("address-space: %s\n", as->name); +} + +static void mtree_print_as(gpointer key, gpointer value, gpointer user_data) +{ + MemoryRegion *mr = key; + GSList *as_same_root_mr_list = value; + struct AddressSpaceInfo *asi = user_data; + + g_slist_foreach(as_same_root_mr_list, mtree_print_as_name, NULL); + mtree_print_mr(mr, 1, 0, asi->ml_head, asi->owner, asi->disabled); + qemu_printf("\n"); +} + +static gboolean mtree_info_as_free(gpointer key, gpointer value, + gpointer user_data) +{ + GSList *as_same_root_mr_list = value; + + g_slist_free(as_same_root_mr_list); + + return true; +} + +static void mtree_info_as(bool dispatch_tree, bool owner, bool disabled) +{ + MemoryRegionListHead ml_head; + MemoryRegionList *ml, *ml2; + AddressSpace *as; + GHashTable *views = g_hash_table_new(g_direct_hash, g_direct_equal); + GSList *as_same_root_mr_list; + struct AddressSpaceInfo asi = { + .ml_head = &ml_head, + .owner = owner, + .disabled = disabled, + }; + + QTAILQ_INIT(&ml_head); + + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + /* Create hashtable, key=AS root MR, value = list of AS */ + as_same_root_mr_list = g_hash_table_lookup(views, as->root); + as_same_root_mr_list = g_slist_insert_sorted(as_same_root_mr_list, as, + address_space_compare_name); + g_hash_table_insert(views, as->root, as_same_root_mr_list); + } + + /* print address spaces */ + g_hash_table_foreach(views, mtree_print_as, &asi); + g_hash_table_foreach_remove(views, mtree_info_as_free, 0); + g_hash_table_unref(views); + + /* print aliased regions */ + QTAILQ_FOREACH(ml, &ml_head, mrqueue) { + qemu_printf("memory-region: %s\n", memory_region_name(ml->mr)); + mtree_print_mr(ml->mr, 1, 0, &ml_head, owner, disabled); + qemu_printf("\n"); + } + + QTAILQ_FOREACH_SAFE(ml, &ml_head, mrqueue, ml2) { + g_free(ml); + } +} + +void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled) +{ + if (flatview) { + mtree_info_flatview(dispatch_tree, owner); + } else { + mtree_info_as(dispatch_tree, owner, disabled); + } +} + +void memory_region_init_ram(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp) +{ + DeviceState *owner_dev; + Error *err = NULL; + + memory_region_init_ram_nomigrate(mr, owner, name, size, &err); + if (err) { + error_propagate(errp, err); + return; + } + /* This will assert if owner is neither NULL nor a DeviceState. + * We only want the owner here for the purposes of defining a + * unique name for migration. TODO: Ideally we should implement + * a naming scheme for Objects which are not DeviceStates, in + * which case we can relax this restriction. + */ + owner_dev = DEVICE(owner); + vmstate_register_ram(mr, owner_dev); +} + +void memory_region_init_rom(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp) +{ + DeviceState *owner_dev; + Error *err = NULL; + + memory_region_init_rom_nomigrate(mr, owner, name, size, &err); + if (err) { + error_propagate(errp, err); + return; + } + /* This will assert if owner is neither NULL nor a DeviceState. + * We only want the owner here for the purposes of defining a + * unique name for migration. TODO: Ideally we should implement + * a naming scheme for Objects which are not DeviceStates, in + * which case we can relax this restriction. + */ + owner_dev = DEVICE(owner); + vmstate_register_ram(mr, owner_dev); +} + +void memory_region_init_rom_device(MemoryRegion *mr, + Object *owner, + const MemoryRegionOps *ops, + void *opaque, + const char *name, + uint64_t size, + Error **errp) +{ + DeviceState *owner_dev; + Error *err = NULL; + + memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque, + name, size, &err); + if (err) { + error_propagate(errp, err); + return; + } + /* This will assert if owner is neither NULL nor a DeviceState. + * We only want the owner here for the purposes of defining a + * unique name for migration. TODO: Ideally we should implement + * a naming scheme for Objects which are not DeviceStates, in + * which case we can relax this restriction. + */ + owner_dev = DEVICE(owner); + vmstate_register_ram(mr, owner_dev); +} + +/* + * Support system builds with CONFIG_FUZZ using a weak symbol and a stub for + * the fuzz_dma_read_cb callback + */ +#ifdef CONFIG_FUZZ +void __attribute__((weak)) fuzz_dma_read_cb(size_t addr, + size_t len, + MemoryRegion *mr) +{ +} +#endif + +static const TypeInfo memory_region_info = { + .parent = TYPE_OBJECT, + .name = TYPE_MEMORY_REGION, + .class_size = sizeof(MemoryRegionClass), + .instance_size = sizeof(MemoryRegion), + .instance_init = memory_region_initfn, + .instance_finalize = memory_region_finalize, +}; + +static const TypeInfo iommu_memory_region_info = { + .parent = TYPE_MEMORY_REGION, + .name = TYPE_IOMMU_MEMORY_REGION, + .class_size = sizeof(IOMMUMemoryRegionClass), + .instance_size = sizeof(IOMMUMemoryRegion), + .instance_init = iommu_memory_region_initfn, + .abstract = true, +}; + +static const TypeInfo ram_discard_manager_info = { + .parent = TYPE_INTERFACE, + .name = TYPE_RAM_DISCARD_MANAGER, + .class_size = sizeof(RamDiscardManagerClass), +}; + +static void memory_register_types(void) +{ + type_register_static(&memory_region_info); + type_register_static(&iommu_memory_region_info); + type_register_static(&ram_discard_manager_info); +} + +type_init(memory_register_types) diff --git a/system/memory_mapping.c b/system/memory_mapping.c new file mode 100644 index 0000000..d7f1d09 --- /dev/null +++ b/system/memory_mapping.c @@ -0,0 +1,377 @@ +/* + * QEMU memory mapping + * + * Copyright Fujitsu, Corp. 2011, 2012 + * + * Authors: + * Wen Congyang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" + +#include "sysemu/memory_mapping.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "hw/core/cpu.h" + +//#define DEBUG_GUEST_PHYS_REGION_ADD + +static void memory_mapping_list_add_mapping_sorted(MemoryMappingList *list, + MemoryMapping *mapping) +{ + MemoryMapping *p; + + QTAILQ_FOREACH(p, &list->head, next) { + if (p->phys_addr >= mapping->phys_addr) { + QTAILQ_INSERT_BEFORE(p, mapping, next); + return; + } + } + QTAILQ_INSERT_TAIL(&list->head, mapping, next); +} + +static void create_new_memory_mapping(MemoryMappingList *list, + hwaddr phys_addr, + hwaddr virt_addr, + ram_addr_t length) +{ + MemoryMapping *memory_mapping; + + memory_mapping = g_new(MemoryMapping, 1); + memory_mapping->phys_addr = phys_addr; + memory_mapping->virt_addr = virt_addr; + memory_mapping->length = length; + list->last_mapping = memory_mapping; + list->num++; + memory_mapping_list_add_mapping_sorted(list, memory_mapping); +} + +static inline bool mapping_contiguous(MemoryMapping *map, + hwaddr phys_addr, + hwaddr virt_addr) +{ + return phys_addr == map->phys_addr + map->length && + virt_addr == map->virt_addr + map->length; +} + +/* + * [map->phys_addr, map->phys_addr + map->length) and + * [phys_addr, phys_addr + length) have intersection? + */ +static inline bool mapping_have_same_region(MemoryMapping *map, + hwaddr phys_addr, + ram_addr_t length) +{ + return !(phys_addr + length < map->phys_addr || + phys_addr >= map->phys_addr + map->length); +} + +/* + * [map->phys_addr, map->phys_addr + map->length) and + * [phys_addr, phys_addr + length) have intersection. The virtual address in the + * intersection are the same? + */ +static inline bool mapping_conflict(MemoryMapping *map, + hwaddr phys_addr, + hwaddr virt_addr) +{ + return virt_addr - map->virt_addr != phys_addr - map->phys_addr; +} + +/* + * [map->virt_addr, map->virt_addr + map->length) and + * [virt_addr, virt_addr + length) have intersection. And the physical address + * in the intersection are the same. + */ +static inline void mapping_merge(MemoryMapping *map, + hwaddr virt_addr, + ram_addr_t length) +{ + if (virt_addr < map->virt_addr) { + map->length += map->virt_addr - virt_addr; + map->virt_addr = virt_addr; + } + + if ((virt_addr + length) > + (map->virt_addr + map->length)) { + map->length = virt_addr + length - map->virt_addr; + } +} + +void memory_mapping_list_add_merge_sorted(MemoryMappingList *list, + hwaddr phys_addr, + hwaddr virt_addr, + ram_addr_t length) +{ + MemoryMapping *memory_mapping, *last_mapping; + + if (QTAILQ_EMPTY(&list->head)) { + create_new_memory_mapping(list, phys_addr, virt_addr, length); + return; + } + + last_mapping = list->last_mapping; + if (last_mapping) { + if (mapping_contiguous(last_mapping, phys_addr, virt_addr)) { + last_mapping->length += length; + return; + } + } + + QTAILQ_FOREACH(memory_mapping, &list->head, next) { + if (mapping_contiguous(memory_mapping, phys_addr, virt_addr)) { + memory_mapping->length += length; + list->last_mapping = memory_mapping; + return; + } + + if (phys_addr + length < memory_mapping->phys_addr) { + /* create a new region before memory_mapping */ + break; + } + + if (mapping_have_same_region(memory_mapping, phys_addr, length)) { + if (mapping_conflict(memory_mapping, phys_addr, virt_addr)) { + continue; + } + + /* merge this region into memory_mapping */ + mapping_merge(memory_mapping, virt_addr, length); + list->last_mapping = memory_mapping; + return; + } + } + + /* this region can not be merged into any existed memory mapping. */ + create_new_memory_mapping(list, phys_addr, virt_addr, length); +} + +void memory_mapping_list_free(MemoryMappingList *list) +{ + MemoryMapping *p, *q; + + QTAILQ_FOREACH_SAFE(p, &list->head, next, q) { + QTAILQ_REMOVE(&list->head, p, next); + g_free(p); + } + + list->num = 0; + list->last_mapping = NULL; +} + +void memory_mapping_list_init(MemoryMappingList *list) +{ + list->num = 0; + list->last_mapping = NULL; + QTAILQ_INIT(&list->head); +} + +void guest_phys_blocks_free(GuestPhysBlockList *list) +{ + GuestPhysBlock *p, *q; + + QTAILQ_FOREACH_SAFE(p, &list->head, next, q) { + QTAILQ_REMOVE(&list->head, p, next); + memory_region_unref(p->mr); + g_free(p); + } + list->num = 0; +} + +void guest_phys_blocks_init(GuestPhysBlockList *list) +{ + list->num = 0; + QTAILQ_INIT(&list->head); +} + +typedef struct GuestPhysListener { + GuestPhysBlockList *list; + MemoryListener listener; +} GuestPhysListener; + +static void guest_phys_block_add_section(GuestPhysListener *g, + MemoryRegionSection *section) +{ + const hwaddr target_start = section->offset_within_address_space; + const hwaddr target_end = target_start + int128_get64(section->size); + uint8_t *host_addr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region; + GuestPhysBlock *predecessor = NULL; + + /* find continuity in guest physical address space */ + if (!QTAILQ_EMPTY(&g->list->head)) { + hwaddr predecessor_size; + + predecessor = QTAILQ_LAST(&g->list->head); + predecessor_size = predecessor->target_end - predecessor->target_start; + + /* the memory API guarantees monotonically increasing traversal */ + g_assert(predecessor->target_end <= target_start); + + /* we want continuity in both guest-physical and host-virtual memory */ + if (predecessor->target_end < target_start || + predecessor->host_addr + predecessor_size != host_addr || + predecessor->mr != section->mr) { + predecessor = NULL; + } + } + + if (predecessor == NULL) { + /* isolated mapping, allocate it and add it to the list */ + GuestPhysBlock *block = g_malloc0(sizeof *block); + + block->target_start = target_start; + block->target_end = target_end; + block->host_addr = host_addr; + block->mr = section->mr; + memory_region_ref(section->mr); + + QTAILQ_INSERT_TAIL(&g->list->head, block, next); + ++g->list->num; + } else { + /* expand predecessor until @target_end; predecessor's start doesn't + * change + */ + predecessor->target_end = target_end; + } + +#ifdef DEBUG_GUEST_PHYS_REGION_ADD + fprintf(stderr, "%s: target_start=" HWADDR_FMT_plx " target_end=" + HWADDR_FMT_plx ": %s (count: %u)\n", __func__, target_start, + target_end, predecessor ? "joined" : "added", g->list->num); +#endif +} + +static int guest_phys_ram_populate_cb(MemoryRegionSection *section, + void *opaque) +{ + GuestPhysListener *g = opaque; + + guest_phys_block_add_section(g, section); + return 0; +} + +static void guest_phys_blocks_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + GuestPhysListener *g = container_of(listener, GuestPhysListener, listener); + + /* we only care about RAM */ + if (!memory_region_is_ram(section->mr) || + memory_region_is_ram_device(section->mr) || + memory_region_is_nonvolatile(section->mr)) { + return; + } + + /* for special sparse regions, only add populated parts */ + if (memory_region_has_ram_discard_manager(section->mr)) { + RamDiscardManager *rdm; + + rdm = memory_region_get_ram_discard_manager(section->mr); + ram_discard_manager_replay_populated(rdm, section, + guest_phys_ram_populate_cb, g); + return; + } + + guest_phys_block_add_section(g, section); +} + +void guest_phys_blocks_append(GuestPhysBlockList *list) +{ + GuestPhysListener g = { 0 }; + + g.list = list; + g.listener.region_add = &guest_phys_blocks_region_add; + memory_listener_register(&g.listener, &address_space_memory); + memory_listener_unregister(&g.listener); +} + +static CPUState *find_paging_enabled_cpu(CPUState *start_cpu) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + if (cpu_paging_enabled(cpu)) { + return cpu; + } + } + + return NULL; +} + +void qemu_get_guest_memory_mapping(MemoryMappingList *list, + const GuestPhysBlockList *guest_phys_blocks, + Error **errp) +{ + CPUState *cpu, *first_paging_enabled_cpu; + GuestPhysBlock *block; + ram_addr_t offset, length; + + first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu); + if (first_paging_enabled_cpu) { + for (cpu = first_paging_enabled_cpu; cpu != NULL; + cpu = CPU_NEXT(cpu)) { + Error *err = NULL; + cpu_get_memory_mapping(cpu, list, &err); + if (err) { + error_propagate(errp, err); + return; + } + } + return; + } + + /* + * If the guest doesn't use paging, the virtual address is equal to physical + * address. + */ + QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) { + offset = block->target_start; + length = block->target_end - block->target_start; + create_new_memory_mapping(list, offset, offset, length); + } +} + +void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list, + const GuestPhysBlockList *guest_phys_blocks) +{ + GuestPhysBlock *block; + + QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) { + create_new_memory_mapping(list, block->target_start, 0, + block->target_end - block->target_start); + } +} + +void memory_mapping_filter(MemoryMappingList *list, int64_t begin, + int64_t length) +{ + MemoryMapping *cur, *next; + + QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) { + if (cur->phys_addr >= begin + length || + cur->phys_addr + cur->length <= begin) { + QTAILQ_REMOVE(&list->head, cur, next); + g_free(cur); + list->num--; + continue; + } + + if (cur->phys_addr < begin) { + cur->length -= begin - cur->phys_addr; + if (cur->virt_addr) { + cur->virt_addr += begin - cur->phys_addr; + } + cur->phys_addr = begin; + } + + if (cur->phys_addr + cur->length > begin + length) { + cur->length -= cur->phys_addr + cur->length - begin - length; + } + } +} diff --git a/system/meson.build b/system/meson.build new file mode 100644 index 0000000..3a64dd8 --- /dev/null +++ b/system/meson.build @@ -0,0 +1,36 @@ +specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: [files( + 'arch_init.c', + 'ioport.c', + 'memory.c', + 'physmem.c', + 'watchpoint.c', +)]) + +system_ss.add(files( + 'balloon.c', + 'bootdevice.c', + 'cpus.c', + 'cpu-throttle.c', + 'cpu-timers.c', + 'datadir.c', + 'dirtylimit.c', + 'dma-helpers.c', + 'globals.c', + 'memory_mapping.c', + 'qdev-monitor.c', + 'qtest.c', + 'rtc.c', + 'runstate-action.c', + 'runstate-hmp-cmds.c', + 'runstate.c', + 'tpm-hmp-cmds.c', + 'vl.c', +), sdl, libpmem, libdaxctl) + +if have_tpm + system_ss.add(files('tpm.c')) +endif + +system_ss.add(when: seccomp, if_true: files('qemu-seccomp.c')) +system_ss.add(when: fdt, if_true: files('device_tree.c')) +system_ss.add(when: 'CONFIG_LINUX', if_true: files('async-teardown.c')) diff --git a/system/physmem.c b/system/physmem.c new file mode 100644 index 0000000..edc3ed8 --- /dev/null +++ b/system/physmem.c @@ -0,0 +1,3796 @@ +/* + * RAM allocation and memory access + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "exec/page-vary.h" +#include "qapi/error.h" + +#include "qemu/cutils.h" +#include "qemu/cacheflush.h" +#include "qemu/hbitmap.h" +#include "qemu/madvise.h" + +#ifdef CONFIG_TCG +#include "hw/core/tcg-cpu-ops.h" +#endif /* CONFIG_TCG */ + +#include "exec/exec-all.h" +#include "exec/target_page.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/boards.h" +#include "hw/xen/xen.h" +#include "sysemu/kvm.h" +#include "sysemu/tcg.h" +#include "sysemu/qtest.h" +#include "qemu/timer.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "qemu/qemu-print.h" +#include "qemu/log.h" +#include "qemu/memalign.h" +#include "exec/memory.h" +#include "exec/ioport.h" +#include "sysemu/dma.h" +#include "sysemu/hostmem.h" +#include "sysemu/hw_accel.h" +#include "sysemu/xen-mapcache.h" +#include "trace/trace-root.h" + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE +#include +#endif + +#include "qemu/rcu_queue.h" +#include "qemu/main-loop.h" +#include "exec/translate-all.h" +#include "sysemu/replay.h" + +#include "exec/memory-internal.h" +#include "exec/ram_addr.h" + +#include "qemu/pmem.h" + +#include "migration/vmstate.h" + +#include "qemu/range.h" +#ifndef _WIN32 +#include "qemu/mmap-alloc.h" +#endif + +#include "monitor/monitor.h" + +#ifdef CONFIG_LIBDAXCTL +#include +#endif + +//#define DEBUG_SUBPAGE + +/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes + * are protected by the ramlist lock. + */ +RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) }; + +static MemoryRegion *system_memory; +static MemoryRegion *system_io; + +AddressSpace address_space_io; +AddressSpace address_space_memory; + +static MemoryRegion io_mem_unassigned; + +typedef struct PhysPageEntry PhysPageEntry; + +struct PhysPageEntry { + /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */ + uint32_t skip : 6; + /* index into phys_sections (!skip) or phys_map_nodes (skip) */ + uint32_t ptr : 26; +}; + +#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6) + +/* Size of the L2 (and L3, etc) page tables. */ +#define ADDR_SPACE_BITS 64 + +#define P_L2_BITS 9 +#define P_L2_SIZE (1 << P_L2_BITS) + +#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1) + +typedef PhysPageEntry Node[P_L2_SIZE]; + +typedef struct PhysPageMap { + struct rcu_head rcu; + + unsigned sections_nb; + unsigned sections_nb_alloc; + unsigned nodes_nb; + unsigned nodes_nb_alloc; + Node *nodes; + MemoryRegionSection *sections; +} PhysPageMap; + +struct AddressSpaceDispatch { + MemoryRegionSection *mru_section; + /* This is a multi-level map on the physical address space. + * The bottom level has pointers to MemoryRegionSections. + */ + PhysPageEntry phys_map; + PhysPageMap map; +}; + +#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK) +typedef struct subpage_t { + MemoryRegion iomem; + FlatView *fv; + hwaddr base; + uint16_t sub_section[]; +} subpage_t; + +#define PHYS_SECTION_UNASSIGNED 0 + +static void io_mem_init(void); +static void memory_map_init(void); +static void tcg_log_global_after_sync(MemoryListener *listener); +static void tcg_commit(MemoryListener *listener); + +/** + * CPUAddressSpace: all the information a CPU needs about an AddressSpace + * @cpu: the CPU whose AddressSpace this is + * @as: the AddressSpace itself + * @memory_dispatch: its dispatch pointer (cached, RCU protected) + * @tcg_as_listener: listener for tracking changes to the AddressSpace + */ +struct CPUAddressSpace { + CPUState *cpu; + AddressSpace *as; + struct AddressSpaceDispatch *memory_dispatch; + MemoryListener tcg_as_listener; +}; + +struct DirtyBitmapSnapshot { + ram_addr_t start; + ram_addr_t end; + unsigned long dirty[]; +}; + +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) +{ + static unsigned alloc_hint = 16; + if (map->nodes_nb + nodes > map->nodes_nb_alloc) { + map->nodes_nb_alloc = MAX(alloc_hint, map->nodes_nb + nodes); + map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); + alloc_hint = map->nodes_nb_alloc; + } +} + +static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf) +{ + unsigned i; + uint32_t ret; + PhysPageEntry e; + PhysPageEntry *p; + + ret = map->nodes_nb++; + p = map->nodes[ret]; + assert(ret != PHYS_MAP_NODE_NIL); + assert(ret != map->nodes_nb_alloc); + + e.skip = leaf ? 0 : 1; + e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL; + for (i = 0; i < P_L2_SIZE; ++i) { + memcpy(&p[i], &e, sizeof(e)); + } + return ret; +} + +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, + hwaddr *index, uint64_t *nb, uint16_t leaf, + int level) +{ + PhysPageEntry *p; + hwaddr step = (hwaddr)1 << (level * P_L2_BITS); + + if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) { + lp->ptr = phys_map_node_alloc(map, level == 0); + } + p = map->nodes[lp->ptr]; + lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)]; + + while (*nb && lp < &p[P_L2_SIZE]) { + if ((*index & (step - 1)) == 0 && *nb >= step) { + lp->skip = 0; + lp->ptr = leaf; + *index += step; + *nb -= step; + } else { + phys_page_set_level(map, lp, index, nb, leaf, level - 1); + } + ++lp; + } +} + +static void phys_page_set(AddressSpaceDispatch *d, + hwaddr index, uint64_t nb, + uint16_t leaf) +{ + /* Wildly overreserve - it doesn't matter much. */ + phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS); + + phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); +} + +/* Compact a non leaf page entry. Simply detect that the entry has a single child, + * and update our entry so we can skip it and go directly to the destination. + */ +static void phys_page_compact(PhysPageEntry *lp, Node *nodes) +{ + unsigned valid_ptr = P_L2_SIZE; + int valid = 0; + PhysPageEntry *p; + int i; + + if (lp->ptr == PHYS_MAP_NODE_NIL) { + return; + } + + p = nodes[lp->ptr]; + for (i = 0; i < P_L2_SIZE; i++) { + if (p[i].ptr == PHYS_MAP_NODE_NIL) { + continue; + } + + valid_ptr = i; + valid++; + if (p[i].skip) { + phys_page_compact(&p[i], nodes); + } + } + + /* We can only compress if there's only one child. */ + if (valid != 1) { + return; + } + + assert(valid_ptr < P_L2_SIZE); + + /* Don't compress if it won't fit in the # of bits we have. */ + if (P_L2_LEVELS >= (1 << 6) && + lp->skip + p[valid_ptr].skip >= (1 << 6)) { + return; + } + + lp->ptr = p[valid_ptr].ptr; + if (!p[valid_ptr].skip) { + /* If our only child is a leaf, make this a leaf. */ + /* By design, we should have made this node a leaf to begin with so we + * should never reach here. + * But since it's so simple to handle this, let's do it just in case we + * change this rule. + */ + lp->skip = 0; + } else { + lp->skip += p[valid_ptr].skip; + } +} + +void address_space_dispatch_compact(AddressSpaceDispatch *d) +{ + if (d->phys_map.skip) { + phys_page_compact(&d->phys_map, d->map.nodes); + } +} + +static inline bool section_covers_addr(const MemoryRegionSection *section, + hwaddr addr) +{ + /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means + * the section must cover the entire address space. + */ + return int128_gethi(section->size) || + range_covers_byte(section->offset_within_address_space, + int128_getlo(section->size), addr); +} + +static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr) +{ + PhysPageEntry lp = d->phys_map, *p; + Node *nodes = d->map.nodes; + MemoryRegionSection *sections = d->map.sections; + hwaddr index = addr >> TARGET_PAGE_BITS; + int i; + + for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) { + if (lp.ptr == PHYS_MAP_NODE_NIL) { + return §ions[PHYS_SECTION_UNASSIGNED]; + } + p = nodes[lp.ptr]; + lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)]; + } + + if (section_covers_addr(§ions[lp.ptr], addr)) { + return §ions[lp.ptr]; + } else { + return §ions[PHYS_SECTION_UNASSIGNED]; + } +} + +/* Called from RCU critical section */ +static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, + hwaddr addr, + bool resolve_subpage) +{ + MemoryRegionSection *section = qatomic_read(&d->mru_section); + subpage_t *subpage; + + if (!section || section == &d->map.sections[PHYS_SECTION_UNASSIGNED] || + !section_covers_addr(section, addr)) { + section = phys_page_find(d, addr); + qatomic_set(&d->mru_section, section); + } + if (resolve_subpage && section->mr->subpage) { + subpage = container_of(section->mr, subpage_t, iomem); + section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]]; + } + return section; +} + +/* Called from RCU critical section */ +static MemoryRegionSection * +address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat, + hwaddr *plen, bool resolve_subpage) +{ + MemoryRegionSection *section; + MemoryRegion *mr; + Int128 diff; + + section = address_space_lookup_region(d, addr, resolve_subpage); + /* Compute offset within MemoryRegionSection */ + addr -= section->offset_within_address_space; + + /* Compute offset within MemoryRegion */ + *xlat = addr + section->offset_within_region; + + mr = section->mr; + + /* MMIO registers can be expected to perform full-width accesses based only + * on their address, without considering adjacent registers that could + * decode to completely different MemoryRegions. When such registers + * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO + * regions overlap wildly. For this reason we cannot clamp the accesses + * here. + * + * If the length is small (as is the case for address_space_ldl/stl), + * everything works fine. If the incoming length is large, however, + * the caller really has to do the clamping through memory_access_size. + */ + if (memory_region_is_ram(mr)) { + diff = int128_sub(section->size, int128_make64(addr)); + *plen = int128_get64(int128_min(diff, int128_make64(*plen))); + } + return section; +} + +/** + * address_space_translate_iommu - translate an address through an IOMMU + * memory region and then through the target address space. + * + * @iommu_mr: the IOMMU memory region that we start the translation from + * @addr: the address to be translated through the MMU + * @xlat: the translated address offset within the destination memory region. + * It cannot be %NULL. + * @plen_out: valid read/write length of the translated address. It + * cannot be %NULL. + * @page_mask_out: page mask for the translated address. This + * should only be meaningful for IOMMU translated + * addresses, since there may be huge pages that this bit + * would tell. It can be %NULL if we don't care about it. + * @is_write: whether the translation operation is for write + * @is_mmio: whether this can be MMIO, set true if it can + * @target_as: the address space targeted by the IOMMU + * @attrs: transaction attributes + * + * This function is called from RCU critical section. It is the common + * part of flatview_do_translate and address_space_translate_cached. + */ +static MemoryRegionSection address_space_translate_iommu(IOMMUMemoryRegion *iommu_mr, + hwaddr *xlat, + hwaddr *plen_out, + hwaddr *page_mask_out, + bool is_write, + bool is_mmio, + AddressSpace **target_as, + MemTxAttrs attrs) +{ + MemoryRegionSection *section; + hwaddr page_mask = (hwaddr)-1; + + do { + hwaddr addr = *xlat; + IOMMUMemoryRegionClass *imrc = memory_region_get_iommu_class_nocheck(iommu_mr); + int iommu_idx = 0; + IOMMUTLBEntry iotlb; + + if (imrc->attrs_to_index) { + iommu_idx = imrc->attrs_to_index(iommu_mr, attrs); + } + + iotlb = imrc->translate(iommu_mr, addr, is_write ? + IOMMU_WO : IOMMU_RO, iommu_idx); + + if (!(iotlb.perm & (1 << is_write))) { + goto unassigned; + } + + addr = ((iotlb.translated_addr & ~iotlb.addr_mask) + | (addr & iotlb.addr_mask)); + page_mask &= iotlb.addr_mask; + *plen_out = MIN(*plen_out, (addr | iotlb.addr_mask) - addr + 1); + *target_as = iotlb.target_as; + + section = address_space_translate_internal( + address_space_to_dispatch(iotlb.target_as), addr, xlat, + plen_out, is_mmio); + + iommu_mr = memory_region_get_iommu(section->mr); + } while (unlikely(iommu_mr)); + + if (page_mask_out) { + *page_mask_out = page_mask; + } + return *section; + +unassigned: + return (MemoryRegionSection) { .mr = &io_mem_unassigned }; +} + +/** + * flatview_do_translate - translate an address in FlatView + * + * @fv: the flat view that we want to translate on + * @addr: the address to be translated in above address space + * @xlat: the translated address offset within memory region. It + * cannot be @NULL. + * @plen_out: valid read/write length of the translated address. It + * can be @NULL when we don't care about it. + * @page_mask_out: page mask for the translated address. This + * should only be meaningful for IOMMU translated + * addresses, since there may be huge pages that this bit + * would tell. It can be @NULL if we don't care about it. + * @is_write: whether the translation operation is for write + * @is_mmio: whether this can be MMIO, set true if it can + * @target_as: the address space targeted by the IOMMU + * @attrs: memory transaction attributes + * + * This function is called from RCU critical section + */ +static MemoryRegionSection flatview_do_translate(FlatView *fv, + hwaddr addr, + hwaddr *xlat, + hwaddr *plen_out, + hwaddr *page_mask_out, + bool is_write, + bool is_mmio, + AddressSpace **target_as, + MemTxAttrs attrs) +{ + MemoryRegionSection *section; + IOMMUMemoryRegion *iommu_mr; + hwaddr plen = (hwaddr)(-1); + + if (!plen_out) { + plen_out = &plen; + } + + section = address_space_translate_internal( + flatview_to_dispatch(fv), addr, xlat, + plen_out, is_mmio); + + iommu_mr = memory_region_get_iommu(section->mr); + if (unlikely(iommu_mr)) { + return address_space_translate_iommu(iommu_mr, xlat, + plen_out, page_mask_out, + is_write, is_mmio, + target_as, attrs); + } + if (page_mask_out) { + /* Not behind an IOMMU, use default page size. */ + *page_mask_out = ~TARGET_PAGE_MASK; + } + + return *section; +} + +/* Called from RCU critical section */ +IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr, + bool is_write, MemTxAttrs attrs) +{ + MemoryRegionSection section; + hwaddr xlat, page_mask; + + /* + * This can never be MMIO, and we don't really care about plen, + * but page mask. + */ + section = flatview_do_translate(address_space_to_flatview(as), addr, &xlat, + NULL, &page_mask, is_write, false, &as, + attrs); + + /* Illegal translation */ + if (section.mr == &io_mem_unassigned) { + goto iotlb_fail; + } + + /* Convert memory region offset into address space offset */ + xlat += section.offset_within_address_space - + section.offset_within_region; + + return (IOMMUTLBEntry) { + .target_as = as, + .iova = addr & ~page_mask, + .translated_addr = xlat & ~page_mask, + .addr_mask = page_mask, + /* IOTLBs are for DMAs, and DMA only allows on RAMs. */ + .perm = IOMMU_RW, + }; + +iotlb_fail: + return (IOMMUTLBEntry) {0}; +} + +/* Called from RCU critical section */ +MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat, + hwaddr *plen, bool is_write, + MemTxAttrs attrs) +{ + MemoryRegion *mr; + MemoryRegionSection section; + AddressSpace *as = NULL; + + /* This can be MMIO, so setup MMIO bit. */ + section = flatview_do_translate(fv, addr, xlat, plen, NULL, + is_write, true, &as, attrs); + mr = section.mr; + + if (xen_enabled() && memory_access_is_direct(mr, is_write)) { + hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr; + *plen = MIN(page, *plen); + } + + return mr; +} + +typedef struct TCGIOMMUNotifier { + IOMMUNotifier n; + MemoryRegion *mr; + CPUState *cpu; + int iommu_idx; + bool active; +} TCGIOMMUNotifier; + +static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) +{ + TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n); + + if (!notifier->active) { + return; + } + tlb_flush(notifier->cpu); + notifier->active = false; + /* We leave the notifier struct on the list to avoid reallocating it later. + * Generally the number of IOMMUs a CPU deals with will be small. + * In any case we can't unregister the iommu notifier from a notify + * callback. + */ +} + +static void tcg_register_iommu_notifier(CPUState *cpu, + IOMMUMemoryRegion *iommu_mr, + int iommu_idx) +{ + /* Make sure this CPU has an IOMMU notifier registered for this + * IOMMU/IOMMU index combination, so that we can flush its TLB + * when the IOMMU tells us the mappings we've cached have changed. + */ + MemoryRegion *mr = MEMORY_REGION(iommu_mr); + TCGIOMMUNotifier *notifier = NULL; + int i; + + for (i = 0; i < cpu->iommu_notifiers->len; i++) { + notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i); + if (notifier->mr == mr && notifier->iommu_idx == iommu_idx) { + break; + } + } + if (i == cpu->iommu_notifiers->len) { + /* Not found, add a new entry at the end of the array */ + cpu->iommu_notifiers = g_array_set_size(cpu->iommu_notifiers, i + 1); + notifier = g_new0(TCGIOMMUNotifier, 1); + g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i) = notifier; + + notifier->mr = mr; + notifier->iommu_idx = iommu_idx; + notifier->cpu = cpu; + /* Rather than trying to register interest in the specific part + * of the iommu's address space that we've accessed and then + * expand it later as subsequent accesses touch more of it, we + * just register interest in the whole thing, on the assumption + * that iommu reconfiguration will be rare. + */ + iommu_notifier_init(¬ifier->n, + tcg_iommu_unmap_notify, + IOMMU_NOTIFIER_UNMAP, + 0, + HWADDR_MAX, + iommu_idx); + memory_region_register_iommu_notifier(notifier->mr, ¬ifier->n, + &error_fatal); + } + + if (!notifier->active) { + notifier->active = true; + } +} + +void tcg_iommu_free_notifier_list(CPUState *cpu) +{ + /* Destroy the CPU's notifier list */ + int i; + TCGIOMMUNotifier *notifier; + + for (i = 0; i < cpu->iommu_notifiers->len; i++) { + notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i); + memory_region_unregister_iommu_notifier(notifier->mr, ¬ifier->n); + g_free(notifier); + } + g_array_free(cpu->iommu_notifiers, true); +} + +void tcg_iommu_init_notifier_list(CPUState *cpu) +{ + cpu->iommu_notifiers = g_array_new(false, true, sizeof(TCGIOMMUNotifier *)); +} + +/* Called from RCU critical section */ +MemoryRegionSection * +address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr, + hwaddr *xlat, hwaddr *plen, + MemTxAttrs attrs, int *prot) +{ + MemoryRegionSection *section; + IOMMUMemoryRegion *iommu_mr; + IOMMUMemoryRegionClass *imrc; + IOMMUTLBEntry iotlb; + int iommu_idx; + hwaddr addr = orig_addr; + AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch; + + for (;;) { + section = address_space_translate_internal(d, addr, &addr, plen, false); + + iommu_mr = memory_region_get_iommu(section->mr); + if (!iommu_mr) { + break; + } + + imrc = memory_region_get_iommu_class_nocheck(iommu_mr); + + iommu_idx = imrc->attrs_to_index(iommu_mr, attrs); + tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx); + /* We need all the permissions, so pass IOMMU_NONE so the IOMMU + * doesn't short-cut its translation table walk. + */ + iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx); + addr = ((iotlb.translated_addr & ~iotlb.addr_mask) + | (addr & iotlb.addr_mask)); + /* Update the caller's prot bits to remove permissions the IOMMU + * is giving us a failure response for. If we get down to no + * permissions left at all we can give up now. + */ + if (!(iotlb.perm & IOMMU_RO)) { + *prot &= ~(PAGE_READ | PAGE_EXEC); + } + if (!(iotlb.perm & IOMMU_WO)) { + *prot &= ~PAGE_WRITE; + } + + if (!*prot) { + goto translate_fail; + } + + d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as)); + } + + assert(!memory_region_is_iommu(section->mr)); + *xlat = addr; + return section; + +translate_fail: + /* + * We should be given a page-aligned address -- certainly + * tlb_set_page_with_attrs() does so. The page offset of xlat + * is used to index sections[], and PHYS_SECTION_UNASSIGNED = 0. + * The page portion of xlat will be logged by memory_region_access_valid() + * when this memory access is rejected, so use the original untranslated + * physical address. + */ + assert((orig_addr & ~TARGET_PAGE_MASK) == 0); + *xlat = orig_addr; + return &d->map.sections[PHYS_SECTION_UNASSIGNED]; +} + +void cpu_address_space_init(CPUState *cpu, int asidx, + const char *prefix, MemoryRegion *mr) +{ + CPUAddressSpace *newas; + AddressSpace *as = g_new0(AddressSpace, 1); + char *as_name; + + assert(mr); + as_name = g_strdup_printf("%s-%d", prefix, cpu->cpu_index); + address_space_init(as, mr, as_name); + g_free(as_name); + + /* Target code should have set num_ases before calling us */ + assert(asidx < cpu->num_ases); + + if (asidx == 0) { + /* address space 0 gets the convenience alias */ + cpu->as = as; + } + + /* KVM cannot currently support multiple address spaces. */ + assert(asidx == 0 || !kvm_enabled()); + + if (!cpu->cpu_ases) { + cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); + } + + newas = &cpu->cpu_ases[asidx]; + newas->cpu = cpu; + newas->as = as; + if (tcg_enabled()) { + newas->tcg_as_listener.log_global_after_sync = tcg_log_global_after_sync; + newas->tcg_as_listener.commit = tcg_commit; + newas->tcg_as_listener.name = "tcg"; + memory_listener_register(&newas->tcg_as_listener, as); + } +} + +AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) +{ + /* Return the AddressSpace corresponding to the specified index */ + return cpu->cpu_ases[asidx].as; +} + +/* Called from RCU critical section */ +static RAMBlock *qemu_get_ram_block(ram_addr_t addr) +{ + RAMBlock *block; + + block = qatomic_rcu_read(&ram_list.mru_block); + if (block && addr - block->offset < block->max_length) { + return block; + } + RAMBLOCK_FOREACH(block) { + if (addr - block->offset < block->max_length) { + goto found; + } + } + + fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); + abort(); + +found: + /* It is safe to write mru_block outside the iothread lock. This + * is what happens: + * + * mru_block = xxx + * rcu_read_unlock() + * xxx removed from list + * rcu_read_lock() + * read mru_block + * mru_block = NULL; + * call_rcu(reclaim_ramblock, xxx); + * rcu_read_unlock() + * + * qatomic_rcu_set is not needed here. The block was already published + * when it was placed into the list. Here we're just making an extra + * copy of the pointer. + */ + ram_list.mru_block = block; + return block; +} + +static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) +{ + CPUState *cpu; + ram_addr_t start1; + RAMBlock *block; + ram_addr_t end; + + assert(tcg_enabled()); + end = TARGET_PAGE_ALIGN(start + length); + start &= TARGET_PAGE_MASK; + + RCU_READ_LOCK_GUARD(); + block = qemu_get_ram_block(start); + assert(block == qemu_get_ram_block(end - 1)); + start1 = (uintptr_t)ramblock_ptr(block, start - block->offset); + CPU_FOREACH(cpu) { + tlb_reset_dirty(cpu, start1, length); + } +} + +/* Note: start and end must be within the same ram block. */ +bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, + ram_addr_t length, + unsigned client) +{ + DirtyMemoryBlocks *blocks; + unsigned long end, page, start_page; + bool dirty = false; + RAMBlock *ramblock; + uint64_t mr_offset, mr_size; + + if (length == 0) { + return false; + } + + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; + start_page = start >> TARGET_PAGE_BITS; + page = start_page; + + WITH_RCU_READ_LOCK_GUARD() { + blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); + ramblock = qemu_get_ram_block(start); + /* Range sanity check on the ramblock */ + assert(start >= ramblock->offset && + start + length <= ramblock->offset + ramblock->used_length); + + while (page < end) { + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; + unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; + unsigned long num = MIN(end - page, + DIRTY_MEMORY_BLOCK_SIZE - offset); + + dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx], + offset, num); + page += num; + } + + mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - ramblock->offset; + mr_size = (end - start_page) << TARGET_PAGE_BITS; + memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size); + } + + if (dirty && tcg_enabled()) { + tlb_reset_dirty_range_all(start, length); + } + + return dirty; +} + +DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty + (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client) +{ + DirtyMemoryBlocks *blocks; + ram_addr_t start = memory_region_get_ram_addr(mr) + offset; + unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL); + ram_addr_t first = QEMU_ALIGN_DOWN(start, align); + ram_addr_t last = QEMU_ALIGN_UP(start + length, align); + DirtyBitmapSnapshot *snap; + unsigned long page, end, dest; + + snap = g_malloc0(sizeof(*snap) + + ((last - first) >> (TARGET_PAGE_BITS + 3))); + snap->start = first; + snap->end = last; + + page = first >> TARGET_PAGE_BITS; + end = last >> TARGET_PAGE_BITS; + dest = 0; + + WITH_RCU_READ_LOCK_GUARD() { + blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); + + while (page < end) { + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; + unsigned long ofs = page % DIRTY_MEMORY_BLOCK_SIZE; + unsigned long num = MIN(end - page, + DIRTY_MEMORY_BLOCK_SIZE - ofs); + + assert(QEMU_IS_ALIGNED(ofs, (1 << BITS_PER_LEVEL))); + assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL))); + ofs >>= BITS_PER_LEVEL; + + bitmap_copy_and_clear_atomic(snap->dirty + dest, + blocks->blocks[idx] + ofs, + num); + page += num; + dest += num >> BITS_PER_LEVEL; + } + } + + if (tcg_enabled()) { + tlb_reset_dirty_range_all(start, length); + } + + memory_region_clear_dirty_bitmap(mr, offset, length); + + return snap; +} + +bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, + ram_addr_t start, + ram_addr_t length) +{ + unsigned long page, end; + + assert(start >= snap->start); + assert(start + length <= snap->end); + + end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS; + page = (start - snap->start) >> TARGET_PAGE_BITS; + + while (page < end) { + if (test_bit(page, snap->dirty)) { + return true; + } + page++; + } + return false; +} + +/* Called from RCU critical section */ +hwaddr memory_region_section_get_iotlb(CPUState *cpu, + MemoryRegionSection *section) +{ + AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); + return section - d->map.sections; +} + +static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, + uint16_t section); +static subpage_t *subpage_init(FlatView *fv, hwaddr base); + +static uint16_t phys_section_add(PhysPageMap *map, + MemoryRegionSection *section) +{ + /* The physical section number is ORed with a page-aligned + * pointer to produce the iotlb entries. Thus it should + * never overflow into the page-aligned value. + */ + assert(map->sections_nb < TARGET_PAGE_SIZE); + + if (map->sections_nb == map->sections_nb_alloc) { + map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16); + map->sections = g_renew(MemoryRegionSection, map->sections, + map->sections_nb_alloc); + } + map->sections[map->sections_nb] = *section; + memory_region_ref(section->mr); + return map->sections_nb++; +} + +static void phys_section_destroy(MemoryRegion *mr) +{ + bool have_sub_page = mr->subpage; + + memory_region_unref(mr); + + if (have_sub_page) { + subpage_t *subpage = container_of(mr, subpage_t, iomem); + object_unref(OBJECT(&subpage->iomem)); + g_free(subpage); + } +} + +static void phys_sections_free(PhysPageMap *map) +{ + while (map->sections_nb > 0) { + MemoryRegionSection *section = &map->sections[--map->sections_nb]; + phys_section_destroy(section->mr); + } + g_free(map->sections); + g_free(map->nodes); +} + +static void register_subpage(FlatView *fv, MemoryRegionSection *section) +{ + AddressSpaceDispatch *d = flatview_to_dispatch(fv); + subpage_t *subpage; + hwaddr base = section->offset_within_address_space + & TARGET_PAGE_MASK; + MemoryRegionSection *existing = phys_page_find(d, base); + MemoryRegionSection subsection = { + .offset_within_address_space = base, + .size = int128_make64(TARGET_PAGE_SIZE), + }; + hwaddr start, end; + + assert(existing->mr->subpage || existing->mr == &io_mem_unassigned); + + if (!(existing->mr->subpage)) { + subpage = subpage_init(fv, base); + subsection.fv = fv; + subsection.mr = &subpage->iomem; + phys_page_set(d, base >> TARGET_PAGE_BITS, 1, + phys_section_add(&d->map, &subsection)); + } else { + subpage = container_of(existing->mr, subpage_t, iomem); + } + start = section->offset_within_address_space & ~TARGET_PAGE_MASK; + end = start + int128_get64(section->size) - 1; + subpage_register(subpage, start, end, + phys_section_add(&d->map, section)); +} + + +static void register_multipage(FlatView *fv, + MemoryRegionSection *section) +{ + AddressSpaceDispatch *d = flatview_to_dispatch(fv); + hwaddr start_addr = section->offset_within_address_space; + uint16_t section_index = phys_section_add(&d->map, section); + uint64_t num_pages = int128_get64(int128_rshift(section->size, + TARGET_PAGE_BITS)); + + assert(num_pages); + phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index); +} + +/* + * The range in *section* may look like this: + * + * |s|PPPPPPP|s| + * + * where s stands for subpage and P for page. + */ +void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section) +{ + MemoryRegionSection remain = *section; + Int128 page_size = int128_make64(TARGET_PAGE_SIZE); + + /* register first subpage */ + if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) { + uint64_t left = TARGET_PAGE_ALIGN(remain.offset_within_address_space) + - remain.offset_within_address_space; + + MemoryRegionSection now = remain; + now.size = int128_min(int128_make64(left), now.size); + register_subpage(fv, &now); + if (int128_eq(remain.size, now.size)) { + return; + } + remain.size = int128_sub(remain.size, now.size); + remain.offset_within_address_space += int128_get64(now.size); + remain.offset_within_region += int128_get64(now.size); + } + + /* register whole pages */ + if (int128_ge(remain.size, page_size)) { + MemoryRegionSection now = remain; + now.size = int128_and(now.size, int128_neg(page_size)); + register_multipage(fv, &now); + if (int128_eq(remain.size, now.size)) { + return; + } + remain.size = int128_sub(remain.size, now.size); + remain.offset_within_address_space += int128_get64(now.size); + remain.offset_within_region += int128_get64(now.size); + } + + /* register last subpage */ + register_subpage(fv, &remain); +} + +void qemu_flush_coalesced_mmio_buffer(void) +{ + if (kvm_enabled()) + kvm_flush_coalesced_mmio_buffer(); +} + +void qemu_mutex_lock_ramlist(void) +{ + qemu_mutex_lock(&ram_list.mutex); +} + +void qemu_mutex_unlock_ramlist(void) +{ + qemu_mutex_unlock(&ram_list.mutex); +} + +GString *ram_block_format(void) +{ + RAMBlock *block; + char *psize; + GString *buf = g_string_new(""); + + RCU_READ_LOCK_GUARD(); + g_string_append_printf(buf, "%24s %8s %18s %18s %18s %18s %3s\n", + "Block Name", "PSize", "Offset", "Used", "Total", + "HVA", "RO"); + + RAMBLOCK_FOREACH(block) { + psize = size_to_str(block->page_size); + g_string_append_printf(buf, "%24s %8s 0x%016" PRIx64 " 0x%016" PRIx64 + " 0x%016" PRIx64 " 0x%016" PRIx64 " %3s\n", + block->idstr, psize, + (uint64_t)block->offset, + (uint64_t)block->used_length, + (uint64_t)block->max_length, + (uint64_t)(uintptr_t)block->host, + block->mr->readonly ? "ro" : "rw"); + + g_free(psize); + } + + return buf; +} + +static int find_min_backend_pagesize(Object *obj, void *opaque) +{ + long *hpsize_min = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + long hpsize = host_memory_backend_pagesize(backend); + + if (host_memory_backend_is_mapped(backend) && (hpsize < *hpsize_min)) { + *hpsize_min = hpsize; + } + } + + return 0; +} + +static int find_max_backend_pagesize(Object *obj, void *opaque) +{ + long *hpsize_max = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + long hpsize = host_memory_backend_pagesize(backend); + + if (host_memory_backend_is_mapped(backend) && (hpsize > *hpsize_max)) { + *hpsize_max = hpsize; + } + } + + return 0; +} + +/* + * TODO: We assume right now that all mapped host memory backends are + * used as RAM, however some might be used for different purposes. + */ +long qemu_minrampagesize(void) +{ + long hpsize = LONG_MAX; + Object *memdev_root = object_resolve_path("/objects", NULL); + + object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize); + return hpsize; +} + +long qemu_maxrampagesize(void) +{ + long pagesize = 0; + Object *memdev_root = object_resolve_path("/objects", NULL); + + object_child_foreach(memdev_root, find_max_backend_pagesize, &pagesize); + return pagesize; +} + +#ifdef CONFIG_POSIX +static int64_t get_file_size(int fd) +{ + int64_t size; +#if defined(__linux__) + struct stat st; + + if (fstat(fd, &st) < 0) { + return -errno; + } + + /* Special handling for devdax character devices */ + if (S_ISCHR(st.st_mode)) { + g_autofree char *subsystem_path = NULL; + g_autofree char *subsystem = NULL; + + subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", + major(st.st_rdev), minor(st.st_rdev)); + subsystem = g_file_read_link(subsystem_path, NULL); + + if (subsystem && g_str_has_suffix(subsystem, "/dax")) { + g_autofree char *size_path = NULL; + g_autofree char *size_str = NULL; + + size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", + major(st.st_rdev), minor(st.st_rdev)); + + if (g_file_get_contents(size_path, &size_str, NULL, NULL)) { + return g_ascii_strtoll(size_str, NULL, 0); + } + } + } +#endif /* defined(__linux__) */ + + /* st.st_size may be zero for special files yet lseek(2) works */ + size = lseek(fd, 0, SEEK_END); + if (size < 0) { + return -errno; + } + return size; +} + +static int64_t get_file_align(int fd) +{ + int64_t align = -1; +#if defined(__linux__) && defined(CONFIG_LIBDAXCTL) + struct stat st; + + if (fstat(fd, &st) < 0) { + return -errno; + } + + /* Special handling for devdax character devices */ + if (S_ISCHR(st.st_mode)) { + g_autofree char *path = NULL; + g_autofree char *rpath = NULL; + struct daxctl_ctx *ctx; + struct daxctl_region *region; + int rc = 0; + + path = g_strdup_printf("/sys/dev/char/%d:%d", + major(st.st_rdev), minor(st.st_rdev)); + rpath = realpath(path, NULL); + if (!rpath) { + return -errno; + } + + rc = daxctl_new(&ctx); + if (rc) { + return -1; + } + + daxctl_region_foreach(ctx, region) { + if (strstr(rpath, daxctl_region_get_path(region))) { + align = daxctl_region_get_align(region); + break; + } + } + daxctl_unref(ctx); + } +#endif /* defined(__linux__) && defined(CONFIG_LIBDAXCTL) */ + + return align; +} + +static int file_ram_open(const char *path, + const char *region_name, + bool readonly, + bool *created) +{ + char *filename; + char *sanitized_name; + char *c; + int fd = -1; + + *created = false; + for (;;) { + fd = open(path, readonly ? O_RDONLY : O_RDWR); + if (fd >= 0) { + /* + * open(O_RDONLY) won't fail with EISDIR. Check manually if we + * opened a directory and fail similarly to how we fail ENOENT + * in readonly mode. Note that mkstemp() would imply O_RDWR. + */ + if (readonly) { + struct stat file_stat; + + if (fstat(fd, &file_stat)) { + close(fd); + if (errno == EINTR) { + continue; + } + return -errno; + } else if (S_ISDIR(file_stat.st_mode)) { + close(fd); + return -EISDIR; + } + } + /* @path names an existing file, use it */ + break; + } + if (errno == ENOENT) { + if (readonly) { + /* Refuse to create new, readonly files. */ + return -ENOENT; + } + /* @path names a file that doesn't exist, create it */ + fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd >= 0) { + *created = true; + break; + } + } else if (errno == EISDIR) { + /* @path names a directory, create a file there */ + /* Make name safe to use with mkstemp by replacing '/' with '_'. */ + sanitized_name = g_strdup(region_name); + for (c = sanitized_name; *c != '\0'; c++) { + if (*c == '/') { + *c = '_'; + } + } + + filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path, + sanitized_name); + g_free(sanitized_name); + + fd = mkstemp(filename); + if (fd >= 0) { + unlink(filename); + g_free(filename); + break; + } + g_free(filename); + } + if (errno != EEXIST && errno != EINTR) { + return -errno; + } + /* + * Try again on EINTR and EEXIST. The latter happens when + * something else creates the file between our two open(). + */ + } + + return fd; +} + +static void *file_ram_alloc(RAMBlock *block, + ram_addr_t memory, + int fd, + bool truncate, + off_t offset, + Error **errp) +{ + uint32_t qemu_map_flags; + void *area; + + block->page_size = qemu_fd_getpagesize(fd); + if (block->mr->align % block->page_size) { + error_setg(errp, "alignment 0x%" PRIx64 + " must be multiples of page size 0x%zx", + block->mr->align, block->page_size); + return NULL; + } else if (block->mr->align && !is_power_of_2(block->mr->align)) { + error_setg(errp, "alignment 0x%" PRIx64 + " must be a power of two", block->mr->align); + return NULL; + } else if (offset % block->page_size) { + error_setg(errp, "offset 0x%" PRIx64 + " must be multiples of page size 0x%zx", + offset, block->page_size); + return NULL; + } + block->mr->align = MAX(block->page_size, block->mr->align); +#if defined(__s390x__) + if (kvm_enabled()) { + block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN); + } +#endif + + if (memory < block->page_size) { + error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " + "or larger than page size 0x%zx", + memory, block->page_size); + return NULL; + } + + memory = ROUND_UP(memory, block->page_size); + + /* + * ftruncate is not supported by hugetlbfs in older + * hosts, so don't bother bailing out on errors. + * If anything goes wrong with it under other filesystems, + * mmap will fail. + * + * Do not truncate the non-empty backend file to avoid corrupting + * the existing data in the file. Disabling shrinking is not + * enough. For example, the current vNVDIMM implementation stores + * the guest NVDIMM labels at the end of the backend file. If the + * backend file is later extended, QEMU will not be able to find + * those labels. Therefore, extending the non-empty backend file + * is disabled as well. + */ + if (truncate && ftruncate(fd, offset + memory)) { + perror("ftruncate"); + } + + qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0; + qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0; + qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0; + qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0; + area = qemu_ram_mmap(fd, memory, block->mr->align, qemu_map_flags, offset); + if (area == MAP_FAILED) { + error_setg_errno(errp, errno, + "unable to map backing store for guest RAM"); + return NULL; + } + + block->fd = fd; + block->fd_offset = offset; + return area; +} +#endif + +/* Allocate space within the ram_addr_t space that governs the + * dirty bitmaps. + * Called with the ramlist lock held. + */ +static ram_addr_t find_ram_offset(ram_addr_t size) +{ + RAMBlock *block, *next_block; + ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX; + + assert(size != 0); /* it would hand out same offset multiple times */ + + if (QLIST_EMPTY_RCU(&ram_list.blocks)) { + return 0; + } + + RAMBLOCK_FOREACH(block) { + ram_addr_t candidate, next = RAM_ADDR_MAX; + + /* Align blocks to start on a 'long' in the bitmap + * which makes the bitmap sync'ing take the fast path. + */ + candidate = block->offset + block->max_length; + candidate = ROUND_UP(candidate, BITS_PER_LONG << TARGET_PAGE_BITS); + + /* Search for the closest following block + * and find the gap. + */ + RAMBLOCK_FOREACH(next_block) { + if (next_block->offset >= candidate) { + next = MIN(next, next_block->offset); + } + } + + /* If it fits remember our place and remember the size + * of gap, but keep going so that we might find a smaller + * gap to fill so avoiding fragmentation. + */ + if (next - candidate >= size && next - candidate < mingap) { + offset = candidate; + mingap = next - candidate; + } + + trace_find_ram_offset_loop(size, candidate, offset, next, mingap); + } + + if (offset == RAM_ADDR_MAX) { + fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n", + (uint64_t)size); + abort(); + } + + trace_find_ram_offset(size, offset); + + return offset; +} + +static unsigned long last_ram_page(void) +{ + RAMBlock *block; + ram_addr_t last = 0; + + RCU_READ_LOCK_GUARD(); + RAMBLOCK_FOREACH(block) { + last = MAX(last, block->offset + block->max_length); + } + return last >> TARGET_PAGE_BITS; +} + +static void qemu_ram_setup_dump(void *addr, ram_addr_t size) +{ + int ret; + + /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */ + if (!machine_dump_guest_core(current_machine)) { + ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP); + if (ret) { + perror("qemu_madvise"); + fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, " + "but dump_guest_core=off specified\n"); + } + } +} + +const char *qemu_ram_get_idstr(RAMBlock *rb) +{ + return rb->idstr; +} + +void *qemu_ram_get_host_addr(RAMBlock *rb) +{ + return rb->host; +} + +ram_addr_t qemu_ram_get_offset(RAMBlock *rb) +{ + return rb->offset; +} + +ram_addr_t qemu_ram_get_used_length(RAMBlock *rb) +{ + return rb->used_length; +} + +ram_addr_t qemu_ram_get_max_length(RAMBlock *rb) +{ + return rb->max_length; +} + +bool qemu_ram_is_shared(RAMBlock *rb) +{ + return rb->flags & RAM_SHARED; +} + +bool qemu_ram_is_noreserve(RAMBlock *rb) +{ + return rb->flags & RAM_NORESERVE; +} + +/* Note: Only set at the start of postcopy */ +bool qemu_ram_is_uf_zeroable(RAMBlock *rb) +{ + return rb->flags & RAM_UF_ZEROPAGE; +} + +void qemu_ram_set_uf_zeroable(RAMBlock *rb) +{ + rb->flags |= RAM_UF_ZEROPAGE; +} + +bool qemu_ram_is_migratable(RAMBlock *rb) +{ + return rb->flags & RAM_MIGRATABLE; +} + +void qemu_ram_set_migratable(RAMBlock *rb) +{ + rb->flags |= RAM_MIGRATABLE; +} + +void qemu_ram_unset_migratable(RAMBlock *rb) +{ + rb->flags &= ~RAM_MIGRATABLE; +} + +bool qemu_ram_is_named_file(RAMBlock *rb) +{ + return rb->flags & RAM_NAMED_FILE; +} + +int qemu_ram_get_fd(RAMBlock *rb) +{ + return rb->fd; +} + +/* Called with iothread lock held. */ +void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) +{ + RAMBlock *block; + + assert(new_block); + assert(!new_block->idstr[0]); + + if (dev) { + char *id = qdev_get_dev_path(dev); + if (id) { + snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id); + g_free(id); + } + } + pstrcat(new_block->idstr, sizeof(new_block->idstr), name); + + RCU_READ_LOCK_GUARD(); + RAMBLOCK_FOREACH(block) { + if (block != new_block && + !strcmp(block->idstr, new_block->idstr)) { + fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n", + new_block->idstr); + abort(); + } + } +} + +/* Called with iothread lock held. */ +void qemu_ram_unset_idstr(RAMBlock *block) +{ + /* FIXME: arch_init.c assumes that this is not called throughout + * migration. Ignore the problem since hot-unplug during migration + * does not work anyway. + */ + if (block) { + memset(block->idstr, 0, sizeof(block->idstr)); + } +} + +size_t qemu_ram_pagesize(RAMBlock *rb) +{ + return rb->page_size; +} + +/* Returns the largest size of page in use */ +size_t qemu_ram_pagesize_largest(void) +{ + RAMBlock *block; + size_t largest = 0; + + RAMBLOCK_FOREACH(block) { + largest = MAX(largest, qemu_ram_pagesize(block)); + } + + return largest; +} + +static int memory_try_enable_merging(void *addr, size_t len) +{ + if (!machine_mem_merge(current_machine)) { + /* disabled by the user */ + return 0; + } + + return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); +} + +/* + * Resizing RAM while migrating can result in the migration being canceled. + * Care has to be taken if the guest might have already detected the memory. + * + * As memory core doesn't know how is memory accessed, it is up to + * resize callback to update device state and/or add assertions to detect + * misuse, if necessary. + */ +int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) +{ + const ram_addr_t oldsize = block->used_length; + const ram_addr_t unaligned_size = newsize; + + assert(block); + + newsize = HOST_PAGE_ALIGN(newsize); + + if (block->used_length == newsize) { + /* + * We don't have to resize the ram block (which only knows aligned + * sizes), however, we have to notify if the unaligned size changed. + */ + if (unaligned_size != memory_region_size(block->mr)) { + memory_region_set_size(block->mr, unaligned_size); + if (block->resized) { + block->resized(block->idstr, unaligned_size, block->host); + } + } + return 0; + } + + if (!(block->flags & RAM_RESIZEABLE)) { + error_setg_errno(errp, EINVAL, + "Size mismatch: %s: 0x" RAM_ADDR_FMT + " != 0x" RAM_ADDR_FMT, block->idstr, + newsize, block->used_length); + return -EINVAL; + } + + if (block->max_length < newsize) { + error_setg_errno(errp, EINVAL, + "Size too large: %s: 0x" RAM_ADDR_FMT + " > 0x" RAM_ADDR_FMT, block->idstr, + newsize, block->max_length); + return -EINVAL; + } + + /* Notify before modifying the ram block and touching the bitmaps. */ + if (block->host) { + ram_block_notify_resize(block->host, oldsize, newsize); + } + + cpu_physical_memory_clear_dirty_range(block->offset, block->used_length); + block->used_length = newsize; + cpu_physical_memory_set_dirty_range(block->offset, block->used_length, + DIRTY_CLIENTS_ALL); + memory_region_set_size(block->mr, unaligned_size); + if (block->resized) { + block->resized(block->idstr, unaligned_size, block->host); + } + return 0; +} + +/* + * Trigger sync on the given ram block for range [start, start + length] + * with the backing store if one is available. + * Otherwise no-op. + * @Note: this is supposed to be a synchronous op. + */ +void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) +{ + /* The requested range should fit in within the block range */ + g_assert((start + length) <= block->used_length); + +#ifdef CONFIG_LIBPMEM + /* The lack of support for pmem should not block the sync */ + if (ramblock_is_pmem(block)) { + void *addr = ramblock_ptr(block, start); + pmem_persist(addr, length); + return; + } +#endif + if (block->fd >= 0) { + /** + * Case there is no support for PMEM or the memory has not been + * specified as persistent (or is not one) - use the msync. + * Less optimal but still achieves the same goal + */ + void *addr = ramblock_ptr(block, start); + if (qemu_msync(addr, length, block->fd)) { + warn_report("%s: failed to sync memory range: start: " + RAM_ADDR_FMT " length: " RAM_ADDR_FMT, + __func__, start, length); + } + } +} + +/* Called with ram_list.mutex held */ +static void dirty_memory_extend(ram_addr_t old_ram_size, + ram_addr_t new_ram_size) +{ + ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, + DIRTY_MEMORY_BLOCK_SIZE); + ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, + DIRTY_MEMORY_BLOCK_SIZE); + int i; + + /* Only need to extend if block count increased */ + if (new_num_blocks <= old_num_blocks) { + return; + } + + for (i = 0; i < DIRTY_MEMORY_NUM; i++) { + DirtyMemoryBlocks *old_blocks; + DirtyMemoryBlocks *new_blocks; + int j; + + old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]); + new_blocks = g_malloc(sizeof(*new_blocks) + + sizeof(new_blocks->blocks[0]) * new_num_blocks); + + if (old_num_blocks) { + memcpy(new_blocks->blocks, old_blocks->blocks, + old_num_blocks * sizeof(old_blocks->blocks[0])); + } + + for (j = old_num_blocks; j < new_num_blocks; j++) { + new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE); + } + + qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks); + + if (old_blocks) { + g_free_rcu(old_blocks, rcu); + } + } +} + +static void ram_block_add(RAMBlock *new_block, Error **errp) +{ + const bool noreserve = qemu_ram_is_noreserve(new_block); + const bool shared = qemu_ram_is_shared(new_block); + RAMBlock *block; + RAMBlock *last_block = NULL; + ram_addr_t old_ram_size, new_ram_size; + Error *err = NULL; + + old_ram_size = last_ram_page(); + + qemu_mutex_lock_ramlist(); + new_block->offset = find_ram_offset(new_block->max_length); + + if (!new_block->host) { + if (xen_enabled()) { + xen_ram_alloc(new_block->offset, new_block->max_length, + new_block->mr, &err); + if (err) { + error_propagate(errp, err); + qemu_mutex_unlock_ramlist(); + return; + } + } else { + new_block->host = qemu_anon_ram_alloc(new_block->max_length, + &new_block->mr->align, + shared, noreserve); + if (!new_block->host) { + error_setg_errno(errp, errno, + "cannot set up guest memory '%s'", + memory_region_name(new_block->mr)); + qemu_mutex_unlock_ramlist(); + return; + } + memory_try_enable_merging(new_block->host, new_block->max_length); + } + } + + new_ram_size = MAX(old_ram_size, + (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); + if (new_ram_size > old_ram_size) { + dirty_memory_extend(old_ram_size, new_ram_size); + } + /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, + * QLIST (which has an RCU-friendly variant) does not have insertion at + * tail, so save the last element in last_block. + */ + RAMBLOCK_FOREACH(block) { + last_block = block; + if (block->max_length < new_block->max_length) { + break; + } + } + if (block) { + QLIST_INSERT_BEFORE_RCU(block, new_block, next); + } else if (last_block) { + QLIST_INSERT_AFTER_RCU(last_block, new_block, next); + } else { /* list is empty */ + QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next); + } + ram_list.mru_block = NULL; + + /* Write list before version */ + smp_wmb(); + ram_list.version++; + qemu_mutex_unlock_ramlist(); + + cpu_physical_memory_set_dirty_range(new_block->offset, + new_block->used_length, + DIRTY_CLIENTS_ALL); + + if (new_block->host) { + qemu_ram_setup_dump(new_block->host, new_block->max_length); + qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE); + /* + * MADV_DONTFORK is also needed by KVM in absence of synchronous MMU + * Configure it unless the machine is a qtest server, in which case + * KVM is not used and it may be forked (eg for fuzzing purposes). + */ + if (!qtest_enabled()) { + qemu_madvise(new_block->host, new_block->max_length, + QEMU_MADV_DONTFORK); + } + ram_block_notify_add(new_block->host, new_block->used_length, + new_block->max_length); + } +} + +#ifdef CONFIG_POSIX +RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + uint32_t ram_flags, int fd, off_t offset, + Error **errp) +{ + RAMBlock *new_block; + Error *local_err = NULL; + int64_t file_size, file_align; + + /* Just support these ram flags by now. */ + assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | + RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | + RAM_READONLY_FD)) == 0); + + if (xen_enabled()) { + error_setg(errp, "-mem-path not supported with Xen"); + return NULL; + } + + if (kvm_enabled() && !kvm_has_sync_mmu()) { + error_setg(errp, + "host lacks kvm mmu notifiers, -mem-path unsupported"); + return NULL; + } + + size = HOST_PAGE_ALIGN(size); + file_size = get_file_size(fd); + if (file_size > offset && file_size < (offset + size)) { + error_setg(errp, "backing store size 0x%" PRIx64 + " does not match 'size' option 0x" RAM_ADDR_FMT, + file_size, size); + return NULL; + } + + file_align = get_file_align(fd); + if (file_align > 0 && file_align > mr->align) { + error_setg(errp, "backing store align 0x%" PRIx64 + " is larger than 'align' option 0x%" PRIx64, + file_align, mr->align); + return NULL; + } + + new_block = g_malloc0(sizeof(*new_block)); + new_block->mr = mr; + new_block->used_length = size; + new_block->max_length = size; + new_block->flags = ram_flags; + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, + errp); + if (!new_block->host) { + g_free(new_block); + return NULL; + } + + ram_block_add(new_block, &local_err); + if (local_err) { + g_free(new_block); + error_propagate(errp, local_err); + return NULL; + } + return new_block; + +} + + +RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, + uint32_t ram_flags, const char *mem_path, + off_t offset, Error **errp) +{ + int fd; + bool created; + RAMBlock *block; + + fd = file_ram_open(mem_path, memory_region_name(mr), + !!(ram_flags & RAM_READONLY_FD), &created); + if (fd < 0) { + error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM", + mem_path); + if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) && + fd == -EACCES) { + /* + * If we can open the file R/O (note: will never create a new file) + * and we are dealing with a private mapping, there are still ways + * to consume such files and get RAM instead of ROM. + */ + fd = file_ram_open(mem_path, memory_region_name(mr), true, + &created); + if (fd < 0) { + return NULL; + } + assert(!created); + close(fd); + error_append_hint(errp, "Consider opening the backing store" + " read-only but still creating writable RAM using" + " '-object memory-backend-file,readonly=on,rom=off...'" + " (see \"VM templating\" documentation)\n"); + } + return NULL; + } + + block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp); + if (!block) { + if (created) { + unlink(mem_path); + } + close(fd); + return NULL; + } + + return block; +} +#endif + +static +RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + void (*resized)(const char*, + uint64_t length, + void *host), + void *host, uint32_t ram_flags, + MemoryRegion *mr, Error **errp) +{ + RAMBlock *new_block; + Error *local_err = NULL; + + assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | + RAM_NORESERVE)) == 0); + assert(!host ^ (ram_flags & RAM_PREALLOC)); + + size = HOST_PAGE_ALIGN(size); + max_size = HOST_PAGE_ALIGN(max_size); + new_block = g_malloc0(sizeof(*new_block)); + new_block->mr = mr; + new_block->resized = resized; + new_block->used_length = size; + new_block->max_length = max_size; + assert(max_size >= size); + new_block->fd = -1; + new_block->page_size = qemu_real_host_page_size(); + new_block->host = host; + new_block->flags = ram_flags; + ram_block_add(new_block, &local_err); + if (local_err) { + g_free(new_block); + error_propagate(errp, local_err); + return NULL; + } + return new_block; +} + +RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, + MemoryRegion *mr, Error **errp) +{ + return qemu_ram_alloc_internal(size, size, NULL, host, RAM_PREALLOC, mr, + errp); +} + +RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, + MemoryRegion *mr, Error **errp) +{ + assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); + return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); +} + +RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz, + void (*resized)(const char*, + uint64_t length, + void *host), + MemoryRegion *mr, Error **errp) +{ + return qemu_ram_alloc_internal(size, maxsz, resized, NULL, + RAM_RESIZEABLE, mr, errp); +} + +static void reclaim_ramblock(RAMBlock *block) +{ + if (block->flags & RAM_PREALLOC) { + ; + } else if (xen_enabled()) { + xen_invalidate_map_cache_entry(block->host); +#ifndef _WIN32 + } else if (block->fd >= 0) { + qemu_ram_munmap(block->fd, block->host, block->max_length); + close(block->fd); +#endif + } else { + qemu_anon_ram_free(block->host, block->max_length); + } + g_free(block); +} + +void qemu_ram_free(RAMBlock *block) +{ + if (!block) { + return; + } + + if (block->host) { + ram_block_notify_remove(block->host, block->used_length, + block->max_length); + } + + qemu_mutex_lock_ramlist(); + QLIST_REMOVE_RCU(block, next); + ram_list.mru_block = NULL; + /* Write list before version */ + smp_wmb(); + ram_list.version++; + call_rcu(block, reclaim_ramblock, rcu); + qemu_mutex_unlock_ramlist(); +} + +#ifndef _WIN32 +void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) +{ + RAMBlock *block; + ram_addr_t offset; + int flags; + void *area, *vaddr; + int prot; + + RAMBLOCK_FOREACH(block) { + offset = addr - block->offset; + if (offset < block->max_length) { + vaddr = ramblock_ptr(block, offset); + if (block->flags & RAM_PREALLOC) { + ; + } else if (xen_enabled()) { + abort(); + } else { + flags = MAP_FIXED; + flags |= block->flags & RAM_SHARED ? + MAP_SHARED : MAP_PRIVATE; + flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0; + prot = PROT_READ; + prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; + if (block->fd >= 0) { + area = mmap(vaddr, length, prot, flags, block->fd, + offset + block->fd_offset); + } else { + flags |= MAP_ANONYMOUS; + area = mmap(vaddr, length, prot, flags, -1, 0); + } + if (area != vaddr) { + error_report("Could not remap addr: " + RAM_ADDR_FMT "@" RAM_ADDR_FMT "", + length, addr); + exit(1); + } + memory_try_enable_merging(vaddr, length); + qemu_ram_setup_dump(vaddr, length); + } + } + } +} +#endif /* !_WIN32 */ + +/* Return a host pointer to ram allocated with qemu_ram_alloc. + * This should not be used for general purpose DMA. Use address_space_map + * or address_space_rw instead. For local memory (e.g. video ram) that the + * device owns, use memory_region_get_ram_ptr. + * + * Called within RCU critical section. + */ +void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr) +{ + RAMBlock *block = ram_block; + + if (block == NULL) { + block = qemu_get_ram_block(addr); + addr -= block->offset; + } + + if (xen_enabled() && block->host == NULL) { + /* We need to check if the requested address is in the RAM + * because we don't want to map the entire memory in QEMU. + * In that case just map until the end of the page. + */ + if (block->offset == 0) { + return xen_map_cache(addr, 0, 0, false); + } + + block->host = xen_map_cache(block->offset, block->max_length, 1, false); + } + return ramblock_ptr(block, addr); +} + +/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr + * but takes a size argument. + * + * Called within RCU critical section. + */ +static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr, + hwaddr *size, bool lock) +{ + RAMBlock *block = ram_block; + if (*size == 0) { + return NULL; + } + + if (block == NULL) { + block = qemu_get_ram_block(addr); + addr -= block->offset; + } + *size = MIN(*size, block->max_length - addr); + + if (xen_enabled() && block->host == NULL) { + /* We need to check if the requested address is in the RAM + * because we don't want to map the entire memory in QEMU. + * In that case just map the requested area. + */ + if (block->offset == 0) { + return xen_map_cache(addr, *size, lock, lock); + } + + block->host = xen_map_cache(block->offset, block->max_length, 1, lock); + } + + return ramblock_ptr(block, addr); +} + +/* Return the offset of a hostpointer within a ramblock */ +ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host) +{ + ram_addr_t res = (uint8_t *)host - (uint8_t *)rb->host; + assert((uintptr_t)host >= (uintptr_t)rb->host); + assert(res < rb->max_length); + + return res; +} + +/* + * Translates a host ptr back to a RAMBlock, a ram_addr and an offset + * in that RAMBlock. + * + * ptr: Host pointer to look up + * round_offset: If true round the result offset down to a page boundary + * *ram_addr: set to result ram_addr + * *offset: set to result offset within the RAMBlock + * + * Returns: RAMBlock (or NULL if not found) + * + * By the time this function returns, the returned pointer is not protected + * by RCU anymore. If the caller is not within an RCU critical section and + * does not hold the iothread lock, it must have other means of protecting the + * pointer, such as a reference to the region that includes the incoming + * ram_addr_t. + */ +RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, + ram_addr_t *offset) +{ + RAMBlock *block; + uint8_t *host = ptr; + + if (xen_enabled()) { + ram_addr_t ram_addr; + RCU_READ_LOCK_GUARD(); + ram_addr = xen_ram_addr_from_mapcache(ptr); + block = qemu_get_ram_block(ram_addr); + if (block) { + *offset = ram_addr - block->offset; + } + return block; + } + + RCU_READ_LOCK_GUARD(); + block = qatomic_rcu_read(&ram_list.mru_block); + if (block && block->host && host - block->host < block->max_length) { + goto found; + } + + RAMBLOCK_FOREACH(block) { + /* This case append when the block is not mapped. */ + if (block->host == NULL) { + continue; + } + if (host - block->host < block->max_length) { + goto found; + } + } + + return NULL; + +found: + *offset = (host - block->host); + if (round_offset) { + *offset &= TARGET_PAGE_MASK; + } + return block; +} + +/* + * Finds the named RAMBlock + * + * name: The name of RAMBlock to find + * + * Returns: RAMBlock (or NULL if not found) + */ +RAMBlock *qemu_ram_block_by_name(const char *name) +{ + RAMBlock *block; + + RAMBLOCK_FOREACH(block) { + if (!strcmp(name, block->idstr)) { + return block; + } + } + + return NULL; +} + +/* + * Some of the system routines need to translate from a host pointer + * (typically a TLB entry) back to a ram offset. + */ +ram_addr_t qemu_ram_addr_from_host(void *ptr) +{ + RAMBlock *block; + ram_addr_t offset; + + block = qemu_ram_block_from_host(ptr, false, &offset); + if (!block) { + return RAM_ADDR_INVALID; + } + + return block->offset + offset; +} + +ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr) +{ + ram_addr_t ram_addr; + + ram_addr = qemu_ram_addr_from_host(ptr); + if (ram_addr == RAM_ADDR_INVALID) { + error_report("Bad ram pointer %p", ptr); + abort(); + } + return ram_addr; +} + +static MemTxResult flatview_read(FlatView *fv, hwaddr addr, + MemTxAttrs attrs, void *buf, hwaddr len); +static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + const void *buf, hwaddr len); +static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len, + bool is_write, MemTxAttrs attrs); + +static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data, + unsigned len, MemTxAttrs attrs) +{ + subpage_t *subpage = opaque; + uint8_t buf[8]; + MemTxResult res; + +#if defined(DEBUG_SUBPAGE) + printf("%s: subpage %p len %u addr " HWADDR_FMT_plx "\n", __func__, + subpage, len, addr); +#endif + res = flatview_read(subpage->fv, addr + subpage->base, attrs, buf, len); + if (res) { + return res; + } + *data = ldn_p(buf, len); + return MEMTX_OK; +} + +static MemTxResult subpage_write(void *opaque, hwaddr addr, + uint64_t value, unsigned len, MemTxAttrs attrs) +{ + subpage_t *subpage = opaque; + uint8_t buf[8]; + +#if defined(DEBUG_SUBPAGE) + printf("%s: subpage %p len %u addr " HWADDR_FMT_plx + " value %"PRIx64"\n", + __func__, subpage, len, addr, value); +#endif + stn_p(buf, len, value); + return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len); +} + +static bool subpage_accepts(void *opaque, hwaddr addr, + unsigned len, bool is_write, + MemTxAttrs attrs) +{ + subpage_t *subpage = opaque; +#if defined(DEBUG_SUBPAGE) + printf("%s: subpage %p %c len %u addr " HWADDR_FMT_plx "\n", + __func__, subpage, is_write ? 'w' : 'r', len, addr); +#endif + + return flatview_access_valid(subpage->fv, addr + subpage->base, + len, is_write, attrs); +} + +static const MemoryRegionOps subpage_ops = { + .read_with_attrs = subpage_read, + .write_with_attrs = subpage_write, + .impl.min_access_size = 1, + .impl.max_access_size = 8, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .valid.accepts = subpage_accepts, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, + uint16_t section) +{ + int idx, eidx; + + if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE) + return -1; + idx = SUBPAGE_IDX(start); + eidx = SUBPAGE_IDX(end); +#if defined(DEBUG_SUBPAGE) + printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n", + __func__, mmio, start, end, idx, eidx, section); +#endif + for (; idx <= eidx; idx++) { + mmio->sub_section[idx] = section; + } + + return 0; +} + +static subpage_t *subpage_init(FlatView *fv, hwaddr base) +{ + subpage_t *mmio; + + /* mmio->sub_section is set to PHYS_SECTION_UNASSIGNED with g_malloc0 */ + mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t)); + mmio->fv = fv; + mmio->base = base; + memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio, + NULL, TARGET_PAGE_SIZE); + mmio->iomem.subpage = true; +#if defined(DEBUG_SUBPAGE) + printf("%s: %p base " HWADDR_FMT_plx " len %08x\n", __func__, + mmio, base, TARGET_PAGE_SIZE); +#endif + + return mmio; +} + +static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr) +{ + assert(fv); + MemoryRegionSection section = { + .fv = fv, + .mr = mr, + .offset_within_address_space = 0, + .offset_within_region = 0, + .size = int128_2_64(), + }; + + return phys_section_add(map, §ion); +} + +MemoryRegionSection *iotlb_to_section(CPUState *cpu, + hwaddr index, MemTxAttrs attrs) +{ + int asidx = cpu_asidx_from_attrs(cpu, attrs); + CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; + AddressSpaceDispatch *d = cpuas->memory_dispatch; + int section_index = index & ~TARGET_PAGE_MASK; + MemoryRegionSection *ret; + + assert(section_index < d->map.sections_nb); + ret = d->map.sections + section_index; + assert(ret->mr); + assert(ret->mr->ops); + + return ret; +} + +static void io_mem_init(void) +{ + memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, + NULL, UINT64_MAX); +} + +AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) +{ + AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1); + uint16_t n; + + n = dummy_section(&d->map, fv, &io_mem_unassigned); + assert(n == PHYS_SECTION_UNASSIGNED); + + d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; + + return d; +} + +void address_space_dispatch_free(AddressSpaceDispatch *d) +{ + phys_sections_free(&d->map); + g_free(d); +} + +static void do_nothing(CPUState *cpu, run_on_cpu_data d) +{ +} + +static void tcg_log_global_after_sync(MemoryListener *listener) +{ + CPUAddressSpace *cpuas; + + /* Wait for the CPU to end the current TB. This avoids the following + * incorrect race: + * + * vCPU migration + * ---------------------- ------------------------- + * TLB check -> slow path + * notdirty_mem_write + * write to RAM + * mark dirty + * clear dirty flag + * TLB check -> fast path + * read memory + * write to RAM + * + * by pushing the migration thread's memory read after the vCPU thread has + * written the memory. + */ + if (replay_mode == REPLAY_MODE_NONE) { + /* + * VGA can make calls to this function while updating the screen. + * In record/replay mode this causes a deadlock, because + * run_on_cpu waits for rr mutex. Therefore no races are possible + * in this case and no need for making run_on_cpu when + * record/replay is enabled. + */ + cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); + run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL); + } +} + +static void tcg_commit_cpu(CPUState *cpu, run_on_cpu_data data) +{ + CPUAddressSpace *cpuas = data.host_ptr; + + cpuas->memory_dispatch = address_space_to_dispatch(cpuas->as); + tlb_flush(cpu); +} + +static void tcg_commit(MemoryListener *listener) +{ + CPUAddressSpace *cpuas; + CPUState *cpu; + + assert(tcg_enabled()); + /* since each CPU stores ram addresses in its TLB cache, we must + reset the modified entries */ + cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); + cpu = cpuas->cpu; + + /* + * Defer changes to as->memory_dispatch until the cpu is quiescent. + * Otherwise we race between (1) other cpu threads and (2) ongoing + * i/o for the current cpu thread, with data cached by mmu_lookup(). + * + * In addition, queueing the work function will kick the cpu back to + * the main loop, which will end the RCU critical section and reclaim + * the memory data structures. + * + * That said, the listener is also called during realize, before + * all of the tcg machinery for run-on is initialized: thus halt_cond. + */ + if (cpu->halt_cond) { + async_run_on_cpu(cpu, tcg_commit_cpu, RUN_ON_CPU_HOST_PTR(cpuas)); + } else { + tcg_commit_cpu(cpu, RUN_ON_CPU_HOST_PTR(cpuas)); + } +} + +static void memory_map_init(void) +{ + system_memory = g_malloc(sizeof(*system_memory)); + + memory_region_init(system_memory, NULL, "system", UINT64_MAX); + address_space_init(&address_space_memory, system_memory, "memory"); + + system_io = g_malloc(sizeof(*system_io)); + memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io", + 65536); + address_space_init(&address_space_io, system_io, "I/O"); +} + +MemoryRegion *get_system_memory(void) +{ + return system_memory; +} + +MemoryRegion *get_system_io(void) +{ + return system_io; +} + +static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, + hwaddr length) +{ + uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr); + addr += memory_region_get_ram_addr(mr); + + /* No early return if dirty_log_mask is or becomes 0, because + * cpu_physical_memory_set_dirty_range will still call + * xen_modified_memory. + */ + if (dirty_log_mask) { + dirty_log_mask = + cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask); + } + if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { + assert(tcg_enabled()); + tb_invalidate_phys_range(addr, addr + length - 1); + dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); + } + cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); +} + +void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) +{ + /* + * In principle this function would work on other memory region types too, + * but the ROM device use case is the only one where this operation is + * necessary. Other memory regions should use the + * address_space_read/write() APIs. + */ + assert(memory_region_is_romd(mr)); + + invalidate_and_set_dirty(mr, addr, size); +} + +int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) +{ + unsigned access_size_max = mr->ops->valid.max_access_size; + + /* Regions are assumed to support 1-4 byte accesses unless + otherwise specified. */ + if (access_size_max == 0) { + access_size_max = 4; + } + + /* Bound the maximum access by the alignment of the address. */ + if (!mr->ops->impl.unaligned) { + unsigned align_size_max = addr & -addr; + if (align_size_max != 0 && align_size_max < access_size_max) { + access_size_max = align_size_max; + } + } + + /* Don't attempt accesses larger than the maximum. */ + if (l > access_size_max) { + l = access_size_max; + } + l = pow2floor(l); + + return l; +} + +bool prepare_mmio_access(MemoryRegion *mr) +{ + bool release_lock = false; + + if (!qemu_mutex_iothread_locked()) { + qemu_mutex_lock_iothread(); + release_lock = true; + } + if (mr->flush_coalesced_mmio) { + qemu_flush_coalesced_mmio_buffer(); + } + + return release_lock; +} + +/** + * flatview_access_allowed + * @mr: #MemoryRegion to be accessed + * @attrs: memory transaction attributes + * @addr: address within that memory region + * @len: the number of bytes to access + * + * Check if a memory transaction is allowed. + * + * Returns: true if transaction is allowed, false if denied. + */ +static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs, + hwaddr addr, hwaddr len) +{ + if (likely(!attrs.memory)) { + return true; + } + if (memory_region_is_ram(mr)) { + return true; + } + qemu_log_mask(LOG_GUEST_ERROR, + "Invalid access to non-RAM device at " + "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", " + "region '%s'\n", addr, len, memory_region_name(mr)); + return false; +} + +/* Called within RCU critical section. */ +static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, + MemTxAttrs attrs, + const void *ptr, + hwaddr len, hwaddr addr1, + hwaddr l, MemoryRegion *mr) +{ + uint8_t *ram_ptr; + uint64_t val; + MemTxResult result = MEMTX_OK; + bool release_lock = false; + const uint8_t *buf = ptr; + + for (;;) { + if (!flatview_access_allowed(mr, attrs, addr1, l)) { + result |= MEMTX_ACCESS_ERROR; + /* Keep going. */ + } else if (!memory_access_is_direct(mr, true)) { + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); + /* XXX: could force current_cpu to NULL to avoid + potential bugs */ + val = ldn_he_p(buf, l); + result |= memory_region_dispatch_write(mr, addr1, val, + size_memop(l), attrs); + } else { + /* RAM case */ + ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); + memmove(ram_ptr, buf, l); + invalidate_and_set_dirty(mr, addr1, l); + } + + if (release_lock) { + qemu_mutex_unlock_iothread(); + release_lock = false; + } + + len -= l; + buf += l; + addr += l; + + if (!len) { + break; + } + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); + } + + return result; +} + +/* Called from RCU critical section. */ +static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + const void *buf, hwaddr len) +{ + hwaddr l; + hwaddr addr1; + MemoryRegion *mr; + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); + if (!flatview_access_allowed(mr, attrs, addr, len)) { + return MEMTX_ACCESS_ERROR; + } + return flatview_write_continue(fv, addr, attrs, buf, len, + addr1, l, mr); +} + +/* Called within RCU critical section. */ +MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, + MemTxAttrs attrs, void *ptr, + hwaddr len, hwaddr addr1, hwaddr l, + MemoryRegion *mr) +{ + uint8_t *ram_ptr; + uint64_t val; + MemTxResult result = MEMTX_OK; + bool release_lock = false; + uint8_t *buf = ptr; + + fuzz_dma_read_cb(addr, len, mr); + for (;;) { + if (!flatview_access_allowed(mr, attrs, addr1, l)) { + result |= MEMTX_ACCESS_ERROR; + /* Keep going. */ + } else if (!memory_access_is_direct(mr, false)) { + /* I/O case */ + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); + result |= memory_region_dispatch_read(mr, addr1, &val, + size_memop(l), attrs); + stn_he_p(buf, l, val); + } else { + /* RAM case */ + ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); + memcpy(buf, ram_ptr, l); + } + + if (release_lock) { + qemu_mutex_unlock_iothread(); + release_lock = false; + } + + len -= l; + buf += l; + addr += l; + + if (!len) { + break; + } + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); + } + + return result; +} + +/* Called from RCU critical section. */ +static MemTxResult flatview_read(FlatView *fv, hwaddr addr, + MemTxAttrs attrs, void *buf, hwaddr len) +{ + hwaddr l; + hwaddr addr1; + MemoryRegion *mr; + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); + if (!flatview_access_allowed(mr, attrs, addr, len)) { + return MEMTX_ACCESS_ERROR; + } + return flatview_read_continue(fv, addr, attrs, buf, len, + addr1, l, mr); +} + +MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, void *buf, hwaddr len) +{ + MemTxResult result = MEMTX_OK; + FlatView *fv; + + if (len > 0) { + RCU_READ_LOCK_GUARD(); + fv = address_space_to_flatview(as); + result = flatview_read(fv, addr, attrs, buf, len); + } + + return result; +} + +MemTxResult address_space_write(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, + const void *buf, hwaddr len) +{ + MemTxResult result = MEMTX_OK; + FlatView *fv; + + if (len > 0) { + RCU_READ_LOCK_GUARD(); + fv = address_space_to_flatview(as); + result = flatview_write(fv, addr, attrs, buf, len); + } + + return result; +} + +MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, + void *buf, hwaddr len, bool is_write) +{ + if (is_write) { + return address_space_write(as, addr, attrs, buf, len); + } else { + return address_space_read_full(as, addr, attrs, buf, len); + } +} + +MemTxResult address_space_set(AddressSpace *as, hwaddr addr, + uint8_t c, hwaddr len, MemTxAttrs attrs) +{ +#define FILLBUF_SIZE 512 + uint8_t fillbuf[FILLBUF_SIZE]; + int l; + MemTxResult error = MEMTX_OK; + + memset(fillbuf, c, FILLBUF_SIZE); + while (len > 0) { + l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE; + error |= address_space_write(as, addr, attrs, fillbuf, l); + len -= l; + addr += l; + } + + return error; +} + +void cpu_physical_memory_rw(hwaddr addr, void *buf, + hwaddr len, bool is_write) +{ + address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED, + buf, len, is_write); +} + +enum write_rom_type { + WRITE_DATA, + FLUSH_CACHE, +}; + +static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, + hwaddr addr, + MemTxAttrs attrs, + const void *ptr, + hwaddr len, + enum write_rom_type type) +{ + hwaddr l; + uint8_t *ram_ptr; + hwaddr addr1; + MemoryRegion *mr; + const uint8_t *buf = ptr; + + RCU_READ_LOCK_GUARD(); + while (len > 0) { + l = len; + mr = address_space_translate(as, addr, &addr1, &l, true, attrs); + + if (!(memory_region_is_ram(mr) || + memory_region_is_romd(mr))) { + l = memory_access_size(mr, l, addr1); + } else { + /* ROM/RAM case */ + ram_ptr = qemu_map_ram_ptr(mr->ram_block, addr1); + switch (type) { + case WRITE_DATA: + memcpy(ram_ptr, buf, l); + invalidate_and_set_dirty(mr, addr1, l); + break; + case FLUSH_CACHE: + flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l); + break; + } + } + len -= l; + buf += l; + addr += l; + } + return MEMTX_OK; +} + +/* used for ROM loading : can write in RAM and ROM */ +MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, + const void *buf, hwaddr len) +{ + return address_space_write_rom_internal(as, addr, attrs, + buf, len, WRITE_DATA); +} + +void cpu_flush_icache_range(hwaddr start, hwaddr len) +{ + /* + * This function should do the same thing as an icache flush that was + * triggered from within the guest. For TCG we are always cache coherent, + * so there is no need to flush anything. For KVM / Xen we need to flush + * the host's instruction cache at least. + */ + if (tcg_enabled()) { + return; + } + + address_space_write_rom_internal(&address_space_memory, + start, MEMTXATTRS_UNSPECIFIED, + NULL, len, FLUSH_CACHE); +} + +typedef struct { + MemoryRegion *mr; + void *buffer; + hwaddr addr; + hwaddr len; + bool in_use; +} BounceBuffer; + +static BounceBuffer bounce; + +typedef struct MapClient { + QEMUBH *bh; + QLIST_ENTRY(MapClient) link; +} MapClient; + +QemuMutex map_client_list_lock; +static QLIST_HEAD(, MapClient) map_client_list + = QLIST_HEAD_INITIALIZER(map_client_list); + +static void cpu_unregister_map_client_do(MapClient *client) +{ + QLIST_REMOVE(client, link); + g_free(client); +} + +static void cpu_notify_map_clients_locked(void) +{ + MapClient *client; + + while (!QLIST_EMPTY(&map_client_list)) { + client = QLIST_FIRST(&map_client_list); + qemu_bh_schedule(client->bh); + cpu_unregister_map_client_do(client); + } +} + +void cpu_register_map_client(QEMUBH *bh) +{ + MapClient *client = g_malloc(sizeof(*client)); + + qemu_mutex_lock(&map_client_list_lock); + client->bh = bh; + QLIST_INSERT_HEAD(&map_client_list, client, link); + /* Write map_client_list before reading in_use. */ + smp_mb(); + if (!qatomic_read(&bounce.in_use)) { + cpu_notify_map_clients_locked(); + } + qemu_mutex_unlock(&map_client_list_lock); +} + +void cpu_exec_init_all(void) +{ + qemu_mutex_init(&ram_list.mutex); + /* The data structures we set up here depend on knowing the page size, + * so no more changes can be made after this point. + * In an ideal world, nothing we did before we had finished the + * machine setup would care about the target page size, and we could + * do this much later, rather than requiring board models to state + * up front what their requirements are. + */ + finalize_target_page_bits(); + io_mem_init(); + memory_map_init(); + qemu_mutex_init(&map_client_list_lock); +} + +void cpu_unregister_map_client(QEMUBH *bh) +{ + MapClient *client; + + qemu_mutex_lock(&map_client_list_lock); + QLIST_FOREACH(client, &map_client_list, link) { + if (client->bh == bh) { + cpu_unregister_map_client_do(client); + break; + } + } + qemu_mutex_unlock(&map_client_list_lock); +} + +static void cpu_notify_map_clients(void) +{ + qemu_mutex_lock(&map_client_list_lock); + cpu_notify_map_clients_locked(); + qemu_mutex_unlock(&map_client_list_lock); +} + +static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len, + bool is_write, MemTxAttrs attrs) +{ + MemoryRegion *mr; + hwaddr l, xlat; + + while (len > 0) { + l = len; + mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); + if (!memory_access_is_direct(mr, is_write)) { + l = memory_access_size(mr, l, addr); + if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) { + return false; + } + } + + len -= l; + addr += l; + } + return true; +} + +bool address_space_access_valid(AddressSpace *as, hwaddr addr, + hwaddr len, bool is_write, + MemTxAttrs attrs) +{ + FlatView *fv; + + RCU_READ_LOCK_GUARD(); + fv = address_space_to_flatview(as); + return flatview_access_valid(fv, addr, len, is_write, attrs); +} + +static hwaddr +flatview_extend_translation(FlatView *fv, hwaddr addr, + hwaddr target_len, + MemoryRegion *mr, hwaddr base, hwaddr len, + bool is_write, MemTxAttrs attrs) +{ + hwaddr done = 0; + hwaddr xlat; + MemoryRegion *this_mr; + + for (;;) { + target_len -= len; + addr += len; + done += len; + if (target_len == 0) { + return done; + } + + len = target_len; + this_mr = flatview_translate(fv, addr, &xlat, + &len, is_write, attrs); + if (this_mr != mr || xlat != base + done) { + return done; + } + } +} + +/* Map a physical memory region into a host virtual address. + * May map a subset of the requested range, given by and returned in *plen. + * May return NULL if resources needed to perform the mapping are exhausted. + * Use only for reads OR writes - not for read-modify-write operations. + * Use cpu_register_map_client() to know when retrying the map operation is + * likely to succeed. + */ +void *address_space_map(AddressSpace *as, + hwaddr addr, + hwaddr *plen, + bool is_write, + MemTxAttrs attrs) +{ + hwaddr len = *plen; + hwaddr l, xlat; + MemoryRegion *mr; + FlatView *fv; + + if (len == 0) { + return NULL; + } + + l = len; + RCU_READ_LOCK_GUARD(); + fv = address_space_to_flatview(as); + mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); + + if (!memory_access_is_direct(mr, is_write)) { + if (qatomic_xchg(&bounce.in_use, true)) { + *plen = 0; + return NULL; + } + /* Avoid unbounded allocations */ + l = MIN(l, TARGET_PAGE_SIZE); + bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); + bounce.addr = addr; + bounce.len = l; + + memory_region_ref(mr); + bounce.mr = mr; + if (!is_write) { + flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, + bounce.buffer, l); + } + + *plen = l; + return bounce.buffer; + } + + + memory_region_ref(mr); + *plen = flatview_extend_translation(fv, addr, len, mr, xlat, + l, is_write, attrs); + fuzz_dma_read_cb(addr, *plen, mr); + return qemu_ram_ptr_length(mr->ram_block, xlat, plen, true); +} + +/* Unmaps a memory region previously mapped by address_space_map(). + * Will also mark the memory as dirty if is_write is true. access_len gives + * the amount of memory that was actually read or written by the caller. + */ +void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + bool is_write, hwaddr access_len) +{ + if (buffer != bounce.buffer) { + MemoryRegion *mr; + ram_addr_t addr1; + + mr = memory_region_from_host(buffer, &addr1); + assert(mr != NULL); + if (is_write) { + invalidate_and_set_dirty(mr, addr1, access_len); + } + if (xen_enabled()) { + xen_invalidate_map_cache_entry(buffer); + } + memory_region_unref(mr); + return; + } + if (is_write) { + address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED, + bounce.buffer, access_len); + } + qemu_vfree(bounce.buffer); + bounce.buffer = NULL; + memory_region_unref(bounce.mr); + /* Clear in_use before reading map_client_list. */ + qatomic_set_mb(&bounce.in_use, false); + cpu_notify_map_clients(); +} + +void *cpu_physical_memory_map(hwaddr addr, + hwaddr *plen, + bool is_write) +{ + return address_space_map(&address_space_memory, addr, plen, is_write, + MEMTXATTRS_UNSPECIFIED); +} + +void cpu_physical_memory_unmap(void *buffer, hwaddr len, + bool is_write, hwaddr access_len) +{ + return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len); +} + +#define ARG1_DECL AddressSpace *as +#define ARG1 as +#define SUFFIX +#define TRANSLATE(...) address_space_translate(as, __VA_ARGS__) +#define RCU_READ_LOCK(...) rcu_read_lock() +#define RCU_READ_UNLOCK(...) rcu_read_unlock() +#include "memory_ldst.c.inc" + +int64_t address_space_cache_init(MemoryRegionCache *cache, + AddressSpace *as, + hwaddr addr, + hwaddr len, + bool is_write) +{ + AddressSpaceDispatch *d; + hwaddr l; + MemoryRegion *mr; + Int128 diff; + + assert(len > 0); + + l = len; + cache->fv = address_space_get_flatview(as); + d = flatview_to_dispatch(cache->fv); + cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true); + + /* + * cache->xlat is now relative to cache->mrs.mr, not to the section itself. + * Take that into account to compute how many bytes are there between + * cache->xlat and the end of the section. + */ + diff = int128_sub(cache->mrs.size, + int128_make64(cache->xlat - cache->mrs.offset_within_region)); + l = int128_get64(int128_min(diff, int128_make64(l))); + + mr = cache->mrs.mr; + memory_region_ref(mr); + if (memory_access_is_direct(mr, is_write)) { + /* We don't care about the memory attributes here as we're only + * doing this if we found actual RAM, which behaves the same + * regardless of attributes; so UNSPECIFIED is fine. + */ + l = flatview_extend_translation(cache->fv, addr, len, mr, + cache->xlat, l, is_write, + MEMTXATTRS_UNSPECIFIED); + cache->ptr = qemu_ram_ptr_length(mr->ram_block, cache->xlat, &l, true); + } else { + cache->ptr = NULL; + } + + cache->len = l; + cache->is_write = is_write; + return l; +} + +void address_space_cache_invalidate(MemoryRegionCache *cache, + hwaddr addr, + hwaddr access_len) +{ + assert(cache->is_write); + if (likely(cache->ptr)) { + invalidate_and_set_dirty(cache->mrs.mr, addr + cache->xlat, access_len); + } +} + +void address_space_cache_destroy(MemoryRegionCache *cache) +{ + if (!cache->mrs.mr) { + return; + } + + if (xen_enabled()) { + xen_invalidate_map_cache_entry(cache->ptr); + } + memory_region_unref(cache->mrs.mr); + flatview_unref(cache->fv); + cache->mrs.mr = NULL; + cache->fv = NULL; +} + +/* Called from RCU critical section. This function has the same + * semantics as address_space_translate, but it only works on a + * predefined range of a MemoryRegion that was mapped with + * address_space_cache_init. + */ +static inline MemoryRegion *address_space_translate_cached( + MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat, + hwaddr *plen, bool is_write, MemTxAttrs attrs) +{ + MemoryRegionSection section; + MemoryRegion *mr; + IOMMUMemoryRegion *iommu_mr; + AddressSpace *target_as; + + assert(!cache->ptr); + *xlat = addr + cache->xlat; + + mr = cache->mrs.mr; + iommu_mr = memory_region_get_iommu(mr); + if (!iommu_mr) { + /* MMIO region. */ + return mr; + } + + section = address_space_translate_iommu(iommu_mr, xlat, plen, + NULL, is_write, true, + &target_as, attrs); + return section.mr; +} + +/* Called from RCU critical section. address_space_read_cached uses this + * out of line function when the target is an MMIO or IOMMU region. + */ +MemTxResult +address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr, + void *buf, hwaddr len) +{ + hwaddr addr1, l; + MemoryRegion *mr; + + l = len; + mr = address_space_translate_cached(cache, addr, &addr1, &l, false, + MEMTXATTRS_UNSPECIFIED); + return flatview_read_continue(cache->fv, + addr, MEMTXATTRS_UNSPECIFIED, buf, len, + addr1, l, mr); +} + +/* Called from RCU critical section. address_space_write_cached uses this + * out of line function when the target is an MMIO or IOMMU region. + */ +MemTxResult +address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, + const void *buf, hwaddr len) +{ + hwaddr addr1, l; + MemoryRegion *mr; + + l = len; + mr = address_space_translate_cached(cache, addr, &addr1, &l, true, + MEMTXATTRS_UNSPECIFIED); + return flatview_write_continue(cache->fv, + addr, MEMTXATTRS_UNSPECIFIED, buf, len, + addr1, l, mr); +} + +#define ARG1_DECL MemoryRegionCache *cache +#define ARG1 cache +#define SUFFIX _cached_slow +#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__) +#define RCU_READ_LOCK() ((void)0) +#define RCU_READ_UNLOCK() ((void)0) +#include "memory_ldst.c.inc" + +/* virtual memory access for debug (includes writing to ROM) */ +int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, + void *ptr, size_t len, bool is_write) +{ + hwaddr phys_addr; + vaddr l, page; + uint8_t *buf = ptr; + + cpu_synchronize_state(cpu); + while (len > 0) { + int asidx; + MemTxAttrs attrs; + MemTxResult res; + + page = addr & TARGET_PAGE_MASK; + phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs); + asidx = cpu_asidx_from_attrs(cpu, attrs); + /* if no physical page mapped, return an error */ + if (phys_addr == -1) + return -1; + l = (page + TARGET_PAGE_SIZE) - addr; + if (l > len) + l = len; + phys_addr += (addr & ~TARGET_PAGE_MASK); + if (is_write) { + res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, + attrs, buf, l); + } else { + res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr, + attrs, buf, l); + } + if (res != MEMTX_OK) { + return -1; + } + len -= l; + buf += l; + addr += l; + } + return 0; +} + +/* + * Allows code that needs to deal with migration bitmaps etc to still be built + * target independent. + */ +size_t qemu_target_page_size(void) +{ + return TARGET_PAGE_SIZE; +} + +int qemu_target_page_mask(void) +{ + return TARGET_PAGE_MASK; +} + +int qemu_target_page_bits(void) +{ + return TARGET_PAGE_BITS; +} + +int qemu_target_page_bits_min(void) +{ + return TARGET_PAGE_BITS_MIN; +} + +/* Convert target pages to MiB (2**20). */ +size_t qemu_target_pages_to_MiB(size_t pages) +{ + int page_bits = TARGET_PAGE_BITS; + + /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */ + g_assert(page_bits < 20); + + return pages >> (20 - page_bits); +} + +bool cpu_physical_memory_is_io(hwaddr phys_addr) +{ + MemoryRegion*mr; + hwaddr l = 1; + + RCU_READ_LOCK_GUARD(); + mr = address_space_translate(&address_space_memory, + phys_addr, &phys_addr, &l, false, + MEMTXATTRS_UNSPECIFIED); + + return !(memory_region_is_ram(mr) || memory_region_is_romd(mr)); +} + +int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) +{ + RAMBlock *block; + int ret = 0; + + RCU_READ_LOCK_GUARD(); + RAMBLOCK_FOREACH(block) { + ret = func(block, opaque); + if (ret) { + break; + } + } + return ret; +} + +/* + * Unmap pages of memory from start to start+length such that + * they a) read as 0, b) Trigger whatever fault mechanism + * the OS provides for postcopy. + * The pages must be unmapped by the end of the function. + * Returns: 0 on success, none-0 on failure + * + */ +int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) +{ + int ret = -1; + + uint8_t *host_startaddr = rb->host + start; + + if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) { + error_report("ram_block_discard_range: Unaligned start address: %p", + host_startaddr); + goto err; + } + + if ((start + length) <= rb->max_length) { + bool need_madvise, need_fallocate; + if (!QEMU_IS_ALIGNED(length, rb->page_size)) { + error_report("ram_block_discard_range: Unaligned length: %zx", + length); + goto err; + } + + errno = ENOTSUP; /* If we are missing MADVISE etc */ + + /* The logic here is messy; + * madvise DONTNEED fails for hugepages + * fallocate works on hugepages and shmem + * shared anonymous memory requires madvise REMOVE + */ + need_madvise = (rb->page_size == qemu_host_page_size); + need_fallocate = rb->fd != -1; + if (need_fallocate) { + /* For a file, this causes the area of the file to be zero'd + * if read, and for hugetlbfs also causes it to be unmapped + * so a userfault will trigger. + */ +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + /* + * fallocate() will fail with readonly files. Let's print a + * proper error message. + */ + if (rb->flags & RAM_READONLY_FD) { + error_report("ram_block_discard_range: Discarding RAM" + " with readonly files is not supported"); + goto err; + + } + /* + * We'll discard data from the actual file, even though we only + * have a MAP_PRIVATE mapping, possibly messing with other + * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to + * change that behavior whithout violating the promised + * semantics of ram_block_discard_range(). + * + * Only warn, because it works as long as nobody else uses that + * file. + */ + if (!qemu_ram_is_shared(rb)) { + warn_report_once("ram_block_discard_range: Discarding RAM" + " in private file mappings is possibly" + " dangerous, because it will modify the" + " underlying file and will affect other" + " users of the file"); + } + + ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, length); + if (ret) { + ret = -errno; + error_report("ram_block_discard_range: Failed to fallocate " + "%s:%" PRIx64 " +%zx (%d)", + rb->idstr, start, length, ret); + goto err; + } +#else + ret = -ENOSYS; + error_report("ram_block_discard_range: fallocate not available/file" + "%s:%" PRIx64 " +%zx (%d)", + rb->idstr, start, length, ret); + goto err; +#endif + } + if (need_madvise) { + /* For normal RAM this causes it to be unmapped, + * for shared memory it causes the local mapping to disappear + * and to fall back on the file contents (which we just + * fallocate'd away). + */ +#if defined(CONFIG_MADVISE) + if (qemu_ram_is_shared(rb) && rb->fd < 0) { + ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE); + } else { + ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED); + } + if (ret) { + ret = -errno; + error_report("ram_block_discard_range: Failed to discard range " + "%s:%" PRIx64 " +%zx (%d)", + rb->idstr, start, length, ret); + goto err; + } +#else + ret = -ENOSYS; + error_report("ram_block_discard_range: MADVISE not available" + "%s:%" PRIx64 " +%zx (%d)", + rb->idstr, start, length, ret); + goto err; +#endif + } + trace_ram_block_discard_range(rb->idstr, host_startaddr, length, + need_madvise, need_fallocate, ret); + } else { + error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64 + "/%zx/" RAM_ADDR_FMT")", + rb->idstr, start, length, rb->max_length); + } + +err: + return ret; +} + +bool ramblock_is_pmem(RAMBlock *rb) +{ + return rb->flags & RAM_PMEM; +} + +static void mtree_print_phys_entries(int start, int end, int skip, int ptr) +{ + if (start == end - 1) { + qemu_printf("\t%3d ", start); + } else { + qemu_printf("\t%3d..%-3d ", start, end - 1); + } + qemu_printf(" skip=%d ", skip); + if (ptr == PHYS_MAP_NODE_NIL) { + qemu_printf(" ptr=NIL"); + } else if (!skip) { + qemu_printf(" ptr=#%d", ptr); + } else { + qemu_printf(" ptr=[%d]", ptr); + } + qemu_printf("\n"); +} + +#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \ + int128_sub((size), int128_one())) : 0) + +void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root) +{ + int i; + + qemu_printf(" Dispatch\n"); + qemu_printf(" Physical sections\n"); + + for (i = 0; i < d->map.sections_nb; ++i) { + MemoryRegionSection *s = d->map.sections + i; + const char *names[] = { " [unassigned]", " [not dirty]", + " [ROM]", " [watch]" }; + + qemu_printf(" #%d @" HWADDR_FMT_plx ".." HWADDR_FMT_plx + " %s%s%s%s%s", + i, + s->offset_within_address_space, + s->offset_within_address_space + MR_SIZE(s->size), + s->mr->name ? s->mr->name : "(noname)", + i < ARRAY_SIZE(names) ? names[i] : "", + s->mr == root ? " [ROOT]" : "", + s == d->mru_section ? " [MRU]" : "", + s->mr->is_iommu ? " [iommu]" : ""); + + if (s->mr->alias) { + qemu_printf(" alias=%s", s->mr->alias->name ? + s->mr->alias->name : "noname"); + } + qemu_printf("\n"); + } + + qemu_printf(" Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n", + P_L2_BITS, P_L2_LEVELS, d->phys_map.ptr, d->phys_map.skip); + for (i = 0; i < d->map.nodes_nb; ++i) { + int j, jprev; + PhysPageEntry prev; + Node *n = d->map.nodes + i; + + qemu_printf(" [%d]\n", i); + + for (j = 0, jprev = 0, prev = *n[0]; j < ARRAY_SIZE(*n); ++j) { + PhysPageEntry *pe = *n + j; + + if (pe->ptr == prev.ptr && pe->skip == prev.skip) { + continue; + } + + mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr); + + jprev = j; + prev = *pe; + } + + if (jprev != ARRAY_SIZE(*n)) { + mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr); + } + } +} + +/* Require any discards to work. */ +static unsigned int ram_block_discard_required_cnt; +/* Require only coordinated discards to work. */ +static unsigned int ram_block_coordinated_discard_required_cnt; +/* Disable any discards. */ +static unsigned int ram_block_discard_disabled_cnt; +/* Disable only uncoordinated discards. */ +static unsigned int ram_block_uncoordinated_discard_disabled_cnt; +static QemuMutex ram_block_discard_disable_mutex; + +static void ram_block_discard_disable_mutex_lock(void) +{ + static gsize initialized; + + if (g_once_init_enter(&initialized)) { + qemu_mutex_init(&ram_block_discard_disable_mutex); + g_once_init_leave(&initialized, 1); + } + qemu_mutex_lock(&ram_block_discard_disable_mutex); +} + +static void ram_block_discard_disable_mutex_unlock(void) +{ + qemu_mutex_unlock(&ram_block_discard_disable_mutex); +} + +int ram_block_discard_disable(bool state) +{ + int ret = 0; + + ram_block_discard_disable_mutex_lock(); + if (!state) { + ram_block_discard_disabled_cnt--; + } else if (ram_block_discard_required_cnt || + ram_block_coordinated_discard_required_cnt) { + ret = -EBUSY; + } else { + ram_block_discard_disabled_cnt++; + } + ram_block_discard_disable_mutex_unlock(); + return ret; +} + +int ram_block_uncoordinated_discard_disable(bool state) +{ + int ret = 0; + + ram_block_discard_disable_mutex_lock(); + if (!state) { + ram_block_uncoordinated_discard_disabled_cnt--; + } else if (ram_block_discard_required_cnt) { + ret = -EBUSY; + } else { + ram_block_uncoordinated_discard_disabled_cnt++; + } + ram_block_discard_disable_mutex_unlock(); + return ret; +} + +int ram_block_discard_require(bool state) +{ + int ret = 0; + + ram_block_discard_disable_mutex_lock(); + if (!state) { + ram_block_discard_required_cnt--; + } else if (ram_block_discard_disabled_cnt || + ram_block_uncoordinated_discard_disabled_cnt) { + ret = -EBUSY; + } else { + ram_block_discard_required_cnt++; + } + ram_block_discard_disable_mutex_unlock(); + return ret; +} + +int ram_block_coordinated_discard_require(bool state) +{ + int ret = 0; + + ram_block_discard_disable_mutex_lock(); + if (!state) { + ram_block_coordinated_discard_required_cnt--; + } else if (ram_block_discard_disabled_cnt) { + ret = -EBUSY; + } else { + ram_block_coordinated_discard_required_cnt++; + } + ram_block_discard_disable_mutex_unlock(); + return ret; +} + +bool ram_block_discard_is_disabled(void) +{ + return qatomic_read(&ram_block_discard_disabled_cnt) || + qatomic_read(&ram_block_uncoordinated_discard_disabled_cnt); +} + +bool ram_block_discard_is_required(void) +{ + return qatomic_read(&ram_block_discard_required_cnt) || + qatomic_read(&ram_block_coordinated_discard_required_cnt); +} diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c new file mode 100644 index 0000000..74f4e41 --- /dev/null +++ b/system/qdev-monitor.c @@ -0,0 +1,1148 @@ +/* + * Dynamic device configuration and creation. + * + * Copyright (c) 2009 CodeSourcery + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "monitor/hmp.h" +#include "monitor/monitor.h" +#include "monitor/qdev.h" +#include "sysemu/arch_init.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-qdev.h" +#include "qapi/qmp/dispatch.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qerror.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qobject-input-visitor.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "qemu/help_option.h" +#include "qemu/option.h" +#include "qemu/qemu-print.h" +#include "qemu/option_int.h" +#include "sysemu/block-backend.h" +#include "migration/misc.h" +#include "migration/migration.h" +#include "qemu/cutils.h" +#include "hw/qdev-properties.h" +#include "hw/clock.h" +#include "hw/boards.h" + +/* + * Aliases were a bad idea from the start. Let's keep them + * from spreading further. + */ +typedef struct QDevAlias +{ + const char *typename; + const char *alias; + uint32_t arch_mask; +} QDevAlias; + +/* default virtio transport per architecture */ +#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | QEMU_ARCH_ARM | \ + QEMU_ARCH_HPPA | QEMU_ARCH_I386 | \ + QEMU_ARCH_MIPS | QEMU_ARCH_PPC | \ + QEMU_ARCH_RISCV | QEMU_ARCH_SH4 | \ + QEMU_ARCH_SPARC | QEMU_ARCH_XTENSA | \ + QEMU_ARCH_LOONGARCH) +#define QEMU_ARCH_VIRTIO_CCW (QEMU_ARCH_S390X) +#define QEMU_ARCH_VIRTIO_MMIO (QEMU_ARCH_M68K) + +/* Please keep this table sorted by typename. */ +static const QDevAlias qdev_alias_table[] = { + { "AC97", "ac97" }, /* -soundhw name */ + { "e1000", "e1000-82540em" }, + { "ES1370", "es1370" }, /* -soundhw name */ + { "ich9-ahci", "ahci" }, + { "lsi53c895a", "lsi" }, + { "virtio-9p-device", "virtio-9p", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-9p-ccw", "virtio-9p", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-9p-pci", "virtio-9p", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-balloon-device", "virtio-balloon", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-balloon-ccw", "virtio-balloon", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-balloon-pci", "virtio-balloon", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-blk-device", "virtio-blk", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-blk-ccw", "virtio-blk", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-blk-pci", "virtio-blk", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-gpu-device", "virtio-gpu", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-gpu-ccw", "virtio-gpu", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-gpu-pci", "virtio-gpu", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-gpu-gl-device", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-gpu-gl-pci", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-input-host-device", "virtio-input-host", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-input-host-ccw", "virtio-input-host", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-input-host-pci", "virtio-input-host", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-iommu-pci", "virtio-iommu", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-keyboard-device", "virtio-keyboard", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-keyboard-ccw", "virtio-keyboard", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-keyboard-pci", "virtio-keyboard", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-mouse-device", "virtio-mouse", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-mouse-ccw", "virtio-mouse", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-mouse-pci", "virtio-mouse", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-net-device", "virtio-net", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-net-ccw", "virtio-net", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-net-pci", "virtio-net", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-rng-device", "virtio-rng", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-rng-ccw", "virtio-rng", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-rng-pci", "virtio-rng", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-scsi-device", "virtio-scsi", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-scsi-ccw", "virtio-scsi", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-scsi-pci", "virtio-scsi", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-serial-device", "virtio-serial", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-serial-ccw", "virtio-serial", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-serial-pci", "virtio-serial", QEMU_ARCH_VIRTIO_PCI}, + { "virtio-tablet-device", "virtio-tablet", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-tablet-ccw", "virtio-tablet", QEMU_ARCH_VIRTIO_CCW }, + { "virtio-tablet-pci", "virtio-tablet", QEMU_ARCH_VIRTIO_PCI }, + { } +}; + +static const char *qdev_class_get_alias(DeviceClass *dc) +{ + const char *typename = object_class_get_name(OBJECT_CLASS(dc)); + int i; + + for (i = 0; qdev_alias_table[i].typename; i++) { + if (qdev_alias_table[i].arch_mask && + !(qdev_alias_table[i].arch_mask & arch_type)) { + continue; + } + + if (strcmp(qdev_alias_table[i].typename, typename) == 0) { + return qdev_alias_table[i].alias; + } + } + + return NULL; +} + +static bool qdev_class_has_alias(DeviceClass *dc) +{ + return (qdev_class_get_alias(dc) != NULL); +} + +static void qdev_print_devinfo(DeviceClass *dc) +{ + qemu_printf("name \"%s\"", object_class_get_name(OBJECT_CLASS(dc))); + if (dc->bus_type) { + qemu_printf(", bus %s", dc->bus_type); + } + if (qdev_class_has_alias(dc)) { + qemu_printf(", alias \"%s\"", qdev_class_get_alias(dc)); + } + if (dc->desc) { + qemu_printf(", desc \"%s\"", dc->desc); + } + if (!dc->user_creatable) { + qemu_printf(", no-user"); + } + qemu_printf("\n"); +} + +static void qdev_print_devinfos(bool show_no_user) +{ + static const char *cat_name[DEVICE_CATEGORY_MAX + 1] = { + [DEVICE_CATEGORY_BRIDGE] = "Controller/Bridge/Hub", + [DEVICE_CATEGORY_USB] = "USB", + [DEVICE_CATEGORY_STORAGE] = "Storage", + [DEVICE_CATEGORY_NETWORK] = "Network", + [DEVICE_CATEGORY_INPUT] = "Input", + [DEVICE_CATEGORY_DISPLAY] = "Display", + [DEVICE_CATEGORY_SOUND] = "Sound", + [DEVICE_CATEGORY_MISC] = "Misc", + [DEVICE_CATEGORY_CPU] = "CPU", + [DEVICE_CATEGORY_WATCHDOG]= "Watchdog", + [DEVICE_CATEGORY_MAX] = "Uncategorized", + }; + GSList *list, *elt; + int i; + bool cat_printed; + + module_load_qom_all(); + list = object_class_get_list_sorted(TYPE_DEVICE, false); + + for (i = 0; i <= DEVICE_CATEGORY_MAX; i++) { + cat_printed = false; + for (elt = list; elt; elt = elt->next) { + DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data, + TYPE_DEVICE); + if ((i < DEVICE_CATEGORY_MAX + ? !test_bit(i, dc->categories) + : !bitmap_empty(dc->categories, DEVICE_CATEGORY_MAX)) + || (!show_no_user + && !dc->user_creatable)) { + continue; + } + if (!cat_printed) { + qemu_printf("%s%s devices:\n", i ? "\n" : "", cat_name[i]); + cat_printed = true; + } + qdev_print_devinfo(dc); + } + } + + g_slist_free(list); +} + +static const char *find_typename_by_alias(const char *alias) +{ + int i; + + for (i = 0; qdev_alias_table[i].alias; i++) { + if (qdev_alias_table[i].arch_mask && + !(qdev_alias_table[i].arch_mask & arch_type)) { + continue; + } + + if (strcmp(qdev_alias_table[i].alias, alias) == 0) { + return qdev_alias_table[i].typename; + } + } + + return NULL; +} + +static DeviceClass *qdev_get_device_class(const char **driver, Error **errp) +{ + ObjectClass *oc; + DeviceClass *dc; + const char *original_name = *driver; + + oc = module_object_class_by_name(*driver); + if (!oc) { + const char *typename = find_typename_by_alias(*driver); + + if (typename) { + *driver = typename; + oc = module_object_class_by_name(*driver); + } + } + + if (!object_class_dynamic_cast(oc, TYPE_DEVICE)) { + if (*driver != original_name) { + error_setg(errp, "'%s' (alias '%s') is not a valid device model" + " name", original_name, *driver); + } else { + error_setg(errp, "'%s' is not a valid device model name", *driver); + } + return NULL; + } + + if (object_class_is_abstract(oc)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver", + "a non-abstract device type"); + return NULL; + } + + dc = DEVICE_CLASS(oc); + if (!dc->user_creatable || + (phase_check(PHASE_MACHINE_READY) && !dc->hotpluggable)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver", + "a pluggable device type"); + return NULL; + } + + if (object_class_dynamic_cast(oc, TYPE_SYS_BUS_DEVICE)) { + /* sysbus devices need to be allowed by the machine */ + MachineClass *mc = MACHINE_CLASS(object_get_class(qdev_get_machine())); + if (!device_type_is_dynamic_sysbus(mc, *driver)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver", + "a dynamic sysbus device type for the machine"); + return NULL; + } + } + + return dc; +} + + +int qdev_device_help(QemuOpts *opts) +{ + Error *local_err = NULL; + const char *driver; + ObjectPropertyInfoList *prop_list; + ObjectPropertyInfoList *prop; + GPtrArray *array; + int i; + + driver = qemu_opt_get(opts, "driver"); + if (driver && is_help_option(driver)) { + qdev_print_devinfos(false); + return 1; + } + + if (!driver || !qemu_opt_has_help_opt(opts)) { + return 0; + } + + if (!object_class_by_name(driver)) { + const char *typename = find_typename_by_alias(driver); + + if (typename) { + driver = typename; + } + } + + prop_list = qmp_device_list_properties(driver, &local_err); + if (local_err) { + goto error; + } + + if (prop_list) { + qemu_printf("%s options:\n", driver); + } else { + qemu_printf("There are no options for %s.\n", driver); + } + array = g_ptr_array_new(); + for (prop = prop_list; prop; prop = prop->next) { + g_ptr_array_add(array, + object_property_help(prop->value->name, + prop->value->type, + prop->value->default_value, + prop->value->description)); + } + g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0); + for (i = 0; i < array->len; i++) { + qemu_printf("%s\n", (char *)array->pdata[i]); + } + g_ptr_array_set_free_func(array, g_free); + g_ptr_array_free(array, true); + qapi_free_ObjectPropertyInfoList(prop_list); + return 1; + +error: + error_report_err(local_err); + return 1; +} + +static Object *qdev_get_peripheral(void) +{ + static Object *dev; + + if (dev == NULL) { + dev = container_get(qdev_get_machine(), "/peripheral"); + } + + return dev; +} + +static Object *qdev_get_peripheral_anon(void) +{ + static Object *dev; + + if (dev == NULL) { + dev = container_get(qdev_get_machine(), "/peripheral-anon"); + } + + return dev; +} + +static void qbus_error_append_bus_list_hint(DeviceState *dev, + Error *const *errp) +{ + BusState *child; + const char *sep = " "; + + error_append_hint(errp, "child buses at \"%s\":", + dev->id ? dev->id : object_get_typename(OBJECT(dev))); + QLIST_FOREACH(child, &dev->child_bus, sibling) { + error_append_hint(errp, "%s\"%s\"", sep, child->name); + sep = ", "; + } + error_append_hint(errp, "\n"); +} + +static void qbus_error_append_dev_list_hint(BusState *bus, + Error *const *errp) +{ + BusChild *kid; + const char *sep = " "; + + error_append_hint(errp, "devices at \"%s\":", bus->name); + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + error_append_hint(errp, "%s\"%s\"", sep, + object_get_typename(OBJECT(dev))); + if (dev->id) { + error_append_hint(errp, "/\"%s\"", dev->id); + } + sep = ", "; + } + error_append_hint(errp, "\n"); +} + +static BusState *qbus_find_bus(DeviceState *dev, char *elem) +{ + BusState *child; + + QLIST_FOREACH(child, &dev->child_bus, sibling) { + if (strcmp(child->name, elem) == 0) { + return child; + } + } + return NULL; +} + +static DeviceState *qbus_find_dev(BusState *bus, char *elem) +{ + BusChild *kid; + + /* + * try to match in order: + * (1) instance id, if present + * (2) driver name + * (3) driver alias, if present + */ + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + if (dev->id && strcmp(dev->id, elem) == 0) { + return dev; + } + } + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + if (strcmp(object_get_typename(OBJECT(dev)), elem) == 0) { + return dev; + } + } + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + DeviceClass *dc = DEVICE_GET_CLASS(dev); + + if (qdev_class_has_alias(dc) && + strcmp(qdev_class_get_alias(dc), elem) == 0) { + return dev; + } + } + return NULL; +} + +static inline bool qbus_is_full(BusState *bus) +{ + BusClass *bus_class; + + if (bus->full) { + return true; + } + bus_class = BUS_GET_CLASS(bus); + return bus_class->max_dev && bus->num_children >= bus_class->max_dev; +} + +/* + * Search the tree rooted at @bus for a bus. + * If @name, search for a bus with that name. Note that bus names + * need not be unique. Yes, that's screwed up. + * Else search for a bus that is a subtype of @bus_typename. + * If more than one exists, prefer one that can take another device. + * Return the bus if found, else %NULL. + */ +static BusState *qbus_find_recursive(BusState *bus, const char *name, + const char *bus_typename) +{ + BusChild *kid; + BusState *pick, *child, *ret; + bool match; + + assert(name || bus_typename); + if (name) { + match = !strcmp(bus->name, name); + } else { + match = !!object_dynamic_cast(OBJECT(bus), bus_typename); + } + + if (match && !qbus_is_full(bus)) { + return bus; /* root matches and isn't full */ + } + + pick = match ? bus : NULL; + + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + QLIST_FOREACH(child, &dev->child_bus, sibling) { + ret = qbus_find_recursive(child, name, bus_typename); + if (ret && !qbus_is_full(ret)) { + return ret; /* a descendant matches and isn't full */ + } + if (ret && !pick) { + pick = ret; + } + } + } + + /* root or a descendant matches, but is full */ + return pick; +} + +static BusState *qbus_find(const char *path, Error **errp) +{ + DeviceState *dev; + BusState *bus; + char elem[128]; + int pos, len; + + /* find start element */ + if (path[0] == '/') { + bus = sysbus_get_default(); + pos = 0; + } else { + if (sscanf(path, "%127[^/]%n", elem, &len) != 1) { + assert(!path[0]); + elem[0] = len = 0; + } + bus = qbus_find_recursive(sysbus_get_default(), elem, NULL); + if (!bus) { + error_setg(errp, "Bus '%s' not found", elem); + return NULL; + } + pos = len; + } + + for (;;) { + assert(path[pos] == '/' || !path[pos]); + while (path[pos] == '/') { + pos++; + } + if (path[pos] == '\0') { + break; + } + + /* find device */ + if (sscanf(path+pos, "%127[^/]%n", elem, &len) != 1) { + g_assert_not_reached(); + elem[0] = len = 0; + } + pos += len; + dev = qbus_find_dev(bus, elem); + if (!dev) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", elem); + qbus_error_append_dev_list_hint(bus, errp); + return NULL; + } + + assert(path[pos] == '/' || !path[pos]); + while (path[pos] == '/') { + pos++; + } + if (path[pos] == '\0') { + /* last specified element is a device. If it has exactly + * one child bus accept it nevertheless */ + if (dev->num_child_bus == 1) { + bus = QLIST_FIRST(&dev->child_bus); + break; + } + if (dev->num_child_bus) { + error_setg(errp, "Device '%s' has multiple child buses", + elem); + qbus_error_append_bus_list_hint(dev, errp); + } else { + error_setg(errp, "Device '%s' has no child bus", elem); + } + return NULL; + } + + /* find bus */ + if (sscanf(path+pos, "%127[^/]%n", elem, &len) != 1) { + g_assert_not_reached(); + elem[0] = len = 0; + } + pos += len; + bus = qbus_find_bus(dev, elem); + if (!bus) { + error_setg(errp, "Bus '%s' not found", elem); + qbus_error_append_bus_list_hint(dev, errp); + return NULL; + } + } + + if (qbus_is_full(bus)) { + error_setg(errp, "Bus '%s' is full", path); + return NULL; + } + return bus; +} + +/* Takes ownership of @id, will be freed when deleting the device */ +const char *qdev_set_id(DeviceState *dev, char *id, Error **errp) +{ + ObjectProperty *prop; + + assert(!dev->id && !dev->realized); + + /* + * object_property_[try_]add_child() below will assert the device + * has no parent + */ + if (id) { + prop = object_property_try_add_child(qdev_get_peripheral(), id, + OBJECT(dev), NULL); + if (prop) { + dev->id = id; + } else { + error_setg(errp, "Duplicate device ID '%s'", id); + g_free(id); + return NULL; + } + } else { + static int anon_count; + gchar *name = g_strdup_printf("device[%d]", anon_count++); + prop = object_property_add_child(qdev_get_peripheral_anon(), name, + OBJECT(dev)); + g_free(name); + } + + return prop->name; +} + +DeviceState *qdev_device_add_from_qdict(const QDict *opts, + bool from_json, Error **errp) +{ + ERRP_GUARD(); + DeviceClass *dc; + const char *driver, *path; + char *id; + DeviceState *dev = NULL; + BusState *bus = NULL; + + driver = qdict_get_try_str(opts, "driver"); + if (!driver) { + error_setg(errp, QERR_MISSING_PARAMETER, "driver"); + return NULL; + } + + /* find driver */ + dc = qdev_get_device_class(&driver, errp); + if (!dc) { + return NULL; + } + + /* find bus */ + path = qdict_get_try_str(opts, "bus"); + if (path != NULL) { + bus = qbus_find(path, errp); + if (!bus) { + return NULL; + } + if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { + error_setg(errp, "Device '%s' can't go on %s bus", + driver, object_get_typename(OBJECT(bus))); + return NULL; + } + } else if (dc->bus_type != NULL) { + bus = qbus_find_recursive(sysbus_get_default(), NULL, dc->bus_type); + if (!bus || qbus_is_full(bus)) { + error_setg(errp, "No '%s' bus found for device '%s'", + dc->bus_type, driver); + return NULL; + } + } + + if (qdev_should_hide_device(opts, from_json, errp)) { + if (bus && !qbus_is_hotpluggable(bus)) { + error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); + } + return NULL; + } else if (*errp) { + return NULL; + } + + if (phase_check(PHASE_MACHINE_READY) && bus && !qbus_is_hotpluggable(bus)) { + error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); + return NULL; + } + + if (!migration_is_idle()) { + error_setg(errp, "device_add not allowed while migrating"); + return NULL; + } + + /* create device */ + dev = qdev_new(driver); + + /* Check whether the hotplug is allowed by the machine */ + if (phase_check(PHASE_MACHINE_READY)) { + if (!qdev_hotplug_allowed(dev, errp)) { + goto err_del_dev; + } + + if (!bus && !qdev_get_machine_hotplug_handler(dev)) { + /* No bus, no machine hotplug handler --> device is not hotpluggable */ + error_setg(errp, "Device '%s' can not be hotplugged on this machine", + driver); + goto err_del_dev; + } + } + + /* + * set dev's parent and register its id. + * If it fails it means the id is already taken. + */ + id = g_strdup(qdict_get_try_str(opts, "id")); + if (!qdev_set_id(dev, id, errp)) { + goto err_del_dev; + } + + /* set properties */ + dev->opts = qdict_clone_shallow(opts); + qdict_del(dev->opts, "driver"); + qdict_del(dev->opts, "bus"); + qdict_del(dev->opts, "id"); + + object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json, + errp); + if (*errp) { + goto err_del_dev; + } + + if (!qdev_realize(dev, bus, errp)) { + goto err_del_dev; + } + return dev; + +err_del_dev: + if (dev) { + object_unparent(OBJECT(dev)); + object_unref(OBJECT(dev)); + } + return NULL; +} + +/* Takes ownership of @opts on success */ +DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) +{ + QDict *qdict = qemu_opts_to_qdict(opts, NULL); + DeviceState *ret; + + ret = qdev_device_add_from_qdict(qdict, false, errp); + if (ret) { + qemu_opts_del(opts); + } + qobject_unref(qdict); + return ret; +} + +#define qdev_printf(fmt, ...) monitor_printf(mon, "%*s" fmt, indent, "", ## __VA_ARGS__) +static void qbus_print(Monitor *mon, BusState *bus, int indent); + +static void qdev_print_props(Monitor *mon, DeviceState *dev, Property *props, + int indent) +{ + if (!props) + return; + for (; props->name; props++) { + char *value; + char *legacy_name = g_strdup_printf("legacy-%s", props->name); + + if (object_property_get_type(OBJECT(dev), legacy_name, NULL)) { + value = object_property_get_str(OBJECT(dev), legacy_name, NULL); + } else { + value = object_property_print(OBJECT(dev), props->name, true, + NULL); + } + g_free(legacy_name); + + if (!value) { + continue; + } + qdev_printf("%s = %s\n", props->name, + *value ? value : ""); + g_free(value); + } +} + +static void bus_print_dev(BusState *bus, Monitor *mon, DeviceState *dev, int indent) +{ + BusClass *bc = BUS_GET_CLASS(bus); + + if (bc->print_dev) { + bc->print_dev(mon, dev, indent); + } +} + +static void qdev_print(Monitor *mon, DeviceState *dev, int indent) +{ + ObjectClass *class; + BusState *child; + NamedGPIOList *ngl; + NamedClockList *ncl; + + qdev_printf("dev: %s, id \"%s\"\n", object_get_typename(OBJECT(dev)), + dev->id ? dev->id : ""); + indent += 2; + QLIST_FOREACH(ngl, &dev->gpios, node) { + if (ngl->num_in) { + qdev_printf("gpio-in \"%s\" %d\n", ngl->name ? ngl->name : "", + ngl->num_in); + } + if (ngl->num_out) { + qdev_printf("gpio-out \"%s\" %d\n", ngl->name ? ngl->name : "", + ngl->num_out); + } + } + QLIST_FOREACH(ncl, &dev->clocks, node) { + g_autofree char *freq_str = clock_display_freq(ncl->clock); + qdev_printf("clock-%s%s \"%s\" freq_hz=%s\n", + ncl->output ? "out" : "in", + ncl->alias ? " (alias)" : "", + ncl->name, freq_str); + } + class = object_get_class(OBJECT(dev)); + do { + qdev_print_props(mon, dev, DEVICE_CLASS(class)->props_, indent); + class = object_class_get_parent(class); + } while (class != object_class_by_name(TYPE_DEVICE)); + bus_print_dev(dev->parent_bus, mon, dev, indent); + QLIST_FOREACH(child, &dev->child_bus, sibling) { + qbus_print(mon, child, indent); + } +} + +static void qbus_print(Monitor *mon, BusState *bus, int indent) +{ + BusChild *kid; + + qdev_printf("bus: %s\n", bus->name); + indent += 2; + qdev_printf("type %s\n", object_get_typename(OBJECT(bus))); + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + qdev_print(mon, dev, indent); + } +} +#undef qdev_printf + +void hmp_info_qtree(Monitor *mon, const QDict *qdict) +{ + if (sysbus_get_default()) + qbus_print(mon, sysbus_get_default(), 0); +} + +void hmp_info_qdm(Monitor *mon, const QDict *qdict) +{ + qdev_print_devinfos(true); +} + +void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp) +{ + QemuOpts *opts; + DeviceState *dev; + + opts = qemu_opts_from_qdict(qemu_find_opts("device"), qdict, errp); + if (!opts) { + return; + } + if (!monitor_cur_is_qmp() && qdev_device_help(opts)) { + qemu_opts_del(opts); + return; + } + dev = qdev_device_add(opts, errp); + + /* + * Drain all pending RCU callbacks. This is done because + * some bus related operations can delay a device removal + * (in this case this can happen if device is added and then + * removed due to a configuration error) + * to a RCU callback, but user might expect that this interface + * will finish its job completely once qmp command returns result + * to the user + */ + drain_call_rcu(); + + if (!dev) { + qemu_opts_del(opts); + return; + } + object_unref(OBJECT(dev)); +} + +static DeviceState *find_device_state(const char *id, Error **errp) +{ + Object *obj = object_resolve_path_at(qdev_get_peripheral(), id); + DeviceState *dev; + + if (!obj) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", id); + return NULL; + } + + dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE); + if (!dev) { + error_setg(errp, "%s is not a hotpluggable device", id); + return NULL; + } + + return dev; +} + +void qdev_unplug(DeviceState *dev, Error **errp) +{ + DeviceClass *dc = DEVICE_GET_CLASS(dev); + HotplugHandler *hotplug_ctrl; + HotplugHandlerClass *hdc; + Error *local_err = NULL; + + if (qdev_unplug_blocked(dev, errp)) { + return; + } + + if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) { + error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name); + return; + } + + if (!dc->hotpluggable) { + error_setg(errp, QERR_DEVICE_NO_HOTPLUG, + object_get_typename(OBJECT(dev))); + return; + } + + if (!migration_is_idle() && !dev->allow_unplug_during_migration) { + error_setg(errp, "device_del not allowed while migrating"); + return; + } + + qdev_hot_removed = true; + + hotplug_ctrl = qdev_get_hotplug_handler(dev); + /* hotpluggable device MUST have HotplugHandler, if it doesn't + * then something is very wrong with it */ + g_assert(hotplug_ctrl); + + /* If device supports async unplug just request it to be done, + * otherwise just remove it synchronously */ + hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl); + if (hdc->unplug_request) { + hotplug_handler_unplug_request(hotplug_ctrl, dev, &local_err); + } else { + hotplug_handler_unplug(hotplug_ctrl, dev, &local_err); + if (!local_err) { + object_unparent(OBJECT(dev)); + } + } + error_propagate(errp, local_err); +} + +void qmp_device_del(const char *id, Error **errp) +{ + DeviceState *dev = find_device_state(id, errp); + if (dev != NULL) { + if (dev->pending_deleted_event && + (dev->pending_deleted_expires_ms == 0 || + dev->pending_deleted_expires_ms > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) { + error_setg(errp, "Device %s is already in the " + "process of unplug", id); + return; + } + + qdev_unplug(dev, errp); + } +} + +void hmp_device_add(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + + qmp_device_add((QDict *)qdict, NULL, &err); + hmp_handle_error(mon, err); +} + +void hmp_device_del(Monitor *mon, const QDict *qdict) +{ + const char *id = qdict_get_str(qdict, "id"); + Error *err = NULL; + + qmp_device_del(id, &err); + hmp_handle_error(mon, err); +} + +void device_add_completion(ReadLineState *rs, int nb_args, const char *str) +{ + GSList *list, *elt; + size_t len; + + if (nb_args != 2) { + return; + } + + len = strlen(str); + readline_set_completion_index(rs, len); + list = elt = object_class_get_list(TYPE_DEVICE, false); + while (elt) { + DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data, + TYPE_DEVICE); + + if (dc->user_creatable) { + readline_add_completion_of(rs, str, + object_class_get_name(OBJECT_CLASS(dc))); + } + elt = elt->next; + } + g_slist_free(list); +} + +static int qdev_add_hotpluggable_device(Object *obj, void *opaque) +{ + GSList **list = opaque; + DeviceState *dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE); + + if (dev == NULL) { + return 0; + } + + if (dev->realized && object_property_get_bool(obj, "hotpluggable", NULL)) { + *list = g_slist_append(*list, dev); + } + + return 0; +} + +static GSList *qdev_build_hotpluggable_device_list(Object *peripheral) +{ + GSList *list = NULL; + + object_child_foreach(peripheral, qdev_add_hotpluggable_device, &list); + + return list; +} + +static void peripheral_device_del_completion(ReadLineState *rs, + const char *str) +{ + Object *peripheral = container_get(qdev_get_machine(), "/peripheral"); + GSList *list, *item; + + list = qdev_build_hotpluggable_device_list(peripheral); + if (!list) { + return; + } + + for (item = list; item; item = g_slist_next(item)) { + DeviceState *dev = item->data; + + if (dev->id) { + readline_add_completion_of(rs, str, dev->id); + } + } + + g_slist_free(list); +} + +void device_del_completion(ReadLineState *rs, int nb_args, const char *str) +{ + if (nb_args != 2) { + return; + } + + readline_set_completion_index(rs, strlen(str)); + peripheral_device_del_completion(rs, str); +} + +BlockBackend *blk_by_qdev_id(const char *id, Error **errp) +{ + DeviceState *dev; + BlockBackend *blk; + + GLOBAL_STATE_CODE(); + + dev = find_device_state(id, errp); + if (dev == NULL) { + return NULL; + } + + blk = blk_by_dev(dev); + if (!blk) { + error_setg(errp, "Device does not have a block device backend"); + } + return blk; +} + +QemuOptsList qemu_device_opts = { + .name = "device", + .implied_opt_name = "driver", + .head = QTAILQ_HEAD_INITIALIZER(qemu_device_opts.head), + .desc = { + /* + * no elements => accept any + * sanity checking will happen later + * when setting device properties + */ + { /* end of list */ } + }, +}; + +QemuOptsList qemu_global_opts = { + .name = "global", + .head = QTAILQ_HEAD_INITIALIZER(qemu_global_opts.head), + .desc = { + { + .name = "driver", + .type = QEMU_OPT_STRING, + },{ + .name = "property", + .type = QEMU_OPT_STRING, + },{ + .name = "value", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + +int qemu_global_option(const char *str) +{ + char driver[64], property[64]; + QemuOpts *opts; + int rc, offset; + + rc = sscanf(str, "%63[^.=].%63[^=]%n", driver, property, &offset); + if (rc == 2 && str[offset] == '=') { + opts = qemu_opts_create(&qemu_global_opts, NULL, 0, &error_abort); + qemu_opt_set(opts, "driver", driver, &error_abort); + qemu_opt_set(opts, "property", property, &error_abort); + qemu_opt_set(opts, "value", str + offset + 1, &error_abort); + return 0; + } + + opts = qemu_opts_parse_noisily(&qemu_global_opts, str, false); + if (!opts) { + return -1; + } + if (!qemu_opt_get(opts, "driver") + || !qemu_opt_get(opts, "property") + || !qemu_opt_get(opts, "value")) { + error_report("options 'driver', 'property', and 'value'" + " are required"); + return -1; + } + + return 0; +} + +bool qmp_command_available(const QmpCommand *cmd, Error **errp) +{ + if (!phase_check(PHASE_MACHINE_READY) && + !(cmd->options & QCO_ALLOW_PRECONFIG)) { + error_setg(errp, "The command '%s' is permitted only after machine initialization has completed", + cmd->name); + return false; + } + return true; +} diff --git a/system/qemu-seccomp.c b/system/qemu-seccomp.c new file mode 100644 index 0000000..4d7439e --- /dev/null +++ b/system/qemu-seccomp.c @@ -0,0 +1,486 @@ +/* + * QEMU seccomp mode 2 support with libseccomp + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Eduardo Otubo + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/config-file.h" +#include "qemu/option.h" +#include "qemu/module.h" +#include +#include +#include "sysemu/seccomp.h" +#include + +/* For some architectures (notably ARM) cacheflush is not supported until + * libseccomp 2.2.3, but configure enforces that we are using a more recent + * version on those hosts, so it is OK for this check to be less strict. + */ +#if SCMP_VER_MAJOR >= 3 + #define HAVE_CACHEFLUSH +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 2 + #define HAVE_CACHEFLUSH +#endif + +struct QemuSeccompSyscall { + int32_t num; + uint8_t set; + uint8_t narg; + const struct scmp_arg_cmp *arg_cmp; + uint32_t action; +}; + +const struct scmp_arg_cmp sched_setscheduler_arg[] = { + /* was SCMP_A1(SCMP_CMP_NE, SCHED_IDLE), but expanded due to GCC 4.x bug */ + { .arg = 1, .op = SCMP_CMP_NE, .datum_a = SCHED_IDLE } +}; + +/* + * See 'NOTES' in 'man 2 clone' - s390 & cross have 'flags' in + * different position to other architectures + */ +#if defined(HOST_S390X) || defined(HOST_S390) || defined(HOST_CRIS) +#define CLONE_FLAGS_ARG 1 +#else +#define CLONE_FLAGS_ARG 0 +#endif + +#ifndef CLONE_PIDFD +# define CLONE_PIDFD 0x00001000 +#endif + +#define REQUIRE_CLONE_FLAG(flag) \ + const struct scmp_arg_cmp clone_arg ## flag[] = { \ + { .arg = CLONE_FLAGS_ARG, \ + .op = SCMP_CMP_MASKED_EQ, \ + .datum_a = flag, .datum_b = 0 } } + +#define FORBID_CLONE_FLAG(flag) \ + const struct scmp_arg_cmp clone_arg ## flag[] = { \ + { .arg = CLONE_FLAGS_ARG, \ + .op = SCMP_CMP_MASKED_EQ, \ + .datum_a = flag, .datum_b = flag } } + +#define RULE_CLONE_FLAG(flag) \ + { SCMP_SYS(clone), QEMU_SECCOMP_SET_SPAWN, \ + ARRAY_SIZE(clone_arg ## flag), clone_arg ## flag, SCMP_ACT_TRAP } + +/* If no CLONE_* flags are set, except CSIGNAL, deny */ +const struct scmp_arg_cmp clone_arg_none[] = { + { .arg = CLONE_FLAGS_ARG, + .op = SCMP_CMP_MASKED_EQ, + .datum_a = ~(CSIGNAL), .datum_b = 0 } +}; + +/* + * pthread_create should always set all of these. + */ +REQUIRE_CLONE_FLAG(CLONE_VM); +REQUIRE_CLONE_FLAG(CLONE_FS); +REQUIRE_CLONE_FLAG(CLONE_FILES); +REQUIRE_CLONE_FLAG(CLONE_SIGHAND); +REQUIRE_CLONE_FLAG(CLONE_THREAD); +REQUIRE_CLONE_FLAG(CLONE_SYSVSEM); +REQUIRE_CLONE_FLAG(CLONE_SETTLS); +REQUIRE_CLONE_FLAG(CLONE_PARENT_SETTID); +REQUIRE_CLONE_FLAG(CLONE_CHILD_CLEARTID); +/* + * Musl sets this in pthread_create too, but it is + * obsolete and harmless since its behaviour is + * subsumed under CLONE_THREAD + */ +/*REQUIRE_CLONE_FLAG(CLONE_DETACHED);*/ + + +/* + * These all indicate an attempt to spawn a process + * instead of a thread, or other undesirable scenarios + */ +FORBID_CLONE_FLAG(CLONE_PIDFD); +FORBID_CLONE_FLAG(CLONE_PTRACE); +FORBID_CLONE_FLAG(CLONE_VFORK); +FORBID_CLONE_FLAG(CLONE_PARENT); +FORBID_CLONE_FLAG(CLONE_NEWNS); +FORBID_CLONE_FLAG(CLONE_UNTRACED); +FORBID_CLONE_FLAG(CLONE_NEWCGROUP); +FORBID_CLONE_FLAG(CLONE_NEWUTS); +FORBID_CLONE_FLAG(CLONE_NEWIPC); +FORBID_CLONE_FLAG(CLONE_NEWUSER); +FORBID_CLONE_FLAG(CLONE_NEWPID); +FORBID_CLONE_FLAG(CLONE_NEWNET); +FORBID_CLONE_FLAG(CLONE_IO); + + +static const struct QemuSeccompSyscall denylist[] = { + /* default set of syscalls that should get blocked */ + { SCMP_SYS(reboot), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(swapon), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(swapoff), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(syslog), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(mount), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(umount), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(kexec_load), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(afs_syscall), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(break), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(ftime), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(getpmsg), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(gtty), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(lock), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(mpx), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(prof), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(profil), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(putpmsg), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(security), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(stty), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(tuxcall), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(ulimit), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(vserver), QEMU_SECCOMP_SET_DEFAULT, + 0, NULL, SCMP_ACT_TRAP }, + /* obsolete */ + { SCMP_SYS(readdir), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(_sysctl), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(bdflush), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(create_module), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(get_kernel_syms), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(query_module), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(sgetmask), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(ssetmask), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(sysfs), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(uselib), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(ustat), QEMU_SECCOMP_SET_OBSOLETE, + 0, NULL, SCMP_ACT_TRAP }, + /* privileged */ + { SCMP_SYS(setuid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(setgid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(setpgid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(setsid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(setreuid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(setregid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(setresuid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(setresgid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(setfsuid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(setfsgid), QEMU_SECCOMP_SET_PRIVILEGED, + 0, NULL, SCMP_ACT_TRAP }, + /* spawn */ + { SCMP_SYS(fork), QEMU_SECCOMP_SET_SPAWN, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(vfork), QEMU_SECCOMP_SET_SPAWN, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(execve), QEMU_SECCOMP_SET_SPAWN, + 0, NULL, SCMP_ACT_TRAP }, + { SCMP_SYS(clone), QEMU_SECCOMP_SET_SPAWN, + ARRAY_SIZE(clone_arg_none), clone_arg_none, SCMP_ACT_TRAP }, + RULE_CLONE_FLAG(CLONE_VM), + RULE_CLONE_FLAG(CLONE_FS), + RULE_CLONE_FLAG(CLONE_FILES), + RULE_CLONE_FLAG(CLONE_SIGHAND), + RULE_CLONE_FLAG(CLONE_THREAD), + RULE_CLONE_FLAG(CLONE_SYSVSEM), + RULE_CLONE_FLAG(CLONE_SETTLS), + RULE_CLONE_FLAG(CLONE_PARENT_SETTID), + RULE_CLONE_FLAG(CLONE_CHILD_CLEARTID), + /*RULE_CLONE_FLAG(CLONE_DETACHED),*/ + RULE_CLONE_FLAG(CLONE_PIDFD), + RULE_CLONE_FLAG(CLONE_PTRACE), + RULE_CLONE_FLAG(CLONE_VFORK), + RULE_CLONE_FLAG(CLONE_PARENT), + RULE_CLONE_FLAG(CLONE_NEWNS), + RULE_CLONE_FLAG(CLONE_UNTRACED), + RULE_CLONE_FLAG(CLONE_NEWCGROUP), + RULE_CLONE_FLAG(CLONE_NEWUTS), + RULE_CLONE_FLAG(CLONE_NEWIPC), + RULE_CLONE_FLAG(CLONE_NEWUSER), + RULE_CLONE_FLAG(CLONE_NEWPID), + RULE_CLONE_FLAG(CLONE_NEWNET), + RULE_CLONE_FLAG(CLONE_IO), +#ifdef __SNR_clone3 + { SCMP_SYS(clone3), QEMU_SECCOMP_SET_SPAWN, + 0, NULL, SCMP_ACT_ERRNO(ENOSYS) }, +#endif +#ifdef __SNR_execveat + { SCMP_SYS(execveat), QEMU_SECCOMP_SET_SPAWN }, +#endif + { SCMP_SYS(setns), QEMU_SECCOMP_SET_SPAWN }, + { SCMP_SYS(unshare), QEMU_SECCOMP_SET_SPAWN }, + /* resource control */ + { SCMP_SYS(setpriority), QEMU_SECCOMP_SET_RESOURCECTL, + 0, NULL, SCMP_ACT_ERRNO(EPERM) }, + { SCMP_SYS(sched_setparam), QEMU_SECCOMP_SET_RESOURCECTL, + 0, NULL, SCMP_ACT_ERRNO(EPERM) }, + { SCMP_SYS(sched_setscheduler), QEMU_SECCOMP_SET_RESOURCECTL, + ARRAY_SIZE(sched_setscheduler_arg), sched_setscheduler_arg, + SCMP_ACT_ERRNO(EPERM) }, + { SCMP_SYS(sched_setaffinity), QEMU_SECCOMP_SET_RESOURCECTL, + 0, NULL, SCMP_ACT_ERRNO(EPERM) }, +}; + +static inline __attribute__((unused)) int +qemu_seccomp(unsigned int operation, unsigned int flags, void *args) +{ +#ifdef __NR_seccomp + return syscall(__NR_seccomp, operation, flags, args); +#else + errno = ENOSYS; + return -1; +#endif +} + +static uint32_t qemu_seccomp_update_action(uint32_t action) +{ +#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \ + defined(SECCOMP_RET_KILL_PROCESS) + if (action == SCMP_ACT_TRAP) { + static int kill_process = -1; + if (kill_process == -1) { + uint32_t testaction = SECCOMP_RET_KILL_PROCESS; + + if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &testaction) == 0) { + kill_process = 1; + } else { + kill_process = 0; + } + } + if (kill_process == 1) { + return SCMP_ACT_KILL_PROCESS; + } + } +#endif + return action; +} + + +static int seccomp_start(uint32_t seccomp_opts, Error **errp) +{ + int rc = -1; + unsigned int i = 0; + scmp_filter_ctx ctx; + + ctx = seccomp_init(SCMP_ACT_ALLOW); + if (ctx == NULL) { + error_setg(errp, "failed to initialize seccomp context"); + goto seccomp_return; + } + +#if defined(CONFIG_SECCOMP_SYSRAWRC) + /* + * This must be the first seccomp_attr_set() call to have full + * error propagation from subsequent seccomp APIs. + */ + rc = seccomp_attr_set(ctx, SCMP_FLTATR_API_SYSRAWRC, 1); + if (rc != 0) { + error_setg_errno(errp, -rc, + "failed to set seccomp rawrc attribute"); + goto seccomp_return; + } +#endif + + rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); + if (rc != 0) { + error_setg_errno(errp, -rc, + "failed to set seccomp thread synchronization"); + goto seccomp_return; + } + + for (i = 0; i < ARRAY_SIZE(denylist); i++) { + uint32_t action; + if (!(seccomp_opts & denylist[i].set)) { + continue; + } + + action = qemu_seccomp_update_action(denylist[i].action); + rc = seccomp_rule_add_array(ctx, action, denylist[i].num, + denylist[i].narg, denylist[i].arg_cmp); + if (rc < 0) { + error_setg_errno(errp, -rc, + "failed to add seccomp denylist rules"); + goto seccomp_return; + } + } + + rc = seccomp_load(ctx); + if (rc < 0) { + error_setg_errno(errp, -rc, + "failed to load seccomp syscall filter in kernel"); + } + + seccomp_return: + seccomp_release(ctx); + return rc < 0 ? -1 : 0; +} + +int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) +{ + if (qemu_opt_get_bool(opts, "enable", false)) { + uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT + | QEMU_SECCOMP_SET_OBSOLETE; + const char *value = NULL; + + value = qemu_opt_get(opts, "obsolete"); + if (value) { + if (g_str_equal(value, "allow")) { + seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE; + } else if (g_str_equal(value, "deny")) { + /* this is the default option, this if is here + * to provide a little bit of consistency for + * the command line */ + } else { + error_setg(errp, "invalid argument for obsolete"); + return -1; + } + } + + value = qemu_opt_get(opts, "elevateprivileges"); + if (value) { + if (g_str_equal(value, "deny")) { + seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; + } else if (g_str_equal(value, "children")) { + seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; + + /* calling prctl directly because we're + * not sure if host has CAP_SYS_ADMIN set*/ + if (prctl(PR_SET_NO_NEW_PRIVS, 1)) { + error_setg(errp, "failed to set no_new_privs aborting"); + return -1; + } + } else if (g_str_equal(value, "allow")) { + /* default value */ + } else { + error_setg(errp, "invalid argument for elevateprivileges"); + return -1; + } + } + + value = qemu_opt_get(opts, "spawn"); + if (value) { + if (g_str_equal(value, "deny")) { + seccomp_opts |= QEMU_SECCOMP_SET_SPAWN; + } else if (g_str_equal(value, "allow")) { + /* default value */ + } else { + error_setg(errp, "invalid argument for spawn"); + return -1; + } + } + + value = qemu_opt_get(opts, "resourcecontrol"); + if (value) { + if (g_str_equal(value, "deny")) { + seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL; + } else if (g_str_equal(value, "allow")) { + /* default value */ + } else { + error_setg(errp, "invalid argument for resourcecontrol"); + return -1; + } + } + + if (seccomp_start(seccomp_opts, errp) < 0) { + return -1; + } + } + + return 0; +} + +static QemuOptsList qemu_sandbox_opts = { + .name = "sandbox", + .implied_opt_name = "enable", + .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head), + .desc = { + { + .name = "enable", + .type = QEMU_OPT_BOOL, + }, + { + .name = "obsolete", + .type = QEMU_OPT_STRING, + }, + { + .name = "elevateprivileges", + .type = QEMU_OPT_STRING, + }, + { + .name = "spawn", + .type = QEMU_OPT_STRING, + }, + { + .name = "resourcecontrol", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + +static void seccomp_register(void) +{ + bool add = false; + + /* FIXME: use seccomp_api_get() >= 2 check when released */ + +#if defined(SECCOMP_FILTER_FLAG_TSYNC) + int check; + + /* check host TSYNC capability, it returns errno == ENOSYS if unavailable */ + check = qemu_seccomp(SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_TSYNC, NULL); + if (check < 0 && errno == EFAULT) { + add = true; + } +#endif + + if (add) { + qemu_add_opts(&qemu_sandbox_opts); + } +} +opts_init(seccomp_register); diff --git a/system/qtest.c b/system/qtest.c new file mode 100644 index 0000000..35b643a --- /dev/null +++ b/system/qtest.c @@ -0,0 +1,1070 @@ +/* + * Test Server + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "sysemu/qtest.h" +#include "sysemu/runstate.h" +#include "chardev/char-fe.h" +#include "exec/ioport.h" +#include "exec/memory.h" +#include "exec/tswap.h" +#include "hw/qdev-core.h" +#include "hw/irq.h" +#include "qemu/accel.h" +#include "sysemu/cpu-timers.h" +#include "qemu/config-file.h" +#include "qemu/option.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qemu/cutils.h" +#include "qom/object_interfaces.h" + +#define MAX_IRQ 256 + +#define TYPE_QTEST "qtest" + +OBJECT_DECLARE_SIMPLE_TYPE(QTest, QTEST) + +struct QTest { + Object parent; + + bool has_machine_link; + char *chr_name; + Chardev *chr; + CharBackend qtest_chr; + char *log; +}; + +bool qtest_allowed; + +static DeviceState *irq_intercept_dev; +static FILE *qtest_log_fp; +static QTest *qtest; +static GString *inbuf; +static int irq_levels[MAX_IRQ]; +static GTimer *timer; +static bool qtest_opened; +static void (*qtest_server_send)(void*, const char*); +static void *qtest_server_send_opaque; + +#define FMT_timeval "%.06f" + +/** + * DOC: QTest Protocol + * + * Line based protocol, request/response based. Server can send async messages + * so clients should always handle many async messages before the response + * comes in. + * + * Valid requests + * ^^^^^^^^^^^^^^ + * + * Clock management: + * """"""""""""""""" + * + * The qtest client is completely in charge of the QEMU_CLOCK_VIRTUAL. qtest commands + * let you adjust the value of the clock (monotonically). All the commands + * return the current value of the clock in nanoseconds. + * + * .. code-block:: none + * + * > clock_step + * < OK VALUE + * + * Advance the clock to the next deadline. Useful when waiting for + * asynchronous events. + * + * .. code-block:: none + * + * > clock_step NS + * < OK VALUE + * + * Advance the clock by NS nanoseconds. + * + * .. code-block:: none + * + * > clock_set NS + * < OK VALUE + * + * Advance the clock to NS nanoseconds (do nothing if it's already past). + * + * PIO and memory access: + * """""""""""""""""""""" + * + * .. code-block:: none + * + * > outb ADDR VALUE + * < OK + * + * .. code-block:: none + * + * > outw ADDR VALUE + * < OK + * + * .. code-block:: none + * + * > outl ADDR VALUE + * < OK + * + * .. code-block:: none + * + * > inb ADDR + * < OK VALUE + * + * .. code-block:: none + * + * > inw ADDR + * < OK VALUE + * + * .. code-block:: none + * + * > inl ADDR + * < OK VALUE + * + * .. code-block:: none + * + * > writeb ADDR VALUE + * < OK + * + * .. code-block:: none + * + * > writew ADDR VALUE + * < OK + * + * .. code-block:: none + * + * > writel ADDR VALUE + * < OK + * + * .. code-block:: none + * + * > writeq ADDR VALUE + * < OK + * + * .. code-block:: none + * + * > readb ADDR + * < OK VALUE + * + * .. code-block:: none + * + * > readw ADDR + * < OK VALUE + * + * .. code-block:: none + * + * > readl ADDR + * < OK VALUE + * + * .. code-block:: none + * + * > readq ADDR + * < OK VALUE + * + * .. code-block:: none + * + * > read ADDR SIZE + * < OK DATA + * + * .. code-block:: none + * + * > write ADDR SIZE DATA + * < OK + * + * .. code-block:: none + * + * > b64read ADDR SIZE + * < OK B64_DATA + * + * .. code-block:: none + * + * > b64write ADDR SIZE B64_DATA + * < OK + * + * .. code-block:: none + * + * > memset ADDR SIZE VALUE + * < OK + * + * ADDR, SIZE, VALUE are all integers parsed with strtoul() with a base of 0. + * For 'memset' a zero size is permitted and does nothing. + * + * DATA is an arbitrarily long hex number prefixed with '0x'. If it's smaller + * than the expected size, the value will be zero filled at the end of the data + * sequence. + * + * B64_DATA is an arbitrarily long base64 encoded string. + * If the sizes do not match, the data will be truncated. + * + * IRQ management: + * """"""""""""""" + * + * .. code-block:: none + * + * > irq_intercept_in QOM-PATH + * < OK + * + * .. code-block:: none + * + * > irq_intercept_out QOM-PATH + * < OK + * + * Attach to the gpio-in (resp. gpio-out) pins exported by the device at + * QOM-PATH. When the pin is triggered, one of the following async messages + * will be printed to the qtest stream:: + * + * IRQ raise NUM + * IRQ lower NUM + * + * where NUM is an IRQ number. For the PC, interrupts can be intercepted + * simply with "irq_intercept_in ioapic" (note that IRQ0 comes out with + * NUM=0 even though it is remapped to GSI 2). + * + * Setting interrupt level: + * """""""""""""""""""""""" + * + * .. code-block:: none + * + * > set_irq_in QOM-PATH NAME NUM LEVEL + * < OK + * + * where NAME is the name of the irq/gpio list, NUM is an IRQ number and + * LEVEL is an signed integer IRQ level. + * + * Forcibly set the given interrupt pin to the given level. + * + */ + +static int hex2nib(char ch) +{ + if (ch >= '0' && ch <= '9') { + return ch - '0'; + } else if (ch >= 'a' && ch <= 'f') { + return 10 + (ch - 'a'); + } else if (ch >= 'A' && ch <= 'F') { + return 10 + (ch - 'A'); + } else { + return -1; + } +} + +void qtest_send_prefix(CharBackend *chr) +{ + if (!qtest_log_fp || !qtest_opened) { + return; + } + + fprintf(qtest_log_fp, "[S +" FMT_timeval "] ", g_timer_elapsed(timer, NULL)); +} + +static void G_GNUC_PRINTF(1, 2) qtest_log_send(const char *fmt, ...) +{ + va_list ap; + + if (!qtest_log_fp || !qtest_opened) { + return; + } + + qtest_send_prefix(NULL); + + va_start(ap, fmt); + vfprintf(qtest_log_fp, fmt, ap); + va_end(ap); +} + +static void qtest_server_char_be_send(void *opaque, const char *str) +{ + size_t len = strlen(str); + CharBackend* chr = (CharBackend *)opaque; + qemu_chr_fe_write_all(chr, (uint8_t *)str, len); + if (qtest_log_fp && qtest_opened) { + fprintf(qtest_log_fp, "%s", str); + } +} + +static void qtest_send(CharBackend *chr, const char *str) +{ + qtest_server_send(qtest_server_send_opaque, str); +} + +void qtest_sendf(CharBackend *chr, const char *fmt, ...) +{ + va_list ap; + gchar *buffer; + + va_start(ap, fmt); + buffer = g_strdup_vprintf(fmt, ap); + qtest_send(chr, buffer); + g_free(buffer); + va_end(ap); +} + +static void qtest_irq_handler(void *opaque, int n, int level) +{ + qemu_irq old_irq = *(qemu_irq *)opaque; + qemu_set_irq(old_irq, level); + + if (irq_levels[n] != level) { + CharBackend *chr = &qtest->qtest_chr; + irq_levels[n] = level; + qtest_send_prefix(chr); + qtest_sendf(chr, "IRQ %s %d\n", + level ? "raise" : "lower", n); + } +} + +static int64_t qtest_clock_counter; + +int64_t qtest_get_virtual_clock(void) +{ + return qatomic_read_i64(&qtest_clock_counter); +} + +static void qtest_set_virtual_clock(int64_t count) +{ + qatomic_set_i64(&qtest_clock_counter, count); +} + +static void qtest_clock_warp(int64_t dest) +{ + int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + AioContext *aio_context; + assert(qtest_enabled()); + aio_context = qemu_get_aio_context(); + while (clock < dest) { + int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, + QEMU_TIMER_ATTR_ALL); + int64_t warp = qemu_soonest_timeout(dest - clock, deadline); + + qtest_set_virtual_clock(qtest_get_virtual_clock() + warp); + + qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); + timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]); + clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + } + qemu_clock_notify(QEMU_CLOCK_VIRTUAL); +} + +static bool (*process_command_cb)(CharBackend *chr, gchar **words); + +void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, gchar **words)) +{ + assert(!process_command_cb); /* Switch to a list if we need more than one */ + + process_command_cb = pc_cb; +} + +static void qtest_install_gpio_out_intercept(DeviceState *dev, const char *name, int n) +{ + qemu_irq *disconnected = g_new0(qemu_irq, 1); + qemu_irq icpt = qemu_allocate_irq(qtest_irq_handler, + disconnected, n); + + *disconnected = qdev_intercept_gpio_out(dev, icpt, name, n); +} + +static void qtest_process_command(CharBackend *chr, gchar **words) +{ + const gchar *command; + + g_assert(words); + + command = words[0]; + + if (qtest_log_fp) { + int i; + + fprintf(qtest_log_fp, "[R +" FMT_timeval "]", g_timer_elapsed(timer, NULL)); + for (i = 0; words[i]; i++) { + fprintf(qtest_log_fp, " %s", words[i]); + } + fprintf(qtest_log_fp, "\n"); + } + + g_assert(command); + if (strcmp(words[0], "irq_intercept_out") == 0 + || strcmp(words[0], "irq_intercept_in") == 0) { + DeviceState *dev; + NamedGPIOList *ngl; + bool is_named; + bool is_outbound; + bool interception_succeeded = false; + + g_assert(words[1]); + is_named = words[2] != NULL; + is_outbound = words[0][14] == 'o'; + dev = DEVICE(object_resolve_path(words[1], NULL)); + if (!dev) { + qtest_send_prefix(chr); + qtest_send(chr, "FAIL Unknown device\n"); + return; + } + + if (is_named && !is_outbound) { + qtest_send_prefix(chr); + qtest_send(chr, "FAIL Interception of named in-GPIOs not yet supported\n"); + return; + } + + if (irq_intercept_dev) { + qtest_send_prefix(chr); + if (irq_intercept_dev != dev) { + qtest_send(chr, "FAIL IRQ intercept already enabled\n"); + } else { + qtest_send(chr, "OK\n"); + } + return; + } + + QLIST_FOREACH(ngl, &dev->gpios, node) { + /* We don't support inbound interception of named GPIOs yet */ + if (is_outbound) { + /* NULL is valid and matchable, for "unnamed GPIO" */ + if (g_strcmp0(ngl->name, words[2]) == 0) { + int i; + for (i = 0; i < ngl->num_out; ++i) { + qtest_install_gpio_out_intercept(dev, ngl->name, i); + } + interception_succeeded = true; + } + } else { + qemu_irq_intercept_in(ngl->in, qtest_irq_handler, + ngl->num_in); + interception_succeeded = true; + } + } + + qtest_send_prefix(chr); + if (interception_succeeded) { + irq_intercept_dev = dev; + qtest_send(chr, "OK\n"); + } else { + qtest_send(chr, "FAIL No intercepts installed\n"); + } + } else if (strcmp(words[0], "set_irq_in") == 0) { + DeviceState *dev; + qemu_irq irq; + char *name; + int ret; + int num; + int level; + + g_assert(words[1] && words[2] && words[3] && words[4]); + + dev = DEVICE(object_resolve_path(words[1], NULL)); + if (!dev) { + qtest_send_prefix(chr); + qtest_send(chr, "FAIL Unknown device\n"); + return; + } + + if (strcmp(words[2], "unnamed-gpio-in") == 0) { + name = NULL; + } else { + name = words[2]; + } + + ret = qemu_strtoi(words[3], NULL, 0, &num); + g_assert(!ret); + ret = qemu_strtoi(words[4], NULL, 0, &level); + g_assert(!ret); + + irq = qdev_get_gpio_in_named(dev, name, num); + + qemu_set_irq(irq, level); + qtest_send_prefix(chr); + qtest_send(chr, "OK\n"); + } else if (strcmp(words[0], "outb") == 0 || + strcmp(words[0], "outw") == 0 || + strcmp(words[0], "outl") == 0) { + unsigned long addr; + unsigned long value; + int ret; + + g_assert(words[1] && words[2]); + ret = qemu_strtoul(words[1], NULL, 0, &addr); + g_assert(ret == 0); + ret = qemu_strtoul(words[2], NULL, 0, &value); + g_assert(ret == 0); + g_assert(addr <= 0xffff); + + if (words[0][3] == 'b') { + cpu_outb(addr, value); + } else if (words[0][3] == 'w') { + cpu_outw(addr, value); + } else if (words[0][3] == 'l') { + cpu_outl(addr, value); + } + qtest_send_prefix(chr); + qtest_send(chr, "OK\n"); + } else if (strcmp(words[0], "inb") == 0 || + strcmp(words[0], "inw") == 0 || + strcmp(words[0], "inl") == 0) { + unsigned long addr; + uint32_t value = -1U; + int ret; + + g_assert(words[1]); + ret = qemu_strtoul(words[1], NULL, 0, &addr); + g_assert(ret == 0); + g_assert(addr <= 0xffff); + + if (words[0][2] == 'b') { + value = cpu_inb(addr); + } else if (words[0][2] == 'w') { + value = cpu_inw(addr); + } else if (words[0][2] == 'l') { + value = cpu_inl(addr); + } + qtest_send_prefix(chr); + qtest_sendf(chr, "OK 0x%04x\n", value); + } else if (strcmp(words[0], "writeb") == 0 || + strcmp(words[0], "writew") == 0 || + strcmp(words[0], "writel") == 0 || + strcmp(words[0], "writeq") == 0) { + uint64_t addr; + uint64_t value; + int ret; + + g_assert(words[1] && words[2]); + ret = qemu_strtou64(words[1], NULL, 0, &addr); + g_assert(ret == 0); + ret = qemu_strtou64(words[2], NULL, 0, &value); + g_assert(ret == 0); + + if (words[0][5] == 'b') { + uint8_t data = value; + address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, + &data, 1); + } else if (words[0][5] == 'w') { + uint16_t data = value; + tswap16s(&data); + address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, + &data, 2); + } else if (words[0][5] == 'l') { + uint32_t data = value; + tswap32s(&data); + address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, + &data, 4); + } else if (words[0][5] == 'q') { + uint64_t data = value; + tswap64s(&data); + address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, + &data, 8); + } + qtest_send_prefix(chr); + qtest_send(chr, "OK\n"); + } else if (strcmp(words[0], "readb") == 0 || + strcmp(words[0], "readw") == 0 || + strcmp(words[0], "readl") == 0 || + strcmp(words[0], "readq") == 0) { + uint64_t addr; + uint64_t value = UINT64_C(-1); + int ret; + + g_assert(words[1]); + ret = qemu_strtou64(words[1], NULL, 0, &addr); + g_assert(ret == 0); + + if (words[0][4] == 'b') { + uint8_t data; + address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, + &data, 1); + value = data; + } else if (words[0][4] == 'w') { + uint16_t data; + address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, + &data, 2); + value = tswap16(data); + } else if (words[0][4] == 'l') { + uint32_t data; + address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, + &data, 4); + value = tswap32(data); + } else if (words[0][4] == 'q') { + address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, + &value, 8); + tswap64s(&value); + } + qtest_send_prefix(chr); + qtest_sendf(chr, "OK 0x%016" PRIx64 "\n", value); + } else if (strcmp(words[0], "read") == 0) { + uint64_t addr, len, i; + uint8_t *data; + char *enc; + int ret; + + g_assert(words[1] && words[2]); + ret = qemu_strtou64(words[1], NULL, 0, &addr); + g_assert(ret == 0); + ret = qemu_strtou64(words[2], NULL, 0, &len); + g_assert(ret == 0); + /* We'd send garbage to libqtest if len is 0 */ + g_assert(len); + + data = g_malloc(len); + address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, + len); + + enc = g_malloc(2 * len + 1); + for (i = 0; i < len; i++) { + sprintf(&enc[i * 2], "%02x", data[i]); + } + + qtest_send_prefix(chr); + qtest_sendf(chr, "OK 0x%s\n", enc); + + g_free(data); + g_free(enc); + } else if (strcmp(words[0], "b64read") == 0) { + uint64_t addr, len; + uint8_t *data; + gchar *b64_data; + int ret; + + g_assert(words[1] && words[2]); + ret = qemu_strtou64(words[1], NULL, 0, &addr); + g_assert(ret == 0); + ret = qemu_strtou64(words[2], NULL, 0, &len); + g_assert(ret == 0); + + data = g_malloc(len); + address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, + len); + b64_data = g_base64_encode(data, len); + qtest_send_prefix(chr); + qtest_sendf(chr, "OK %s\n", b64_data); + + g_free(data); + g_free(b64_data); + } else if (strcmp(words[0], "write") == 0) { + uint64_t addr, len, i; + uint8_t *data; + size_t data_len; + int ret; + + g_assert(words[1] && words[2] && words[3]); + ret = qemu_strtou64(words[1], NULL, 0, &addr); + g_assert(ret == 0); + ret = qemu_strtou64(words[2], NULL, 0, &len); + g_assert(ret == 0); + + data_len = strlen(words[3]); + if (data_len < 3) { + qtest_send(chr, "ERR invalid argument size\n"); + return; + } + + data = g_malloc(len); + for (i = 0; i < len; i++) { + if ((i * 2 + 4) <= data_len) { + data[i] = hex2nib(words[3][i * 2 + 2]) << 4; + data[i] |= hex2nib(words[3][i * 2 + 3]); + } else { + data[i] = 0; + } + } + address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, + len); + g_free(data); + + qtest_send_prefix(chr); + qtest_send(chr, "OK\n"); + } else if (strcmp(words[0], "memset") == 0) { + uint64_t addr, len; + uint8_t *data; + unsigned long pattern; + int ret; + + g_assert(words[1] && words[2] && words[3]); + ret = qemu_strtou64(words[1], NULL, 0, &addr); + g_assert(ret == 0); + ret = qemu_strtou64(words[2], NULL, 0, &len); + g_assert(ret == 0); + ret = qemu_strtoul(words[3], NULL, 0, &pattern); + g_assert(ret == 0); + + if (len) { + data = g_malloc(len); + memset(data, pattern, len); + address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, + data, len); + g_free(data); + } + + qtest_send_prefix(chr); + qtest_send(chr, "OK\n"); + } else if (strcmp(words[0], "b64write") == 0) { + uint64_t addr, len; + uint8_t *data; + size_t data_len; + gsize out_len; + int ret; + + g_assert(words[1] && words[2] && words[3]); + ret = qemu_strtou64(words[1], NULL, 0, &addr); + g_assert(ret == 0); + ret = qemu_strtou64(words[2], NULL, 0, &len); + g_assert(ret == 0); + + data_len = strlen(words[3]); + if (data_len < 3) { + qtest_send(chr, "ERR invalid argument size\n"); + return; + } + + data = g_base64_decode_inplace(words[3], &out_len); + if (out_len != len) { + qtest_log_send("b64write: data length mismatch (told %"PRIu64", " + "found %zu)\n", + len, out_len); + out_len = MIN(out_len, len); + } + + address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, + len); + + qtest_send_prefix(chr); + qtest_send(chr, "OK\n"); + } else if (strcmp(words[0], "endianness") == 0) { + qtest_send_prefix(chr); + if (target_words_bigendian()) { + qtest_sendf(chr, "OK big\n"); + } else { + qtest_sendf(chr, "OK little\n"); + } + } else if (qtest_enabled() && strcmp(words[0], "clock_step") == 0) { + int64_t ns; + + if (words[1]) { + int ret = qemu_strtoi64(words[1], NULL, 0, &ns); + g_assert(ret == 0); + } else { + ns = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, + QEMU_TIMER_ATTR_ALL); + } + qtest_clock_warp(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns); + qtest_send_prefix(chr); + qtest_sendf(chr, "OK %"PRIi64"\n", + (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + } else if (strcmp(words[0], "module_load") == 0) { + Error *local_err = NULL; + int rv; + g_assert(words[1] && words[2]); + + qtest_send_prefix(chr); + rv = module_load(words[1], words[2], &local_err); + if (rv > 0) { + qtest_sendf(chr, "OK\n"); + } else { + if (rv < 0) { + error_report_err(local_err); + } + qtest_sendf(chr, "FAIL\n"); + } + } else if (qtest_enabled() && strcmp(words[0], "clock_set") == 0) { + int64_t ns; + int ret; + + g_assert(words[1]); + ret = qemu_strtoi64(words[1], NULL, 0, &ns); + g_assert(ret == 0); + qtest_clock_warp(ns); + qtest_send_prefix(chr); + qtest_sendf(chr, "OK %"PRIi64"\n", + (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + } else if (process_command_cb && process_command_cb(chr, words)) { + /* Command got consumed by the callback handler */ + } else { + qtest_send_prefix(chr); + qtest_sendf(chr, "FAIL Unknown command '%s'\n", words[0]); + } +} + +static void qtest_process_inbuf(CharBackend *chr, GString *inbuf) +{ + char *end; + + while ((end = strchr(inbuf->str, '\n')) != NULL) { + size_t offset; + GString *cmd; + gchar **words; + + offset = end - inbuf->str; + + cmd = g_string_new_len(inbuf->str, offset); + g_string_erase(inbuf, 0, offset + 1); + + words = g_strsplit(cmd->str, " ", 0); + qtest_process_command(chr, words); + g_strfreev(words); + + g_string_free(cmd, TRUE); + } +} + +static void qtest_read(void *opaque, const uint8_t *buf, int size) +{ + CharBackend *chr = opaque; + + g_string_append_len(inbuf, (const gchar *)buf, size); + qtest_process_inbuf(chr, inbuf); +} + +static int qtest_can_read(void *opaque) +{ + return 1024; +} + +static void qtest_event(void *opaque, QEMUChrEvent event) +{ + int i; + + switch (event) { + case CHR_EVENT_OPENED: + /* + * We used to call qemu_system_reset() here, hoping we could + * use the same process for multiple tests that way. Never + * used. Injects an extra reset even when it's not used, and + * that can mess up tests, e.g. -boot once. + */ + for (i = 0; i < ARRAY_SIZE(irq_levels); i++) { + irq_levels[i] = 0; + } + + g_clear_pointer(&timer, g_timer_destroy); + timer = g_timer_new(); + qtest_opened = true; + if (qtest_log_fp) { + fprintf(qtest_log_fp, "[I " FMT_timeval "] OPENED\n", g_timer_elapsed(timer, NULL)); + } + break; + case CHR_EVENT_CLOSED: + qtest_opened = false; + if (qtest_log_fp) { + fprintf(qtest_log_fp, "[I +" FMT_timeval "] CLOSED\n", g_timer_elapsed(timer, NULL)); + } + g_clear_pointer(&timer, g_timer_destroy); + break; + default: + break; + } +} + +void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **errp) +{ + ERRP_GUARD(); + Chardev *chr; + Object *qtest; + + chr = qemu_chr_new("qtest", qtest_chrdev, NULL); + if (chr == NULL) { + error_setg(errp, "Failed to initialize device for qtest: \"%s\"", + qtest_chrdev); + return; + } + + qtest = object_new(TYPE_QTEST); + object_property_set_str(qtest, "chardev", chr->label, &error_abort); + if (qtest_log) { + object_property_set_str(qtest, "log", qtest_log, &error_abort); + } + object_property_add_child(qdev_get_machine(), "qtest", qtest); + user_creatable_complete(USER_CREATABLE(qtest), errp); + if (*errp) { + object_unparent(qtest); + } + object_unref(OBJECT(chr)); + object_unref(qtest); +} + +static bool qtest_server_start(QTest *q, Error **errp) +{ + Chardev *chr = q->chr; + const char *qtest_log = q->log; + + if (qtest_log) { + if (strcmp(qtest_log, "none") != 0) { + qtest_log_fp = fopen(qtest_log, "w+"); + } + } else { + qtest_log_fp = stderr; + } + + if (!qemu_chr_fe_init(&q->qtest_chr, chr, errp)) { + return false; + } + qemu_chr_fe_set_handlers(&q->qtest_chr, qtest_can_read, qtest_read, + qtest_event, NULL, &q->qtest_chr, NULL, true); + qemu_chr_fe_set_echo(&q->qtest_chr, true); + + inbuf = g_string_new(""); + + if (!qtest_server_send) { + qtest_server_set_send_handler(qtest_server_char_be_send, &q->qtest_chr); + } + qtest = q; + return true; +} + +void qtest_server_set_send_handler(void (*send)(void*, const char*), + void *opaque) +{ + qtest_server_send = send; + qtest_server_send_opaque = opaque; +} + +bool qtest_driver(void) +{ + return qtest && qtest->qtest_chr.chr != NULL; +} + +void qtest_server_inproc_recv(void *dummy, const char *buf) +{ + static GString *gstr; + if (!gstr) { + gstr = g_string_new(NULL); + } + g_string_append(gstr, buf); + if (gstr->str[gstr->len - 1] == '\n') { + qtest_process_inbuf(NULL, gstr); + g_string_truncate(gstr, 0); + } +} + +static void qtest_complete(UserCreatable *uc, Error **errp) +{ + QTest *q = QTEST(uc); + if (qtest) { + error_setg(errp, "Only one instance of qtest can be created"); + return; + } + if (!q->chr_name) { + error_setg(errp, "No backend specified"); + return; + } + + if (OBJECT(uc)->parent != qdev_get_machine()) { + q->has_machine_link = true; + object_property_add_const_link(qdev_get_machine(), "qtest", OBJECT(uc)); + } else { + /* -qtest was used. */ + } + + qtest_server_start(q, errp); +} + +static void qtest_unparent(Object *obj) +{ + QTest *q = QTEST(obj); + + if (qtest == q) { + qemu_chr_fe_disconnect(&q->qtest_chr); + assert(!qtest_opened); + qemu_chr_fe_deinit(&q->qtest_chr, false); + if (qtest_log_fp) { + fclose(qtest_log_fp); + qtest_log_fp = NULL; + } + qtest = NULL; + } + + if (q->has_machine_link) { + object_property_del(qdev_get_machine(), "qtest"); + q->has_machine_link = false; + } +} + +static void qtest_set_log(Object *obj, const char *value, Error **errp) +{ + QTest *q = QTEST(obj); + + if (qtest == q) { + error_setg(errp, "Property 'log' can not be set now"); + } else { + g_free(q->log); + q->log = g_strdup(value); + } +} + +static char *qtest_get_log(Object *obj, Error **errp) +{ + QTest *q = QTEST(obj); + + return g_strdup(q->log); +} + +static void qtest_set_chardev(Object *obj, const char *value, Error **errp) +{ + QTest *q = QTEST(obj); + Chardev *chr; + + if (qtest == q) { + error_setg(errp, "Property 'chardev' can not be set now"); + return; + } + + chr = qemu_chr_find(value); + if (!chr) { + error_setg(errp, "Cannot find character device '%s'", value); + return; + } + + g_free(q->chr_name); + q->chr_name = g_strdup(value); + + if (q->chr) { + object_unref(q->chr); + } + q->chr = chr; + object_ref(chr); +} + +static char *qtest_get_chardev(Object *obj, Error **errp) +{ + QTest *q = QTEST(obj); + + return g_strdup(q->chr_name); +} + +static void qtest_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + oc->unparent = qtest_unparent; + ucc->complete = qtest_complete; + + object_class_property_add_str(oc, "chardev", + qtest_get_chardev, qtest_set_chardev); + object_class_property_add_str(oc, "log", + qtest_get_log, qtest_set_log); +} + +static const TypeInfo qtest_info = { + .name = TYPE_QTEST, + .parent = TYPE_OBJECT, + .class_init = qtest_class_init, + .instance_size = sizeof(QTest), + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void register_types(void) +{ + type_register_static(&qtest_info); +} + +type_init(register_types); diff --git a/system/rtc.c b/system/rtc.c new file mode 100644 index 0000000..4904581 --- /dev/null +++ b/system/rtc.c @@ -0,0 +1,192 @@ +/* + * RTC configuration and clock read + * + * Copyright (c) 2003-2020 QEMU contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/timer.h" +#include "qom/object.h" +#include "sysemu/replay.h" +#include "sysemu/sysemu.h" +#include "sysemu/rtc.h" +#include "hw/rtc/mc146818rtc.h" + +static enum { + RTC_BASE_UTC, + RTC_BASE_LOCALTIME, + RTC_BASE_DATETIME, +} rtc_base_type = RTC_BASE_UTC; +static time_t rtc_ref_start_datetime; +static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */ +static int rtc_host_datetime_offset = -1; /* valid & used only with + RTC_BASE_DATETIME */ +QEMUClockType rtc_clock; +/***********************************************************/ +/* RTC reference time/date access */ +static time_t qemu_ref_timedate(QEMUClockType clock) +{ + time_t value = qemu_clock_get_ms(clock) / 1000; + switch (clock) { + case QEMU_CLOCK_REALTIME: + value -= rtc_realtime_clock_offset; + /* fall through */ + case QEMU_CLOCK_VIRTUAL: + value += rtc_ref_start_datetime; + break; + case QEMU_CLOCK_HOST: + if (rtc_base_type == RTC_BASE_DATETIME) { + value -= rtc_host_datetime_offset; + } + break; + default: + assert(0); + } + return value; +} + +void qemu_get_timedate(struct tm *tm, time_t offset) +{ + time_t ti = qemu_ref_timedate(rtc_clock); + + ti += offset; + + switch (rtc_base_type) { + case RTC_BASE_DATETIME: + case RTC_BASE_UTC: + gmtime_r(&ti, tm); + break; + case RTC_BASE_LOCALTIME: + localtime_r(&ti, tm); + break; + } +} + +time_t qemu_timedate_diff(struct tm *tm) +{ + time_t seconds; + + switch (rtc_base_type) { + case RTC_BASE_DATETIME: + case RTC_BASE_UTC: + seconds = mktimegm(tm); + break; + case RTC_BASE_LOCALTIME: + { + struct tm tmp = *tm; + tmp.tm_isdst = -1; /* use timezone to figure it out */ + seconds = mktime(&tmp); + break; + } + default: + abort(); + } + + return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST); +} + +static void configure_rtc_base_datetime(const char *startdate) +{ + time_t rtc_start_datetime; + struct tm tm; + + if (sscanf(startdate, "%d-%d-%dT%d:%d:%d", &tm.tm_year, &tm.tm_mon, + &tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec) == 6) { + /* OK */ + } else if (sscanf(startdate, "%d-%d-%d", + &tm.tm_year, &tm.tm_mon, &tm.tm_mday) == 3) { + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + } else { + goto date_fail; + } + tm.tm_year -= 1900; + tm.tm_mon--; + rtc_start_datetime = mktimegm(&tm); + if (rtc_start_datetime == -1) { + date_fail: + error_report("invalid datetime format"); + error_printf("valid formats: " + "'2006-06-17T16:01:21' or '2006-06-17'\n"); + exit(1); + } + rtc_host_datetime_offset = rtc_ref_start_datetime - rtc_start_datetime; + rtc_ref_start_datetime = rtc_start_datetime; +} + +void configure_rtc(QemuOpts *opts) +{ + const char *value; + + /* Set defaults */ + rtc_clock = QEMU_CLOCK_HOST; + rtc_ref_start_datetime = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000; + rtc_realtime_clock_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; + + value = qemu_opt_get(opts, "base"); + if (value) { + if (!strcmp(value, "utc")) { + rtc_base_type = RTC_BASE_UTC; + } else if (!strcmp(value, "localtime")) { + rtc_base_type = RTC_BASE_LOCALTIME; + replay_add_blocker("-rtc base=localtime"); + } else { + rtc_base_type = RTC_BASE_DATETIME; + configure_rtc_base_datetime(value); + } + } + value = qemu_opt_get(opts, "clock"); + if (value) { + if (!strcmp(value, "host")) { + rtc_clock = QEMU_CLOCK_HOST; + } else if (!strcmp(value, "rt")) { + rtc_clock = QEMU_CLOCK_REALTIME; + } else if (!strcmp(value, "vm")) { + rtc_clock = QEMU_CLOCK_VIRTUAL; + } else { + error_report("invalid option value '%s'", value); + exit(1); + } + } + value = qemu_opt_get(opts, "driftfix"); + if (value) { + if (!strcmp(value, "slew")) { + object_register_sugar_prop(TYPE_MC146818_RTC, + "lost_tick_policy", + "slew", + false); + if (!object_class_by_name(TYPE_MC146818_RTC)) { + warn_report("driftfix 'slew' is not available with this machine"); + } + } else if (!strcmp(value, "none")) { + /* discard is default */ + } else { + error_report("invalid option value '%s'", value); + exit(1); + } + } +} diff --git a/system/runstate-action.c b/system/runstate-action.c new file mode 100644 index 0000000..ae0761a --- /dev/null +++ b/system/runstate-action.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "sysemu/runstate-action.h" +#include "sysemu/watchdog.h" +#include "qemu/config-file.h" +#include "qapi/error.h" +#include "qemu/option_int.h" + +RebootAction reboot_action = REBOOT_ACTION_RESET; +ShutdownAction shutdown_action = SHUTDOWN_ACTION_POWEROFF; +PanicAction panic_action = PANIC_ACTION_SHUTDOWN; + +/* + * Receives actions to be applied for specific guest events + * and sets the internal state as requested. + */ +void qmp_set_action(bool has_reboot, RebootAction reboot, + bool has_shutdown, ShutdownAction shutdown, + bool has_panic, PanicAction panic, + bool has_watchdog, WatchdogAction watchdog, + Error **errp) +{ + if (has_reboot) { + reboot_action = reboot; + } + + if (has_panic) { + panic_action = panic; + } + + if (has_watchdog) { + qmp_watchdog_set_action(watchdog, errp); + } + + /* Process shutdown last, in case the panic action needs to be altered */ + if (has_shutdown) { + shutdown_action = shutdown; + } +} diff --git a/system/runstate-hmp-cmds.c b/system/runstate-hmp-cmds.c new file mode 100644 index 0000000..2df670f --- /dev/null +++ b/system/runstate-hmp-cmds.c @@ -0,0 +1,95 @@ +/* + * HMP commands related to run state + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "exec/cpu-common.h" +#include "monitor/hmp.h" +#include "monitor/monitor.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-run-state.h" +#include "qapi/qmp/qdict.h" +#include "qemu/accel.h" + +void hmp_info_status(Monitor *mon, const QDict *qdict) +{ + StatusInfo *info; + + info = qmp_query_status(NULL); + + monitor_printf(mon, "VM status: %s", + info->running ? "running" : "paused"); + + if (!info->running && info->status != RUN_STATE_PAUSED) { + monitor_printf(mon, " (%s)", RunState_str(info->status)); + } + + monitor_printf(mon, "\n"); + + qapi_free_StatusInfo(info); +} + +void hmp_one_insn_per_tb(Monitor *mon, const QDict *qdict) +{ + const char *option = qdict_get_try_str(qdict, "option"); + AccelState *accel = current_accel(); + bool newval; + + if (!object_property_find(OBJECT(accel), "one-insn-per-tb")) { + monitor_printf(mon, + "This accelerator does not support setting one-insn-per-tb\n"); + return; + } + + if (!option || !strcmp(option, "on")) { + newval = true; + } else if (!strcmp(option, "off")) { + newval = false; + } else { + monitor_printf(mon, "unexpected option %s\n", option); + return; + } + /* If the property exists then setting it can never fail */ + object_property_set_bool(OBJECT(accel), "one-insn-per-tb", + newval, &error_abort); +} + +void hmp_watchdog_action(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + WatchdogAction action; + char *qapi_value; + + qapi_value = g_ascii_strdown(qdict_get_str(qdict, "action"), -1); + action = qapi_enum_parse(&WatchdogAction_lookup, qapi_value, -1, &err); + g_free(qapi_value); + if (err) { + hmp_handle_error(mon, err); + return; + } + qmp_watchdog_set_action(action, &error_abort); +} + +void watchdog_action_completion(ReadLineState *rs, int nb_args, const char *str) +{ + int i; + + if (nb_args != 2) { + return; + } + readline_set_completion_index(rs, strlen(str)); + for (i = 0; i < WATCHDOG_ACTION__MAX; i++) { + readline_add_completion_of(rs, str, WatchdogAction_str(i)); + } +} diff --git a/system/runstate.c b/system/runstate.c new file mode 100644 index 0000000..1652ed0 --- /dev/null +++ b/system/runstate.c @@ -0,0 +1,871 @@ +/* + * QEMU main system emulation loop + * + * Copyright (c) 2003-2020 QEMU contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "audio/audio.h" +#include "block/block.h" +#include "block/export.h" +#include "chardev/char.h" +#include "crypto/cipher.h" +#include "crypto/init.h" +#include "exec/cpu-common.h" +#include "gdbstub/syscalls.h" +#include "hw/boards.h" +#include "migration/misc.h" +#include "migration/postcopy-ram.h" +#include "monitor/monitor.h" +#include "net/net.h" +#include "net/vhost_net.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-run-state.h" +#include "qapi/qapi-events-run-state.h" +#include "qemu/accel.h" +#include "qemu/error-report.h" +#include "qemu/job.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/plugin.h" +#include "qemu/sockets.h" +#include "qemu/timer.h" +#include "qemu/thread.h" +#include "qom/object.h" +#include "qom/object_interfaces.h" +#include "sysemu/cpus.h" +#include "sysemu/qtest.h" +#include "sysemu/replay.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" +#include "sysemu/runstate-action.h" +#include "sysemu/sysemu.h" +#include "sysemu/tpm.h" +#include "trace.h" + +static NotifierList exit_notifiers = + NOTIFIER_LIST_INITIALIZER(exit_notifiers); + +static RunState current_run_state = RUN_STATE_PRELAUNCH; + +/* We use RUN_STATE__MAX but any invalid value will do */ +static RunState vmstop_requested = RUN_STATE__MAX; +static QemuMutex vmstop_lock; + +typedef struct { + RunState from; + RunState to; +} RunStateTransition; + +static const RunStateTransition runstate_transitions_def[] = { + { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, + + { RUN_STATE_DEBUG, RUN_STATE_RUNNING }, + { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_DEBUG, RUN_STATE_PRELAUNCH }, + + { RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR }, + { RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR }, + { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED }, + { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN }, + { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED }, + { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG }, + { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED }, + { RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH }, + { RUN_STATE_INMIGRATE, RUN_STATE_POSTMIGRATE }, + { RUN_STATE_INMIGRATE, RUN_STATE_COLO }, + + { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED }, + { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PRELAUNCH }, + + { RUN_STATE_IO_ERROR, RUN_STATE_RUNNING }, + { RUN_STATE_IO_ERROR, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_IO_ERROR, RUN_STATE_PRELAUNCH }, + + { RUN_STATE_PAUSED, RUN_STATE_RUNNING }, + { RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_PAUSED, RUN_STATE_POSTMIGRATE }, + { RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH }, + { RUN_STATE_PAUSED, RUN_STATE_COLO}, + + { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_POSTMIGRATE, RUN_STATE_PRELAUNCH }, + + { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, + { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, + + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_INTERNAL_ERROR }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_IO_ERROR }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SHUTDOWN }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SUSPENDED }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_WATCHDOG }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_GUEST_PANICKED }, + + { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING }, + { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH }, + + { RUN_STATE_COLO, RUN_STATE_RUNNING }, + { RUN_STATE_COLO, RUN_STATE_PRELAUNCH }, + { RUN_STATE_COLO, RUN_STATE_SHUTDOWN}, + + { RUN_STATE_RUNNING, RUN_STATE_DEBUG }, + { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR }, + { RUN_STATE_RUNNING, RUN_STATE_IO_ERROR }, + { RUN_STATE_RUNNING, RUN_STATE_PAUSED }, + { RUN_STATE_RUNNING, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_RUNNING, RUN_STATE_RESTORE_VM }, + { RUN_STATE_RUNNING, RUN_STATE_SAVE_VM }, + { RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN }, + { RUN_STATE_RUNNING, RUN_STATE_WATCHDOG }, + { RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED }, + { RUN_STATE_RUNNING, RUN_STATE_COLO}, + + { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING }, + + { RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED }, + { RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_SHUTDOWN, RUN_STATE_PRELAUNCH }, + { RUN_STATE_SHUTDOWN, RUN_STATE_COLO }, + + { RUN_STATE_DEBUG, RUN_STATE_SUSPENDED }, + { RUN_STATE_RUNNING, RUN_STATE_SUSPENDED }, + { RUN_STATE_SUSPENDED, RUN_STATE_RUNNING }, + { RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH }, + { RUN_STATE_SUSPENDED, RUN_STATE_COLO}, + + { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING }, + { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_WATCHDOG, RUN_STATE_PRELAUNCH }, + { RUN_STATE_WATCHDOG, RUN_STATE_COLO}, + + { RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING }, + { RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_GUEST_PANICKED, RUN_STATE_PRELAUNCH }, + + { RUN_STATE__MAX, RUN_STATE__MAX }, +}; + +static bool runstate_valid_transitions[RUN_STATE__MAX][RUN_STATE__MAX]; + +bool runstate_check(RunState state) +{ + return current_run_state == state; +} + +static void runstate_init(void) +{ + const RunStateTransition *p; + + memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions)); + for (p = &runstate_transitions_def[0]; p->from != RUN_STATE__MAX; p++) { + runstate_valid_transitions[p->from][p->to] = true; + } + + qemu_mutex_init(&vmstop_lock); +} + +/* This function will abort() on invalid state transitions */ +void runstate_set(RunState new_state) +{ + assert(new_state < RUN_STATE__MAX); + + trace_runstate_set(current_run_state, RunState_str(current_run_state), + new_state, RunState_str(new_state)); + + if (current_run_state == new_state) { + return; + } + + if (!runstate_valid_transitions[current_run_state][new_state]) { + error_report("invalid runstate transition: '%s' -> '%s'", + RunState_str(current_run_state), + RunState_str(new_state)); + abort(); + } + + current_run_state = new_state; +} + +RunState runstate_get(void) +{ + return current_run_state; +} + +bool runstate_is_running(void) +{ + return runstate_check(RUN_STATE_RUNNING); +} + +bool runstate_needs_reset(void) +{ + return runstate_check(RUN_STATE_INTERNAL_ERROR) || + runstate_check(RUN_STATE_SHUTDOWN); +} + +StatusInfo *qmp_query_status(Error **errp) +{ + StatusInfo *info = g_malloc0(sizeof(*info)); + AccelState *accel = current_accel(); + + /* + * We ignore errors, which will happen if the accelerator + * is not TCG. "singlestep" is meaningless for other accelerators, + * so we will set the StatusInfo field to false for those. + */ + info->singlestep = object_property_get_bool(OBJECT(accel), + "one-insn-per-tb", NULL); + info->running = runstate_is_running(); + info->status = current_run_state; + + return info; +} + +bool qemu_vmstop_requested(RunState *r) +{ + qemu_mutex_lock(&vmstop_lock); + *r = vmstop_requested; + vmstop_requested = RUN_STATE__MAX; + qemu_mutex_unlock(&vmstop_lock); + return *r < RUN_STATE__MAX; +} + +void qemu_system_vmstop_request_prepare(void) +{ + qemu_mutex_lock(&vmstop_lock); +} + +void qemu_system_vmstop_request(RunState state) +{ + vmstop_requested = state; + qemu_mutex_unlock(&vmstop_lock); + qemu_notify_event(); +} +struct VMChangeStateEntry { + VMChangeStateHandler *cb; + VMChangeStateHandler *prepare_cb; + void *opaque; + QTAILQ_ENTRY(VMChangeStateEntry) entries; + int priority; +}; + +static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head = + QTAILQ_HEAD_INITIALIZER(vm_change_state_head); + +/** + * qemu_add_vm_change_state_handler_prio: + * @cb: the callback to invoke + * @opaque: user data passed to the callback + * @priority: low priorities execute first when the vm runs and the reverse is + * true when the vm stops + * + * Register a callback function that is invoked when the vm starts or stops + * running. + * + * Returns: an entry to be freed using qemu_del_vm_change_state_handler() + */ +VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( + VMChangeStateHandler *cb, void *opaque, int priority) +{ + return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque, + priority); +} + +/** + * qemu_add_vm_change_state_handler_prio_full: + * @cb: the main callback to invoke + * @prepare_cb: a callback to invoke before the main callback + * @opaque: user data passed to the callbacks + * @priority: low priorities execute first when the vm runs and the reverse is + * true when the vm stops + * + * Register a main callback function and an optional prepare callback function + * that are invoked when the vm starts or stops running. The main callback and + * the prepare callback are called in two separate phases: First all prepare + * callbacks are called and only then all main callbacks are called. As its + * name suggests, the prepare callback can be used to do some preparatory work + * before invoking the main callback. + * + * Returns: an entry to be freed using qemu_del_vm_change_state_handler() + */ +VMChangeStateEntry * +qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, + VMChangeStateHandler *prepare_cb, + void *opaque, int priority) +{ + VMChangeStateEntry *e; + VMChangeStateEntry *other; + + e = g_malloc0(sizeof(*e)); + e->cb = cb; + e->prepare_cb = prepare_cb; + e->opaque = opaque; + e->priority = priority; + + /* Keep list sorted in ascending priority order */ + QTAILQ_FOREACH(other, &vm_change_state_head, entries) { + if (priority < other->priority) { + QTAILQ_INSERT_BEFORE(other, e, entries); + return e; + } + } + + QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries); + return e; +} + +VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, + void *opaque) +{ + return qemu_add_vm_change_state_handler_prio(cb, opaque, 0); +} + +void qemu_del_vm_change_state_handler(VMChangeStateEntry *e) +{ + QTAILQ_REMOVE(&vm_change_state_head, e, entries); + g_free(e); +} + +void vm_state_notify(bool running, RunState state) +{ + VMChangeStateEntry *e, *next; + + trace_vm_state_notify(running, state, RunState_str(state)); + + if (running) { + QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { + if (e->prepare_cb) { + e->prepare_cb(e->opaque, running, state); + } + } + + QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { + e->cb(e->opaque, running, state); + } + } else { + QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { + if (e->prepare_cb) { + e->prepare_cb(e->opaque, running, state); + } + } + + QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { + e->cb(e->opaque, running, state); + } + } +} + +static ShutdownCause reset_requested; +static ShutdownCause shutdown_requested; +static int shutdown_signal; +static pid_t shutdown_pid; +static int powerdown_requested; +static int debug_requested; +static int suspend_requested; +static WakeupReason wakeup_reason; +static NotifierList powerdown_notifiers = + NOTIFIER_LIST_INITIALIZER(powerdown_notifiers); +static NotifierList suspend_notifiers = + NOTIFIER_LIST_INITIALIZER(suspend_notifiers); +static NotifierList wakeup_notifiers = + NOTIFIER_LIST_INITIALIZER(wakeup_notifiers); +static NotifierList shutdown_notifiers = + NOTIFIER_LIST_INITIALIZER(shutdown_notifiers); +static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE); + +ShutdownCause qemu_shutdown_requested_get(void) +{ + return shutdown_requested; +} + +ShutdownCause qemu_reset_requested_get(void) +{ + return reset_requested; +} + +static int qemu_shutdown_requested(void) +{ + return qatomic_xchg(&shutdown_requested, SHUTDOWN_CAUSE_NONE); +} + +static void qemu_kill_report(void) +{ + if (!qtest_driver() && shutdown_signal) { + if (shutdown_pid == 0) { + /* This happens for eg ^C at the terminal, so it's worth + * avoiding printing an odd message in that case. + */ + error_report("terminating on signal %d", shutdown_signal); + } else { + char *shutdown_cmd = qemu_get_pid_name(shutdown_pid); + + error_report("terminating on signal %d from pid " FMT_pid " (%s)", + shutdown_signal, shutdown_pid, + shutdown_cmd ? shutdown_cmd : ""); + g_free(shutdown_cmd); + } + shutdown_signal = 0; + } +} + +static ShutdownCause qemu_reset_requested(void) +{ + ShutdownCause r = reset_requested; + + if (r && replay_checkpoint(CHECKPOINT_RESET_REQUESTED)) { + reset_requested = SHUTDOWN_CAUSE_NONE; + return r; + } + return SHUTDOWN_CAUSE_NONE; +} + +static int qemu_suspend_requested(void) +{ + int r = suspend_requested; + if (r && replay_checkpoint(CHECKPOINT_SUSPEND_REQUESTED)) { + suspend_requested = 0; + return r; + } + return false; +} + +static WakeupReason qemu_wakeup_requested(void) +{ + return wakeup_reason; +} + +static int qemu_powerdown_requested(void) +{ + int r = powerdown_requested; + powerdown_requested = 0; + return r; +} + +static int qemu_debug_requested(void) +{ + int r = debug_requested; + debug_requested = 0; + return r; +} + +/* + * Reset the VM. Issue an event unless @reason is SHUTDOWN_CAUSE_NONE. + */ +void qemu_system_reset(ShutdownCause reason) +{ + MachineClass *mc; + + mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL; + + cpu_synchronize_all_states(); + + if (mc && mc->reset) { + mc->reset(current_machine, reason); + } else { + qemu_devices_reset(reason); + } + switch (reason) { + case SHUTDOWN_CAUSE_NONE: + case SHUTDOWN_CAUSE_SUBSYSTEM_RESET: + case SHUTDOWN_CAUSE_SNAPSHOT_LOAD: + break; + default: + qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); + } + cpu_synchronize_all_post_reset(); +} + +/* + * Wake the VM after suspend. + */ +static void qemu_system_wakeup(void) +{ + MachineClass *mc; + + mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL; + + if (mc && mc->wakeup) { + mc->wakeup(current_machine); + } +} + +void qemu_system_guest_panicked(GuestPanicInformation *info) +{ + qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed"); + + if (current_cpu) { + current_cpu->crash_occurred = true; + } + /* + * TODO: Currently the available panic actions are: none, pause, and + * shutdown, but in principle debug and reset could be supported as well. + * Investigate any potential use cases for the unimplemented actions. + */ + if (panic_action == PANIC_ACTION_PAUSE + || (panic_action == PANIC_ACTION_SHUTDOWN && shutdown_action == SHUTDOWN_ACTION_PAUSE)) { + qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, info); + vm_stop(RUN_STATE_GUEST_PANICKED); + } else if (panic_action == PANIC_ACTION_SHUTDOWN || + panic_action == PANIC_ACTION_EXIT_FAILURE) { + qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, info); + vm_stop(RUN_STATE_GUEST_PANICKED); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_PANIC); + } else { + qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_RUN, info); + } + + if (info) { + if (info->type == GUEST_PANIC_INFORMATION_TYPE_HYPER_V) { + qemu_log_mask(LOG_GUEST_ERROR, "\nHV crash parameters: (%#"PRIx64 + " %#"PRIx64" %#"PRIx64" %#"PRIx64" %#"PRIx64")\n", + info->u.hyper_v.arg1, + info->u.hyper_v.arg2, + info->u.hyper_v.arg3, + info->u.hyper_v.arg4, + info->u.hyper_v.arg5); + } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_S390) { + qemu_log_mask(LOG_GUEST_ERROR, " on cpu %d: %s\n" + "PSW: 0x%016" PRIx64 " 0x%016" PRIx64"\n", + info->u.s390.core, + S390CrashReason_str(info->u.s390.reason), + info->u.s390.psw_mask, + info->u.s390.psw_addr); + } + qapi_free_GuestPanicInformation(info); + } +} + +void qemu_system_guest_crashloaded(GuestPanicInformation *info) +{ + qemu_log_mask(LOG_GUEST_ERROR, "Guest crash loaded"); + qapi_event_send_guest_crashloaded(GUEST_PANIC_ACTION_RUN, info); + qapi_free_GuestPanicInformation(info); +} + +void qemu_system_reset_request(ShutdownCause reason) +{ + if (reboot_action == REBOOT_ACTION_SHUTDOWN && + reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) { + shutdown_requested = reason; + } else if (!cpus_are_resettable()) { + error_report("cpus are not resettable, terminating"); + shutdown_requested = reason; + } else { + reset_requested = reason; + } + cpu_stop_current(); + qemu_notify_event(); +} + +static void qemu_system_suspend(void) +{ + pause_all_vcpus(); + notifier_list_notify(&suspend_notifiers, NULL); + runstate_set(RUN_STATE_SUSPENDED); + qapi_event_send_suspend(); +} + +void qemu_system_suspend_request(void) +{ + if (runstate_check(RUN_STATE_SUSPENDED)) { + return; + } + suspend_requested = 1; + cpu_stop_current(); + qemu_notify_event(); +} + +void qemu_register_suspend_notifier(Notifier *notifier) +{ + notifier_list_add(&suspend_notifiers, notifier); +} + +void qemu_system_wakeup_request(WakeupReason reason, Error **errp) +{ + trace_system_wakeup_request(reason); + + if (!runstate_check(RUN_STATE_SUSPENDED)) { + error_setg(errp, + "Unable to wake up: guest is not in suspended state"); + return; + } + if (!(wakeup_reason_mask & (1 << reason))) { + return; + } + runstate_set(RUN_STATE_RUNNING); + wakeup_reason = reason; + qemu_notify_event(); +} + +void qemu_system_wakeup_enable(WakeupReason reason, bool enabled) +{ + if (enabled) { + wakeup_reason_mask |= (1 << reason); + } else { + wakeup_reason_mask &= ~(1 << reason); + } +} + +void qemu_register_wakeup_notifier(Notifier *notifier) +{ + notifier_list_add(&wakeup_notifiers, notifier); +} + +static bool wakeup_suspend_enabled; + +void qemu_register_wakeup_support(void) +{ + wakeup_suspend_enabled = true; +} + +bool qemu_wakeup_suspend_enabled(void) +{ + return wakeup_suspend_enabled; +} + +void qemu_system_killed(int signal, pid_t pid) +{ + shutdown_signal = signal; + shutdown_pid = pid; + shutdown_action = SHUTDOWN_ACTION_POWEROFF; + + /* Cannot call qemu_system_shutdown_request directly because + * we are in a signal handler. + */ + shutdown_requested = SHUTDOWN_CAUSE_HOST_SIGNAL; + qemu_notify_event(); +} + +void qemu_system_shutdown_request(ShutdownCause reason) +{ + trace_qemu_system_shutdown_request(reason); + replay_shutdown_request(reason); + shutdown_requested = reason; + qemu_notify_event(); +} + +static void qemu_system_powerdown(void) +{ + qapi_event_send_powerdown(); + notifier_list_notify(&powerdown_notifiers, NULL); +} + +static void qemu_system_shutdown(ShutdownCause cause) +{ + qapi_event_send_shutdown(shutdown_caused_by_guest(cause), cause); + notifier_list_notify(&shutdown_notifiers, &cause); +} + +void qemu_system_powerdown_request(void) +{ + trace_qemu_system_powerdown_request(); + powerdown_requested = 1; + qemu_notify_event(); +} + +void qemu_register_powerdown_notifier(Notifier *notifier) +{ + notifier_list_add(&powerdown_notifiers, notifier); +} + +void qemu_register_shutdown_notifier(Notifier *notifier) +{ + notifier_list_add(&shutdown_notifiers, notifier); +} + +void qemu_system_debug_request(void) +{ + debug_requested = 1; + qemu_notify_event(); +} + +static bool main_loop_should_exit(int *status) +{ + RunState r; + ShutdownCause request; + + if (qemu_debug_requested()) { + vm_stop(RUN_STATE_DEBUG); + } + if (qemu_suspend_requested()) { + qemu_system_suspend(); + } + request = qemu_shutdown_requested(); + if (request) { + qemu_kill_report(); + qemu_system_shutdown(request); + if (shutdown_action == SHUTDOWN_ACTION_PAUSE) { + vm_stop(RUN_STATE_SHUTDOWN); + } else { + if (request == SHUTDOWN_CAUSE_GUEST_PANIC && + panic_action == PANIC_ACTION_EXIT_FAILURE) { + *status = EXIT_FAILURE; + } + return true; + } + } + request = qemu_reset_requested(); + if (request) { + pause_all_vcpus(); + qemu_system_reset(request); + resume_all_vcpus(); + /* + * runstate can change in pause_all_vcpus() + * as iothread mutex is unlocked + */ + if (!runstate_check(RUN_STATE_RUNNING) && + !runstate_check(RUN_STATE_INMIGRATE) && + !runstate_check(RUN_STATE_FINISH_MIGRATE)) { + runstate_set(RUN_STATE_PRELAUNCH); + } + } + if (qemu_wakeup_requested()) { + pause_all_vcpus(); + qemu_system_wakeup(); + notifier_list_notify(&wakeup_notifiers, &wakeup_reason); + wakeup_reason = QEMU_WAKEUP_REASON_NONE; + resume_all_vcpus(); + qapi_event_send_wakeup(); + } + if (qemu_powerdown_requested()) { + qemu_system_powerdown(); + } + if (qemu_vmstop_requested(&r)) { + vm_stop(r); + } + return false; +} + +int qemu_main_loop(void) +{ + int status = EXIT_SUCCESS; + + while (!main_loop_should_exit(&status)) { + main_loop_wait(false); + } + + return status; +} + +void qemu_add_exit_notifier(Notifier *notify) +{ + notifier_list_add(&exit_notifiers, notify); +} + +void qemu_remove_exit_notifier(Notifier *notify) +{ + notifier_remove(notify); +} + +static void qemu_run_exit_notifiers(void) +{ + notifier_list_notify(&exit_notifiers, NULL); +} + +void qemu_init_subsystems(void) +{ + Error *err = NULL; + + os_set_line_buffering(); + + module_call_init(MODULE_INIT_TRACE); + + qemu_init_cpu_list(); + qemu_init_cpu_loop(); + qemu_mutex_lock_iothread(); + + atexit(qemu_run_exit_notifiers); + + module_call_init(MODULE_INIT_QOM); + module_call_init(MODULE_INIT_MIGRATION); + + runstate_init(); + precopy_infrastructure_init(); + postcopy_infrastructure_init(); + monitor_init_globals(); + + if (qcrypto_init(&err) < 0) { + error_reportf_err(err, "cannot initialize crypto: "); + exit(1); + } + + os_setup_early_signal_handling(); + + bdrv_init_with_whitelist(); + socket_init(); +} + + +void qemu_cleanup(void) +{ + gdb_exit(0); + + /* + * cleaning up the migration object cancels any existing migration + * try to do this early so that it also stops using devices. + */ + migration_shutdown(); + + /* + * Close the exports before draining the block layer. The export + * drivers may have coroutines yielding on it, so we need to clean + * them up before the drain, as otherwise they may be get stuck in + * blk_wait_while_drained(). + */ + blk_exp_close_all(); + + + /* No more vcpu or device emulation activity beyond this point */ + vm_shutdown(); + replay_finish(); + + /* + * We must cancel all block jobs while the block layer is drained, + * or cancelling will be affected by throttling and thus may block + * for an extended period of time. + * Begin the drained section after vm_shutdown() to avoid requests being + * stuck in the BlockBackend's request queue. + * We do not need to end this section, because we do not want any + * requests happening from here on anyway. + */ + bdrv_drain_all_begin(); + job_cancel_sync_all(); + bdrv_close_all(); + + /* vhost-user must be cleaned up before chardevs. */ + tpm_cleanup(); + net_cleanup(); + audio_cleanup(); + monitor_cleanup(); + qemu_chr_cleanup(); + user_creatable_cleanup(); + /* TODO: unref root container, check all devices are ok */ +} diff --git a/system/tpm-hmp-cmds.c b/system/tpm-hmp-cmds.c new file mode 100644 index 0000000..9ed6ad6 --- /dev/null +++ b/system/tpm-hmp-cmds.c @@ -0,0 +1,65 @@ +/* + * HMP commands related to TPM + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qapi/qapi-commands-tpm.h" +#include "monitor/monitor.h" +#include "monitor/hmp.h" +#include "qapi/error.h" + +void hmp_info_tpm(Monitor *mon, const QDict *qdict) +{ +#ifdef CONFIG_TPM + TPMInfoList *info_list, *info; + Error *err = NULL; + unsigned int c = 0; + TPMPassthroughOptions *tpo; + TPMEmulatorOptions *teo; + + info_list = qmp_query_tpm(&err); + if (err) { + monitor_printf(mon, "TPM device not supported\n"); + error_free(err); + return; + } + + if (info_list) { + monitor_printf(mon, "TPM device:\n"); + } + + for (info = info_list; info; info = info->next) { + TPMInfo *ti = info->value; + monitor_printf(mon, " tpm%d: model=%s\n", + c, TpmModel_str(ti->model)); + + monitor_printf(mon, " \\ %s: type=%s", + ti->id, TpmType_str(ti->options->type)); + + switch (ti->options->type) { + case TPM_TYPE_PASSTHROUGH: + tpo = ti->options->u.passthrough.data; + monitor_printf(mon, "%s%s%s%s", + tpo->path ? ",path=" : "", + tpo->path ?: "", + tpo->cancel_path ? ",cancel-path=" : "", + tpo->cancel_path ?: ""); + break; + case TPM_TYPE_EMULATOR: + teo = ti->options->u.emulator.data; + monitor_printf(mon, ",chardev=%s", teo->chardev); + break; + case TPM_TYPE__MAX: + break; + } + monitor_printf(mon, "\n"); + c++; + } + qapi_free_TPMInfoList(info_list); +#else + monitor_printf(mon, "TPM device not supported\n"); +#endif /* CONFIG_TPM */ +} diff --git a/system/tpm.c b/system/tpm.c new file mode 100644 index 0000000..578563f --- /dev/null +++ b/system/tpm.c @@ -0,0 +1,239 @@ +/* + * TPM configuration + * + * Copyright (C) 2011-2013 IBM Corporation + * + * Authors: + * Stefan Berger + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * Based on net.c + */ + +#include "qemu/osdep.h" + +#include "qapi/error.h" +#include "qapi/qapi-commands-tpm.h" +#include "qapi/qmp/qerror.h" +#include "sysemu/tpm_backend.h" +#include "sysemu/tpm.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" + +static QLIST_HEAD(, TPMBackend) tpm_backends = + QLIST_HEAD_INITIALIZER(tpm_backends); + +static const TPMBackendClass * +tpm_be_find_by_type(enum TpmType type) +{ + ObjectClass *oc; + char *typename = g_strdup_printf("tpm-%s", TpmType_str(type)); + + oc = object_class_by_name(typename); + g_free(typename); + + if (!object_class_dynamic_cast(oc, TYPE_TPM_BACKEND)) { + return NULL; + } + + return TPM_BACKEND_CLASS(oc); +} + +/* + * Walk the list of available TPM backend drivers and display them on the + * screen. + */ +static void tpm_display_backend_drivers(void) +{ + bool got_one = false; + int i; + + for (i = 0; i < TPM_TYPE__MAX; i++) { + const TPMBackendClass *bc = tpm_be_find_by_type(i); + if (!bc) { + continue; + } + if (!got_one) { + error_printf("Supported TPM types (choose only one):\n"); + got_one = true; + } + error_printf("%12s %s\n", TpmType_str(i), bc->desc); + } + if (!got_one) { + error_printf("No TPM backend types are available\n"); + } +} + +/* + * Find the TPM with the given Id + */ +TPMBackend *qemu_find_tpm_be(const char *id) +{ + TPMBackend *drv; + + if (id) { + QLIST_FOREACH(drv, &tpm_backends, list) { + if (!strcmp(drv->id, id)) { + return drv; + } + } + } + + return NULL; +} + +static int tpm_init_tpmdev(void *dummy, QemuOpts *opts, Error **errp) +{ + /* + * Use of error_report() in a function with an Error ** parameter + * is suspicious. It is okay here. The parameter only exists to + * make the function usable with qemu_opts_foreach(). It is not + * actually used. + */ + const char *value; + const char *id; + const TPMBackendClass *be; + TPMBackend *drv; + Error *local_err = NULL; + int i; + + if (!QLIST_EMPTY(&tpm_backends)) { + error_report("Only one TPM is allowed."); + return 1; + } + + id = qemu_opts_id(opts); + if (id == NULL) { + error_report(QERR_MISSING_PARAMETER, "id"); + return 1; + } + + value = qemu_opt_get(opts, "type"); + if (!value) { + error_report(QERR_MISSING_PARAMETER, "type"); + tpm_display_backend_drivers(); + return 1; + } + + i = qapi_enum_parse(&TpmType_lookup, value, -1, NULL); + be = i >= 0 ? tpm_be_find_by_type(i) : NULL; + if (be == NULL) { + error_report(QERR_INVALID_PARAMETER_VALUE, + "type", "a TPM backend type"); + tpm_display_backend_drivers(); + return 1; + } + + /* validate backend specific opts */ + if (!qemu_opts_validate(opts, be->opts, &local_err)) { + error_report_err(local_err); + return 1; + } + + drv = be->create(opts); + if (!drv) { + return 1; + } + + drv->id = g_strdup(id); + QLIST_INSERT_HEAD(&tpm_backends, drv, list); + + return 0; +} + +/* + * Walk the list of TPM backend drivers that are in use and call their + * destroy function to have them cleaned up. + */ +void tpm_cleanup(void) +{ + TPMBackend *drv, *next; + + QLIST_FOREACH_SAFE(drv, &tpm_backends, list, next) { + QLIST_REMOVE(drv, list); + object_unref(OBJECT(drv)); + } +} + +/* + * Initialize the TPM. Process the tpmdev command line options describing the + * TPM backend. + */ +int tpm_init(void) +{ + if (qemu_opts_foreach(qemu_find_opts("tpmdev"), + tpm_init_tpmdev, NULL, NULL)) { + return -1; + } + + return 0; +} + +/* + * Parse the TPM configuration options. + * To display all available TPM backends the user may use '-tpmdev help' + */ +int tpm_config_parse(QemuOptsList *opts_list, const char *optarg) +{ + QemuOpts *opts; + + if (!strcmp(optarg, "help")) { + tpm_display_backend_drivers(); + return -1; + } + opts = qemu_opts_parse_noisily(opts_list, optarg, true); + if (!opts) { + return -1; + } + return 0; +} + +/* + * Walk the list of active TPM backends and collect information about them. + */ +TPMInfoList *qmp_query_tpm(Error **errp) +{ + TPMBackend *drv; + TPMInfoList *head = NULL, **tail = &head; + + QLIST_FOREACH(drv, &tpm_backends, list) { + if (!drv->tpmif) { + continue; + } + + QAPI_LIST_APPEND(tail, tpm_backend_query_tpm(drv)); + } + + return head; +} + +TpmTypeList *qmp_query_tpm_types(Error **errp) +{ + unsigned int i = 0; + TpmTypeList *head = NULL, **tail = &head; + + for (i = 0; i < TPM_TYPE__MAX; i++) { + if (!tpm_be_find_by_type(i)) { + continue; + } + QAPI_LIST_APPEND(tail, i); + } + + return head; +} +TpmModelList *qmp_query_tpm_models(Error **errp) +{ + TpmModelList *head = NULL, **tail = &head; + GSList *e, *l = object_class_get_list(TYPE_TPM_IF, false); + + for (e = l; e; e = e->next) { + TPMIfClass *c = TPM_IF_CLASS(e->data); + + QAPI_LIST_APPEND(tail, c->model); + } + g_slist_free(l); + + return head; +} diff --git a/system/trace-events b/system/trace-events new file mode 100644 index 0000000..69c9044 --- /dev/null +++ b/system/trace-events @@ -0,0 +1,40 @@ +# See docs/devel/tracing.rst for syntax documentation. + +# balloon.c +# Since requests are raised via monitor, not many tracepoints are needed. +balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu" + +# ioport.c +cpu_in(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u" +cpu_out(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u" + +# memory.c +memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'" +memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'" +memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u" +memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u" +memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u" +memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u" +memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)" +flatview_new(void *view, void *root) "%p (root %p)" +flatview_destroy(void *view, void *root) "%p (root %p)" +flatview_destroy_rcu(void *view, void *root) "%p (root %p)" +global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32 + +# cpus.c +vm_stop_flush_all(int ret) "ret %d" + +# vl.c +vm_state_notify(int running, int reason, const char *reason_str) "running %d reason %d (%s)" +load_file(const char *name, const char *path) "name %s location %s" +runstate_set(int current_state, const char *current_state_str, int new_state, const char *new_state_str) "current_run_state %d (%s) new_state %d (%s)" +system_wakeup_request(int reason) "reason=%d" +qemu_system_shutdown_request(int reason) "reason=%d" +qemu_system_powerdown_request(void) "" + +#dirtylimit.c +dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: max cpus %d" +dirtylimit_state_finalize(void) +dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us" +dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64 +dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us" diff --git a/system/trace.h b/system/trace.h new file mode 100644 index 0000000..cd0136d --- /dev/null +++ b/system/trace.h @@ -0,0 +1 @@ +#include "trace/trace-system.h" diff --git a/system/vl.c b/system/vl.c new file mode 100644 index 0000000..98e071e --- /dev/null +++ b/system/vl.c @@ -0,0 +1,3730 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/help-texts.h" +#include "qemu/datadir.h" +#include "qemu/units.h" +#include "exec/cpu-common.h" +#include "exec/page-vary.h" +#include "hw/qdev-properties.h" +#include "qapi/compat-policy.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qjson.h" +#include "qemu-version.h" +#include "qemu/cutils.h" +#include "qemu/help_option.h" +#include "qemu/hw-version.h" +#include "qemu/uuid.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" +#include "sysemu/runstate-action.h" +#include "sysemu/seccomp.h" +#include "sysemu/tcg.h" +#include "sysemu/xen.h" + +#include "qemu/error-report.h" +#include "qemu/sockets.h" +#include "qemu/accel.h" +#include "qemu/async-teardown.h" +#include "hw/usb.h" +#include "hw/isa/isa.h" +#include "hw/scsi/scsi.h" +#include "hw/display/vga.h" +#include "hw/firmware/smbios.h" +#include "hw/acpi/acpi.h" +#include "hw/xen/xen.h" +#include "hw/loader.h" +#include "monitor/qdev.h" +#include "net/net.h" +#include "net/slirp.h" +#include "monitor/monitor.h" +#include "ui/console.h" +#include "ui/input.h" +#include "sysemu/sysemu.h" +#include "sysemu/numa.h" +#include "sysemu/hostmem.h" +#include "exec/gdbstub.h" +#include "qemu/timer.h" +#include "chardev/char.h" +#include "qemu/bitmap.h" +#include "qemu/log.h" +#include "sysemu/blockdev.h" +#include "hw/block/block.h" +#include "hw/i386/x86.h" +#include "hw/i386/pc.h" +#include "migration/misc.h" +#include "migration/snapshot.h" +#include "sysemu/tpm.h" +#include "sysemu/dma.h" +#include "hw/audio/soundhw.h" +#include "audio/audio.h" +#include "sysemu/cpus.h" +#include "sysemu/cpu-timers.h" +#include "migration/colo.h" +#include "migration/postcopy-ram.h" +#include "sysemu/kvm.h" +#include "qapi/qobject-input-visitor.h" +#include "qemu/option.h" +#include "qemu/config-file.h" +#include "qemu/main-loop.h" +#ifdef CONFIG_VIRTFS +#include "fsdev/qemu-fsdev.h" +#endif +#include "sysemu/qtest.h" +#ifdef CONFIG_TCG +#include "accel/tcg/perf.h" +#endif + +#include "disas/disas.h" + +#include "trace.h" +#include "trace/control.h" +#include "qemu/plugin.h" +#include "qemu/queue.h" +#include "sysemu/arch_init.h" +#include "exec/confidential-guest-support.h" + +#include "ui/qemu-spice.h" +#include "qapi/string-input-visitor.h" +#include "qapi/opts-visitor.h" +#include "qapi/clone-visitor.h" +#include "qom/object_interfaces.h" +#include "semihosting/semihost.h" +#include "crypto/init.h" +#include "sysemu/replay.h" +#include "qapi/qapi-events-run-state.h" +#include "qapi/qapi-types-audio.h" +#include "qapi/qapi-visit-audio.h" +#include "qapi/qapi-visit-block-core.h" +#include "qapi/qapi-visit-compat.h" +#include "qapi/qapi-visit-machine.h" +#include "qapi/qapi-visit-ui.h" +#include "qapi/qapi-commands-block-core.h" +#include "qapi/qapi-commands-migration.h" +#include "qapi/qapi-commands-misc.h" +#include "qapi/qapi-visit-qom.h" +#include "qapi/qapi-commands-ui.h" +#include "block/qdict.h" +#include "qapi/qmp/qerror.h" +#include "sysemu/iothread.h" +#include "qemu/guest-random.h" +#include "qemu/keyval.h" + +#define MAX_VIRTIO_CONSOLES 1 + +typedef struct BlockdevOptionsQueueEntry { + BlockdevOptions *bdo; + Location loc; + QSIMPLEQ_ENTRY(BlockdevOptionsQueueEntry) entry; +} BlockdevOptionsQueueEntry; + +typedef QSIMPLEQ_HEAD(, BlockdevOptionsQueueEntry) BlockdevOptionsQueue; + +typedef struct ObjectOption { + ObjectOptions *opts; + QTAILQ_ENTRY(ObjectOption) next; +} ObjectOption; + +typedef struct DeviceOption { + QDict *opts; + Location loc; + QTAILQ_ENTRY(DeviceOption) next; +} DeviceOption; + +static const char *cpu_option; +static const char *mem_path; +static const char *incoming; +static const char *loadvm; +static const char *accelerators; +static bool have_custom_ram_size; +static const char *ram_memdev_id; +static QDict *machine_opts_dict; +static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts); +static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts); +static int display_remote; +static int snapshot; +static bool preconfig_requested; +static QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list); +static BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue); +static bool nographic = false; +static int mem_prealloc; /* force preallocation of physical target memory */ +static const char *vga_model = NULL; +static DisplayOptions dpy; +static int num_serial_hds; +static Chardev **serial_hds; +static const char *log_mask; +static const char *log_file; +static bool list_data_dirs; +static const char *qtest_chrdev; +static const char *qtest_log; +static bool opt_one_insn_per_tb; + +static int has_defaults = 1; +static int default_serial = 1; +static int default_parallel = 1; +static int default_monitor = 1; +static int default_floppy = 1; +static int default_cdrom = 1; +static int default_sdcard = 1; +static int default_vga = 1; +static int default_net = 1; + +static struct { + const char *driver; + int *flag; +} default_list[] = { + { .driver = "isa-serial", .flag = &default_serial }, + { .driver = "isa-parallel", .flag = &default_parallel }, + { .driver = "isa-fdc", .flag = &default_floppy }, + { .driver = "floppy", .flag = &default_floppy }, + { .driver = "ide-cd", .flag = &default_cdrom }, + { .driver = "ide-hd", .flag = &default_cdrom }, + { .driver = "scsi-cd", .flag = &default_cdrom }, + { .driver = "scsi-hd", .flag = &default_cdrom }, + { .driver = "VGA", .flag = &default_vga }, + { .driver = "isa-vga", .flag = &default_vga }, + { .driver = "cirrus-vga", .flag = &default_vga }, + { .driver = "isa-cirrus-vga", .flag = &default_vga }, + { .driver = "vmware-svga", .flag = &default_vga }, + { .driver = "qxl-vga", .flag = &default_vga }, + { .driver = "virtio-vga", .flag = &default_vga }, + { .driver = "ati-vga", .flag = &default_vga }, + { .driver = "vhost-user-vga", .flag = &default_vga }, + { .driver = "virtio-vga-gl", .flag = &default_vga }, +}; + +static QemuOptsList qemu_rtc_opts = { + .name = "rtc", + .head = QTAILQ_HEAD_INITIALIZER(qemu_rtc_opts.head), + .merge_lists = true, + .desc = { + { + .name = "base", + .type = QEMU_OPT_STRING, + },{ + .name = "clock", + .type = QEMU_OPT_STRING, + },{ + .name = "driftfix", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + +static QemuOptsList qemu_option_rom_opts = { + .name = "option-rom", + .implied_opt_name = "romfile", + .head = QTAILQ_HEAD_INITIALIZER(qemu_option_rom_opts.head), + .desc = { + { + .name = "bootindex", + .type = QEMU_OPT_NUMBER, + }, { + .name = "romfile", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + +static QemuOptsList qemu_accel_opts = { + .name = "accel", + .implied_opt_name = "accel", + .head = QTAILQ_HEAD_INITIALIZER(qemu_accel_opts.head), + .desc = { + /* + * no elements => accept any + * sanity checking will happen later + * when setting accelerator properties + */ + { } + }, +}; + +static QemuOptsList qemu_boot_opts = { + .name = "boot-opts", + .implied_opt_name = "order", + .merge_lists = true, + .head = QTAILQ_HEAD_INITIALIZER(qemu_boot_opts.head), + .desc = { + { + .name = "order", + .type = QEMU_OPT_STRING, + }, { + .name = "once", + .type = QEMU_OPT_STRING, + }, { + .name = "menu", + .type = QEMU_OPT_BOOL, + }, { + .name = "splash", + .type = QEMU_OPT_STRING, + }, { + .name = "splash-time", + .type = QEMU_OPT_NUMBER, + }, { + .name = "reboot-timeout", + .type = QEMU_OPT_NUMBER, + }, { + .name = "strict", + .type = QEMU_OPT_BOOL, + }, + { /*End of list */ } + }, +}; + +static QemuOptsList qemu_add_fd_opts = { + .name = "add-fd", + .head = QTAILQ_HEAD_INITIALIZER(qemu_add_fd_opts.head), + .desc = { + { + .name = "fd", + .type = QEMU_OPT_NUMBER, + .help = "file descriptor of which a duplicate is added to fd set", + },{ + .name = "set", + .type = QEMU_OPT_NUMBER, + .help = "ID of the fd set to add fd to", + },{ + .name = "opaque", + .type = QEMU_OPT_STRING, + .help = "free-form string used to describe fd", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList qemu_object_opts = { + .name = "object", + .implied_opt_name = "qom-type", + .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head), + .desc = { + { } + }, +}; + +static QemuOptsList qemu_tpmdev_opts = { + .name = "tpmdev", + .implied_opt_name = "type", + .head = QTAILQ_HEAD_INITIALIZER(qemu_tpmdev_opts.head), + .desc = { + /* options are defined in the TPM backends */ + { /* end of list */ } + }, +}; + +static QemuOptsList qemu_overcommit_opts = { + .name = "overcommit", + .head = QTAILQ_HEAD_INITIALIZER(qemu_overcommit_opts.head), + .desc = { + { + .name = "mem-lock", + .type = QEMU_OPT_BOOL, + }, + { + .name = "cpu-pm", + .type = QEMU_OPT_BOOL, + }, + { /* end of list */ } + }, +}; + +static QemuOptsList qemu_msg_opts = { + .name = "msg", + .head = QTAILQ_HEAD_INITIALIZER(qemu_msg_opts.head), + .desc = { + { + .name = "timestamp", + .type = QEMU_OPT_BOOL, + }, + { + .name = "guest-name", + .type = QEMU_OPT_BOOL, + .help = "Prepends guest name for error messages but only if " + "-name guest is set otherwise option is ignored\n", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList qemu_name_opts = { + .name = "name", + .implied_opt_name = "guest", + .merge_lists = true, + .head = QTAILQ_HEAD_INITIALIZER(qemu_name_opts.head), + .desc = { + { + .name = "guest", + .type = QEMU_OPT_STRING, + .help = "Sets the name of the guest.\n" + "This name will be displayed in the SDL window caption.\n" + "The name will also be used for the VNC server", + }, { + .name = "process", + .type = QEMU_OPT_STRING, + .help = "Sets the name of the QEMU process, as shown in top etc", + }, { + .name = "debug-threads", + .type = QEMU_OPT_BOOL, + .help = "When enabled, name the individual threads; defaults off.\n" + "NOTE: The thread names are for debugging and not a\n" + "stable API.", + }, + { /* End of list */ } + }, +}; + +static QemuOptsList qemu_mem_opts = { + .name = "memory", + .implied_opt_name = "size", + .head = QTAILQ_HEAD_INITIALIZER(qemu_mem_opts.head), + .merge_lists = true, + .desc = { + { + .name = "size", + .type = QEMU_OPT_SIZE, + }, + { + .name = "slots", + .type = QEMU_OPT_NUMBER, + }, + { + .name = "maxmem", + .type = QEMU_OPT_SIZE, + }, + { /* end of list */ } + }, +}; + +static QemuOptsList qemu_icount_opts = { + .name = "icount", + .implied_opt_name = "shift", + .merge_lists = true, + .head = QTAILQ_HEAD_INITIALIZER(qemu_icount_opts.head), + .desc = { + { + .name = "shift", + .type = QEMU_OPT_STRING, + }, { + .name = "align", + .type = QEMU_OPT_BOOL, + }, { + .name = "sleep", + .type = QEMU_OPT_BOOL, + }, { + .name = "rr", + .type = QEMU_OPT_STRING, + }, { + .name = "rrfile", + .type = QEMU_OPT_STRING, + }, { + .name = "rrsnapshot", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + +static QemuOptsList qemu_fw_cfg_opts = { + .name = "fw_cfg", + .implied_opt_name = "name", + .head = QTAILQ_HEAD_INITIALIZER(qemu_fw_cfg_opts.head), + .desc = { + { + .name = "name", + .type = QEMU_OPT_STRING, + .help = "Sets the fw_cfg name of the blob to be inserted", + }, { + .name = "file", + .type = QEMU_OPT_STRING, + .help = "Sets the name of the file from which " + "the fw_cfg blob will be loaded", + }, { + .name = "string", + .type = QEMU_OPT_STRING, + .help = "Sets content of the blob to be inserted from a string", + }, { + .name = "gen_id", + .type = QEMU_OPT_STRING, + .help = "Sets id of the object generating the fw_cfg blob " + "to be inserted", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList qemu_action_opts = { + .name = "action", + .merge_lists = true, + .head = QTAILQ_HEAD_INITIALIZER(qemu_action_opts.head), + .desc = { + { + .name = "shutdown", + .type = QEMU_OPT_STRING, + },{ + .name = "reboot", + .type = QEMU_OPT_STRING, + },{ + .name = "panic", + .type = QEMU_OPT_STRING, + },{ + .name = "watchdog", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + +const char *qemu_get_vm_name(void) +{ + return qemu_name; +} + +static void default_driver_disable(const char *driver) +{ + int i; + + if (!driver) { + return; + } + + for (i = 0; i < ARRAY_SIZE(default_list); i++) { + if (strcmp(default_list[i].driver, driver) != 0) + continue; + *(default_list[i].flag) = 0; + } +} + +static int default_driver_check(void *opaque, QemuOpts *opts, Error **errp) +{ + const char *driver = qemu_opt_get(opts, "driver"); + + default_driver_disable(driver); + return 0; +} + +static void default_driver_check_json(void) +{ + DeviceOption *opt; + + QTAILQ_FOREACH(opt, &device_opts, next) { + const char *driver = qdict_get_try_str(opt->opts, "driver"); + default_driver_disable(driver); + } +} + +static int parse_name(void *opaque, QemuOpts *opts, Error **errp) +{ + const char *proc_name; + + if (qemu_opt_get(opts, "debug-threads")) { + qemu_thread_naming(qemu_opt_get_bool(opts, "debug-threads", false)); + } + qemu_name = qemu_opt_get(opts, "guest"); + + proc_name = qemu_opt_get(opts, "process"); + if (proc_name) { + os_set_proc_name(proc_name); + } + + return 0; +} + +bool defaults_enabled(void) +{ + return has_defaults; +} + +#ifndef _WIN32 +static int parse_add_fd(void *opaque, QemuOpts *opts, Error **errp) +{ + int fd, dupfd, flags; + int64_t fdset_id; + const char *fd_opaque = NULL; + AddfdInfo *fdinfo; + + fd = qemu_opt_get_number(opts, "fd", -1); + fdset_id = qemu_opt_get_number(opts, "set", -1); + fd_opaque = qemu_opt_get(opts, "opaque"); + + if (fd < 0) { + error_setg(errp, "fd option is required and must be non-negative"); + return -1; + } + + if (fd <= STDERR_FILENO) { + error_setg(errp, "fd cannot be a standard I/O stream"); + return -1; + } + + /* + * All fds inherited across exec() necessarily have FD_CLOEXEC + * clear, while qemu sets FD_CLOEXEC on all other fds used internally. + */ + flags = fcntl(fd, F_GETFD); + if (flags == -1 || (flags & FD_CLOEXEC)) { + error_setg(errp, "fd is not valid or already in use"); + return -1; + } + + if (fdset_id < 0) { + error_setg(errp, "set option is required and must be non-negative"); + return -1; + } + +#ifdef F_DUPFD_CLOEXEC + dupfd = fcntl(fd, F_DUPFD_CLOEXEC, 0); +#else + dupfd = dup(fd); + if (dupfd != -1) { + qemu_set_cloexec(dupfd); + } +#endif + if (dupfd == -1) { + error_setg(errp, "error duplicating fd: %s", strerror(errno)); + return -1; + } + + /* add the duplicate fd, and optionally the opaque string, to the fd set */ + fdinfo = monitor_fdset_add_fd(dupfd, true, fdset_id, fd_opaque, + &error_abort); + g_free(fdinfo); + + return 0; +} + +static int cleanup_add_fd(void *opaque, QemuOpts *opts, Error **errp) +{ + int fd; + + fd = qemu_opt_get_number(opts, "fd", -1); + close(fd); + + return 0; +} +#endif + +/***********************************************************/ +/* QEMU Block devices */ + +#define HD_OPTS "media=disk" +#define CDROM_OPTS "media=cdrom" +#define FD_OPTS "" +#define PFLASH_OPTS "" +#define MTD_OPTS "" +#define SD_OPTS "" + +static int drive_init_func(void *opaque, QemuOpts *opts, Error **errp) +{ + BlockInterfaceType *block_default_type = opaque; + + return drive_new(opts, *block_default_type, errp) == NULL; +} + +static int drive_enable_snapshot(void *opaque, QemuOpts *opts, Error **errp) +{ + if (qemu_opt_get(opts, "snapshot") == NULL) { + qemu_opt_set(opts, "snapshot", "on", &error_abort); + } + return 0; +} + +static void default_drive(int enable, int snapshot, BlockInterfaceType type, + int index, const char *optstr) +{ + QemuOpts *opts; + DriveInfo *dinfo; + + if (!enable || drive_get_by_index(type, index)) { + return; + } + + opts = drive_add(type, index, NULL, optstr); + if (snapshot) { + drive_enable_snapshot(NULL, opts, NULL); + } + + dinfo = drive_new(opts, type, &error_abort); + dinfo->is_default = true; + +} + +static void configure_blockdev(BlockdevOptionsQueue *bdo_queue, + MachineClass *machine_class, int snapshot) +{ + /* + * If the currently selected machine wishes to override the + * units-per-bus property of its default HBA interface type, do so + * now. + */ + if (machine_class->units_per_default_bus) { + override_max_devs(machine_class->block_default_type, + machine_class->units_per_default_bus); + } + + /* open the virtual block devices */ + while (!QSIMPLEQ_EMPTY(bdo_queue)) { + BlockdevOptionsQueueEntry *bdo = QSIMPLEQ_FIRST(bdo_queue); + + QSIMPLEQ_REMOVE_HEAD(bdo_queue, entry); + loc_push_restore(&bdo->loc); + qmp_blockdev_add(bdo->bdo, &error_fatal); + loc_pop(&bdo->loc); + qapi_free_BlockdevOptions(bdo->bdo); + g_free(bdo); + } + if (snapshot) { + qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, + NULL, NULL); + } + if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, + &machine_class->block_default_type, &error_fatal)) { + /* We printed help */ + exit(0); + } + + default_drive(default_cdrom, snapshot, machine_class->block_default_type, 2, + CDROM_OPTS); + default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); + default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); + +} + +static QemuOptsList qemu_smp_opts = { + .name = "smp-opts", + .implied_opt_name = "cpus", + .merge_lists = true, + .head = QTAILQ_HEAD_INITIALIZER(qemu_smp_opts.head), + .desc = { + { + .name = "cpus", + .type = QEMU_OPT_NUMBER, + }, { + .name = "sockets", + .type = QEMU_OPT_NUMBER, + }, { + .name = "dies", + .type = QEMU_OPT_NUMBER, + }, { + .name = "clusters", + .type = QEMU_OPT_NUMBER, + }, { + .name = "cores", + .type = QEMU_OPT_NUMBER, + }, { + .name = "threads", + .type = QEMU_OPT_NUMBER, + }, { + .name = "maxcpus", + .type = QEMU_OPT_NUMBER, + }, + { /*End of list */ } + }, +}; + +#if defined(CONFIG_POSIX) +static QemuOptsList qemu_run_with_opts = { + .name = "run-with", + .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), + .desc = { +#if defined(CONFIG_LINUX) + { + .name = "async-teardown", + .type = QEMU_OPT_BOOL, + }, +#endif + { + .name = "chroot", + .type = QEMU_OPT_STRING, + }, + { /* end of list */ } + }, +}; + +#define qemu_add_run_with_opts() qemu_add_opts(&qemu_run_with_opts) + +#else + +#define qemu_add_run_with_opts() + +#endif /* CONFIG_POSIX */ + +static void realtime_init(void) +{ + if (enable_mlock) { + if (os_mlock() < 0) { + error_report("locking memory failed"); + exit(1); + } + } +} + + +static void configure_msg(QemuOpts *opts) +{ + message_with_timestamp = qemu_opt_get_bool(opts, "timestamp", false); + error_with_guestname = qemu_opt_get_bool(opts, "guest-name", false); +} + + +/***********************************************************/ +/* USB devices */ + +static int usb_device_add(const char *devname) +{ + USBDevice *dev = NULL; + + if (!machine_usb(current_machine)) { + return -1; + } + + dev = usbdevice_create(devname); + if (!dev) + return -1; + + return 0; +} + +static int usb_parse(const char *cmdline) +{ + int r; + r = usb_device_add(cmdline); + if (r < 0) { + error_report("could not add USB device '%s'", cmdline); + } + return r; +} + +/***********************************************************/ +/* machine registration */ + +static MachineClass *find_machine(const char *name, GSList *machines) +{ + GSList *el; + + for (el = machines; el; el = el->next) { + MachineClass *mc = el->data; + + if (!strcmp(mc->name, name) || !g_strcmp0(mc->alias, name)) { + return mc; + } + } + + return NULL; +} + +static MachineClass *find_default_machine(GSList *machines) +{ + GSList *el; + MachineClass *default_machineclass = NULL; + + for (el = machines; el; el = el->next) { + MachineClass *mc = el->data; + + if (mc->is_default) { + assert(default_machineclass == NULL && "Multiple default machines"); + default_machineclass = mc; + } + } + + return default_machineclass; +} + +static void version(void) +{ + printf("QEMU emulator version " QEMU_FULL_VERSION "\n" + QEMU_COPYRIGHT "\n"); +} + +static void help(int exitcode) +{ + version(); + printf("usage: %s [options] [disk_image]\n\n" + "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", + g_get_prgname()); + +#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \ + if ((arch_mask) & arch_type) \ + fputs(opt_help, stdout); + +#define ARCHHEADING(text, arch_mask) \ + if ((arch_mask) & arch_type) \ + puts(stringify(text)); + +#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL) + +#include "qemu-options.def" + + printf("\nDuring emulation, the following keys are useful:\n" + "ctrl-alt-f toggle full screen\n" + "ctrl-alt-n switch to virtual console 'n'\n" + "ctrl-alt toggle mouse and keyboard grab\n" + "\n" + "When using -nographic, press 'ctrl-a h' to get some help.\n" + "\n" + QEMU_HELP_BOTTOM "\n"); + + exit(exitcode); +} + +enum { + +#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \ + opt_enum, +#define DEFHEADING(text) +#define ARCHHEADING(text, arch_mask) + +#include "qemu-options.def" +}; + +#define HAS_ARG 0x0001 + +typedef struct QEMUOption { + const char *name; + int flags; + int index; + uint32_t arch_mask; +} QEMUOption; + +static const QEMUOption qemu_options[] = { + { "h", 0, QEMU_OPTION_h, QEMU_ARCH_ALL }, + +#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \ + { option, opt_arg, opt_enum, arch_mask }, +#define DEFHEADING(text) +#define ARCHHEADING(text, arch_mask) + +#include "qemu-options.def" + { /* end of list */ } +}; + +typedef struct VGAInterfaceInfo { + const char *opt_name; /* option name */ + const char *name; /* human-readable name */ + /* Class names indicating that support is available. + * If no class is specified, the interface is always available */ + const char *class_names[2]; +} VGAInterfaceInfo; + +static const VGAInterfaceInfo vga_interfaces[VGA_TYPE_MAX] = { + [VGA_NONE] = { + .opt_name = "none", + .name = "no graphic card", + }, + [VGA_STD] = { + .opt_name = "std", + .name = "standard VGA", + .class_names = { "VGA", "isa-vga" }, + }, + [VGA_CIRRUS] = { + .opt_name = "cirrus", + .name = "Cirrus VGA", + .class_names = { "cirrus-vga", "isa-cirrus-vga" }, + }, + [VGA_VMWARE] = { + .opt_name = "vmware", + .name = "VMWare SVGA", + .class_names = { "vmware-svga" }, + }, + [VGA_VIRTIO] = { + .opt_name = "virtio", + .name = "Virtio VGA", + .class_names = { "virtio-vga" }, + }, + [VGA_QXL] = { + .opt_name = "qxl", + .name = "QXL VGA", + .class_names = { "qxl-vga" }, + }, + [VGA_TCX] = { + .opt_name = "tcx", + .name = "TCX framebuffer", + .class_names = { "sun-tcx" }, + }, + [VGA_CG3] = { + .opt_name = "cg3", + .name = "CG3 framebuffer", + .class_names = { "cgthree" }, + }, +#ifdef CONFIG_XEN_BACKEND + [VGA_XENFB] = { + .opt_name = "xenfb", + .name = "Xen paravirtualized framebuffer", + }, +#endif +}; + +static bool vga_interface_available(VGAInterfaceType t) +{ + const VGAInterfaceInfo *ti = &vga_interfaces[t]; + + assert(t < VGA_TYPE_MAX); + return !ti->class_names[0] || + module_object_class_by_name(ti->class_names[0]) || + module_object_class_by_name(ti->class_names[1]); +} + +static const char * +get_default_vga_model(const MachineClass *machine_class) +{ + if (machine_class->default_display) { + for (int t = 0; t < VGA_TYPE_MAX; t++) { + const VGAInterfaceInfo *ti = &vga_interfaces[t]; + + if (ti->opt_name && vga_interface_available(t) && + g_str_equal(ti->opt_name, machine_class->default_display)) { + return machine_class->default_display; + } + } + + warn_report_once("Default display '%s' is not available in this binary", + machine_class->default_display); + return NULL; + } else if (vga_interface_available(VGA_CIRRUS)) { + return "cirrus"; + } else if (vga_interface_available(VGA_STD)) { + return "std"; + } + + return NULL; +} + +static void select_vgahw(const MachineClass *machine_class, const char *p) +{ + const char *opts; + int t; + + if (g_str_equal(p, "help")) { + const char *def = get_default_vga_model(machine_class); + + for (t = 0; t < VGA_TYPE_MAX; t++) { + const VGAInterfaceInfo *ti = &vga_interfaces[t]; + + if (vga_interface_available(t) && ti->opt_name) { + printf("%-20s %s%s\n", ti->opt_name, ti->name ?: "", + (def && g_str_equal(ti->opt_name, def)) ? + " (default)" : ""); + } + } + exit(0); + } + + assert(vga_interface_type == VGA_NONE); + for (t = 0; t < VGA_TYPE_MAX; t++) { + const VGAInterfaceInfo *ti = &vga_interfaces[t]; + if (ti->opt_name && strstart(p, ti->opt_name, &opts)) { + if (!vga_interface_available(t)) { + error_report("%s not available", ti->name); + exit(1); + } + vga_interface_type = t; + break; + } + } + if (t == VGA_TYPE_MAX) { + invalid_vga: + error_report("unknown vga type: %s", p); + exit(1); + } + while (*opts) { + const char *nextopt; + + if (strstart(opts, ",retrace=", &nextopt)) { + opts = nextopt; + if (strstart(opts, "dumb", &nextopt)) + vga_retrace_method = VGA_RETRACE_DUMB; + else if (strstart(opts, "precise", &nextopt)) + vga_retrace_method = VGA_RETRACE_PRECISE; + else goto invalid_vga; + } else goto invalid_vga; + opts = nextopt; + } +} + +static void parse_display_qapi(const char *optarg) +{ + DisplayOptions *opts; + Visitor *v; + + v = qobject_input_visitor_new_str(optarg, "type", &error_fatal); + + visit_type_DisplayOptions(v, NULL, &opts, &error_fatal); + QAPI_CLONE_MEMBERS(DisplayOptions, &dpy, opts); + + qapi_free_DisplayOptions(opts); + visit_free(v); +} + +DisplayOptions *qmp_query_display_options(Error **errp) +{ + return QAPI_CLONE(DisplayOptions, &dpy); +} + +static void parse_display(const char *p) +{ + const char *opts; + + if (is_help_option(p)) { + qemu_display_help(); + exit(0); + } + + if (strstart(p, "vnc", &opts)) { + /* + * vnc isn't a (local) DisplayType but a protocol for remote + * display access. + */ + if (*opts == '=') { + vnc_parse(opts + 1); + } else { + error_report("VNC requires a display argument vnc="); + exit(1); + } + } else { + parse_display_qapi(p); + } +} + +static inline bool nonempty_str(const char *str) +{ + return str && *str; +} + +static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp) +{ + gchar *buf; + size_t size; + const char *name, *file, *str, *gen_id; + FWCfgState *fw_cfg = (FWCfgState *) opaque; + + if (fw_cfg == NULL) { + error_setg(errp, "fw_cfg device not available"); + return -1; + } + name = qemu_opt_get(opts, "name"); + file = qemu_opt_get(opts, "file"); + str = qemu_opt_get(opts, "string"); + gen_id = qemu_opt_get(opts, "gen_id"); + + /* we need the name, and exactly one of: file, content string, gen_id */ + if (!nonempty_str(name) || + nonempty_str(file) + nonempty_str(str) + nonempty_str(gen_id) != 1) { + error_setg(errp, "name, plus exactly one of file," + " string and gen_id, are needed"); + return -1; + } + if (strlen(name) > FW_CFG_MAX_FILE_PATH - 1) { + error_setg(errp, "name too long (max. %d char)", + FW_CFG_MAX_FILE_PATH - 1); + return -1; + } + if (nonempty_str(gen_id)) { + /* + * In this particular case where the content is populated + * internally, the "etc/" namespace protection is relaxed, + * so do not emit a warning. + */ + } else if (strncmp(name, "opt/", 4) != 0) { + warn_report("externally provided fw_cfg item names " + "should be prefixed with \"opt/\""); + } + if (nonempty_str(str)) { + size = strlen(str); /* NUL terminator NOT included in fw_cfg blob */ + buf = g_memdup(str, size); + } else if (nonempty_str(gen_id)) { + if (!fw_cfg_add_from_generator(fw_cfg, name, gen_id, errp)) { + return -1; + } + return 0; + } else { + GError *err = NULL; + if (!g_file_get_contents(file, &buf, &size, &err)) { + error_setg(errp, "can't load %s: %s", file, err->message); + g_error_free(err); + return -1; + } + } + /* For legacy, keep user files in a specific global order. */ + fw_cfg_set_order_override(fw_cfg, FW_CFG_ORDER_OVERRIDE_USER); + fw_cfg_add_file(fw_cfg, name, buf, size); + fw_cfg_reset_order_override(fw_cfg); + return 0; +} + +static int device_help_func(void *opaque, QemuOpts *opts, Error **errp) +{ + return qdev_device_help(opts); +} + +static int device_init_func(void *opaque, QemuOpts *opts, Error **errp) +{ + DeviceState *dev; + + dev = qdev_device_add(opts, errp); + if (!dev && *errp) { + error_report_err(*errp); + return -1; + } else if (dev) { + object_unref(OBJECT(dev)); + } + return 0; +} + +static int chardev_init_func(void *opaque, QemuOpts *opts, Error **errp) +{ + Error *local_err = NULL; + + if (!qemu_chr_new_from_opts(opts, NULL, &local_err)) { + if (local_err) { + error_propagate(errp, local_err); + return -1; + } + exit(0); + } + return 0; +} + +#ifdef CONFIG_VIRTFS +static int fsdev_init_func(void *opaque, QemuOpts *opts, Error **errp) +{ + return qemu_fsdev_add(opts, errp); +} +#endif + +static int mon_init_func(void *opaque, QemuOpts *opts, Error **errp) +{ + return monitor_init_opts(opts, errp); +} + +static void monitor_parse(const char *optarg, const char *mode, bool pretty) +{ + static int monitor_device_index = 0; + QemuOpts *opts; + const char *p; + char label[32]; + + if (strstart(optarg, "chardev:", &p)) { + snprintf(label, sizeof(label), "%s", p); + } else { + snprintf(label, sizeof(label), "compat_monitor%d", + monitor_device_index); + opts = qemu_chr_parse_compat(label, optarg, true); + if (!opts) { + error_report("parse error: %s", optarg); + exit(1); + } + } + + opts = qemu_opts_create(qemu_find_opts("mon"), label, 1, &error_fatal); + qemu_opt_set(opts, "mode", mode, &error_abort); + qemu_opt_set(opts, "chardev", label, &error_abort); + if (!strcmp(mode, "control")) { + qemu_opt_set_bool(opts, "pretty", pretty, &error_abort); + } else { + assert(pretty == false); + } + monitor_device_index++; +} + +struct device_config { + enum { + DEV_USB, /* -usbdevice */ + DEV_SERIAL, /* -serial */ + DEV_PARALLEL, /* -parallel */ + DEV_DEBUGCON, /* -debugcon */ + DEV_GDB, /* -gdb, -s */ + DEV_SCLP, /* s390 sclp */ + } type; + const char *cmdline; + Location loc; + QTAILQ_ENTRY(device_config) next; +}; + +static QTAILQ_HEAD(, device_config) device_configs = + QTAILQ_HEAD_INITIALIZER(device_configs); + +static void add_device_config(int type, const char *cmdline) +{ + struct device_config *conf; + + conf = g_malloc0(sizeof(*conf)); + conf->type = type; + conf->cmdline = cmdline; + loc_save(&conf->loc); + QTAILQ_INSERT_TAIL(&device_configs, conf, next); +} + +static int foreach_device_config(int type, int (*func)(const char *cmdline)) +{ + struct device_config *conf; + int rc; + + QTAILQ_FOREACH(conf, &device_configs, next) { + if (conf->type != type) + continue; + loc_push_restore(&conf->loc); + rc = func(conf->cmdline); + loc_pop(&conf->loc); + if (rc) { + return rc; + } + } + return 0; +} + +static void qemu_disable_default_devices(void) +{ + MachineClass *machine_class = MACHINE_GET_CLASS(current_machine); + + default_driver_check_json(); + qemu_opts_foreach(qemu_find_opts("device"), + default_driver_check, NULL, NULL); + qemu_opts_foreach(qemu_find_opts("global"), + default_driver_check, NULL, NULL); + + if (!vga_model && !default_vga) { + vga_interface_type = VGA_DEVICE; + vga_interface_created = true; + } + if (!has_defaults || machine_class->no_serial) { + default_serial = 0; + } + if (!has_defaults || machine_class->no_parallel) { + default_parallel = 0; + } + if (!has_defaults || machine_class->no_floppy) { + default_floppy = 0; + } + if (!has_defaults || machine_class->no_cdrom) { + default_cdrom = 0; + } + if (!has_defaults || machine_class->no_sdcard) { + default_sdcard = 0; + } + if (!has_defaults) { + default_monitor = 0; + default_net = 0; + default_vga = 0; + } else { + if (default_net && machine_class->default_nic && + !module_object_class_by_name(machine_class->default_nic)) { + warn_report("Default NIC '%s' is not available in this binary", + machine_class->default_nic); + default_net = 0; + } + } +} + +static void qemu_create_default_devices(void) +{ + MachineClass *machine_class = MACHINE_GET_CLASS(current_machine); + + if (is_daemonized()) { + /* According to documentation and historically, -nographic redirects + * serial port, parallel port and monitor to stdio, which does not work + * with -daemonize. We can redirect these to null instead, but since + * -nographic is legacy, let's just error out. + * We disallow -nographic only if all other ports are not redirected + * explicitly, to not break existing legacy setups which uses + * -nographic _and_ redirects all ports explicitly - this is valid + * usage, -nographic is just a no-op in this case. + */ + if (nographic + && (default_parallel || default_serial || default_monitor)) { + error_report("-nographic cannot be used with -daemonize"); + exit(1); + } + } + + if (nographic) { + if (default_parallel) + add_device_config(DEV_PARALLEL, "null"); + if (default_serial && default_monitor) { + add_device_config(DEV_SERIAL, "mon:stdio"); + } else { + if (default_serial) + add_device_config(DEV_SERIAL, "stdio"); + if (default_monitor) + monitor_parse("stdio", "readline", false); + } + } else { + if (default_serial) + add_device_config(DEV_SERIAL, "vc:80Cx24C"); + if (default_parallel) + add_device_config(DEV_PARALLEL, "vc:80Cx24C"); + if (default_monitor) + monitor_parse("vc:80Cx24C", "readline", false); + } + + if (default_net) { + QemuOptsList *net = qemu_find_opts("net"); + qemu_opts_parse(net, "nic", true, &error_abort); +#ifdef CONFIG_SLIRP + qemu_opts_parse(net, "user", true, &error_abort); +#endif + } + +#if defined(CONFIG_VNC) + if (!QTAILQ_EMPTY(&(qemu_find_opts("vnc")->head))) { + display_remote++; + } +#endif + if (dpy.type == DISPLAY_TYPE_DEFAULT && !display_remote) { + if (!qemu_display_find_default(&dpy)) { + dpy.type = DISPLAY_TYPE_NONE; +#if defined(CONFIG_VNC) + vnc_parse("localhost:0,to=99,id=default"); +#endif + } + } + if (dpy.type == DISPLAY_TYPE_DEFAULT) { + dpy.type = DISPLAY_TYPE_NONE; + } + + /* If no default VGA is requested, the default is "none". */ + if (default_vga) { + vga_model = get_default_vga_model(machine_class); + } + if (vga_model) { + select_vgahw(machine_class, vga_model); + } +} + +static int serial_parse(const char *devname) +{ + int index = num_serial_hds; + char label[32]; + + if (strcmp(devname, "none") == 0) + return 0; + snprintf(label, sizeof(label), "serial%d", index); + serial_hds = g_renew(Chardev *, serial_hds, index + 1); + + serial_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL); + if (!serial_hds[index]) { + error_report("could not connect serial device" + " to character backend '%s'", devname); + return -1; + } + num_serial_hds++; + return 0; +} + +Chardev *serial_hd(int i) +{ + assert(i >= 0); + if (i < num_serial_hds) { + return serial_hds[i]; + } + return NULL; +} + +static int parallel_parse(const char *devname) +{ + static int index = 0; + char label[32]; + + if (strcmp(devname, "none") == 0) + return 0; + if (index == MAX_PARALLEL_PORTS) { + error_report("too many parallel ports"); + exit(1); + } + snprintf(label, sizeof(label), "parallel%d", index); + parallel_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL); + if (!parallel_hds[index]) { + error_report("could not connect parallel device" + " to character backend '%s'", devname); + return -1; + } + index++; + return 0; +} + +static int debugcon_parse(const char *devname) +{ + QemuOpts *opts; + + if (!qemu_chr_new_mux_mon("debugcon", devname, NULL)) { + error_report("invalid character backend '%s'", devname); + exit(1); + } + opts = qemu_opts_create(qemu_find_opts("device"), "debugcon", 1, NULL); + if (!opts) { + error_report("already have a debugcon device"); + exit(1); + } + qemu_opt_set(opts, "driver", "isa-debugcon", &error_abort); + qemu_opt_set(opts, "chardev", "debugcon", &error_abort); + return 0; +} + +static gint machine_class_cmp(gconstpointer a, gconstpointer b) +{ + const MachineClass *mc1 = a, *mc2 = b; + int res; + + if (mc1->family == NULL) { + if (mc2->family == NULL) { + /* Compare standalone machine types against each other; they sort + * in increasing order. + */ + return strcmp(object_class_get_name(OBJECT_CLASS(mc1)), + object_class_get_name(OBJECT_CLASS(mc2))); + } + + /* Standalone machine types sort after families. */ + return 1; + } + + if (mc2->family == NULL) { + /* Families sort before standalone machine types. */ + return -1; + } + + /* Families sort between each other alphabetically increasingly. */ + res = strcmp(mc1->family, mc2->family); + if (res != 0) { + return res; + } + + /* Within the same family, machine types sort in decreasing order. */ + return strcmp(object_class_get_name(OBJECT_CLASS(mc2)), + object_class_get_name(OBJECT_CLASS(mc1))); +} + +static void machine_help_func(const QDict *qdict) +{ + GSList *machines, *el; + const char *type = qdict_get_try_str(qdict, "type"); + + machines = object_class_get_list(TYPE_MACHINE, false); + if (type) { + ObjectClass *machine_class = OBJECT_CLASS(find_machine(type, machines)); + if (machine_class) { + type_print_class_properties(object_class_get_name(machine_class)); + return; + } + } + + printf("Supported machines are:\n"); + machines = g_slist_sort(machines, machine_class_cmp); + for (el = machines; el; el = el->next) { + MachineClass *mc = el->data; + if (mc->alias) { + printf("%-20s %s (alias of %s)\n", mc->alias, mc->desc, mc->name); + } + printf("%-20s %s%s%s\n", mc->name, mc->desc, + mc->is_default ? " (default)" : "", + mc->deprecation_reason ? " (deprecated)" : ""); + } +} + +static void +machine_merge_property(const char *propname, QDict *prop, Error **errp) +{ + QDict *opts; + + opts = qdict_new(); + /* Preserve the caller's reference to prop. */ + qobject_ref(prop); + qdict_put(opts, propname, prop); + keyval_merge(machine_opts_dict, opts, errp); + qobject_unref(opts); +} + +static void +machine_parse_property_opt(QemuOptsList *opts_list, const char *propname, + const char *arg) +{ + QDict *prop = NULL; + bool help = false; + + prop = keyval_parse(arg, opts_list->implied_opt_name, &help, &error_fatal); + if (help) { + qemu_opts_print_help(opts_list, true); + exit(0); + } + machine_merge_property(propname, prop, &error_fatal); + qobject_unref(prop); +} + +static const char *pid_file; +struct UnlinkPidfileNotifier { + Notifier notifier; + char *pid_file_realpath; +}; +static struct UnlinkPidfileNotifier qemu_unlink_pidfile_notifier; + +static void qemu_unlink_pidfile(Notifier *n, void *data) +{ + struct UnlinkPidfileNotifier *upn; + + upn = DO_UPCAST(struct UnlinkPidfileNotifier, notifier, n); + unlink(upn->pid_file_realpath); +} + +static const QEMUOption *lookup_opt(int argc, char **argv, + const char **poptarg, int *poptind) +{ + const QEMUOption *popt; + int optind = *poptind; + char *r = argv[optind]; + const char *optarg; + + loc_set_cmdline(argv, optind, 1); + optind++; + /* Treat --foo the same as -foo. */ + if (r[1] == '-') + r++; + popt = qemu_options; + for(;;) { + if (!popt->name) { + error_report("invalid option"); + exit(1); + } + if (!strcmp(popt->name, r + 1)) + break; + popt++; + } + if (popt->flags & HAS_ARG) { + if (optind >= argc) { + error_report("requires an argument"); + exit(1); + } + optarg = argv[optind++]; + loc_set_cmdline(argv, optind - 2, 2); + } else { + optarg = NULL; + } + + *poptarg = optarg; + *poptind = optind; + + return popt; +} + +static MachineClass *select_machine(QDict *qdict, Error **errp) +{ + const char *optarg = qdict_get_try_str(qdict, "type"); + GSList *machines = object_class_get_list(TYPE_MACHINE, false); + MachineClass *machine_class; + Error *local_err = NULL; + + if (optarg) { + machine_class = find_machine(optarg, machines); + qdict_del(qdict, "type"); + if (!machine_class) { + error_setg(&local_err, "unsupported machine type"); + } + } else { + machine_class = find_default_machine(machines); + if (!machine_class) { + error_setg(&local_err, "No machine specified, and there is no default"); + } + } + + g_slist_free(machines); + if (local_err) { + error_append_hint(&local_err, "Use -machine help to list supported machines\n"); + error_propagate(errp, local_err); + } + return machine_class; +} + +static int object_parse_property_opt(Object *obj, + const char *name, const char *value, + const char *skip, Error **errp) +{ + if (g_str_equal(name, skip)) { + return 0; + } + + if (!object_property_parse(obj, name, value, errp)) { + return -1; + } + + return 0; +} + +/* *Non*recursively replace underscores with dashes in QDict keys. */ +static void keyval_dashify(QDict *qdict, Error **errp) +{ + const QDictEntry *ent, *next; + char *p; + + for (ent = qdict_first(qdict); ent; ent = next) { + g_autofree char *new_key = NULL; + + next = qdict_next(qdict, ent); + if (!strchr(ent->key, '_')) { + continue; + } + new_key = g_strdup(ent->key); + for (p = new_key; *p; p++) { + if (*p == '_') { + *p = '-'; + } + } + if (qdict_haskey(qdict, new_key)) { + error_setg(errp, "Conflict between '%s' and '%s'", ent->key, new_key); + return; + } + qobject_ref(ent->value); + qdict_put_obj(qdict, new_key, ent->value); + qdict_del(qdict, ent->key); + } +} + +static void qemu_apply_legacy_machine_options(QDict *qdict) +{ + const char *value; + QObject *prop; + + keyval_dashify(qdict, &error_fatal); + + /* Legacy options do not correspond to MachineState properties. */ + value = qdict_get_try_str(qdict, "accel"); + if (value) { + accelerators = g_strdup(value); + qdict_del(qdict, "accel"); + } + + value = qdict_get_try_str(qdict, "igd-passthru"); + if (value) { + object_register_sugar_prop(ACCEL_CLASS_NAME("xen"), "igd-passthru", value, + false); + qdict_del(qdict, "igd-passthru"); + } + + value = qdict_get_try_str(qdict, "kvm-shadow-mem"); + if (value) { + object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), "kvm-shadow-mem", value, + false); + qdict_del(qdict, "kvm-shadow-mem"); + } + + value = qdict_get_try_str(qdict, "kernel-irqchip"); + if (value) { + object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), "kernel-irqchip", value, + false); + object_register_sugar_prop(ACCEL_CLASS_NAME("whpx"), "kernel-irqchip", value, + false); + qdict_del(qdict, "kernel-irqchip"); + } + + value = qdict_get_try_str(qdict, "memory-backend"); + if (value) { + if (mem_path) { + error_report("'-mem-path' can't be used together with" + "'-machine memory-backend'"); + exit(EXIT_FAILURE); + } + + /* Resolved later. */ + ram_memdev_id = g_strdup(value); + qdict_del(qdict, "memory-backend"); + } + + prop = qdict_get(qdict, "memory"); + if (prop) { + have_custom_ram_size = + qobject_type(prop) == QTYPE_QDICT && + qdict_haskey(qobject_to(QDict, prop), "size"); + } +} + +static void object_option_foreach_add(bool (*type_opt_predicate)(const char *)) +{ + ObjectOption *opt, *next; + + QTAILQ_FOREACH_SAFE(opt, &object_opts, next, next) { + const char *type = ObjectType_str(opt->opts->qom_type); + if (type_opt_predicate(type)) { + user_creatable_add_qapi(opt->opts, &error_fatal); + qapi_free_ObjectOptions(opt->opts); + QTAILQ_REMOVE(&object_opts, opt, next); + g_free(opt); + } + } +} + +static void object_option_add_visitor(Visitor *v) +{ + ObjectOption *opt = g_new0(ObjectOption, 1); + visit_type_ObjectOptions(v, NULL, &opt->opts, &error_fatal); + QTAILQ_INSERT_TAIL(&object_opts, opt, next); +} + +static void object_option_parse(const char *optarg) +{ + QemuOpts *opts; + const char *type; + Visitor *v; + + if (optarg[0] == '{') { + QObject *obj = qobject_from_json(optarg, &error_fatal); + + v = qobject_input_visitor_new(obj); + qobject_unref(obj); + } else { + opts = qemu_opts_parse_noisily(qemu_find_opts("object"), + optarg, true); + if (!opts) { + exit(1); + } + + type = qemu_opt_get(opts, "qom-type"); + if (!type) { + error_setg(&error_fatal, QERR_MISSING_PARAMETER, "qom-type"); + } + if (user_creatable_print_help(type, opts)) { + exit(0); + } + + v = opts_visitor_new(opts); + } + + object_option_add_visitor(v); + visit_free(v); +} + +/* + * Very early object creation, before the sandbox options have been activated. + */ +static bool object_create_pre_sandbox(const char *type) +{ + /* + * Objects should in general not get initialized "too early" without + * a reason. If you add one, state the reason in a comment! + */ + + /* + * Reason: -sandbox on,resourcecontrol=deny disallows setting CPU + * affinity of threads. + */ + if (g_str_equal(type, "thread-context")) { + return true; + } + + return false; +} + +/* + * Initial object creation happens before all other + * QEMU data types are created. The majority of objects + * can be created at this point. The rng-egd object + * cannot be created here, as it depends on the chardev + * already existing. + */ +static bool object_create_early(const char *type) +{ + /* + * Objects should not be made "delayed" without a reason. If you + * add one, state the reason in a comment! + */ + + /* Reason: already created. */ + if (object_create_pre_sandbox(type)) { + return false; + } + + /* Reason: property "chardev" */ + if (g_str_equal(type, "rng-egd") || + g_str_equal(type, "qtest")) { + return false; + } + +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) + /* Reason: cryptodev-vhost-user property "chardev" */ + if (g_str_equal(type, "cryptodev-vhost-user")) { + return false; + } +#endif + + /* Reason: vhost-user-blk-server property "node-name" */ + if (g_str_equal(type, "vhost-user-blk-server")) { + return false; + } + /* + * Reason: filter-* property "netdev" etc. + */ + if (g_str_equal(type, "filter-buffer") || + g_str_equal(type, "filter-dump") || + g_str_equal(type, "filter-mirror") || + g_str_equal(type, "filter-redirector") || + g_str_equal(type, "colo-compare") || + g_str_equal(type, "filter-rewriter") || + g_str_equal(type, "filter-replay")) { + return false; + } + + /* + * Allocation of large amounts of memory may delay + * chardev initialization for too long, and trigger timeouts + * on software that waits for a monitor socket to be created + * (e.g. libvirt). + */ + if (g_str_has_prefix(type, "memory-backend-")) { + return false; + } + + return true; +} + +static void qemu_apply_machine_options(QDict *qdict) +{ + object_set_properties_from_keyval(OBJECT(current_machine), qdict, false, &error_fatal); + + if (semihosting_enabled(false) && !semihosting_get_argc()) { + /* fall back to the -kernel/-append */ + semihosting_arg_fallback(current_machine->kernel_filename, current_machine->kernel_cmdline); + } + + if (current_machine->smp.cpus > 1) { + replay_add_blocker("smp"); + } +} + +static void qemu_create_early_backends(void) +{ + MachineClass *machine_class = MACHINE_GET_CLASS(current_machine); +#if defined(CONFIG_SDL) + const bool use_sdl = (dpy.type == DISPLAY_TYPE_SDL); +#else + const bool use_sdl = false; +#endif +#if defined(CONFIG_GTK) + const bool use_gtk = (dpy.type == DISPLAY_TYPE_GTK); +#else + const bool use_gtk = false; +#endif + + if (dpy.has_window_close && !use_gtk && !use_sdl) { + error_report("window-close is only valid for GTK and SDL, " + "ignoring option"); + } + + qemu_display_early_init(&dpy); + qemu_console_early_init(); + + if (dpy.has_gl && dpy.gl != DISPLAYGL_MODE_OFF && display_opengl == 0) { +#if defined(CONFIG_OPENGL) + error_report("OpenGL is not supported by the display"); +#else + error_report("OpenGL support is disabled"); +#endif + exit(1); + } + + object_option_foreach_add(object_create_early); + + /* spice needs the timers to be initialized by this point */ + /* spice must initialize before audio as it changes the default audiodev */ + /* spice must initialize before chardevs (for spicevmc and spiceport) */ + qemu_spice.init(); + + qemu_opts_foreach(qemu_find_opts("chardev"), + chardev_init_func, NULL, &error_fatal); + +#ifdef CONFIG_VIRTFS + qemu_opts_foreach(qemu_find_opts("fsdev"), + fsdev_init_func, NULL, &error_fatal); +#endif + + /* + * Note: we need to create audio and block backends before + * setting machine properties, so they can be referred to. + */ + configure_blockdev(&bdo_queue, machine_class, snapshot); + audio_init_audiodevs(); +} + + +/* + * The remainder of object creation happens after the + * creation of chardev, fsdev, net clients and device data types. + */ +static bool object_create_late(const char *type) +{ + return !object_create_early(type) && !object_create_pre_sandbox(type); +} + +static void qemu_create_late_backends(void) +{ + if (qtest_chrdev) { + qtest_server_init(qtest_chrdev, qtest_log, &error_fatal); + } + + net_init_clients(); + + object_option_foreach_add(object_create_late); + + if (tpm_init() < 0) { + exit(1); + } + + qemu_opts_foreach(qemu_find_opts("mon"), + mon_init_func, NULL, &error_fatal); + + if (foreach_device_config(DEV_SERIAL, serial_parse) < 0) + exit(1); + if (foreach_device_config(DEV_PARALLEL, parallel_parse) < 0) + exit(1); + if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0) + exit(1); + + /* now chardevs have been created we may have semihosting to connect */ + qemu_semihosting_chardev_init(); +} + +static void qemu_resolve_machine_memdev(void) +{ + if (ram_memdev_id) { + Object *backend; + ram_addr_t backend_size; + + backend = object_resolve_path_type(ram_memdev_id, + TYPE_MEMORY_BACKEND, NULL); + if (!backend) { + error_report("Memory backend '%s' not found", ram_memdev_id); + exit(EXIT_FAILURE); + } + if (!have_custom_ram_size) { + backend_size = object_property_get_uint(backend, "size", &error_abort); + current_machine->ram_size = backend_size; + } + object_property_set_link(OBJECT(current_machine), + "memory-backend", backend, &error_fatal); + } +} + +static void parse_memory_options(void) +{ + QemuOpts *opts = qemu_find_opts_singleton("memory"); + QDict *dict, *prop; + const char *mem_str; + Location loc; + + loc_push_none(&loc); + qemu_opts_loc_restore(opts); + + prop = qdict_new(); + + if (qemu_opt_get_size(opts, "size", 0) != 0) { + /* Fix up legacy suffix-less format */ + mem_str = qemu_opt_get(opts, "size"); + if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) { + g_autofree char *mib_str = g_strdup_printf("%sM", mem_str); + qdict_put_str(prop, "size", mib_str); + } else { + qdict_put_str(prop, "size", mem_str); + } + } + + if (qemu_opt_get(opts, "maxmem")) { + qdict_put_str(prop, "max-size", qemu_opt_get(opts, "maxmem")); + } + if (qemu_opt_get(opts, "slots")) { + qdict_put_str(prop, "slots", qemu_opt_get(opts, "slots")); + } + + dict = qdict_new(); + qdict_put(dict, "memory", prop); + keyval_merge(machine_opts_dict, dict, &error_fatal); + qobject_unref(dict); + loc_pop(&loc); +} + +static void qemu_create_machine(QDict *qdict) +{ + MachineClass *machine_class = select_machine(qdict, &error_fatal); + object_set_machine_compat_props(machine_class->compat_props); + + current_machine = MACHINE(object_new_with_class(OBJECT_CLASS(machine_class))); + object_property_add_child(object_get_root(), "machine", + OBJECT(current_machine)); + object_property_add_child(container_get(OBJECT(current_machine), + "/unattached"), + "sysbus", OBJECT(sysbus_get_default())); + + if (machine_class->minimum_page_bits) { + if (!set_preferred_target_page_bits(machine_class->minimum_page_bits)) { + /* This would be a board error: specifying a minimum smaller than + * a target's compile-time fixed setting. + */ + g_assert_not_reached(); + } + } + + cpu_exec_init_all(); + page_size_init(); + + if (machine_class->hw_version) { + qemu_set_hw_version(machine_class->hw_version); + } + + /* + * Get the default machine options from the machine if it is not already + * specified either by the configuration file or by the command line. + */ + if (machine_class->default_machine_opts) { + QDict *default_opts = + keyval_parse(machine_class->default_machine_opts, NULL, NULL, + &error_abort); + qemu_apply_legacy_machine_options(default_opts); + object_set_properties_from_keyval(OBJECT(current_machine), default_opts, + false, &error_abort); + qobject_unref(default_opts); + } +} + +static int global_init_func(void *opaque, QemuOpts *opts, Error **errp) +{ + GlobalProperty *g; + + g = g_malloc0(sizeof(*g)); + g->driver = qemu_opt_get(opts, "driver"); + g->property = qemu_opt_get(opts, "property"); + g->value = qemu_opt_get(opts, "value"); + qdev_prop_register_global(g); + return 0; +} + +/* + * Return whether configuration group @group is stored in QemuOpts, or + * recorded as one or more QDicts by qemu_record_config_group. + */ +static bool is_qemuopts_group(const char *group) +{ + if (g_str_equal(group, "object") || + g_str_equal(group, "audiodev") || + g_str_equal(group, "machine") || + g_str_equal(group, "smp-opts") || + g_str_equal(group, "boot-opts")) { + return false; + } + return true; +} + +static void qemu_record_config_group(const char *group, QDict *dict, + bool from_json, Error **errp) +{ + if (g_str_equal(group, "object")) { + Visitor *v = qobject_input_visitor_new_keyval(QOBJECT(dict)); + object_option_add_visitor(v); + visit_free(v); + + } else if (g_str_equal(group, "audiodev")) { + Audiodev *dev = NULL; + Visitor *v = qobject_input_visitor_new_keyval(QOBJECT(dict)); + if (visit_type_Audiodev(v, NULL, &dev, errp)) { + audio_define(dev); + } + visit_free(v); + + } else if (g_str_equal(group, "machine")) { + /* + * Cannot merge string-valued and type-safe dictionaries, so JSON + * is not accepted yet for -M. + */ + assert(!from_json); + keyval_merge(machine_opts_dict, dict, errp); + } else if (g_str_equal(group, "smp-opts")) { + machine_merge_property("smp", dict, &error_fatal); + } else if (g_str_equal(group, "boot-opts")) { + machine_merge_property("boot", dict, &error_fatal); + } else { + abort(); + } +} + +/* + * Parse non-QemuOpts config file groups, pass the rest to + * qemu_config_do_parse. + */ +static void qemu_parse_config_group(const char *group, QDict *qdict, + void *opaque, Error **errp) +{ + QObject *crumpled; + if (is_qemuopts_group(group)) { + qemu_config_do_parse(group, qdict, opaque, errp); + return; + } + + crumpled = qdict_crumple(qdict, errp); + if (!crumpled) { + return; + } + switch (qobject_type(crumpled)) { + case QTYPE_QDICT: + qemu_record_config_group(group, qobject_to(QDict, crumpled), false, errp); + break; + case QTYPE_QLIST: + error_setg(errp, "Lists cannot be at top level of a configuration section"); + break; + default: + g_assert_not_reached(); + } + qobject_unref(crumpled); +} + +static void qemu_read_default_config_file(Error **errp) +{ + ERRP_GUARD(); + int ret; + g_autofree char *file = get_relocated_path(CONFIG_QEMU_CONFDIR "/qemu.conf"); + + ret = qemu_read_config_file(file, qemu_parse_config_group, errp); + if (ret < 0) { + if (ret == -ENOENT) { + error_free(*errp); + *errp = NULL; + } + } +} + +static void qemu_set_option(const char *str, Error **errp) +{ + char group[64], id[64], arg[64]; + QemuOptsList *list; + QemuOpts *opts; + int rc, offset; + + rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset); + if (rc < 3 || str[offset] != '=') { + error_setg(errp, "can't parse: \"%s\"", str); + return; + } + + if (!is_qemuopts_group(group)) { + error_setg(errp, "-set is not supported with %s", group); + } else { + list = qemu_find_opts_err(group, errp); + if (list) { + opts = qemu_opts_find(list, id); + if (!opts) { + error_setg(errp, "there is no %s \"%s\" defined", group, id); + return; + } + qemu_opt_set(opts, arg, str + offset + 1, errp); + } + } +} + +static void user_register_global_props(void) +{ + qemu_opts_foreach(qemu_find_opts("global"), + global_init_func, NULL, NULL); +} + +static int do_configure_icount(void *opaque, QemuOpts *opts, Error **errp) +{ + icount_configure(opts, errp); + return 0; +} + +static int accelerator_set_property(void *opaque, + const char *name, const char *value, + Error **errp) +{ + return object_parse_property_opt(opaque, name, value, "accel", errp); +} + +static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp) +{ + bool *p_init_failed = opaque; + const char *acc = qemu_opt_get(opts, "accel"); + AccelClass *ac = accel_find(acc); + AccelState *accel; + int ret; + bool qtest_with_kvm; + + if (!acc) { + error_setg(errp, QERR_MISSING_PARAMETER, "accel"); + goto bad; + } + + qtest_with_kvm = g_str_equal(acc, "kvm") && qtest_chrdev != NULL; + + if (!ac) { + if (!qtest_with_kvm) { + error_report("invalid accelerator %s", acc); + } + goto bad; + } + accel = ACCEL(object_new_with_class(OBJECT_CLASS(ac))); + object_apply_compat_props(OBJECT(accel)); + qemu_opt_foreach(opts, accelerator_set_property, + accel, + &error_fatal); + /* + * If legacy -singlestep option is set, honour it for TCG and + * silently ignore for any other accelerator (which is how this + * option has always behaved). + */ + if (opt_one_insn_per_tb) { + /* + * This will always succeed for TCG, and we want to ignore + * the error from trying to set a nonexistent property + * on any other accelerator. + */ + object_property_set_bool(OBJECT(accel), "one-insn-per-tb", true, NULL); + } + ret = accel_init_machine(accel, current_machine); + if (ret < 0) { + if (!qtest_with_kvm || ret != -ENOENT) { + error_report("failed to initialize %s: %s", acc, strerror(-ret)); + } + goto bad; + } + + return 1; + +bad: + *p_init_failed = true; + return 0; +} + +static void configure_accelerators(const char *progname) +{ + bool init_failed = false; + + qemu_opts_foreach(qemu_find_opts("icount"), + do_configure_icount, NULL, &error_fatal); + + if (QTAILQ_EMPTY(&qemu_accel_opts.head)) { + char **accel_list, **tmp; + + if (accelerators == NULL) { + /* Select the default accelerator */ + bool have_tcg = accel_find("tcg"); + bool have_kvm = accel_find("kvm"); + + if (have_tcg && have_kvm) { + if (g_str_has_suffix(progname, "kvm")) { + /* If the program name ends with "kvm", we prefer KVM */ + accelerators = "kvm:tcg"; + } else { + accelerators = "tcg:kvm"; + } + } else if (have_kvm) { + accelerators = "kvm"; + } else if (have_tcg) { + accelerators = "tcg"; + } else { + error_report("No accelerator selected and" + " no default accelerator available"); + exit(1); + } + } + accel_list = g_strsplit(accelerators, ":", 0); + + for (tmp = accel_list; *tmp; tmp++) { + /* + * Filter invalid accelerators here, to prevent obscenities + * such as "-machine accel=tcg,,thread=single". + */ + if (accel_find(*tmp)) { + qemu_opts_parse_noisily(qemu_find_opts("accel"), *tmp, true); + } else { + init_failed = true; + error_report("invalid accelerator %s", *tmp); + } + } + g_strfreev(accel_list); + } else { + if (accelerators != NULL) { + error_report("The -accel and \"-machine accel=\" options are incompatible"); + exit(1); + } + } + + if (!qemu_opts_foreach(qemu_find_opts("accel"), + do_configure_accelerator, &init_failed, &error_fatal)) { + if (!init_failed) { + error_report("no accelerator found"); + } + exit(1); + } + + if (init_failed && !qtest_chrdev) { + error_report("falling back to %s", current_accel_name()); + } + + if (icount_enabled() && !tcg_enabled()) { + error_report("-icount is not allowed with hardware virtualization"); + exit(1); + } +} + +static void qemu_validate_options(const QDict *machine_opts) +{ + const char *kernel_filename = qdict_get_try_str(machine_opts, "kernel"); + const char *initrd_filename = qdict_get_try_str(machine_opts, "initrd"); + const char *kernel_cmdline = qdict_get_try_str(machine_opts, "append"); + + if (kernel_filename == NULL) { + if (kernel_cmdline != NULL) { + error_report("-append only allowed with -kernel option"); + exit(1); + } + + if (initrd_filename != NULL) { + error_report("-initrd only allowed with -kernel option"); + exit(1); + } + } + + if (loadvm && preconfig_requested) { + error_report("'preconfig' and 'loadvm' options are " + "mutually exclusive"); + exit(EXIT_FAILURE); + } + if (incoming && preconfig_requested && strcmp(incoming, "defer") != 0) { + error_report("'preconfig' supports '-incoming defer' only"); + exit(EXIT_FAILURE); + } + +#ifdef CONFIG_CURSES + if (is_daemonized() && dpy.type == DISPLAY_TYPE_CURSES) { + error_report("curses display cannot be used with -daemonize"); + exit(1); + } +#endif +} + +static void qemu_process_sugar_options(void) +{ + if (mem_prealloc) { + QObject *smp = qdict_get(machine_opts_dict, "smp"); + if (smp && qobject_type(smp) == QTYPE_QDICT) { + QObject *cpus = qdict_get(qobject_to(QDict, smp), "cpus"); + if (cpus && qobject_type(cpus) == QTYPE_QSTRING) { + const char *val = qstring_get_str(qobject_to(QString, cpus)); + object_register_sugar_prop("memory-backend", "prealloc-threads", + val, false); + } + } + object_register_sugar_prop("memory-backend", "prealloc", "on", false); + } +} + +/* -action processing */ + +/* + * Process all the -action parameters parsed from cmdline. + */ +static int process_runstate_actions(void *opaque, QemuOpts *opts, Error **errp) +{ + Error *local_err = NULL; + QDict *qdict = qemu_opts_to_qdict(opts, NULL); + QObject *ret = NULL; + qmp_marshal_set_action(qdict, &ret, &local_err); + qobject_unref(ret); + qobject_unref(qdict); + if (local_err) { + error_propagate(errp, local_err); + return 1; + } + return 0; +} + +static void qemu_process_early_options(void) +{ + qemu_opts_foreach(qemu_find_opts("name"), + parse_name, NULL, &error_fatal); + + object_option_foreach_add(object_create_pre_sandbox); + +#ifdef CONFIG_SECCOMP + QemuOptsList *olist = qemu_find_opts_err("sandbox", NULL); + if (olist) { + qemu_opts_foreach(olist, parse_sandbox, NULL, &error_fatal); + } +#endif + + if (qemu_opts_foreach(qemu_find_opts("action"), + process_runstate_actions, NULL, &error_fatal)) { + exit(1); + } + +#ifndef _WIN32 + qemu_opts_foreach(qemu_find_opts("add-fd"), + parse_add_fd, NULL, &error_fatal); + + qemu_opts_foreach(qemu_find_opts("add-fd"), + cleanup_add_fd, NULL, &error_fatal); +#endif + + /* Open the logfile at this point and set the log mask if necessary. */ + { + int mask = 0; + if (log_mask) { + mask = qemu_str_to_log_mask(log_mask); + if (!mask) { + qemu_print_log_usage(stdout); + exit(1); + } + } + qemu_set_log_filename_flags(log_file, mask, &error_fatal); + } + + qemu_add_default_firmwarepath(); +} + +static void qemu_process_help_options(void) +{ + /* + * Check for -cpu help and -device help before we call select_machine(), + * which will return an error if the architecture has no default machine + * type and the user did not specify one, so that the user doesn't need + * to say '-cpu help -machine something'. + */ + if (cpu_option && is_help_option(cpu_option)) { + list_cpus(); + exit(0); + } + + if (qemu_opts_foreach(qemu_find_opts("device"), + device_help_func, NULL, NULL)) { + exit(0); + } + + /* -L help lists the data directories and exits. */ + if (list_data_dirs) { + qemu_list_data_dirs(); + exit(0); + } +} + +static void qemu_maybe_daemonize(const char *pid_file) +{ + Error *err = NULL; + + os_daemonize(); + rcu_disable_atfork(); + + if (pid_file) { + char *pid_file_realpath = NULL; + + if (!qemu_write_pidfile(pid_file, &err)) { + error_reportf_err(err, "cannot create PID file: "); + exit(1); + } + + pid_file_realpath = g_malloc0(PATH_MAX); + if (!realpath(pid_file, pid_file_realpath)) { + if (errno != ENOENT) { + warn_report("not removing PID file on exit: cannot resolve PID " + "file path: %s: %s", pid_file, strerror(errno)); + } + return; + } + + qemu_unlink_pidfile_notifier = (struct UnlinkPidfileNotifier) { + .notifier = { + .notify = qemu_unlink_pidfile, + }, + .pid_file_realpath = pid_file_realpath, + }; + qemu_add_exit_notifier(&qemu_unlink_pidfile_notifier.notifier); + } +} + +static void qemu_init_displays(void) +{ + DisplayState *ds; + + /* init local displays */ + ds = init_displaystate(); + qemu_display_init(ds, &dpy); + + /* must be after terminal init, SDL library changes signal handlers */ + os_setup_signal_handling(); + + /* init remote displays */ +#ifdef CONFIG_VNC + qemu_opts_foreach(qemu_find_opts("vnc"), + vnc_init_func, NULL, &error_fatal); +#endif + + if (using_spice) { + qemu_spice.display_init(); + } +} + +static void qemu_init_board(void) +{ + /* process plugin before CPUs are created, but once -smp has been parsed */ + qemu_plugin_load_list(&plugin_list, &error_fatal); + + /* From here on we enter MACHINE_PHASE_INITIALIZED. */ + machine_run_board_init(current_machine, mem_path, &error_fatal); + + drive_check_orphaned(); + + realtime_init(); +} + +static void qemu_create_cli_devices(void) +{ + DeviceOption *opt; + + soundhw_init(); + + qemu_opts_foreach(qemu_find_opts("fw_cfg"), + parse_fw_cfg, fw_cfg_find(), &error_fatal); + + /* init USB devices */ + if (machine_usb(current_machine)) { + if (foreach_device_config(DEV_USB, usb_parse) < 0) + exit(1); + } + + /* init generic devices */ + rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE); + qemu_opts_foreach(qemu_find_opts("device"), + device_init_func, NULL, &error_fatal); + QTAILQ_FOREACH(opt, &device_opts, next) { + DeviceState *dev; + loc_push_restore(&opt->loc); + /* + * TODO Eventually we should call qmp_device_add() here to make sure it + * behaves the same, but QMP still has to accept incorrectly typed + * options until libvirt is fixed and we want to be strict on the CLI + * from the start, so call qdev_device_add_from_qdict() directly for + * now. + */ + dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal); + object_unref(OBJECT(dev)); + loc_pop(&opt->loc); + } + rom_reset_order_override(); +} + +static void qemu_machine_creation_done(void) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + + /* Did we create any drives that we failed to create a device for? */ + drive_check_orphaned(); + + /* Don't warn about the default network setup that you get if + * no command line -net or -netdev options are specified. There + * are two cases that we would otherwise complain about: + * (1) board doesn't support a NIC but the implicit "-net nic" + * requested one + * (2) CONFIG_SLIRP not set, in which case the implicit "-net nic" + * sets up a nic that isn't connected to anything. + */ + if (!default_net && (!qtest_enabled() || has_defaults)) { + net_check_clients(); + } + + qdev_prop_check_globals(); + + qdev_machine_creation_done(); + + if (machine->cgs) { + /* + * Verify that Confidential Guest Support has actually been initialized + */ + assert(machine->cgs->ready); + } + + if (foreach_device_config(DEV_GDB, gdbserver_start) < 0) { + exit(1); + } + if (!vga_interface_created && !default_vga && + vga_interface_type != VGA_NONE) { + warn_report("A -vga option was passed but this machine " + "type does not use that option; " + "No VGA device has been created"); + } +} + +void qmp_x_exit_preconfig(Error **errp) +{ + if (phase_check(PHASE_MACHINE_INITIALIZED)) { + error_setg(errp, "The command is permitted only before machine initialization"); + return; + } + + qemu_init_board(); + qemu_create_cli_devices(); + qemu_machine_creation_done(); + + if (loadvm) { + load_snapshot(loadvm, NULL, false, NULL, &error_fatal); + } + if (replay_mode != REPLAY_MODE_NONE) { + replay_vmstate_init(); + } + + if (incoming) { + Error *local_err = NULL; + if (strcmp(incoming, "defer") != 0) { + qmp_migrate_incoming(incoming, &local_err); + if (local_err) { + error_reportf_err(local_err, "-incoming %s: ", incoming); + exit(1); + } + } + } else if (autostart) { + qmp_cont(NULL); + } +} + +void qemu_init(int argc, char **argv) +{ + QemuOpts *opts; + QemuOpts *icount_opts = NULL, *accel_opts = NULL; + QemuOptsList *olist; + int optind; + const char *optarg; + MachineClass *machine_class; + bool userconfig = true; + FILE *vmstate_dump_file = NULL; + + qemu_add_opts(&qemu_drive_opts); + qemu_add_drive_opts(&qemu_legacy_drive_opts); + qemu_add_drive_opts(&qemu_common_drive_opts); + qemu_add_drive_opts(&qemu_drive_opts); + qemu_add_drive_opts(&bdrv_runtime_opts); + qemu_add_opts(&qemu_chardev_opts); + qemu_add_opts(&qemu_device_opts); + qemu_add_opts(&qemu_netdev_opts); + qemu_add_opts(&qemu_nic_opts); + qemu_add_opts(&qemu_net_opts); + qemu_add_opts(&qemu_rtc_opts); + qemu_add_opts(&qemu_global_opts); + qemu_add_opts(&qemu_mon_opts); + qemu_add_opts(&qemu_trace_opts); + qemu_plugin_add_opts(); + qemu_add_opts(&qemu_option_rom_opts); + qemu_add_opts(&qemu_accel_opts); + qemu_add_opts(&qemu_mem_opts); + qemu_add_opts(&qemu_smp_opts); + qemu_add_opts(&qemu_boot_opts); + qemu_add_opts(&qemu_add_fd_opts); + qemu_add_opts(&qemu_object_opts); + qemu_add_opts(&qemu_tpmdev_opts); + qemu_add_opts(&qemu_overcommit_opts); + qemu_add_opts(&qemu_msg_opts); + qemu_add_opts(&qemu_name_opts); + qemu_add_opts(&qemu_numa_opts); + qemu_add_opts(&qemu_icount_opts); + qemu_add_opts(&qemu_semihosting_config_opts); + qemu_add_opts(&qemu_fw_cfg_opts); + qemu_add_opts(&qemu_action_opts); + qemu_add_run_with_opts(); + module_call_init(MODULE_INIT_OPTS); + + error_init(argv[0]); + qemu_init_exec_dir(argv[0]); + + qemu_init_arch_modules(); + + qemu_init_subsystems(); + + /* first pass of option parsing */ + optind = 1; + while (optind < argc) { + if (argv[optind][0] != '-') { + /* disk image */ + optind++; + } else { + const QEMUOption *popt; + + popt = lookup_opt(argc, argv, &optarg, &optind); + switch (popt->index) { + case QEMU_OPTION_nouserconfig: + userconfig = false; + break; + } + } + } + + machine_opts_dict = qdict_new(); + if (userconfig) { + qemu_read_default_config_file(&error_fatal); + } + + /* second pass of option parsing */ + optind = 1; + for(;;) { + if (optind >= argc) + break; + if (argv[optind][0] != '-') { + loc_set_cmdline(argv, optind, 1); + drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); + } else { + const QEMUOption *popt; + + popt = lookup_opt(argc, argv, &optarg, &optind); + if (!(popt->arch_mask & arch_type)) { + error_report("Option not supported for this target"); + exit(1); + } + switch(popt->index) { + case QEMU_OPTION_cpu: + /* hw initialization will check this */ + cpu_option = optarg; + break; + case QEMU_OPTION_hda: + case QEMU_OPTION_hdb: + case QEMU_OPTION_hdc: + case QEMU_OPTION_hdd: + drive_add(IF_DEFAULT, popt->index - QEMU_OPTION_hda, optarg, + HD_OPTS); + break; + case QEMU_OPTION_blockdev: + { + Visitor *v; + BlockdevOptionsQueueEntry *bdo; + + v = qobject_input_visitor_new_str(optarg, "driver", + &error_fatal); + + bdo = g_new(BlockdevOptionsQueueEntry, 1); + visit_type_BlockdevOptions(v, NULL, &bdo->bdo, + &error_fatal); + visit_free(v); + loc_save(&bdo->loc); + QSIMPLEQ_INSERT_TAIL(&bdo_queue, bdo, entry); + break; + } + case QEMU_OPTION_drive: + opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), + optarg, false); + if (opts == NULL) { + exit(1); + } + break; + case QEMU_OPTION_set: + qemu_set_option(optarg, &error_fatal); + break; + case QEMU_OPTION_global: + if (qemu_global_option(optarg) != 0) + exit(1); + break; + case QEMU_OPTION_mtdblock: + drive_add(IF_MTD, -1, optarg, MTD_OPTS); + break; + case QEMU_OPTION_sd: + drive_add(IF_SD, -1, optarg, SD_OPTS); + break; + case QEMU_OPTION_pflash: + drive_add(IF_PFLASH, -1, optarg, PFLASH_OPTS); + break; + case QEMU_OPTION_snapshot: + snapshot = 1; + replay_add_blocker("-snapshot"); + break; + case QEMU_OPTION_numa: + opts = qemu_opts_parse_noisily(qemu_find_opts("numa"), + optarg, true); + if (!opts) { + exit(1); + } + break; + case QEMU_OPTION_display: + parse_display(optarg); + break; + case QEMU_OPTION_nographic: + qdict_put_str(machine_opts_dict, "graphics", "off"); + nographic = true; + dpy.type = DISPLAY_TYPE_NONE; + break; + case QEMU_OPTION_portrait: + graphic_rotate = 90; + break; + case QEMU_OPTION_rotate: + graphic_rotate = strtol(optarg, (char **) &optarg, 10); + if (graphic_rotate != 0 && graphic_rotate != 90 && + graphic_rotate != 180 && graphic_rotate != 270) { + error_report("only 90, 180, 270 deg rotation is available"); + exit(1); + } + break; + case QEMU_OPTION_kernel: + qdict_put_str(machine_opts_dict, "kernel", optarg); + break; + case QEMU_OPTION_initrd: + qdict_put_str(machine_opts_dict, "initrd", optarg); + break; + case QEMU_OPTION_append: + qdict_put_str(machine_opts_dict, "append", optarg); + break; + case QEMU_OPTION_dtb: + qdict_put_str(machine_opts_dict, "dtb", optarg); + break; + case QEMU_OPTION_cdrom: + drive_add(IF_DEFAULT, 2, optarg, CDROM_OPTS); + break; + case QEMU_OPTION_boot: + machine_parse_property_opt(qemu_find_opts("boot-opts"), "boot", optarg); + break; + case QEMU_OPTION_fda: + case QEMU_OPTION_fdb: + drive_add(IF_FLOPPY, popt->index - QEMU_OPTION_fda, + optarg, FD_OPTS); + break; + case QEMU_OPTION_no_fd_bootchk: + fd_bootchk = 0; + break; + case QEMU_OPTION_netdev: + default_net = 0; + if (netdev_is_modern(optarg)) { + netdev_parse_modern(optarg); + } else { + net_client_parse(qemu_find_opts("netdev"), optarg); + } + break; + case QEMU_OPTION_nic: + default_net = 0; + net_client_parse(qemu_find_opts("nic"), optarg); + break; + case QEMU_OPTION_net: + default_net = 0; + net_client_parse(qemu_find_opts("net"), optarg); + break; +#ifdef CONFIG_LIBISCSI + case QEMU_OPTION_iscsi: + opts = qemu_opts_parse_noisily(qemu_find_opts("iscsi"), + optarg, false); + if (!opts) { + exit(1); + } + break; +#endif + case QEMU_OPTION_audiodev: + audio_parse_option(optarg); + break; + case QEMU_OPTION_audio: { + bool help; + char *model; + Audiodev *dev = NULL; + Visitor *v; + QDict *dict = keyval_parse(optarg, "driver", &help, &error_fatal); + if (help || (qdict_haskey(dict, "driver") && + is_help_option(qdict_get_str(dict, "driver")))) { + audio_help(); + exit(EXIT_SUCCESS); + } + if (!qdict_haskey(dict, "id")) { + qdict_put_str(dict, "id", "audiodev0"); + } + if (!qdict_haskey(dict, "model")) { + error_setg(&error_fatal, "Parameter 'model' is missing"); + } + model = g_strdup(qdict_get_str(dict, "model")); + qdict_del(dict, "model"); + if (is_help_option(model)) { + show_valid_soundhw(); + exit(0); + } + v = qobject_input_visitor_new_keyval(QOBJECT(dict)); + qobject_unref(dict); + visit_type_Audiodev(v, NULL, &dev, &error_fatal); + visit_free(v); + audio_define(dev); + select_soundhw(model, dev->id); + g_free(model); + break; + } + case QEMU_OPTION_h: + help(0); + break; + case QEMU_OPTION_version: + version(); + exit(0); + break; + case QEMU_OPTION_m: + opts = qemu_opts_parse_noisily(qemu_find_opts("memory"), optarg, true); + if (opts == NULL) { + exit(1); + } + break; +#ifdef CONFIG_TPM + case QEMU_OPTION_tpmdev: + if (tpm_config_parse(qemu_find_opts("tpmdev"), optarg) < 0) { + exit(1); + } + break; +#endif + case QEMU_OPTION_mempath: + mem_path = optarg; + break; + case QEMU_OPTION_mem_prealloc: + mem_prealloc = 1; + break; + case QEMU_OPTION_d: + log_mask = optarg; + break; + case QEMU_OPTION_D: + log_file = optarg; + break; + case QEMU_OPTION_DFILTER: + qemu_set_dfilter_ranges(optarg, &error_fatal); + break; +#if defined(CONFIG_TCG) && defined(CONFIG_LINUX) + case QEMU_OPTION_perfmap: + perf_enable_perfmap(); + break; + case QEMU_OPTION_jitdump: + perf_enable_jitdump(); + break; +#endif + case QEMU_OPTION_seed: + qemu_guest_random_seed_main(optarg, &error_fatal); + break; + case QEMU_OPTION_s: + add_device_config(DEV_GDB, "tcp::" DEFAULT_GDBSTUB_PORT); + break; + case QEMU_OPTION_gdb: + add_device_config(DEV_GDB, optarg); + break; + case QEMU_OPTION_L: + if (is_help_option(optarg)) { + list_data_dirs = true; + } else { + qemu_add_data_dir(g_strdup(optarg)); + } + break; + case QEMU_OPTION_bios: + qdict_put_str(machine_opts_dict, "firmware", optarg); + break; + case QEMU_OPTION_singlestep: + opt_one_insn_per_tb = true; + break; + case QEMU_OPTION_S: + autostart = 0; + break; + case QEMU_OPTION_k: + keyboard_layout = optarg; + break; + case QEMU_OPTION_vga: + vga_model = optarg; + default_vga = 0; + break; + case QEMU_OPTION_g: + { + const char *p; + int w, h, depth; + p = optarg; + w = strtol(p, (char **)&p, 10); + if (w <= 0) { + graphic_error: + error_report("invalid resolution or depth"); + exit(1); + } + if (*p != 'x') + goto graphic_error; + p++; + h = strtol(p, (char **)&p, 10); + if (h <= 0) + goto graphic_error; + if (*p == 'x') { + p++; + depth = strtol(p, (char **)&p, 10); + if (depth != 1 && depth != 2 && depth != 4 && + depth != 8 && depth != 15 && depth != 16 && + depth != 24 && depth != 32) + goto graphic_error; + } else if (*p == '\0') { + depth = graphic_depth; + } else { + goto graphic_error; + } + + graphic_width = w; + graphic_height = h; + graphic_depth = depth; + } + break; + case QEMU_OPTION_echr: + { + char *r; + term_escape_char = strtol(optarg, &r, 0); + if (r == optarg) + printf("Bad argument to echr\n"); + break; + } + case QEMU_OPTION_monitor: + default_monitor = 0; + if (strncmp(optarg, "none", 4)) { + monitor_parse(optarg, "readline", false); + } + break; + case QEMU_OPTION_qmp: + monitor_parse(optarg, "control", false); + default_monitor = 0; + break; + case QEMU_OPTION_qmp_pretty: + monitor_parse(optarg, "control", true); + default_monitor = 0; + break; + case QEMU_OPTION_mon: + opts = qemu_opts_parse_noisily(qemu_find_opts("mon"), optarg, + true); + if (!opts) { + exit(1); + } + default_monitor = 0; + break; + case QEMU_OPTION_chardev: + opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), + optarg, true); + if (!opts) { + exit(1); + } + break; + case QEMU_OPTION_fsdev: + olist = qemu_find_opts("fsdev"); + if (!olist) { + error_report("fsdev support is disabled"); + exit(1); + } + opts = qemu_opts_parse_noisily(olist, optarg, true); + if (!opts) { + exit(1); + } + break; + case QEMU_OPTION_virtfs: { + QemuOpts *fsdev; + QemuOpts *device; + const char *writeout, *sock_fd, *socket, *path, *security_model, + *multidevs; + + olist = qemu_find_opts("virtfs"); + if (!olist) { + error_report("virtfs support is disabled"); + exit(1); + } + opts = qemu_opts_parse_noisily(olist, optarg, true); + if (!opts) { + exit(1); + } + + if (qemu_opt_get(opts, "fsdriver") == NULL || + qemu_opt_get(opts, "mount_tag") == NULL) { + error_report("Usage: -virtfs fsdriver,mount_tag=tag"); + exit(1); + } + fsdev = qemu_opts_create(qemu_find_opts("fsdev"), + qemu_opts_id(opts) ?: + qemu_opt_get(opts, "mount_tag"), + 1, NULL); + if (!fsdev) { + error_report("duplicate or invalid fsdev id: %s", + qemu_opt_get(opts, "mount_tag")); + exit(1); + } + + writeout = qemu_opt_get(opts, "writeout"); + if (writeout) { +#ifdef CONFIG_SYNC_FILE_RANGE + qemu_opt_set(fsdev, "writeout", writeout, &error_abort); +#else + error_report("writeout=immediate not supported " + "on this platform"); + exit(1); +#endif + } + qemu_opt_set(fsdev, "fsdriver", + qemu_opt_get(opts, "fsdriver"), &error_abort); + path = qemu_opt_get(opts, "path"); + if (path) { + qemu_opt_set(fsdev, "path", path, &error_abort); + } + security_model = qemu_opt_get(opts, "security_model"); + if (security_model) { + qemu_opt_set(fsdev, "security_model", security_model, + &error_abort); + } + socket = qemu_opt_get(opts, "socket"); + if (socket) { + qemu_opt_set(fsdev, "socket", socket, &error_abort); + } + sock_fd = qemu_opt_get(opts, "sock_fd"); + if (sock_fd) { + qemu_opt_set(fsdev, "sock_fd", sock_fd, &error_abort); + } + + qemu_opt_set_bool(fsdev, "readonly", + qemu_opt_get_bool(opts, "readonly", 0), + &error_abort); + multidevs = qemu_opt_get(opts, "multidevs"); + if (multidevs) { + qemu_opt_set(fsdev, "multidevs", multidevs, &error_abort); + } + device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, + &error_abort); + qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort); + qemu_opt_set(device, "fsdev", + qemu_opts_id(fsdev), &error_abort); + qemu_opt_set(device, "mount_tag", + qemu_opt_get(opts, "mount_tag"), &error_abort); + break; + } + case QEMU_OPTION_serial: + add_device_config(DEV_SERIAL, optarg); + default_serial = 0; + if (strncmp(optarg, "mon:", 4) == 0) { + default_monitor = 0; + } + break; + case QEMU_OPTION_action: + olist = qemu_find_opts("action"); + if (!qemu_opts_parse_noisily(olist, optarg, false)) { + exit(1); + } + break; + case QEMU_OPTION_watchdog_action: { + opts = qemu_opts_create(qemu_find_opts("action"), NULL, 0, &error_abort); + qemu_opt_set(opts, "watchdog", optarg, &error_abort); + break; + } + case QEMU_OPTION_parallel: + add_device_config(DEV_PARALLEL, optarg); + default_parallel = 0; + if (strncmp(optarg, "mon:", 4) == 0) { + default_monitor = 0; + } + break; + case QEMU_OPTION_debugcon: + add_device_config(DEV_DEBUGCON, optarg); + break; + case QEMU_OPTION_loadvm: + loadvm = optarg; + break; + case QEMU_OPTION_full_screen: + dpy.has_full_screen = true; + dpy.full_screen = true; + break; + case QEMU_OPTION_pidfile: + pid_file = optarg; + break; + case QEMU_OPTION_win2k_hack: + win2k_install_hack = 1; + break; + case QEMU_OPTION_acpitable: + opts = qemu_opts_parse_noisily(qemu_find_opts("acpi"), + optarg, true); + if (!opts) { + exit(1); + } + acpi_table_add(opts, &error_fatal); + break; + case QEMU_OPTION_smbios: + opts = qemu_opts_parse_noisily(qemu_find_opts("smbios"), + optarg, false); + if (!opts) { + exit(1); + } + smbios_entry_add(opts, &error_fatal); + break; + case QEMU_OPTION_fwcfg: + opts = qemu_opts_parse_noisily(qemu_find_opts("fw_cfg"), + optarg, true); + if (opts == NULL) { + exit(1); + } + break; + case QEMU_OPTION_preconfig: + preconfig_requested = true; + break; + case QEMU_OPTION_enable_kvm: + qdict_put_str(machine_opts_dict, "accel", "kvm"); + break; + case QEMU_OPTION_M: + case QEMU_OPTION_machine: + { + bool help; + + keyval_parse_into(machine_opts_dict, optarg, "type", &help, &error_fatal); + if (help) { + machine_help_func(machine_opts_dict); + exit(EXIT_SUCCESS); + } + break; + } + case QEMU_OPTION_accel: + accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"), + optarg, true); + optarg = qemu_opt_get(accel_opts, "accel"); + if (!optarg || is_help_option(optarg)) { + printf("Accelerators supported in QEMU binary:\n"); + GSList *el, *accel_list = object_class_get_list(TYPE_ACCEL, + false); + for (el = accel_list; el; el = el->next) { + gchar *typename = g_strdup(object_class_get_name( + OBJECT_CLASS(el->data))); + /* omit qtest which is used for tests only */ + if (g_strcmp0(typename, ACCEL_CLASS_NAME("qtest")) && + g_str_has_suffix(typename, ACCEL_CLASS_SUFFIX)) { + gchar **optname = g_strsplit(typename, + ACCEL_CLASS_SUFFIX, 0); + printf("%s\n", optname[0]); + g_strfreev(optname); + } + g_free(typename); + } + g_slist_free(accel_list); + exit(0); + } + break; + case QEMU_OPTION_usb: + qdict_put_str(machine_opts_dict, "usb", "on"); + break; + case QEMU_OPTION_usbdevice: + qdict_put_str(machine_opts_dict, "usb", "on"); + add_device_config(DEV_USB, optarg); + break; + case QEMU_OPTION_device: + if (optarg[0] == '{') { + QObject *obj = qobject_from_json(optarg, &error_fatal); + DeviceOption *opt = g_new0(DeviceOption, 1); + opt->opts = qobject_to(QDict, obj); + loc_save(&opt->loc); + assert(opt->opts != NULL); + QTAILQ_INSERT_TAIL(&device_opts, opt, next); + } else { + if (!qemu_opts_parse_noisily(qemu_find_opts("device"), + optarg, true)) { + exit(1); + } + } + break; + case QEMU_OPTION_smp: + machine_parse_property_opt(qemu_find_opts("smp-opts"), + "smp", optarg); + break; + case QEMU_OPTION_vnc: + vnc_parse(optarg); + break; + case QEMU_OPTION_no_acpi: + warn_report("-no-acpi is deprecated, use '-machine acpi=off' instead"); + qdict_put_str(machine_opts_dict, "acpi", "off"); + break; + case QEMU_OPTION_no_hpet: + warn_report("-no-hpet is deprecated, use '-machine hpet=off' instead"); + qdict_put_str(machine_opts_dict, "hpet", "off"); + break; + case QEMU_OPTION_no_reboot: + olist = qemu_find_opts("action"); + qemu_opts_parse_noisily(olist, "reboot=shutdown", false); + break; + case QEMU_OPTION_no_shutdown: + olist = qemu_find_opts("action"); + qemu_opts_parse_noisily(olist, "shutdown=pause", false); + break; + case QEMU_OPTION_uuid: + if (qemu_uuid_parse(optarg, &qemu_uuid) < 0) { + error_report("failed to parse UUID string: wrong format"); + exit(1); + } + qemu_uuid_set = true; + break; + case QEMU_OPTION_option_rom: + if (nb_option_roms >= MAX_OPTION_ROMS) { + error_report("too many option ROMs"); + exit(1); + } + opts = qemu_opts_parse_noisily(qemu_find_opts("option-rom"), + optarg, true); + if (!opts) { + exit(1); + } + option_rom[nb_option_roms].name = qemu_opt_get(opts, "romfile"); + option_rom[nb_option_roms].bootindex = + qemu_opt_get_number(opts, "bootindex", -1); + if (!option_rom[nb_option_roms].name) { + error_report("Option ROM file is not specified"); + exit(1); + } + nb_option_roms++; + break; + case QEMU_OPTION_semihosting: + qemu_semihosting_enable(); + break; + case QEMU_OPTION_semihosting_config: + if (qemu_semihosting_config_options(optarg) != 0) { + exit(1); + } + break; + case QEMU_OPTION_name: + opts = qemu_opts_parse_noisily(qemu_find_opts("name"), + optarg, true); + if (!opts) { + exit(1); + } + /* Capture guest name if -msg guest-name is used later */ + error_guest_name = qemu_opt_get(opts, "guest"); + break; + case QEMU_OPTION_prom_env: + if (nb_prom_envs >= MAX_PROM_ENVS) { + error_report("too many prom variables"); + exit(1); + } + prom_envs[nb_prom_envs] = optarg; + nb_prom_envs++; + break; + case QEMU_OPTION_old_param: + old_param = 1; + break; + case QEMU_OPTION_rtc: + opts = qemu_opts_parse_noisily(qemu_find_opts("rtc"), optarg, + false); + if (!opts) { + exit(1); + } + break; + case QEMU_OPTION_icount: + icount_opts = qemu_opts_parse_noisily(qemu_find_opts("icount"), + optarg, true); + if (!icount_opts) { + exit(1); + } + break; + case QEMU_OPTION_incoming: + if (!incoming) { + runstate_set(RUN_STATE_INMIGRATE); + } + incoming = optarg; + break; + case QEMU_OPTION_only_migratable: + only_migratable = 1; + break; + case QEMU_OPTION_nodefaults: + has_defaults = 0; + break; + case QEMU_OPTION_xen_domid: + if (!(accel_find("xen")) && !(accel_find("kvm"))) { + error_report("Option not supported for this target"); + exit(1); + } + xen_domid = atoi(optarg); + break; + case QEMU_OPTION_xen_attach: + if (!(accel_find("xen"))) { + error_report("Option not supported for this target"); + exit(1); + } + xen_mode = XEN_ATTACH; + break; + case QEMU_OPTION_xen_domid_restrict: + if (!(accel_find("xen"))) { + error_report("Option not supported for this target"); + exit(1); + } + xen_domid_restrict = true; + break; + case QEMU_OPTION_trace: + trace_opt_parse(optarg); + break; + case QEMU_OPTION_plugin: + qemu_plugin_opt_parse(optarg, &plugin_list); + break; + case QEMU_OPTION_readconfig: + qemu_read_config_file(optarg, qemu_parse_config_group, &error_fatal); + break; +#ifdef CONFIG_SPICE + case QEMU_OPTION_spice: + olist = qemu_find_opts_err("spice", NULL); + if (!olist) { + error_report("spice support is disabled"); + exit(1); + } + opts = qemu_opts_parse_noisily(olist, optarg, false); + if (!opts) { + exit(1); + } + display_remote++; + break; +#endif + case QEMU_OPTION_qtest: + qtest_chrdev = optarg; + break; + case QEMU_OPTION_qtest_log: + qtest_log = optarg; + break; + case QEMU_OPTION_sandbox: + olist = qemu_find_opts("sandbox"); + if (!olist) { +#ifndef CONFIG_SECCOMP + error_report("-sandbox support is not enabled " + "in this QEMU binary"); +#endif + exit(1); + } + + opts = qemu_opts_parse_noisily(olist, optarg, true); + if (!opts) { + exit(1); + } + break; + case QEMU_OPTION_add_fd: +#ifndef _WIN32 + opts = qemu_opts_parse_noisily(qemu_find_opts("add-fd"), + optarg, false); + if (!opts) { + exit(1); + } +#else + error_report("File descriptor passing is disabled on this " + "platform"); + exit(1); +#endif + break; + case QEMU_OPTION_object: + object_option_parse(optarg); + break; + case QEMU_OPTION_overcommit: + opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"), + optarg, false); + if (!opts) { + exit(1); + } + enable_mlock = qemu_opt_get_bool(opts, "mem-lock", false); + enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", false); + break; + case QEMU_OPTION_compat: + { + CompatPolicy *opts_policy; + Visitor *v; + + v = qobject_input_visitor_new_str(optarg, NULL, + &error_fatal); + + visit_type_CompatPolicy(v, NULL, &opts_policy, &error_fatal); + QAPI_CLONE_MEMBERS(CompatPolicy, &compat_policy, opts_policy); + + qapi_free_CompatPolicy(opts_policy); + visit_free(v); + break; + } + case QEMU_OPTION_msg: + opts = qemu_opts_parse_noisily(qemu_find_opts("msg"), optarg, + false); + if (!opts) { + exit(1); + } + configure_msg(opts); + break; + case QEMU_OPTION_dump_vmstate: + if (vmstate_dump_file) { + error_report("only one '-dump-vmstate' " + "option may be given"); + exit(1); + } + vmstate_dump_file = fopen(optarg, "w"); + if (vmstate_dump_file == NULL) { + error_report("open %s: %s", optarg, strerror(errno)); + exit(1); + } + break; + case QEMU_OPTION_enable_sync_profile: + qsp_enable(); + break; + case QEMU_OPTION_nouserconfig: + /* Nothing to be parsed here. Especially, do not error out below. */ + break; +#if defined(CONFIG_POSIX) + case QEMU_OPTION_runas: + if (!os_set_runas(optarg)) { + error_report("User \"%s\" doesn't exist" + " (and is not :)", + optarg); + exit(1); + } + break; + case QEMU_OPTION_chroot: + warn_report("option is deprecated," + " use '-run-with chroot=...' instead"); + os_set_chroot(optarg); + break; + case QEMU_OPTION_daemonize: + os_set_daemonize(true); + break; +#if defined(CONFIG_LINUX) + /* deprecated */ + case QEMU_OPTION_asyncteardown: + init_async_teardown(); + break; +#endif + case QEMU_OPTION_run_with: { + const char *str; + opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"), + optarg, false); + if (!opts) { + exit(1); + } +#if defined(CONFIG_LINUX) + if (qemu_opt_get_bool(opts, "async-teardown", false)) { + init_async_teardown(); + } +#endif + str = qemu_opt_get(opts, "chroot"); + if (str) { + os_set_chroot(str); + } + break; + } +#endif /* CONFIG_POSIX */ + + default: + error_report("Option not supported in this build"); + exit(1); + } + } + } + /* + * Clear error location left behind by the loop. + * Best done right after the loop. Do not insert code here! + */ + loc_set_none(); + + qemu_validate_options(machine_opts_dict); + qemu_process_sugar_options(); + + /* + * These options affect everything else and should be processed + * before daemonizing. + */ + qemu_process_early_options(); + + qemu_process_help_options(); + qemu_maybe_daemonize(pid_file); + + /* + * The trace backend must be initialized after daemonizing. + * trace_init_backends() will call st_init(), which will create the + * trace thread in the parent, and also register st_flush_trace_buffer() + * in atexit(). This function will force the parent to wait for the + * writeout thread to finish, which will not occur, and the parent + * process will be left in the host. + */ + if (!trace_init_backends()) { + exit(1); + } + trace_init_file(); + + qemu_init_main_loop(&error_fatal); + cpu_timers_init(); + + user_register_global_props(); + replay_configure(icount_opts); + + configure_rtc(qemu_find_opts_singleton("rtc")); + + /* Transfer QemuOpts options into machine options */ + parse_memory_options(); + + qemu_create_machine(machine_opts_dict); + + suspend_mux_open(); + + qemu_disable_default_devices(); + qemu_create_default_devices(); + qemu_create_early_backends(); + + qemu_apply_legacy_machine_options(machine_opts_dict); + qemu_apply_machine_options(machine_opts_dict); + qobject_unref(machine_opts_dict); + phase_advance(PHASE_MACHINE_CREATED); + + /* + * Note: uses machine properties such as kernel-irqchip, must run + * after qemu_apply_machine_options. + */ + configure_accelerators(argv[0]); + phase_advance(PHASE_ACCEL_CREATED); + + /* + * Beware, QOM objects created before this point miss global and + * compat properties. + * + * Global properties get set up by qdev_prop_register_global(), + * called from user_register_global_props(), and certain option + * desugaring. Also in CPU feature desugaring (buried in + * parse_cpu_option()), which happens below this point, but may + * only target the CPU type, which can only be created after + * parse_cpu_option() returned the type. + * + * Machine compat properties: object_set_machine_compat_props(). + * Accelerator compat props: object_set_accelerator_compat_props(), + * called from do_configure_accelerator(). + */ + + machine_class = MACHINE_GET_CLASS(current_machine); + if (!qtest_enabled() && machine_class->deprecation_reason) { + warn_report("Machine type '%s' is deprecated: %s", + machine_class->name, machine_class->deprecation_reason); + } + + /* + * Create backends before creating migration objects, so that it can + * check against compatibilities on the backend memories (e.g. postcopy + * over memory-backend-file objects). + */ + qemu_create_late_backends(); + + /* + * Note: creates a QOM object, must run only after global and + * compat properties have been set up. + */ + migration_object_init(); + + /* parse features once if machine provides default cpu_type */ + current_machine->cpu_type = machine_class->default_cpu_type; + if (cpu_option) { + current_machine->cpu_type = parse_cpu_option(cpu_option); + } + /* NB: for machine none cpu_type could STILL be NULL here! */ + + qemu_resolve_machine_memdev(); + parse_numa_opts(current_machine); + + if (vmstate_dump_file) { + /* dump and exit */ + module_load_qom_all(); + dump_vmstate_json_to_file(vmstate_dump_file); + exit(0); + } + + if (!preconfig_requested) { + qmp_x_exit_preconfig(&error_fatal); + } + qemu_init_displays(); + accel_setup_post(current_machine); + os_setup_post(); + resume_mux_open(); +} diff --git a/system/watchpoint.c b/system/watchpoint.c new file mode 100644 index 0000000..45d1f12 --- /dev/null +++ b/system/watchpoint.c @@ -0,0 +1,226 @@ +/* + * CPU watchpoints + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qemu/error-report.h" +#include "exec/exec-all.h" +#include "exec/translate-all.h" +#include "sysemu/tcg.h" +#include "sysemu/replay.h" +#include "hw/core/tcg-cpu-ops.h" +#include "hw/core/cpu.h" + +/* Add a watchpoint. */ +int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, + int flags, CPUWatchpoint **watchpoint) +{ + CPUWatchpoint *wp; + vaddr in_page; + + /* forbid ranges which are empty or run off the end of the address space */ + if (len == 0 || (addr + len - 1) < addr) { + error_report("tried to set invalid watchpoint at %" + VADDR_PRIx ", len=%" VADDR_PRIu, addr, len); + return -EINVAL; + } + wp = g_malloc(sizeof(*wp)); + + wp->vaddr = addr; + wp->len = len; + wp->flags = flags; + + /* keep all GDB-injected watchpoints in front */ + if (flags & BP_GDB) { + QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry); + } else { + QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry); + } + + in_page = -(addr | TARGET_PAGE_MASK); + if (len <= in_page) { + tlb_flush_page(cpu, addr); + } else { + tlb_flush(cpu); + } + + if (watchpoint) { + *watchpoint = wp; + } + return 0; +} + +/* Remove a specific watchpoint. */ +int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len, + int flags) +{ + CPUWatchpoint *wp; + + QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { + if (addr == wp->vaddr && len == wp->len + && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) { + cpu_watchpoint_remove_by_ref(cpu, wp); + return 0; + } + } + return -ENOENT; +} + +/* Remove a specific watchpoint by reference. */ +void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint) +{ + QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry); + + tlb_flush_page(cpu, watchpoint->vaddr); + + g_free(watchpoint); +} + +/* Remove all matching watchpoints. */ +void cpu_watchpoint_remove_all(CPUState *cpu, int mask) +{ + CPUWatchpoint *wp, *next; + + QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) { + if (wp->flags & mask) { + cpu_watchpoint_remove_by_ref(cpu, wp); + } + } +} + +#ifdef CONFIG_TCG + +/* + * Return true if this watchpoint address matches the specified + * access (ie the address range covered by the watchpoint overlaps + * partially or completely with the address range covered by the + * access). + */ +static inline bool watchpoint_address_matches(CPUWatchpoint *wp, + vaddr addr, vaddr len) +{ + /* + * We know the lengths are non-zero, but a little caution is + * required to avoid errors in the case where the range ends + * exactly at the top of the address space and so addr + len + * wraps round to zero. + */ + vaddr wpend = wp->vaddr + wp->len - 1; + vaddr addrend = addr + len - 1; + + return !(addr > wpend || wp->vaddr > addrend); +} + +/* Return flags for watchpoints that match addr + prot. */ +int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len) +{ + CPUWatchpoint *wp; + int ret = 0; + + QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { + if (watchpoint_address_matches(wp, addr, len)) { + ret |= wp->flags; + } + } + return ret; +} + +/* Generate a debug exception if a watchpoint has been hit. */ +void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, + MemTxAttrs attrs, int flags, uintptr_t ra) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + CPUWatchpoint *wp; + + assert(tcg_enabled()); + if (cpu->watchpoint_hit) { + /* + * We re-entered the check after replacing the TB. + * Now raise the debug interrupt so that it will + * trigger after the current instruction. + */ + qemu_mutex_lock_iothread(); + cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG); + qemu_mutex_unlock_iothread(); + return; + } + + if (cc->tcg_ops->adjust_watchpoint_address) { + /* this is currently used only by ARM BE32 */ + addr = cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len); + } + + assert((flags & ~BP_MEM_ACCESS) == 0); + QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { + int hit_flags = wp->flags & flags; + + if (hit_flags && watchpoint_address_matches(wp, addr, len)) { + if (replay_running_debug()) { + /* + * replay_breakpoint reads icount. + * Force recompile to succeed, because icount may + * be read only at the end of the block. + */ + if (!cpu->neg.can_do_io) { + /* Force execution of one insn next time. */ + cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ + | curr_cflags(cpu); + cpu_loop_exit_restore(cpu, ra); + } + /* + * Don't process the watchpoints when we are + * in a reverse debugging operation. + */ + replay_breakpoint(); + return; + } + + wp->flags |= hit_flags << BP_HIT_SHIFT; + wp->hitaddr = MAX(addr, wp->vaddr); + wp->hitattrs = attrs; + + if (wp->flags & BP_CPU + && cc->tcg_ops->debug_check_watchpoint + && !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) { + wp->flags &= ~BP_WATCHPOINT_HIT; + continue; + } + cpu->watchpoint_hit = wp; + + mmap_lock(); + /* This call also restores vCPU state */ + tb_check_watchpoint(cpu, ra); + if (wp->flags & BP_STOP_BEFORE_ACCESS) { + cpu->exception_index = EXCP_DEBUG; + mmap_unlock(); + cpu_loop_exit(cpu); + } else { + /* Force execution of one insn next time. */ + cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ + | curr_cflags(cpu); + mmap_unlock(); + cpu_loop_exit_noexc(cpu); + } + } else { + wp->flags &= ~BP_WATCHPOINT_HIT; + } + } +} + +#endif /* CONFIG_TCG */ diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 1977b30..f33ae64 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -59,7 +59,7 @@ if have_system or have_tools } if seccomp.found() - tests += {'test-seccomp': ['../../softmmu/qemu-seccomp.c', seccomp]} + tests += {'test-seccomp': ['../../system/qemu-seccomp.c', seccomp]} endif endif -- cgit v1.1