diff options
Diffstat (limited to 'system')
34 files changed, 1920 insertions, 898 deletions
diff --git a/system/arch_init.c b/system/arch_init.c index 79716f9..e857368 100644 --- a/system/arch_init.c +++ b/system/arch_init.c @@ -22,29 +22,9 @@ * THE SOFTWARE. */ #include "qemu/osdep.h" -#include "qemu/module.h" -#include "sysemu/arch_init.h" +#include "system/arch_init.h" -#ifdef TARGET_SPARC -int graphic_width = 1024; -int graphic_height = 768; -int graphic_depth = 8; -#elif defined(TARGET_M68K) -int graphic_width = 800; -int graphic_height = 600; -int graphic_depth = 8; -#else -int graphic_width = 800; -int graphic_height = 600; -int graphic_depth = 32; -#endif - -const uint32_t arch_type = QEMU_ARCH; - -void qemu_init_arch_modules(void) +bool qemu_arch_available(unsigned qemu_arch_mask) { -#ifdef CONFIG_MODULES - module_init_info(qemu_modinfo); - module_allow_arch(TARGET_NAME); -#endif + return qemu_arch_mask & QEMU_ARCH; } diff --git a/system/async-teardown.c b/system/async-teardown.c index 396963c..9148ee8 100644 --- a/system/async-teardown.c +++ b/system/async-teardown.c @@ -26,40 +26,6 @@ static pid_t the_ppid; -/* - * Close all open file descriptors. - */ -static void close_all_open_fd(void) -{ - struct dirent *de; - int fd, dfd; - DIR *dir; - -#ifdef CONFIG_CLOSE_RANGE - int r = close_range(0, ~0U, 0); - if (!r) { - /* Success, no need to try other ways. */ - return; - } -#endif - - dir = opendir("/proc/self/fd"); - if (!dir) { - /* If /proc is not mounted, there is nothing that can be done. */ - return; - } - /* Avoid closing the directory. */ - dfd = dirfd(dir); - - for (de = readdir(dir); de; de = readdir(dir)) { - fd = atoi(de->d_name); - if (fd != dfd) { - close(fd); - } - } - closedir(dir); -} - static void hup_handler(int signal) { /* Check every second if this process has been reparented. */ @@ -85,9 +51,8 @@ static int async_teardown_fn(void *arg) /* * Close all file descriptors that might have been inherited from the * main qemu process when doing clone, needed to make libvirt happy. - * Not using close_range for increased compatibility with older kernels. */ - close_all_open_fd(); + qemu_close_all_open_fd(NULL, 0); /* Set up a handler for SIGHUP and unblock SIGHUP. */ sigaction(SIGHUP, &sa, NULL); diff --git a/system/balloon.c b/system/balloon.c index fda7af8..311fa50 100644 --- a/system/balloon.c +++ b/system/balloon.c @@ -26,8 +26,8 @@ #include "qemu/osdep.h" #include "qemu/atomic.h" -#include "sysemu/kvm.h" -#include "sysemu/balloon.h" +#include "system/kvm.h" +#include "system/balloon.h" #include "qapi/error.h" #include "qapi/qapi-commands-machine.h" #include "qapi/qmp/qerror.h" diff --git a/system/bootdevice.c b/system/bootdevice.c index 2579b26..1845be4 100644 --- a/system/bootdevice.c +++ b/system/bootdevice.c @@ -24,10 +24,10 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "sysemu/sysemu.h" +#include "system/system.h" #include "qapi/visitor.h" #include "qemu/error-report.h" -#include "sysemu/reset.h" +#include "system/reset.h" #include "hw/qdev-core.h" #include "hw/boards.h" diff --git a/system/cpu-throttle.c b/system/cpu-throttle.c deleted file mode 100644 index c951a6c..0000000 --- a/system/cpu-throttle.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu/thread.h" -#include "hw/core/cpu.h" -#include "qemu/main-loop.h" -#include "sysemu/cpus.h" -#include "sysemu/cpu-throttle.h" - -/* vcpu throttling controls */ -static QEMUTimer *throttle_timer; -static unsigned int throttle_percentage; - -#define CPU_THROTTLE_PCT_MIN 1 -#define CPU_THROTTLE_PCT_MAX 99 -#define CPU_THROTTLE_TIMESLICE_NS 10000000 - -static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) -{ - double pct; - double throttle_ratio; - int64_t sleeptime_ns, endtime_ns; - - if (!cpu_throttle_get_percentage()) { - return; - } - - pct = (double)cpu_throttle_get_percentage() / 100; - throttle_ratio = pct / (1 - pct); - /* Add 1ns to fix double's rounding error (like 0.9999999...) */ - sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1); - endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns; - while (sleeptime_ns > 0 && !cpu->stop) { - if (sleeptime_ns > SCALE_MS) { - qemu_cond_timedwait_bql(cpu->halt_cond, - sleeptime_ns / SCALE_MS); - } else { - bql_unlock(); - g_usleep(sleeptime_ns / SCALE_US); - bql_lock(); - } - sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - } - qatomic_set(&cpu->throttle_thread_scheduled, 0); -} - -static void cpu_throttle_timer_tick(void *opaque) -{ - CPUState *cpu; - double pct; - - /* Stop the timer if needed */ - if (!cpu_throttle_get_percentage()) { - return; - } - CPU_FOREACH(cpu) { - if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) { - async_run_on_cpu(cpu, cpu_throttle_thread, - RUN_ON_CPU_NULL); - } - } - - pct = (double)cpu_throttle_get_percentage() / 100; - timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + - CPU_THROTTLE_TIMESLICE_NS / (1 - pct)); -} - -void cpu_throttle_set(int new_throttle_pct) -{ - /* - * boolean to store whether throttle is already active or not, - * before modifying throttle_percentage - */ - bool throttle_active = cpu_throttle_active(); - - /* Ensure throttle percentage is within valid range */ - new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX); - new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN); - - qatomic_set(&throttle_percentage, new_throttle_pct); - - if (!throttle_active) { - cpu_throttle_timer_tick(NULL); - } -} - -void cpu_throttle_stop(void) -{ - qatomic_set(&throttle_percentage, 0); -} - -bool cpu_throttle_active(void) -{ - return (cpu_throttle_get_percentage() != 0); -} - -int cpu_throttle_get_percentage(void) -{ - return qatomic_read(&throttle_percentage); -} - -void cpu_throttle_init(void) -{ - throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, - cpu_throttle_timer_tick, NULL); -} diff --git a/system/cpu-timers.c b/system/cpu-timers.c index 0b31c9a..cb35fa6 100644 --- a/system/cpu-timers.c +++ b/system/cpu-timers.c @@ -27,16 +27,16 @@ #include "migration/vmstate.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "sysemu/cpus.h" +#include "system/cpus.h" #include "qemu/main-loop.h" #include "qemu/option.h" #include "qemu/seqlock.h" -#include "sysemu/replay.h" -#include "sysemu/runstate.h" +#include "system/replay.h" +#include "system/runstate.h" #include "hw/core/cpu.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/cpu-throttle.h" -#include "sysemu/cpu-timers-internal.h" +#include "system/cpu-timers.h" +#include "system/cpu-timers-internal.h" +#include "exec/icount.h" /* clock and ticks */ @@ -272,6 +272,4 @@ void cpu_timers_init(void) seqlock_init(&timers_state.vm_clock_seqlock); qemu_spin_init(&timers_state.vm_clock_lock); vmstate_register(NULL, 0, &vmstate_timers, &timers_state); - - cpu_throttle_init(); } diff --git a/system/cpus.c b/system/cpus.c index d3640c9..d16b0df 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -31,18 +31,19 @@ #include "qapi/qapi-events-run-state.h" #include "qapi/qmp/qerror.h" #include "exec/gdbstub.h" -#include "sysemu/hw_accel.h" +#include "system/accel-ops.h" +#include "system/hw_accel.h" #include "exec/cpu-common.h" #include "qemu/thread.h" #include "qemu/main-loop.h" #include "qemu/plugin.h" -#include "sysemu/cpus.h" +#include "system/cpus.h" #include "qemu/guest-random.h" #include "hw/nmi.h" -#include "sysemu/replay.h" -#include "sysemu/runstate.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/whpx.h" +#include "system/replay.h" +#include "system/runstate.h" +#include "system/cpu-timers.h" +#include "system/whpx.h" #include "hw/boards.h" #include "hw/hw.h" #include "trace.h" @@ -298,14 +299,18 @@ static int do_vm_stop(RunState state, bool send_stop) if (oldstate == RUN_STATE_RUNNING) { pause_all_vcpus(); } - vm_state_notify(0, state); + ret = vm_state_notify(0, state); if (send_stop) { qapi_event_send_stop(); } } bdrv_drain_all(); - ret = bdrv_flush_all(); + /* + * Even if vm_state_notify() return failure, + * it would be better to flush as before. + */ + ret |= bdrv_flush_all(); trace_vm_stop_flush_all(ret); return ret; @@ -514,6 +519,20 @@ bool qemu_in_vcpu_thread(void) QEMU_DEFINE_STATIC_CO_TLS(bool, bql_locked) +static uint32_t bql_unlock_blocked; + +void bql_block_unlock(bool increase) +{ + uint32_t new_value; + + assert(bql_locked()); + + /* check for overflow! */ + new_value = bql_unlock_blocked + increase - !increase; + assert((new_value > bql_unlock_blocked) == increase); + bql_unlock_blocked = new_value; +} + bool bql_locked(void) { return get_bql_locked(); @@ -524,6 +543,12 @@ bool qemu_in_main_thread(void) return bql_locked(); } +void rust_bql_mock_lock(void) +{ + error_report("This function should be used only from tests"); + abort(); +} + /* * The BQL is taken from so many places that it is worth profiling the * callers directly, instead of funneling them all through a single function. @@ -540,6 +565,7 @@ void bql_lock_impl(const char *file, int line) void bql_unlock(void) { g_assert(bql_locked()); + g_assert(!bql_unlock_blocked); set_bql_locked(false); qemu_mutex_unlock(&bql); } @@ -568,6 +594,22 @@ void cpu_thread_signal_destroyed(CPUState *cpu) qemu_cond_signal(&qemu_cpu_cond); } +void cpu_pause(CPUState *cpu) +{ + if (qemu_cpu_is_self(cpu)) { + qemu_cpu_stop(cpu, true); + } else { + cpu->stop = true; + qemu_cpu_kick(cpu); + } +} + +void cpu_resume(CPUState *cpu) +{ + cpu->stop = false; + cpu->stopped = false; + qemu_cpu_kick(cpu); +} static bool all_vcpus_paused(void) { @@ -588,12 +630,7 @@ void pause_all_vcpus(void) qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); CPU_FOREACH(cpu) { - if (qemu_cpu_is_self(cpu)) { - qemu_cpu_stop(cpu, true); - } else { - cpu->stop = true; - qemu_cpu_kick(cpu); - } + cpu_pause(cpu); } /* We need to drop the replay_lock so any vCPU threads woken up @@ -613,13 +650,6 @@ void pause_all_vcpus(void) bql_lock(); } -void cpu_resume(CPUState *cpu) -{ - cpu->stop = false; - cpu->stopped = false; - qemu_cpu_kick(cpu); -} - void resume_all_vcpus(void) { CPUState *cpu; @@ -662,7 +692,6 @@ void qemu_init_vcpu(CPUState *cpu) { MachineState *ms = MACHINE(qdev_get_machine()); - cpu->nr_cores = machine_topo_get_cores_per_socket(ms); cpu->nr_threads = ms->smp.threads; cpu->stopped = true; cpu->random_seed = qemu_guest_random_seed_thread_part1(); @@ -788,14 +817,14 @@ int vm_stop_force_state(RunState state) } } -void qmp_memsave(int64_t addr, int64_t size, const char *filename, +void qmp_memsave(uint64_t addr, uint64_t size, const char *filename, bool has_cpu, int64_t cpu_index, Error **errp) { FILE *f; - uint32_t l; + uint64_t l; CPUState *cpu; uint8_t buf[1024]; - int64_t orig_addr = addr, orig_size = size; + uint64_t orig_addr = addr, orig_size = size; if (!has_cpu) { cpu_index = 0; @@ -819,7 +848,7 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename, if (l > size) l = size; if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) { - error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64 + error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRIu64 " specified", orig_addr, orig_size); goto exit; } @@ -836,11 +865,11 @@ exit: fclose(f); } -void qmp_pmemsave(int64_t addr, int64_t size, const char *filename, +void qmp_pmemsave(uint64_t addr, uint64_t size, const char *filename, Error **errp) { FILE *f; - uint32_t l; + uint64_t l; uint8_t buf[1024]; f = fopen(filename, "wb"); diff --git a/system/datadir.c b/system/datadir.c index c9237cb..f96f8fc 100644 --- a/system/datadir.c +++ b/system/datadir.c @@ -30,7 +30,7 @@ static const char *data_dir[16]; static int data_dir_idx; -char *qemu_find_file(int type, const char *name) +char *qemu_find_file(QemuFileType type, const char *name) { int i; const char *subdir; @@ -46,6 +46,9 @@ char *qemu_find_file(int type, const char *name) case QEMU_FILE_TYPE_BIOS: subdir = ""; break; + case QEMU_FILE_TYPE_DTB: + subdir = "dtb/"; + break; case QEMU_FILE_TYPE_KEYMAP: subdir = "keymaps/"; break; diff --git a/system/device_tree-stub.c b/system/device_tree-stub.c index bddda6f..428330b 100644 --- a/system/device_tree-stub.c +++ b/system/device_tree-stub.c @@ -5,6 +5,9 @@ #ifdef CONFIG_FDT void qmp_dumpdtb(const char *filename, Error **errp) { - error_setg(errp, "This machine doesn't have a FDT"); + ERRP_GUARD(); + + error_setg(errp, "This machine doesn't have an FDT"); + error_append_hint(errp, "(this machine type definitely doesn't use FDT)\n"); } #endif diff --git a/system/device_tree.c b/system/device_tree.c index 2e38259..aa3fe95 100644 --- a/system/device_tree.c +++ b/system/device_tree.c @@ -23,12 +23,12 @@ #include "qemu/bswap.h" #include "qemu/cutils.h" #include "qemu/guest-random.h" -#include "sysemu/device_tree.h" +#include "system/device_tree.h" #include "hw/loader.h" #include "hw/boards.h" #include "qemu/config-file.h" #include "qapi/qapi-commands-machine.h" -#include "qapi/qmp/qdict.h" +#include "qobject/qdict.h" #include "monitor/hmp.h" #include <libfdt.h> @@ -594,21 +594,6 @@ int qemu_fdt_add_path(void *fdt, const char *path) return retval; } -void qemu_fdt_dumpdtb(void *fdt, int size) -{ - const char *dumpdtb = current_machine->dumpdtb; - - if (dumpdtb) { - /* Dump the dtb to a file and quit */ - if (g_file_set_contents(dumpdtb, fdt, size, NULL)) { - info_report("dtb dumped to %s. Exiting.", dumpdtb); - exit(0); - } - error_report("%s: Failed dumping dtb to %s", __func__, dumpdtb); - exit(1); - } -} - int qemu_fdt_setprop_sized_cells_from_array(void *fdt, const char *node_path, const char *property, @@ -650,11 +635,16 @@ out: void qmp_dumpdtb(const char *filename, Error **errp) { + ERRP_GUARD(); + g_autoptr(GError) err = NULL; uint32_t size; if (!current_machine->fdt) { - error_setg(errp, "This machine doesn't have a FDT"); + error_setg(errp, "This machine doesn't have an FDT"); + error_append_hint(errp, + "(Perhaps it doesn't support FDT at all, or perhaps " + "you need to provide an FDT with the -fdt option?)\n"); return; } diff --git a/system/dirtylimit.c b/system/dirtylimit.c index ab20da3..b48c0d4 100644 --- a/system/dirtylimit.c +++ b/system/dirtylimit.c @@ -13,16 +13,16 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qapi/qapi-commands-migration.h" -#include "qapi/qmp/qdict.h" +#include "qobject/qdict.h" #include "qapi/error.h" -#include "sysemu/dirtyrate.h" -#include "sysemu/dirtylimit.h" +#include "system/dirtyrate.h" +#include "system/dirtylimit.h" #include "monitor/hmp.h" #include "monitor/monitor.h" -#include "exec/memory.h" +#include "system/memory.h" #include "exec/target_page.h" #include "hw/boards.h" -#include "sysemu/kvm.h" +#include "system/kvm.h" #include "trace.h" #include "migration/misc.h" @@ -80,8 +80,7 @@ static void vcpu_dirty_rate_stat_collect(void) int i = 0; int64_t period = DIRTYLIMIT_CALC_TIME_MS; - if (migrate_dirty_limit() && - migration_is_active()) { + if (migrate_dirty_limit() && migration_is_running()) { period = migrate_vcpu_dirty_limit_period(); } @@ -338,8 +337,6 @@ static void dirtylimit_adjust_throttle(CPUState *cpu) if (!dirtylimit_done(quota, current)) { dirtylimit_set_throttle(cpu, quota, current); } - - return; } void dirtylimit_process(void) diff --git a/system/dma-helpers.c b/system/dma-helpers.c index 7401330..0d592f6 100644 --- a/system/dma-helpers.c +++ b/system/dma-helpers.c @@ -8,12 +8,12 @@ */ #include "qemu/osdep.h" -#include "sysemu/block-backend.h" -#include "sysemu/dma.h" -#include "trace/trace-root.h" +#include "system/block-backend.h" +#include "system/dma.h" +#include "trace.h" #include "qemu/thread.h" #include "qemu/main-loop.h" -#include "sysemu/cpu-timers.h" +#include "exec/icount.h" #include "qemu/range.h" /* #define DEBUG_IOMMU */ @@ -211,7 +211,7 @@ static const AIOCBInfo dma_aiocb_info = { .cancel_async = dma_aio_cancel, }; -BlockAIOCB *dma_blk_io(AioContext *ctx, +BlockAIOCB *dma_blk_io( QEMUSGList *sg, uint64_t offset, uint32_t align, DMAIOFunc *io_func, void *io_func_opaque, BlockCompletionFunc *cb, @@ -223,7 +223,7 @@ BlockAIOCB *dma_blk_io(AioContext *ctx, dbs->acb = NULL; dbs->sg = sg; - dbs->ctx = ctx; + dbs->ctx = qemu_get_current_aio_context(); dbs->offset = offset; dbs->align = align; dbs->sg_cur_index = 0; @@ -251,7 +251,7 @@ BlockAIOCB *dma_blk_read(BlockBackend *blk, QEMUSGList *sg, uint64_t offset, uint32_t align, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, + return dma_blk_io(sg, offset, align, dma_blk_read_io_func, blk, cb, opaque, DMA_DIRECTION_FROM_DEVICE); } @@ -269,7 +269,7 @@ BlockAIOCB *dma_blk_write(BlockBackend *blk, QEMUSGList *sg, uint64_t offset, uint32_t align, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, + return dma_blk_io(sg, offset, align, dma_blk_write_io_func, blk, cb, opaque, DMA_DIRECTION_TO_DEVICE); } diff --git a/system/globals-target.c b/system/globals-target.c new file mode 100644 index 0000000..9897205 --- /dev/null +++ b/system/globals-target.c @@ -0,0 +1,24 @@ +/* + * Global variables that should not exist (target specific) + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * SPDX-License-Identifier: MIT + */ + +#include "qemu/osdep.h" +#include "system/system.h" + +#ifdef TARGET_SPARC +int graphic_width = 1024; +int graphic_height = 768; +int graphic_depth = 8; +#elif defined(TARGET_M68K) +int graphic_width = 800; +int graphic_height = 600; +int graphic_depth = 8; +#else +int graphic_width = 800; +int graphic_height = 600; +int graphic_depth = 32; +#endif diff --git a/system/globals.c b/system/globals.c index e353584..9640c95 100644 --- a/system/globals.c +++ b/system/globals.c @@ -28,19 +28,28 @@ #include "hw/loader.h" #include "hw/xen/xen.h" #include "net/net.h" -#include "sysemu/cpus.h" -#include "sysemu/sysemu.h" +#include "system/cpus.h" +#include "system/system.h" + +bool should_mlock(MlockState state) +{ + return state == MLOCK_ON || state == MLOCK_ON_FAULT; +} + +bool is_mlock_on_fault(MlockState state) +{ + return state == MLOCK_ON_FAULT; +} enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB; int display_opengl; const char* keyboard_layout; -bool enable_mlock; +MlockState mlock_state; bool enable_cpu_pm; int autostart = 1; int vga_interface_type = VGA_NONE; bool vga_interface_created; Chardev *parallel_hds[MAX_PARALLEL_PORTS]; -int graphic_rotate; QEMUOptionRom option_rom[MAX_OPTION_ROMS]; int nb_option_roms; int old_param; @@ -49,7 +58,6 @@ unsigned int nb_prom_envs; const char *prom_envs[MAX_PROM_ENVS]; uint8_t *boot_splash_filedata; int only_migratable; /* turn it off unless user states otherwise */ -int icount_align_option; /* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the * little-endian "wire format" described in the SMBIOS 2.6 specification. @@ -60,6 +68,7 @@ bool qemu_uuid_set; uint32_t xen_domid; enum xen_mode xen_mode = XEN_DISABLED; bool xen_domid_restrict; +bool xen_is_stubdomain; struct evtchn_backend_ops *xen_evtchn_ops; struct gnttab_backend_ops *xen_gnttab_ops; struct foreignmem_backend_ops *xen_foreignmem_ops; diff --git a/system/ioport.c b/system/ioport.c index fd551d0..4f96e91 100644 --- a/system/ioport.c +++ b/system/ioport.c @@ -26,10 +26,9 @@ */ #include "qemu/osdep.h" -#include "cpu.h" -#include "exec/ioport.h" -#include "exec/memory.h" -#include "exec/address-spaces.h" +#include "system/ioport.h" +#include "system/memory.h" +#include "system/address-spaces.h" #include "trace.h" struct MemoryRegionPortioList { @@ -258,7 +257,7 @@ static void portio_list_add_1(PortioList *piolist, object_ref(&mrpio->mr); object_unparent(OBJECT(&mrpio->mr)); if (!piolist->owner) { - owner = container_get(qdev_get_machine(), "/unattached"); + owner = machine_get_container("unattached"); } else { owner = piolist->owner; } diff --git a/system/main.c b/system/main.c index 9b91d21..b8f7157 100644 --- a/system/main.c +++ b/system/main.c @@ -24,26 +24,73 @@ #include "qemu/osdep.h" #include "qemu-main.h" -#include "sysemu/sysemu.h" +#include "qemu/main-loop.h" +#include "system/replay.h" +#include "system/system.h" #ifdef CONFIG_SDL +/* + * SDL insists on wrapping the main() function with its own implementation on + * some platforms; it does so via a macro that renames our main function, so + * <SDL.h> must be #included here even with no SDL code called from this file. + */ #include <SDL.h> #endif -int qemu_default_main(void) +#ifdef CONFIG_DARWIN +#include <CoreFoundation/CoreFoundation.h> +#endif + +static void *qemu_default_main(void *opaque) { int status; + replay_mutex_lock(); + bql_lock(); status = qemu_main_loop(); qemu_cleanup(status); + bql_unlock(); + replay_mutex_unlock(); - return status; + exit(status); } -int (*qemu_main)(void) = qemu_default_main; +int (*qemu_main)(void); + +#ifdef CONFIG_DARWIN +static int os_darwin_cfrunloop_main(void) +{ + CFRunLoopRun(); + g_assert_not_reached(); +} +int (*qemu_main)(void) = os_darwin_cfrunloop_main; +#endif int main(int argc, char **argv) { qemu_init(argc, argv); - return qemu_main(); + + /* + * qemu_init acquires the BQL and replay mutex lock. BQL is acquired when + * initializing cpus, to block associated threads until initialization is + * complete. Replay_mutex lock is acquired on initialization, because it + * must be held when configuring icount_mode. + * + * On MacOS, qemu main event loop runs in a background thread, as main + * thread must be reserved for UI. Thus, we need to transfer lock ownership, + * and the simplest way to do that is to release them, and reacquire them + * from qemu_default_main. + */ + bql_unlock(); + replay_mutex_unlock(); + + if (qemu_main) { + QemuThread main_loop_thread; + qemu_thread_create(&main_loop_thread, "qemu_main", + qemu_default_main, NULL, QEMU_THREAD_DETACHED); + return qemu_main(); + } else { + qemu_default_main(NULL); + g_assert_not_reached(); + } } diff --git a/system/memory-internal.h b/system/memory-internal.h new file mode 100644 index 0000000..46f758f --- /dev/null +++ b/system/memory-internal.h @@ -0,0 +1,57 @@ +/* + * Declarations for functions which are internal to the memory subsystem. + * + * Copyright 2011 Red Hat, Inc. and/or its affiliates + * + * Authors: + * Avi Kivity <avi@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + */ + +#ifndef MEMORY_INTERNAL_H +#define MEMORY_INTERNAL_H + +#ifndef CONFIG_USER_ONLY +static inline AddressSpaceDispatch *flatview_to_dispatch(FlatView *fv) +{ + return fv->dispatch; +} + +static inline AddressSpaceDispatch *address_space_to_dispatch(AddressSpace *as) +{ + return flatview_to_dispatch(address_space_to_flatview(as)); +} + +FlatView *address_space_get_flatview(AddressSpace *as); +void flatview_unref(FlatView *view); + +extern const MemoryRegionOps unassigned_mem_ops; + +void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section); +AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv); +void address_space_dispatch_compact(AddressSpaceDispatch *d); +void address_space_dispatch_free(AddressSpaceDispatch *d); + +void mtree_print_dispatch(struct AddressSpaceDispatch *d, + MemoryRegion *root); + +/* returns true if end is big endian. */ +static inline bool devend_big_endian(enum device_endian end) +{ + if (end == DEVICE_NATIVE_ENDIAN) { + return target_big_endian(); + } + return end == DEVICE_BIG_ENDIAN; +} + +/* enum device_endian to MemOp. */ +static inline MemOp devend_memop(enum device_endian end) +{ + return devend_big_endian(end) ? MO_BE : MO_LE; +} + +#endif +#endif diff --git a/system/memory.c b/system/memory.c index 47c600d..76b44b8 100644 --- a/system/memory.c +++ b/system/memory.c @@ -16,7 +16,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qapi/error.h" -#include "exec/memory.h" +#include "system/memory.h" #include "qapi/visitor.h" #include "qemu/bitops.h" #include "qemu/error-report.h" @@ -24,16 +24,16 @@ #include "qemu/qemu-print.h" #include "qom/object.h" #include "trace.h" - -#include "exec/memory-internal.h" -#include "exec/ram_addr.h" -#include "sysemu/kvm.h" -#include "sysemu/runstate.h" -#include "sysemu/tcg.h" +#include "system/ram_addr.h" +#include "system/kvm.h" +#include "system/runstate.h" +#include "system/tcg.h" #include "qemu/accel.h" #include "hw/boards.h" #include "migration/vmstate.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" + +#include "memory-internal.h" //#define DEBUG_UNASSIGNED @@ -353,15 +353,6 @@ static void flatview_simplify(FlatView *view) } } -static bool memory_region_big_endian(MemoryRegion *mr) -{ -#if TARGET_BIG_ENDIAN - return mr->ops->endianness != DEVICE_LITTLE_ENDIAN; -#else - return mr->ops->endianness == DEVICE_BIG_ENDIAN; -#endif -} - static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op) { if ((op & MO_BSWAP) != devend_memop(mr->ops->endianness)) { @@ -563,7 +554,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, /* FIXME: support unaligned access? */ access_size = MAX(MIN(size, access_size_max), access_size_min); access_mask = MAKE_64BIT_MASK(0, access_size * 8); - if (memory_region_big_endian(mr)) { + if (devend_big_endian(mr->ops->endianness)) { for (i = 0; i < size; i += access_size) { r |= access_fn(mr, addr + i, value, access_size, (size - access_size - i) * 8, access_mask, attrs); @@ -941,6 +932,38 @@ static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as) } } +static void +flat_range_coalesced_io_notify_listener_add_del(FlatRange *fr, + MemoryRegionSection *mrs, + MemoryListener *listener, + AddressSpace *as, bool add) +{ + CoalescedMemoryRange *cmr; + MemoryRegion *mr = fr->mr; + AddrRange tmp; + + QTAILQ_FOREACH(cmr, &mr->coalesced, link) { + tmp = addrrange_shift(cmr->addr, + int128_sub(fr->addr.start, + int128_make64(fr->offset_in_region))); + + if (!addrrange_intersects(tmp, fr->addr)) { + return; + } + tmp = addrrange_intersection(tmp, fr->addr); + + if (add && listener->coalesced_io_add) { + listener->coalesced_io_add(listener, mrs, + int128_get64(tmp.start), + int128_get64(tmp.size)); + } else if (!add && listener->coalesced_io_del) { + listener->coalesced_io_del(listener, mrs, + int128_get64(tmp.start), + int128_get64(tmp.size)); + } + } +} + static void address_space_update_topology_pass(AddressSpace *as, const FlatView *old_view, const FlatView *new_view, @@ -1206,7 +1229,7 @@ static void memory_region_do_init(MemoryRegion *mr, char *name_array = g_strdup_printf("%s[*]", escaped_name); if (!owner) { - owner = container_get(qdev_get_machine(), "/unattached"); + owner = machine_get_container("unattached"); } object_property_add_child(owner, name_array, OBJECT(mr)); @@ -1359,7 +1382,7 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, static const MemoryRegionOps ram_device_mem_ops = { .read = memory_region_ram_device_read, .write = memory_region_ram_device_write, - .endianness = DEVICE_HOST_ENDIAN, + .endianness = HOST_BIG_ENDIAN ? DEVICE_BIG_ENDIAN : DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 1, .max_access_size = 8, @@ -1380,7 +1403,7 @@ bool memory_region_access_valid(MemoryRegion *mr, { if (mr->ops->valid.accepts && !mr->ops->valid.accepts(mr->opaque, addr, size, is_write, attrs)) { - qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX + qemu_log_mask(LOG_INVALID_MEM, "Invalid %s at addr 0x%" HWADDR_PRIX ", size %u, region '%s', reason: rejected\n", is_write ? "write" : "read", addr, size, memory_region_name(mr)); @@ -1388,7 +1411,7 @@ bool memory_region_access_valid(MemoryRegion *mr, } if (!mr->ops->valid.unaligned && (addr & (size - 1))) { - qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX + qemu_log_mask(LOG_INVALID_MEM, "Invalid %s at addr 0x%" HWADDR_PRIX ", size %u, region '%s', reason: unaligned\n", is_write ? "write" : "read", addr, size, memory_region_name(mr)); @@ -1402,7 +1425,7 @@ bool memory_region_access_valid(MemoryRegion *mr, if (size > mr->ops->valid.max_access_size || size < mr->ops->valid.min_access_size) { - qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX + qemu_log_mask(LOG_INVALID_MEM, "Invalid %s at addr 0x%" HWADDR_PRIX ", size %u, region '%s', reason: invalid size " "(min:%u max:%u)\n", is_write ? "write" : "read", @@ -1604,7 +1627,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, return true; } -#ifdef CONFIG_POSIX +#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN) bool memory_region_init_ram_from_file(MemoryRegion *mr, Object *owner, const char *name, @@ -1648,8 +1671,8 @@ bool memory_region_init_ram_from_fd(MemoryRegion *mr, mr->readonly = !!(ram_flags & RAM_READONLY); mr->terminates = true; mr->destructor = memory_region_destructor_ram; - mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, - &err); + mr->ram_block = qemu_ram_alloc_from_fd(size, size, NULL, mr, ram_flags, fd, + offset, false, &err); if (err) { mr->size = int128_zero(); object_unparent(OBJECT(mr)); @@ -1901,32 +1924,6 @@ static int memory_region_update_iommu_notify_flags(IOMMUMemoryRegion *iommu_mr, return ret; } -int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, - uint64_t page_size_mask, - Error **errp) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - int ret = 0; - - if (imrc->iommu_set_page_size_mask) { - ret = imrc->iommu_set_page_size_mask(iommu_mr, page_size_mask, errp); - } - return ret; -} - -int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr, - GList *iova_ranges, - Error **errp) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - int ret = 0; - - if (imrc->iommu_set_iova_ranges) { - ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp); - } - return ret; -} - int memory_region_register_iommu_notifier(MemoryRegion *mr, IOMMUNotifier *n, Error **errp) { @@ -2109,12 +2106,16 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) return mr->rdm; } -void memory_region_set_ram_discard_manager(MemoryRegion *mr, - RamDiscardManager *rdm) +int memory_region_set_ram_discard_manager(MemoryRegion *mr, + RamDiscardManager *rdm) { g_assert(memory_region_is_ram(mr)); - g_assert(!rdm || !mr->rdm); + if (mr->rdm && rdm) { + return -EBUSY; + } + mr->rdm = rdm; + return 0; } uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, @@ -2137,7 +2138,7 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *section, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque) { RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); @@ -2146,15 +2147,15 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, return rdmc->replay_populated(rdm, section, replay_fn, opaque); } -void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, - void *opaque) +int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque) { RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); g_assert(rdmc->replay_discarded); - rdmc->replay_discarded(rdm, section, replay_fn, opaque); + return rdmc->replay_discarded(rdm, section, replay_fn, opaque); } void ram_discard_manager_register_listener(RamDiscardManager *rdm, @@ -2177,18 +2178,14 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, } /* Called with rcu_read_lock held. */ -bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - bool *mr_has_discard_manager, Error **errp) +MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp) { MemoryRegion *mr; hwaddr xlat; hwaddr len = iotlb->addr_mask + 1; bool writable = iotlb->perm & IOMMU_WO; - if (mr_has_discard_manager) { - *mr_has_discard_manager = false; - } /* * The IOMMU TLB entry we have just covers translation through * this IOMMU to its immediate target. We need to translate @@ -2198,7 +2195,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED); if (!memory_region_is_ram(mr)) { error_setg(errp, "iommu map to non memory area %" HWADDR_PRIx "", xlat); - return false; + return NULL; } else if (memory_region_has_ram_discard_manager(mr)) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); MemoryRegionSection tmp = { @@ -2206,9 +2203,6 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, .offset_within_region = xlat, .size = int128_make64(len), }; - if (mr_has_discard_manager) { - *mr_has_discard_manager = true; - } /* * Malicious VMs can map memory into the IOMMU, which is expected * to remain discarded. vfio will pin all pages, populating memory. @@ -2219,7 +2213,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, error_setg(errp, "iommu map to discarded memory (e.g., unplugged" " via virtio-mem): %" HWADDR_PRIx "", iotlb->translated_addr); - return false; + return NULL; } } @@ -2229,22 +2223,11 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, */ if (len & iotlb->addr_mask) { error_setg(errp, "iommu has granularity incompatible with target AS"); - return false; - } - - if (vaddr) { - *vaddr = memory_region_get_ram_ptr(mr) + xlat; - } - - if (ram_addr) { - *ram_addr = memory_region_get_ram_addr(mr) + xlat; - } - - if (read_only) { - *read_only = !writable || mr->readonly; + return NULL; } - return true; + *xlat_p = xlat; + return mr; } void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) @@ -2578,7 +2561,8 @@ void memory_region_add_eventfd(MemoryRegion *mr, unsigned i; if (size) { - adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE); + MemOp mop = (target_big_endian() ? MO_BE : MO_LE) | size_memop(size); + adjust_endianness(mr, &mrfd.data, mop); } memory_region_transaction_begin(); for (i = 0; i < mr->ioeventfd_nb; ++i) { @@ -2613,7 +2597,8 @@ void memory_region_del_eventfd(MemoryRegion *mr, unsigned i; if (size) { - adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE); + MemOp mop = (target_big_endian() ? MO_BE : MO_LE) | size_memop(size); + adjust_endianness(mr, &mrfd.data, mop); } memory_region_transaction_begin(); for (i = 0; i < mr->ioeventfd_nb; ++i) { @@ -3041,8 +3026,10 @@ void memory_global_dirty_log_stop(unsigned int flags) static void listener_add_address_space(MemoryListener *listener, AddressSpace *as) { + unsigned i; FlatView *view; FlatRange *fr; + MemoryRegionIoeventfd *fd; if (listener->begin) { listener->begin(listener); @@ -3067,10 +3054,34 @@ static void listener_add_address_space(MemoryListener *listener, if (listener->region_add) { listener->region_add(listener, §ion); } + + /* send coalesced io add notifications */ + flat_range_coalesced_io_notify_listener_add_del(fr, §ion, + listener, as, true); + if (fr->dirty_log_mask && listener->log_start) { listener->log_start(listener, §ion, 0, fr->dirty_log_mask); } } + + /* + * register all eventfds for this address space for the newly registered + * listener. + */ + for (i = 0; i < as->ioeventfd_nb; i++) { + fd = &as->ioeventfds[i]; + MemoryRegionSection section = (MemoryRegionSection) { + .fv = view, + .offset_within_address_space = int128_get64(fd->addr.start), + .size = fd->addr.size, + }; + + if (listener->eventfd_add) { + listener->eventfd_add(listener, §ion, + fd->match_data, fd->data, fd->e); + } + } + if (listener->commit) { listener->commit(listener); } @@ -3080,8 +3091,10 @@ static void listener_add_address_space(MemoryListener *listener, static void listener_del_address_space(MemoryListener *listener, AddressSpace *as) { + unsigned i; FlatView *view; FlatRange *fr; + MemoryRegionIoeventfd *fd; if (listener->begin) { listener->begin(listener); @@ -3093,10 +3106,33 @@ static void listener_del_address_space(MemoryListener *listener, if (fr->dirty_log_mask && listener->log_stop) { listener->log_stop(listener, §ion, fr->dirty_log_mask, 0); } + + /* send coalesced io del notifications */ + flat_range_coalesced_io_notify_listener_add_del(fr, §ion, + listener, as, false); if (listener->region_del) { listener->region_del(listener, §ion); } } + + /* + * de-register all eventfds for this address space for the current + * listener. + */ + for (i = 0; i < as->ioeventfd_nb; i++) { + fd = &as->ioeventfds[i]; + MemoryRegionSection section = (MemoryRegionSection) { + .fv = view, + .offset_within_address_space = int128_get64(fd->addr.start), + .size = fd->addr.size, + }; + + if (listener->eventfd_del) { + listener->eventfd_del(listener, §ion, + fd->match_data, fd->data, fd->e); + } + } + if (listener->commit) { listener->commit(listener); } @@ -3174,7 +3210,8 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) as->ioeventfds = NULL; QTAILQ_INIT(&as->listeners); QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link); - as->bounce.in_use = false; + as->max_bounce_buffer_size = DEFAULT_MAX_BOUNCE_BUFFER_SIZE; + as->bounce_buffer_size = 0; qemu_mutex_init(&as->map_client_list_lock); QLIST_INIT(&as->map_client_list); as->name = g_strdup(name ? name : "anonymous"); @@ -3184,7 +3221,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) static void do_address_space_destroy(AddressSpace *as) { - assert(!qatomic_read(&as->bounce.in_use)); + assert(qatomic_read(&as->bounce_buffer_size) == 0); assert(QLIST_EMPTY(&as->map_client_list)); qemu_mutex_destroy(&as->map_client_list_lock); diff --git a/system/memory_ldst.c.inc b/system/memory_ldst.c.inc index 0e6f394..7f32d3d 100644 --- a/system/memory_ldst.c.inc +++ b/system/memory_ldst.c.inc @@ -34,7 +34,7 @@ static inline uint32_t glue(address_space_ldl_internal, SUFFIX)(ARG1_DECL, RCU_READ_LOCK(); mr = TRANSLATE(addr, &addr1, &l, false, attrs); - if (l < 4 || !memory_access_is_direct(mr, false)) { + if (l < 4 || !memory_access_is_direct(mr, false, attrs)) { release_lock |= prepare_mmio_access(mr); /* I/O case */ @@ -103,7 +103,7 @@ static inline uint64_t glue(address_space_ldq_internal, SUFFIX)(ARG1_DECL, RCU_READ_LOCK(); mr = TRANSLATE(addr, &addr1, &l, false, attrs); - if (l < 8 || !memory_access_is_direct(mr, false)) { + if (l < 8 || !memory_access_is_direct(mr, false, attrs)) { release_lock |= prepare_mmio_access(mr); /* I/O case */ @@ -170,7 +170,7 @@ uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL, RCU_READ_LOCK(); mr = TRANSLATE(addr, &addr1, &l, false, attrs); - if (!memory_access_is_direct(mr, false)) { + if (!memory_access_is_direct(mr, false, attrs)) { release_lock |= prepare_mmio_access(mr); /* I/O case */ @@ -207,7 +207,7 @@ static inline uint16_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL, RCU_READ_LOCK(); mr = TRANSLATE(addr, &addr1, &l, false, attrs); - if (l < 2 || !memory_access_is_direct(mr, false)) { + if (l < 2 || !memory_access_is_direct(mr, false, attrs)) { release_lock |= prepare_mmio_access(mr); /* I/O case */ @@ -277,7 +277,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL, RCU_READ_LOCK(); mr = TRANSLATE(addr, &addr1, &l, true, attrs); - if (l < 4 || !memory_access_is_direct(mr, true)) { + if (l < 4 || !memory_access_is_direct(mr, true, attrs)) { release_lock |= prepare_mmio_access(mr); r = memory_region_dispatch_write(mr, addr1, val, MO_32, attrs); @@ -314,7 +314,7 @@ static inline void glue(address_space_stl_internal, SUFFIX)(ARG1_DECL, RCU_READ_LOCK(); mr = TRANSLATE(addr, &addr1, &l, true, attrs); - if (l < 4 || !memory_access_is_direct(mr, true)) { + if (l < 4 || !memory_access_is_direct(mr, true, attrs)) { release_lock |= prepare_mmio_access(mr); r = memory_region_dispatch_write(mr, addr1, val, MO_32 | devend_memop(endian), attrs); @@ -377,7 +377,7 @@ void glue(address_space_stb, SUFFIX)(ARG1_DECL, RCU_READ_LOCK(); mr = TRANSLATE(addr, &addr1, &l, true, attrs); - if (!memory_access_is_direct(mr, true)) { + if (!memory_access_is_direct(mr, true, attrs)) { release_lock |= prepare_mmio_access(mr); r = memory_region_dispatch_write(mr, addr1, val, MO_8, attrs); } else { @@ -410,7 +410,7 @@ static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL, RCU_READ_LOCK(); mr = TRANSLATE(addr, &addr1, &l, true, attrs); - if (l < 2 || !memory_access_is_direct(mr, true)) { + if (l < 2 || !memory_access_is_direct(mr, true, attrs)) { release_lock |= prepare_mmio_access(mr); r = memory_region_dispatch_write(mr, addr1, val, MO_16 | devend_memop(endian), attrs); @@ -474,7 +474,7 @@ static void glue(address_space_stq_internal, SUFFIX)(ARG1_DECL, RCU_READ_LOCK(); mr = TRANSLATE(addr, &addr1, &l, true, attrs); - if (l < 8 || !memory_access_is_direct(mr, true)) { + if (l < 8 || !memory_access_is_direct(mr, true, attrs)) { release_lock |= prepare_mmio_access(mr); r = memory_region_dispatch_write(mr, addr1, val, MO_64 | devend_memop(endian), attrs); diff --git a/system/memory_mapping.c b/system/memory_mapping.c index 6f884c5..da708a0 100644 --- a/system/memory_mapping.c +++ b/system/memory_mapping.c @@ -12,11 +12,12 @@ */ #include "qemu/osdep.h" +#include "qemu/range.h" #include "qapi/error.h" -#include "sysemu/memory_mapping.h" -#include "exec/memory.h" -#include "exec/address-spaces.h" +#include "system/memory_mapping.h" +#include "system/memory.h" +#include "system/address-spaces.h" #include "hw/core/cpu.h" //#define DEBUG_GUEST_PHYS_REGION_ADD @@ -353,8 +354,7 @@ void memory_mapping_filter(MemoryMappingList *list, int64_t begin, MemoryMapping *cur, *next; QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) { - if (cur->phys_addr >= begin + length || - cur->phys_addr + cur->length <= begin) { + if (!ranges_overlap(cur->phys_addr, cur->length, begin, length)) { QTAILQ_REMOVE(&list->head, cur, next); g_free(cur); list->num--; diff --git a/system/meson.build b/system/meson.build index a296270..6d21ff9 100644 --- a/system/meson.build +++ b/system/meson.build @@ -1,22 +1,26 @@ specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: [files( 'arch_init.c', - 'ioport.c', - 'memory.c', - 'physmem.c', - 'watchpoint.c', + 'globals-target.c', )]) system_ss.add(files( + 'vl.c', +), sdl, libpmem, libdaxctl) + +system_ss.add(files( 'balloon.c', 'bootdevice.c', 'cpus.c', - 'cpu-throttle.c', 'cpu-timers.c', 'datadir.c', 'dirtylimit.c', 'dma-helpers.c', 'globals.c', + 'ioport.c', + 'ram-block-attributes.c', 'memory_mapping.c', + 'memory.c', + 'physmem.c', 'qdev-monitor.c', 'qtest.c', 'rtc.c', @@ -24,8 +28,8 @@ system_ss.add(files( 'runstate-hmp-cmds.c', 'runstate.c', 'tpm-hmp-cmds.c', - 'vl.c', -), sdl, libpmem, libdaxctl) + 'watchpoint.c', +)) if have_tpm system_ss.add(files('tpm.c')) diff --git a/system/physmem.c b/system/physmem.c index 33d09f7..ff0ca40 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -28,32 +28,35 @@ #include "qemu/lockable.h" #ifdef CONFIG_TCG -#include "hw/core/tcg-cpu-ops.h" +#include "accel/tcg/cpu-ops.h" +#include "accel/tcg/iommu.h" #endif /* CONFIG_TCG */ -#include "exec/exec-all.h" +#include "exec/cputlb.h" #include "exec/page-protection.h" #include "exec/target_page.h" +#include "exec/translation-block.h" #include "hw/qdev-core.h" #include "hw/qdev-properties.h" #include "hw/boards.h" -#include "sysemu/xen.h" -#include "sysemu/kvm.h" -#include "sysemu/tcg.h" -#include "sysemu/qtest.h" +#include "system/xen.h" +#include "system/kvm.h" +#include "system/tcg.h" +#include "system/qtest.h" #include "qemu/timer.h" #include "qemu/config-file.h" #include "qemu/error-report.h" #include "qemu/qemu-print.h" #include "qemu/log.h" #include "qemu/memalign.h" -#include "exec/memory.h" -#include "exec/ioport.h" -#include "sysemu/dma.h" -#include "sysemu/hostmem.h" -#include "sysemu/hw_accel.h" -#include "sysemu/xen-mapcache.h" -#include "trace/trace-root.h" +#include "qemu/memfd.h" +#include "system/memory.h" +#include "system/ioport.h" +#include "system/dma.h" +#include "system/hostmem.h" +#include "system/hw_accel.h" +#include "system/xen-mapcache.h" +#include "trace.h" #ifdef CONFIG_FALLOCATE_PUNCH_HOLE #include <linux/falloc.h> @@ -61,14 +64,16 @@ #include "qemu/rcu_queue.h" #include "qemu/main-loop.h" -#include "exec/translate-all.h" -#include "sysemu/replay.h" +#include "system/replay.h" -#include "exec/memory-internal.h" -#include "exec/ram_addr.h" +#include "system/ram_addr.h" #include "qemu/pmem.h" +#include "qapi/qapi-types-migration.h" +#include "migration/blocker.h" +#include "migration/cpr.h" +#include "migration/options.h" #include "migration/vmstate.h" #include "qemu/range.h" @@ -82,6 +87,8 @@ #include <daxctl/libdaxctl.h> #endif +#include "memory-internal.h" + //#define DEBUG_SUBPAGE /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes @@ -152,6 +159,7 @@ static void io_mem_init(void); static void memory_map_init(void); static void tcg_log_global_after_sync(MemoryListener *listener); static void tcg_commit(MemoryListener *listener); +static bool ram_is_cpr_compatible(RAMBlock *rb); /** * CPUAddressSpace: all the information a CPU needs about an AddressSpace @@ -571,7 +579,7 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat, is_write, true, &as, attrs); mr = section.mr; - if (xen_enabled() && memory_access_is_direct(mr, is_write)) { + if (xen_enabled() && memory_access_is_direct(mr, is_write, attrs)) { hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr; *plen = MIN(page, *plen); } @@ -579,6 +587,8 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat, return mr; } +#ifdef CONFIG_TCG + typedef struct TCGIOMMUNotifier { IOMMUNotifier n; MemoryRegion *mr; @@ -738,6 +748,33 @@ translate_fail: return &d->map.sections[PHYS_SECTION_UNASSIGNED]; } +MemoryRegionSection *iotlb_to_section(CPUState *cpu, + hwaddr index, MemTxAttrs attrs) +{ + int asidx = cpu_asidx_from_attrs(cpu, attrs); + CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; + AddressSpaceDispatch *d = cpuas->memory_dispatch; + int section_index = index & ~TARGET_PAGE_MASK; + MemoryRegionSection *ret; + + assert(section_index < d->map.sections_nb); + ret = d->map.sections + section_index; + assert(ret->mr); + assert(ret->mr->ops); + + return ret; +} + +/* Called from RCU critical section */ +hwaddr memory_region_section_get_iotlb(CPUState *cpu, + MemoryRegionSection *section) +{ + AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); + return section - d->map.sections; +} + +#endif /* CONFIG_TCG */ + void cpu_address_space_init(CPUState *cpu, int asidx, const char *prefix, MemoryRegion *mr) { @@ -763,6 +800,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx, if (!cpu->cpu_ases) { cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); + cpu->cpu_ases_count = cpu->num_ases; } newas = &cpu->cpu_ases[asidx]; @@ -776,6 +814,34 @@ void cpu_address_space_init(CPUState *cpu, int asidx, } } +void cpu_address_space_destroy(CPUState *cpu, int asidx) +{ + CPUAddressSpace *cpuas; + + assert(cpu->cpu_ases); + assert(asidx >= 0 && asidx < cpu->num_ases); + /* KVM cannot currently support multiple address spaces. */ + assert(asidx == 0 || !kvm_enabled()); + + cpuas = &cpu->cpu_ases[asidx]; + if (tcg_enabled()) { + memory_listener_unregister(&cpuas->tcg_as_listener); + } + + address_space_destroy(cpuas->as); + g_free_rcu(cpuas->as, rcu); + + if (asidx == 0) { + /* reset the convenience alias for address space 0 */ + cpu->as = NULL; + } + + if (--cpu->cpu_ases_count == 0) { + g_free(cpu->cpu_ases); + cpu->cpu_ases = NULL; + } +} + AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) { /* Return the AddressSpace corresponding to the specified index */ @@ -894,13 +960,19 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client) { DirtyMemoryBlocks *blocks; - ram_addr_t start = memory_region_get_ram_addr(mr) + offset; + ram_addr_t start, first, last; unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL); - ram_addr_t first = QEMU_ALIGN_DOWN(start, align); - ram_addr_t last = QEMU_ALIGN_UP(start + length, align); DirtyBitmapSnapshot *snap; unsigned long page, end, dest; + start = memory_region_get_ram_addr(mr); + /* We know we're only called for RAM MemoryRegions */ + assert(start != RAM_ADDR_INVALID); + start += offset; + + first = QEMU_ALIGN_DOWN(start, align); + last = QEMU_ALIGN_UP(start + length, align); + snap = g_malloc0(sizeof(*snap) + ((last - first) >> (TARGET_PAGE_BITS + 3))); snap->start = first; @@ -959,14 +1031,6 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, return false; } -/* Called from RCU critical section */ -hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section) -{ - AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); - return section - d->map.sections; -} - static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, uint16_t section); static subpage_t *subpage_init(FlatView *fv, hwaddr base); @@ -1200,7 +1264,7 @@ long qemu_maxrampagesize(void) return pagesize; } -#ifdef CONFIG_POSIX +#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN) static int64_t get_file_size(int fd) { int64_t size; @@ -1499,18 +1563,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size) return offset; } -static unsigned long last_ram_page(void) -{ - RAMBlock *block; - ram_addr_t last = 0; - - RCU_READ_LOCK_GUARD(); - RAMBLOCK_FOREACH(block) { - last = MAX(last, block->offset + block->max_length); - } - return last >> TARGET_PAGE_BITS; -} - static void qemu_ram_setup_dump(void *addr, ram_addr_t size) { int ret; @@ -1521,7 +1573,7 @@ static void qemu_ram_setup_dump(void *addr, ram_addr_t size) if (ret) { perror("qemu_madvise"); fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, " - "but dump_guest_core=off specified\n"); + "but dump-guest-core=off specified\n"); } } } @@ -1637,6 +1689,18 @@ void qemu_ram_unset_idstr(RAMBlock *block) } } +static char *cpr_name(MemoryRegion *mr) +{ + const char *mr_name = memory_region_name(mr); + g_autofree char *id = mr->dev ? qdev_get_dev_path(mr->dev) : NULL; + + if (id) { + return g_strdup_printf("%s/%s", id, mr_name); + } else { + return g_strdup(mr_name); + } +} + size_t qemu_ram_pagesize(RAMBlock *rb) { return rb->page_size; @@ -1764,13 +1828,11 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) } /* Called with ram_list.mutex held */ -static void dirty_memory_extend(ram_addr_t old_ram_size, - ram_addr_t new_ram_size) +static void dirty_memory_extend(ram_addr_t new_ram_size) { - ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, - DIRTY_MEMORY_BLOCK_SIZE); - ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, - DIRTY_MEMORY_BLOCK_SIZE); + unsigned int old_num_blocks = ram_list.num_dirty_blocks; + unsigned int new_num_blocks = DIV_ROUND_UP(new_ram_size, + DIRTY_MEMORY_BLOCK_SIZE); int i; /* Only need to extend if block count increased */ @@ -1802,6 +1864,8 @@ static void dirty_memory_extend(ram_addr_t old_ram_size, g_free_rcu(old_blocks, rcu); } } + + ram_list.num_dirty_blocks = new_num_blocks; } static void ram_block_add(RAMBlock *new_block, Error **errp) @@ -1811,11 +1875,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) RAMBlock *block; RAMBlock *last_block = NULL; bool free_on_error = false; - ram_addr_t old_ram_size, new_ram_size; + ram_addr_t ram_size; Error *err = NULL; - old_ram_size = last_ram_page(); - qemu_mutex_lock_ramlist(); new_block->offset = find_ram_offset(new_block->max_length); @@ -1845,11 +1907,18 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) } if (new_block->flags & RAM_GUEST_MEMFD) { - assert(kvm_enabled()); + int ret; + + if (!kvm_enabled()) { + error_setg(errp, "cannot set up private guest memory for %s: KVM required", + object_get_typename(OBJECT(current_machine->cgs))); + goto out_free; + } assert(new_block->guest_memfd < 0); - if (ram_block_discard_require(true) < 0) { - error_setg_errno(errp, errno, + ret = ram_block_coordinated_discard_require(true); + if (ret < 0) { + error_setg_errno(errp, -ret, "cannot set up private guest memory: discard currently blocked"); error_append_hint(errp, "Are you using assigned devices?\n"); goto out_free; @@ -1861,13 +1930,41 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) qemu_mutex_unlock_ramlist(); goto out_free; } - } - new_ram_size = MAX(old_ram_size, - (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); - if (new_ram_size > old_ram_size) { - dirty_memory_extend(old_ram_size, new_ram_size); + /* + * The attribute bitmap of the RamBlockAttributes is default to + * discarded, which mimics the behavior of kvm_set_phys_mem() when it + * calls kvm_set_memory_attributes_private(). This leads to a brief + * period of inconsistency between the creation of the RAMBlock and its + * mapping into the physical address space. However, this is not + * problematic, as no users rely on the attribute status to perform + * any actions during this interval. + */ + new_block->attributes = ram_block_attributes_create(new_block); + if (!new_block->attributes) { + error_setg(errp, "Failed to create ram block attribute"); + close(new_block->guest_memfd); + ram_block_coordinated_discard_require(false); + qemu_mutex_unlock_ramlist(); + goto out_free; + } + + /* + * Add a specific guest_memfd blocker if a generic one would not be + * added by ram_block_add_cpr_blocker. + */ + if (ram_is_cpr_compatible(new_block)) { + error_setg(&new_block->cpr_blocker, + "Memory region %s uses guest_memfd, " + "which is not supported with CPR.", + memory_region_name(new_block->mr)); + migrate_add_blocker_modes(&new_block->cpr_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1); + } } + + ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS; + dirty_memory_extend(ram_size); /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, * QLIST (which has an RCU-friendly variant) does not have insertion at * tail, so save the last element in last_block. @@ -1920,19 +2017,28 @@ out_free: } } -#ifdef CONFIG_POSIX -RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, +#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN) +RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size, + qemu_ram_resize_cb resized, MemoryRegion *mr, uint32_t ram_flags, int fd, off_t offset, + bool grow, Error **errp) { + ERRP_GUARD(); RAMBlock *new_block; Error *local_err = NULL; - int64_t file_size, file_align; + int64_t file_size, file_align, share_flags; + + share_flags = ram_flags & (RAM_PRIVATE | RAM_SHARED); + assert(share_flags != (RAM_SHARED | RAM_PRIVATE)); + ram_flags &= ~RAM_PRIVATE; /* Just support these ram flags by now. */ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | - RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); + RAM_READONLY_FD | RAM_GUEST_MEMFD | + RAM_RESIZEABLE)) == 0); + assert(max_size >= size); if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); @@ -1947,12 +2053,16 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, size = TARGET_PAGE_ALIGN(size); size = REAL_HOST_PAGE_ALIGN(size); + max_size = TARGET_PAGE_ALIGN(max_size); + max_size = REAL_HOST_PAGE_ALIGN(max_size); file_size = get_file_size(fd); - if (file_size > offset && file_size < (offset + size)) { - error_setg(errp, "backing store size 0x%" PRIx64 - " does not match 'size' option 0x" RAM_ADDR_FMT, - file_size, size); + if (file_size && file_size < offset + max_size && !grow) { + error_setg(errp, "%s backing store size 0x%" PRIx64 + " is too small for 'size' option 0x" RAM_ADDR_FMT + " plus 'offset' option 0x%" PRIx64, + memory_region_name(mr), file_size, max_size, + (uint64_t)offset); return NULL; } @@ -1967,11 +2077,13 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block = g_malloc0(sizeof(*new_block)); new_block->mr = mr; new_block->used_length = size; - new_block->max_length = size; + new_block->max_length = max_size; + new_block->resized = resized; new_block->flags = ram_flags; new_block->guest_memfd = -1; - new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, - errp); + new_block->host = file_ram_alloc(new_block, max_size, fd, + file_size < offset + max_size, + offset, errp); if (!new_block->host) { g_free(new_block); return NULL; @@ -2023,7 +2135,8 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } - block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp); + block = qemu_ram_alloc_from_fd(size, size, NULL, mr, ram_flags, fd, offset, + false, errp); if (!block) { if (created) { unlink(mem_path); @@ -2036,21 +2149,98 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, } #endif +#ifdef CONFIG_POSIX +/* + * Create MAP_SHARED RAMBlocks by mmap'ing a file descriptor, so it can be + * shared with another process if CPR is being used. Use memfd if available + * because it has no size limits, else use POSIX shm. + */ +static int qemu_ram_get_shared_fd(const char *name, bool *reused, Error **errp) +{ + int fd = cpr_find_fd(name, 0); + + if (fd >= 0) { + *reused = true; + return fd; + } + + if (qemu_memfd_check(0)) { + fd = qemu_memfd_create(name, 0, 0, 0, 0, errp); + } else { + fd = qemu_shm_alloc(0, errp); + } + + if (fd >= 0) { + cpr_save_fd(name, 0, fd); + } + *reused = false; + return fd; +} +#endif + static RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, - void (*resized)(const char*, - uint64_t length, - void *host), + qemu_ram_resize_cb resized, void *host, uint32_t ram_flags, MemoryRegion *mr, Error **errp) { RAMBlock *new_block; Error *local_err = NULL; - int align; + int align, share_flags; + + share_flags = ram_flags & (RAM_PRIVATE | RAM_SHARED); + assert(share_flags != (RAM_SHARED | RAM_PRIVATE)); + ram_flags &= ~RAM_PRIVATE; assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); assert(!host ^ (ram_flags & RAM_PREALLOC)); + assert(max_size >= size); + + /* ignore RAM_SHARED for Windows and emscripten*/ +#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN) + if (!host) { + if (!share_flags && current_machine->aux_ram_share) { + ram_flags |= RAM_SHARED; + } + if (ram_flags & RAM_SHARED) { + bool reused; + g_autofree char *name = cpr_name(mr); + int fd = qemu_ram_get_shared_fd(name, &reused, errp); + + if (fd < 0) { + return NULL; + } + + /* Use same alignment as qemu_anon_ram_alloc */ + mr->align = QEMU_VMALLOC_ALIGN; + + /* + * This can fail if the shm mount size is too small, or alloc from + * fd is not supported, but previous QEMU versions that called + * qemu_anon_ram_alloc for anonymous shared memory could have + * succeeded. Quietly fail and fall back. + * + * After cpr-transfer, new QEMU could create a memory region + * with a larger max size than old, so pass reused to grow the + * region if necessary. The extra space will be usable after a + * guest reset. + */ + new_block = qemu_ram_alloc_from_fd(size, max_size, resized, mr, + ram_flags, fd, 0, reused, NULL); + if (new_block) { + trace_qemu_ram_alloc_shared(name, new_block->used_length, + new_block->max_length, fd, + new_block->host); + return new_block; + } + + cpr_delete_fd(name, 0); + close(fd); + /* fall back to anon allocation */ + } + } +#endif align = qemu_real_host_page_size(); align = MAX(align, TARGET_PAGE_SIZE); @@ -2062,7 +2252,6 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, new_block->resized = resized; new_block->used_length = size; new_block->max_length = max_size; - assert(max_size >= size); new_block->fd = -1; new_block->guest_memfd = -1; new_block->page_size = qemu_real_host_page_size(); @@ -2087,15 +2276,14 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, Error **errp) { - assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); + assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD | + RAM_PRIVATE)) == 0); return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); } RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz, - void (*resized)(const char*, - uint64_t length, - void *host), - MemoryRegion *mr, Error **errp) + qemu_ram_resize_cb resized, + MemoryRegion *mr, Error **errp) { return qemu_ram_alloc_internal(size, maxsz, resized, NULL, RAM_RESIZEABLE, mr, errp); @@ -2107,7 +2295,7 @@ static void reclaim_ramblock(RAMBlock *block) ; } else if (xen_enabled()) { xen_invalidate_map_cache_entry(block->host); -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(EMSCRIPTEN) } else if (block->fd >= 0) { qemu_ram_munmap(block->fd, block->host, block->max_length); close(block->fd); @@ -2117,8 +2305,9 @@ static void reclaim_ramblock(RAMBlock *block) } if (block->guest_memfd >= 0) { + ram_block_attributes_destroy(block->attributes); close(block->guest_memfd); - ram_block_discard_require(false); + ram_block_coordinated_discard_require(false); } g_free(block); @@ -2126,6 +2315,8 @@ static void reclaim_ramblock(RAMBlock *block) void qemu_ram_free(RAMBlock *block) { + g_autofree char *name = NULL; + if (!block) { return; } @@ -2136,6 +2327,8 @@ void qemu_ram_free(RAMBlock *block) } qemu_mutex_lock_ramlist(); + name = cpr_name(block->mr); + cpr_delete_fd(name, 0); QLIST_REMOVE_RCU(block, next); ram_list.mru_block = NULL; /* Write list before version */ @@ -2146,45 +2339,80 @@ void qemu_ram_free(RAMBlock *block) } #ifndef _WIN32 -void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) +/* Simply remap the given VM memory location from start to start+length */ +static int qemu_ram_remap_mmap(RAMBlock *block, uint64_t start, size_t length) +{ + int flags, prot; + void *area; + void *host_startaddr = block->host + start; + + assert(block->fd < 0); + flags = MAP_FIXED | MAP_ANONYMOUS; + flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE; + flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0; + prot = PROT_READ; + prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; + area = mmap(host_startaddr, length, prot, flags, -1, 0); + return area != host_startaddr ? -errno : 0; +} + +/* + * qemu_ram_remap - remap a single RAM page + * + * @addr: address in ram_addr_t address space. + * + * This function will try remapping a single page of guest RAM identified by + * @addr, essentially discarding memory to recover from previously poisoned + * memory (MCE). The page size depends on the RAMBlock (i.e., hugetlb). @addr + * does not have to point at the start of the page. + * + * This function is only to be used during system resets; it will kill the + * VM if remapping failed. + */ +void qemu_ram_remap(ram_addr_t addr) { RAMBlock *block; - ram_addr_t offset; - int flags; - void *area, *vaddr; - int prot; + uint64_t offset; + void *vaddr; + size_t page_size; RAMBLOCK_FOREACH(block) { offset = addr - block->offset; if (offset < block->max_length) { + /* Respect the pagesize of our RAMBlock */ + page_size = qemu_ram_pagesize(block); + offset = QEMU_ALIGN_DOWN(offset, page_size); + vaddr = ramblock_ptr(block, offset); if (block->flags & RAM_PREALLOC) { ; } else if (xen_enabled()) { abort(); } else { - flags = MAP_FIXED; - flags |= block->flags & RAM_SHARED ? - MAP_SHARED : MAP_PRIVATE; - flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0; - prot = PROT_READ; - prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; - if (block->fd >= 0) { - area = mmap(vaddr, length, prot, flags, block->fd, - offset + block->fd_offset); - } else { - flags |= MAP_ANONYMOUS; - area = mmap(vaddr, length, prot, flags, -1, 0); - } - if (area != vaddr) { - error_report("Could not remap addr: " - RAM_ADDR_FMT "@" RAM_ADDR_FMT "", - length, addr); - exit(1); + if (ram_block_discard_range(block, offset, page_size) != 0) { + /* + * Fall back to using mmap() only for anonymous mapping, + * as if a backing file is associated we may not be able + * to recover the memory in all cases. + * So don't take the risk of using only mmap and fail now. + */ + if (block->fd >= 0) { + error_report("Could not remap RAM %s:%" PRIx64 "+%" + PRIx64 " +%zx", block->idstr, offset, + block->fd_offset, page_size); + exit(1); + } + if (qemu_ram_remap_mmap(block, offset, page_size) != 0) { + error_report("Could not remap RAM %s:%" PRIx64 " +%zx", + block->idstr, offset, page_size); + exit(1); + } } - memory_try_enable_merging(vaddr, length); - qemu_ram_setup_dump(vaddr, length); + memory_try_enable_merging(vaddr, page_size); + qemu_ram_setup_dump(vaddr, page_size); } + + break; } } } @@ -2277,6 +2505,10 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, ram_addr_t ram_addr; RCU_READ_LOCK_GUARD(); ram_addr = xen_ram_addr_from_mapcache(ptr); + if (ram_addr == RAM_ADDR_INVALID) { + return NULL; + } + block = qemu_get_ram_block(ram_addr); if (block) { *offset = ram_addr - block->offset; @@ -2478,23 +2710,6 @@ static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr) return phys_section_add(map, §ion); } -MemoryRegionSection *iotlb_to_section(CPUState *cpu, - hwaddr index, MemTxAttrs attrs) -{ - int asidx = cpu_asidx_from_attrs(cpu, attrs); - CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; - AddressSpaceDispatch *d = cpuas->memory_dispatch; - int section_index = index & ~TARGET_PAGE_MASK; - MemoryRegionSection *ret; - - assert(section_index < d->map.sections_nb); - ret = d->map.sections + section_index; - assert(ret->mr); - assert(ret->mr->ops); - - return ret; -} - static void io_mem_init(void) { memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, @@ -2623,7 +2838,11 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, hwaddr length) { uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr); - addr += memory_region_get_ram_addr(mr); + ram_addr_t ramaddr = memory_region_get_ram_addr(mr); + + /* We know we're only called for RAM MemoryRegions */ + assert(ramaddr != RAM_ADDR_INVALID); + addr += ramaddr; /* No early return if dirty_log_mask is or becomes 0, because * cpu_physical_memory_set_dirty_range will still call @@ -2635,7 +2854,7 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, } if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { assert(tcg_enabled()); - tb_invalidate_phys_range(addr, addr + length - 1); + tb_invalidate_phys_range(NULL, addr, addr + length - 1); dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); } cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); @@ -2716,7 +2935,7 @@ static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs, if (memory_region_is_ram(mr)) { return true; } - qemu_log_mask(LOG_GUEST_ERROR, + qemu_log_mask(LOG_INVALID_MEM, "Invalid access to non-RAM device at " "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", " "region '%s'\n", addr, len, memory_region_name(mr)); @@ -2732,7 +2951,7 @@ static MemTxResult flatview_write_continue_step(MemTxAttrs attrs, return MEMTX_ACCESS_ERROR; } - if (!memory_access_is_direct(mr, true)) { + if (!memory_access_is_direct(mr, true, attrs)) { uint64_t val; MemTxResult result; bool release_lock = prepare_mmio_access(mr); @@ -2828,7 +3047,7 @@ static MemTxResult flatview_read_continue_step(MemTxAttrs attrs, uint8_t *buf, return MEMTX_ACCESS_ERROR; } - if (!memory_access_is_direct(mr, false)) { + if (!memory_access_is_direct(mr, false, attrs)) { /* I/O case */ uint64_t val; MemTxResult result; @@ -3000,8 +3219,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, l = len; mr = address_space_translate(as, addr, &addr1, &l, true, attrs); - if (!(memory_region_is_ram(mr) || - memory_region_is_romd(mr))) { + if (!memory_region_supports_direct_access(mr)) { l = memory_access_size(mr, l, addr1); } else { /* ROM/RAM case */ @@ -3049,6 +3267,20 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len) NULL, len, FLUSH_CACHE); } +/* + * A magic value stored in the first 8 bytes of the bounce buffer struct. Used + * to detect illegal pointers passed to address_space_unmap. + */ +#define BOUNCE_BUFFER_MAGIC 0xb4017ceb4ffe12ed + +typedef struct { + uint64_t magic; + MemoryRegion *mr; + hwaddr addr; + size_t len; + uint8_t buffer[]; +} BounceBuffer; + static void address_space_unregister_map_client_do(AddressSpaceMapClient *client) { @@ -3074,9 +3306,9 @@ void address_space_register_map_client(AddressSpace *as, QEMUBH *bh) QEMU_LOCK_GUARD(&as->map_client_list_lock); client->bh = bh; QLIST_INSERT_HEAD(&as->map_client_list, client, link); - /* Write map_client_list before reading in_use. */ + /* Write map_client_list before reading bounce_buffer_size. */ smp_mb(); - if (!qatomic_read(&as->bounce.in_use)) { + if (qatomic_read(&as->bounce_buffer_size) < as->max_bounce_buffer_size) { address_space_notify_map_clients_locked(as); } } @@ -3124,7 +3356,7 @@ static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len, while (len > 0) { l = len; mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); - if (!memory_access_is_direct(mr, is_write)) { + if (!memory_access_is_direct(mr, is_write, attrs)) { l = memory_access_size(mr, l, addr); if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) { return false; @@ -3193,6 +3425,8 @@ void *address_space_map(AddressSpace *as, MemoryRegion *mr; FlatView *fv; + trace_address_space_map(as, addr, len, is_write, *(uint32_t *) &attrs); + if (len == 0) { return NULL; } @@ -3202,29 +3436,41 @@ void *address_space_map(AddressSpace *as, fv = address_space_to_flatview(as); mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); - if (!memory_access_is_direct(mr, is_write)) { - if (qatomic_xchg(&as->bounce.in_use, true)) { + if (!memory_access_is_direct(mr, is_write, attrs)) { + size_t used = qatomic_read(&as->bounce_buffer_size); + for (;;) { + hwaddr alloc = MIN(as->max_bounce_buffer_size - used, l); + size_t new_size = used + alloc; + size_t actual = + qatomic_cmpxchg(&as->bounce_buffer_size, used, new_size); + if (actual == used) { + l = alloc; + break; + } + used = actual; + } + + if (l == 0) { *plen = 0; return NULL; } - /* Avoid unbounded allocations */ - l = MIN(l, TARGET_PAGE_SIZE); - as->bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); - as->bounce.addr = addr; - as->bounce.len = l; + BounceBuffer *bounce = g_malloc0(l + sizeof(BounceBuffer)); + bounce->magic = BOUNCE_BUFFER_MAGIC; memory_region_ref(mr); - as->bounce.mr = mr; + bounce->mr = mr; + bounce->addr = addr; + bounce->len = l; + if (!is_write) { - flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, - as->bounce.buffer, l); + flatview_read(fv, addr, attrs, + bounce->buffer, l); } *plen = l; - return as->bounce.buffer; + return bounce->buffer; } - memory_region_ref(mr); *plen = flatview_extend_translation(fv, addr, len, mr, xlat, l, is_write, attrs); @@ -3239,12 +3485,11 @@ void *address_space_map(AddressSpace *as, void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, bool is_write, hwaddr access_len) { - if (buffer != as->bounce.buffer) { - MemoryRegion *mr; - ram_addr_t addr1; + MemoryRegion *mr; + ram_addr_t addr1; - mr = memory_region_from_host(buffer, &addr1); - assert(mr != NULL); + mr = memory_region_from_host(buffer, &addr1); + if (mr != NULL) { if (is_write) { invalidate_and_set_dirty(mr, addr1, access_len); } @@ -3254,15 +3499,22 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, memory_region_unref(mr); return; } + + + BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer); + assert(bounce->magic == BOUNCE_BUFFER_MAGIC); + if (is_write) { - address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED, - as->bounce.buffer, access_len); - } - qemu_vfree(as->bounce.buffer); - as->bounce.buffer = NULL; - memory_region_unref(as->bounce.mr); - /* Clear in_use before reading map_client_list. */ - qatomic_set_mb(&as->bounce.in_use, false); + address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED, + bounce->buffer, access_len); + } + + qatomic_sub(&as->bounce_buffer_size, bounce->len); + bounce->magic = ~BOUNCE_BUFFER_MAGIC; + memory_region_unref(bounce->mr); + g_free(bounce); + /* Write bounce_buffer_size before reading map_client_list. */ + smp_mb(); address_space_notify_map_clients(as); } @@ -3317,7 +3569,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, mr = cache->mrs.mr; memory_region_ref(mr); - if (memory_access_is_direct(mr, is_write)) { + if (memory_access_is_direct(mr, is_write, MEMTXATTRS_UNSPECIFIED)) { /* We don't care about the memory attributes here as we're only * doing this if we found actual RAM, which behaves the same * regardless of attributes; so UNSPECIFIED is fine. @@ -3510,13 +3762,8 @@ int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, if (l > len) l = len; phys_addr += (addr & ~TARGET_PAGE_MASK); - if (is_write) { - res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, - attrs, buf, l); - } else { - res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr, - attrs, buf, l); - } + res = address_space_rw(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf, + l, is_write); if (res != MEMTX_OK) { return -1; } @@ -3626,18 +3873,19 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) } ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - start, length); + start + rb->fd_offset, length); if (ret) { ret = -errno; - error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", - __func__, rb->idstr, start, length, ret); + error_report("%s: Failed to fallocate %s:%" PRIx64 "+%" PRIx64 + " +%zx (%d)", __func__, rb->idstr, start, + rb->fd_offset, length, ret); goto err; } #else ret = -ENOSYS; error_report("%s: fallocate not available/file" - "%s:%" PRIx64 " +%zx (%d)", - __func__, rb->idstr, start, length, ret); + "%s:%" PRIx64 "+%" PRIx64 " +%zx (%d)", __func__, + rb->idstr, start, rb->fd_offset, length, ret); goto err; #endif } @@ -3684,6 +3932,7 @@ int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, int ret = -1; #ifdef CONFIG_FALLOCATE_PUNCH_HOLE + /* ignore fd_offset with guest_memfd */ ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, start, length); @@ -3888,3 +4137,58 @@ bool ram_block_discard_is_required(void) return qatomic_read(&ram_block_discard_required_cnt) || qatomic_read(&ram_block_coordinated_discard_required_cnt); } + +/* + * Return true if ram is compatible with CPR. Do not exclude rom, + * because the rom file could change in new QEMU. + */ +static bool ram_is_cpr_compatible(RAMBlock *rb) +{ + MemoryRegion *mr = rb->mr; + + if (!mr || !memory_region_is_ram(mr)) { + return true; + } + + /* Ram device is remapped in new QEMU */ + if (memory_region_is_ram_device(mr)) { + return true; + } + + /* + * A file descriptor is passed to new QEMU and remapped, or its backing + * file is reopened and mapped. It must be shared to avoid COW. + */ + if (rb->fd >= 0 && qemu_ram_is_shared(rb)) { + return true; + } + + return false; +} + +/* + * Add a blocker for each volatile ram block. This function should only be + * called after we know that the block is migratable. Non-migratable blocks + * are either re-created in new QEMU, or are handled specially, or are covered + * by a device-level CPR blocker. + */ +void ram_block_add_cpr_blocker(RAMBlock *rb, Error **errp) +{ + assert(qemu_ram_is_migratable(rb)); + + if (ram_is_cpr_compatible(rb)) { + return; + } + + error_setg(&rb->cpr_blocker, + "Memory region %s is not compatible with CPR. share=on is " + "required for memory-backend objects, and aux-ram-share=on is " + "required.", memory_region_name(rb->mr)); + migrate_add_blocker_modes(&rb->cpr_blocker, errp, MIG_MODE_CPR_TRANSFER, + -1); +} + +void ram_block_del_cpr_blocker(RAMBlock *rb) +{ + migrate_del_blocker(&rb->cpr_blocker); +} diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c index 6af6ef7..5588ed2 100644 --- a/system/qdev-monitor.c +++ b/system/qdev-monitor.c @@ -22,13 +22,14 @@ #include "monitor/hmp.h" #include "monitor/monitor.h" #include "monitor/qdev.h" -#include "sysemu/arch_init.h" +#include "system/arch_init.h" +#include "system/runstate.h" #include "qapi/error.h" #include "qapi/qapi-commands-qdev.h" -#include "qapi/qmp/dispatch.h" -#include "qapi/qmp/qdict.h" +#include "qapi/qmp-registry.h" +#include "qobject/qdict.h" #include "qapi/qmp/qerror.h" -#include "qapi/qmp/qstring.h" +#include "qobject/qstring.h" #include "qapi/qobject-input-visitor.h" #include "qemu/config-file.h" #include "qemu/error-report.h" @@ -36,7 +37,7 @@ #include "qemu/option.h" #include "qemu/qemu-print.h" #include "qemu/option_int.h" -#include "sysemu/block-backend.h" +#include "system/block-backend.h" #include "migration/misc.h" #include "qemu/cutils.h" #include "hw/qdev-properties.h" @@ -55,12 +56,18 @@ typedef struct QDevAlias } QDevAlias; /* default virtio transport per architecture */ -#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | QEMU_ARCH_ARM | \ - QEMU_ARCH_HPPA | QEMU_ARCH_I386 | \ - QEMU_ARCH_MIPS | QEMU_ARCH_PPC | \ - QEMU_ARCH_RISCV | QEMU_ARCH_SH4 | \ - QEMU_ARCH_SPARC | QEMU_ARCH_XTENSA | \ - QEMU_ARCH_LOONGARCH) +#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | \ + QEMU_ARCH_ARM | \ + QEMU_ARCH_HPPA | \ + QEMU_ARCH_I386 | \ + QEMU_ARCH_LOONGARCH | \ + QEMU_ARCH_MIPS | \ + QEMU_ARCH_OPENRISC | \ + QEMU_ARCH_PPC | \ + QEMU_ARCH_RISCV | \ + QEMU_ARCH_SH4 | \ + QEMU_ARCH_SPARC | \ + QEMU_ARCH_XTENSA) #define QEMU_ARCH_VIRTIO_CCW (QEMU_ARCH_S390X) #define QEMU_ARCH_VIRTIO_MMIO (QEMU_ARCH_M68K) @@ -125,7 +132,7 @@ static const char *qdev_class_get_alias(DeviceClass *dc) for (i = 0; qdev_alias_table[i].typename; i++) { if (qdev_alias_table[i].arch_mask && - !(qdev_alias_table[i].arch_mask & arch_type)) { + !qemu_arch_available(qdev_alias_table[i].arch_mask)) { continue; } @@ -211,7 +218,7 @@ static const char *find_typename_by_alias(const char *alias) for (i = 0; qdev_alias_table[i].alias; i++) { if (qdev_alias_table[i].arch_mask && - !(qdev_alias_table[i].arch_mask & arch_type)) { + !qemu_arch_available(qdev_alias_table[i].arch_mask)) { continue; } @@ -256,8 +263,7 @@ static DeviceClass *qdev_get_device_class(const char **driver, Error **errp) } dc = DEVICE_CLASS(oc); - if (!dc->user_creatable || - (phase_check(PHASE_MACHINE_READY) && !dc->hotpluggable)) { + if (!dc->user_creatable) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver", "a pluggable device type"); return NULL; @@ -341,7 +347,7 @@ static Object *qdev_get_peripheral(void) static Object *dev; if (dev == NULL) { - dev = container_get(qdev_get_machine(), "/peripheral"); + dev = machine_get_container("peripheral"); } return dev; @@ -352,7 +358,7 @@ static Object *qdev_get_peripheral_anon(void) static Object *dev; if (dev == NULL) { - dev = container_get(qdev_get_machine(), "/peripheral-anon"); + dev = machine_get_container("peripheral-anon"); } return dev; @@ -624,6 +630,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, char *id; DeviceState *dev = NULL; BusState *bus = NULL; + QDict *properties; driver = qdict_get_try_str(opts, "driver"); if (!driver) { @@ -668,12 +675,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, return NULL; } - if (phase_check(PHASE_MACHINE_READY) && bus && !qbus_is_hotpluggable(bus)) { - error_setg(errp, "Bus '%s' does not support hotplugging", bus->name); - return NULL; - } - - if (!migration_is_idle()) { + if (migration_is_running()) { error_setg(errp, "device_add not allowed while migrating"); return NULL; } @@ -682,17 +684,9 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, dev = qdev_new(driver); /* Check whether the hotplug is allowed by the machine */ - if (phase_check(PHASE_MACHINE_READY)) { - if (!qdev_hotplug_allowed(dev, errp)) { - goto err_del_dev; - } - - if (!bus && !qdev_get_machine_hotplug_handler(dev)) { - /* No bus, no machine hotplug handler --> device is not hotpluggable */ - error_setg(errp, "Device '%s' can not be hotplugged on this machine", - driver); - goto err_del_dev; - } + if (phase_check(PHASE_MACHINE_READY) && + !qdev_hotplug_allowed(dev, bus, errp)) { + goto err_del_dev; } /* @@ -705,13 +699,14 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, } /* set properties */ - dev->opts = qdict_clone_shallow(opts); - qdict_del(dev->opts, "driver"); - qdict_del(dev->opts, "bus"); - qdict_del(dev->opts, "id"); + properties = qdict_clone_shallow(opts); + qdict_del(properties, "driver"); + qdict_del(properties, "bus"); + qdict_del(properties, "id"); - object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json, + object_set_properties_from_keyval(&dev->parent_obj, properties, from_json, errp); + qobject_unref(properties); if (*errp) { goto err_del_dev; } @@ -745,19 +740,18 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) #define qdev_printf(fmt, ...) monitor_printf(mon, "%*s" fmt, indent, "", ## __VA_ARGS__) -static void qdev_print_props(Monitor *mon, DeviceState *dev, Property *props, +static void qdev_print_props(Monitor *mon, DeviceState *dev, DeviceClass *dc, int indent) { - if (!props) - return; - for (; props->name; props++) { + for (int i = 0, n = dc->props_count_; i < n; ++i) { + const Property *prop = &dc->props_[i]; char *value; - char *legacy_name = g_strdup_printf("legacy-%s", props->name); + char *legacy_name = g_strdup_printf("legacy-%s", prop->name); if (object_property_get_type(OBJECT(dev), legacy_name, NULL)) { value = object_property_get_str(OBJECT(dev), legacy_name, NULL); } else { - value = object_property_print(OBJECT(dev), props->name, true, + value = object_property_print(OBJECT(dev), prop->name, true, NULL); } g_free(legacy_name); @@ -765,7 +759,7 @@ static void qdev_print_props(Monitor *mon, DeviceState *dev, Property *props, if (!value) { continue; } - qdev_printf("%s = %s\n", props->name, + qdev_printf("%s = %s\n", prop->name, *value ? value : "<null>"); g_free(value); } @@ -805,7 +799,7 @@ static void qdev_print(Monitor *mon, DeviceState *dev, int indent) } class = object_get_class(OBJECT(dev)); do { - qdev_print_props(mon, dev, DEVICE_CLASS(class)->props_, indent); + qdev_print_props(mon, dev, DEVICE_CLASS(class), indent); class = object_class_get_parent(class); } while (class != object_class_by_name(TYPE_DEVICE)); bus_print_dev(dev->parent_bus, mon, dev, indent); @@ -849,18 +843,9 @@ void hmp_info_qdm(Monitor *mon, const QDict *qdict) void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp) { - QemuOpts *opts; DeviceState *dev; - opts = qemu_opts_from_qdict(qemu_find_opts("device"), qdict, errp); - if (!opts) { - return; - } - if (!monitor_cur_is_qmp() && qdev_device_help(opts)) { - qemu_opts_del(opts); - return; - } - dev = qdev_device_add(opts, errp); + dev = qdev_device_add_from_qdict(qdict, true, errp); if (!dev) { /* * Drain all pending RCU callbacks. This is done because @@ -872,20 +857,24 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp) * to the user */ drain_call_rcu(); - - qemu_opts_del(opts); - return; } object_unref(OBJECT(dev)); } -static DeviceState *find_device_state(const char *id, Error **errp) +/* + * Note that creating new APIs using error classes other than GenericError is + * not recommended. Set use_generic_error=true for new interfaces. + */ +static DeviceState *find_device_state(const char *id, bool use_generic_error, + Error **errp) { Object *obj = object_resolve_path_at(qdev_get_peripheral(), id); DeviceState *dev; if (!obj) { - error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + error_set(errp, + (use_generic_error ? + ERROR_CLASS_GENERIC_ERROR : ERROR_CLASS_DEVICE_NOT_FOUND), "Device '%s' not found", id); return NULL; } @@ -901,28 +890,15 @@ static DeviceState *find_device_state(const char *id, Error **errp) void qdev_unplug(DeviceState *dev, Error **errp) { - DeviceClass *dc = DEVICE_GET_CLASS(dev); HotplugHandler *hotplug_ctrl; HotplugHandlerClass *hdc; Error *local_err = NULL; - if (qdev_unplug_blocked(dev, errp)) { - return; - } - - if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) { - error_setg(errp, "Bus '%s' does not support hotplugging", - dev->parent_bus->name); - return; - } - - if (!dc->hotpluggable) { - error_setg(errp, "Device '%s' does not support hotplugging", - object_get_typename(OBJECT(dev))); + if (!qdev_hotunplug_allowed(dev, errp)) { return; } - if (!migration_is_idle() && !dev->allow_unplug_during_migration) { + if (migration_is_running() && !dev->allow_unplug_during_migration) { error_setg(errp, "device_del not allowed while migrating"); return; } @@ -950,7 +926,7 @@ void qdev_unplug(DeviceState *dev, Error **errp) void qmp_device_del(const char *id, Error **errp) { - DeviceState *dev = find_device_state(id, errp); + DeviceState *dev = find_device_state(id, false, errp); if (dev != NULL) { if (dev->pending_deleted_event && (dev->pending_deleted_expires_ms == 0 || @@ -964,11 +940,74 @@ void qmp_device_del(const char *id, Error **errp) } } +int qdev_sync_config(DeviceState *dev, Error **errp) +{ + DeviceClass *dc = DEVICE_GET_CLASS(dev); + + if (!dc->sync_config) { + error_setg(errp, "device-sync-config is not supported for '%s'", + object_get_typename(OBJECT(dev))); + return -ENOTSUP; + } + + return dc->sync_config(dev, errp); +} + +void qmp_device_sync_config(const char *id, Error **errp) +{ + DeviceState *dev; + + /* + * During migration there is a race between syncing`configuration + * and migrating it (if migrate first, that target would get + * outdated version), so let's just not allow it. + */ + + if (migration_is_running()) { + error_setg(errp, "Config synchronization is not allowed " + "during migration"); + return; + } + + dev = find_device_state(id, true, errp); + if (!dev) { + return; + } + + qdev_sync_config(dev, errp); +} + void hmp_device_add(Monitor *mon, const QDict *qdict) { Error *err = NULL; + QemuOpts *opts; + DeviceState *dev; + + opts = qemu_opts_from_qdict(qemu_find_opts("device"), qdict, &err); + if (!opts) { + goto out; + } + if (qdev_device_help(opts)) { + qemu_opts_del(opts); + return; + } + dev = qdev_device_add(opts, &err); + if (!dev) { + /* + * Drain all pending RCU callbacks. This is done because + * some bus related operations can delay a device removal + * (in this case this can happen if device is added and then + * removed due to a configuration error) + * to a RCU callback, but user might expect that this interface + * will finish its job completely once qmp command returns result + * to the user + */ + drain_call_rcu(); - qmp_device_add((QDict *)qdict, NULL, &err); + qemu_opts_del(opts); + } + object_unref(dev); +out: hmp_handle_error(mon, err); } @@ -1034,7 +1073,7 @@ static GSList *qdev_build_hotpluggable_device_list(Object *peripheral) static void peripheral_device_del_completion(ReadLineState *rs, const char *str) { - Object *peripheral = container_get(qdev_get_machine(), "/peripheral"); + Object *peripheral = machine_get_container("peripheral"); GSList *list, *item; list = qdev_build_hotpluggable_device_list(peripheral); @@ -1070,7 +1109,7 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp) GLOBAL_STATE_CODE(); - dev = find_device_state(id, errp); + dev = find_device_state(id, false, errp); if (dev == NULL) { return NULL; } diff --git a/system/qemu-seccomp.c b/system/qemu-seccomp.c index 98ffce0..f8e1238 100644 --- a/system/qemu-seccomp.c +++ b/system/qemu-seccomp.c @@ -20,7 +20,7 @@ #include "qemu/module.h" #include <sys/prctl.h> #include <seccomp.h> -#include "sysemu/seccomp.h" +#include "system/seccomp.h" #include <linux/seccomp.h> /* For some architectures (notably ARM) cacheflush is not supported until @@ -47,10 +47,10 @@ const struct scmp_arg_cmp sched_setscheduler_arg[] = { }; /* - * See 'NOTES' in 'man 2 clone' - s390 & cross have 'flags' in + * See 'NOTES' in 'man 2 clone' - s390 has 'flags' in * different position to other architectures */ -#if defined(HOST_S390X) || defined(HOST_S390) || defined(HOST_CRIS) +#if defined(HOST_S390X) || defined(HOST_S390) #define CLONE_FLAGS_ARG 1 #else #define CLONE_FLAGS_ARG 0 diff --git a/system/qtest.c b/system/qtest.c index 12703a2..301b03b 100644 --- a/system/qtest.c +++ b/system/qtest.c @@ -13,17 +13,17 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "sysemu/qtest.h" -#include "sysemu/runstate.h" +#include "system/qtest.h" +#include "system/runstate.h" #include "chardev/char-fe.h" -#include "exec/ioport.h" -#include "exec/memory.h" +#include "system/ioport.h" +#include "system/memory.h" #include "exec/tswap.h" #include "hw/qdev-core.h" #include "hw/irq.h" #include "hw/core/cpu.h" #include "qemu/accel.h" -#include "sysemu/cpu-timers.h" +#include "system/cpu-timers.h" #include "qemu/config-file.h" #include "qemu/option.h" #include "qemu/error-report.h" @@ -78,6 +78,11 @@ static void *qtest_server_send_opaque; * let you adjust the value of the clock (monotonically). All the commands * return the current value of the clock in nanoseconds. * + * If the commands FAIL then time wasn't advanced which is likely + * because the machine was in a paused state or no timer events exist + * in the future. This will cause qtest to abort and the test will + * need to check its assumptions. + * * .. code-block:: none * * > clock_step @@ -260,7 +265,7 @@ static int hex2nib(char ch) } } -void qtest_send_prefix(CharBackend *chr) +static void qtest_log_timestamp(void) { if (!qtest_log_fp || !qtest_opened) { return; @@ -277,7 +282,7 @@ static void G_GNUC_PRINTF(1, 2) qtest_log_send(const char *fmt, ...) return; } - qtest_send_prefix(NULL); + qtest_log_timestamp(); va_start(ap, fmt); vfprintf(qtest_log_fp, fmt, ap); @@ -296,6 +301,7 @@ static void qtest_server_char_be_send(void *opaque, const char *str) static void qtest_send(CharBackend *chr, const char *str) { + qtest_log_timestamp(); qtest_server_send(qtest_server_send_opaque, str); } @@ -319,7 +325,6 @@ static void qtest_irq_handler(void *opaque, int n, int level) if (irq_levels[n] != level) { CharBackend *chr = &qtest->qtest_chr; irq_levels[n] = level; - qtest_send_prefix(chr); qtest_sendf(chr, "IRQ %s %d\n", level ? "raise" : "lower", n); } @@ -375,19 +380,16 @@ static void qtest_process_command(CharBackend *chr, gchar **words) is_outbound = words[0][14] == 'o'; dev = DEVICE(object_resolve_path(words[1], NULL)); if (!dev) { - qtest_send_prefix(chr); qtest_send(chr, "FAIL Unknown device\n"); return; } if (is_named && !is_outbound) { - qtest_send_prefix(chr); qtest_send(chr, "FAIL Interception of named in-GPIOs not yet supported\n"); return; } if (irq_intercept_dev) { - qtest_send_prefix(chr); if (irq_intercept_dev != dev) { qtest_send(chr, "FAIL IRQ intercept already enabled\n"); } else { @@ -414,7 +416,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) } } - qtest_send_prefix(chr); if (interception_succeeded) { irq_intercept_dev = dev; qtest_send(chr, "OK\n"); @@ -433,7 +434,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) dev = DEVICE(object_resolve_path(words[1], NULL)); if (!dev) { - qtest_send_prefix(chr); qtest_send(chr, "FAIL Unknown device\n"); return; } @@ -452,7 +452,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) irq = qdev_get_gpio_in_named(dev, name, num); qemu_set_irq(irq, level); - qtest_send_prefix(chr); qtest_send(chr, "OK\n"); } else if (strcmp(words[0], "outb") == 0 || strcmp(words[0], "outw") == 0 || @@ -475,7 +474,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) } else if (words[0][3] == 'l') { cpu_outl(addr, value); } - qtest_send_prefix(chr); qtest_send(chr, "OK\n"); } else if (strcmp(words[0], "inb") == 0 || strcmp(words[0], "inw") == 0 || @@ -496,7 +494,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) } else if (words[0][2] == 'l') { value = cpu_inl(addr); } - qtest_send_prefix(chr); qtest_sendf(chr, "OK 0x%04x\n", value); } else if (strcmp(words[0], "writeb") == 0 || strcmp(words[0], "writew") == 0 || @@ -532,7 +529,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, &data, 8); } - qtest_send_prefix(chr); qtest_send(chr, "OK\n"); } else if (strcmp(words[0], "readb") == 0 || strcmp(words[0], "readw") == 0 || @@ -566,7 +562,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) &value, 8); tswap64s(&value); } - qtest_send_prefix(chr); qtest_sendf(chr, "OK 0x%016" PRIx64 "\n", value); } else if (strcmp(words[0], "read") == 0) { g_autoptr(GString) enc = NULL; @@ -588,7 +583,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) enc = qemu_hexdump_line(NULL, data, len, 0, 0); - qtest_send_prefix(chr); qtest_sendf(chr, "OK 0x%s\n", enc->str); g_free(data); @@ -608,7 +602,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, len); b64_data = g_base64_encode(data, len); - qtest_send_prefix(chr); qtest_sendf(chr, "OK %s\n", b64_data); g_free(data); @@ -644,7 +637,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) len); g_free(data); - qtest_send_prefix(chr); qtest_send(chr, "OK\n"); } else if (strcmp(words[0], "memset") == 0) { uint64_t addr, len; @@ -668,7 +660,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words) g_free(data); } - qtest_send_prefix(chr); qtest_send(chr, "OK\n"); } else if (strcmp(words[0], "b64write") == 0) { uint64_t addr, len; @@ -700,17 +691,16 @@ static void qtest_process_command(CharBackend *chr, gchar **words) address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data, len); - qtest_send_prefix(chr); qtest_send(chr, "OK\n"); } else if (strcmp(words[0], "endianness") == 0) { - qtest_send_prefix(chr); - if (target_words_bigendian()) { + if (target_big_endian()) { qtest_sendf(chr, "OK big\n"); } else { qtest_sendf(chr, "OK little\n"); } } else if (qtest_enabled() && strcmp(words[0], "clock_step") == 0) { - int64_t ns; + int64_t old_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + int64_t ns, new_ns; if (words[1]) { int ret = qemu_strtoi64(words[1], NULL, 0, &ns); @@ -718,18 +708,24 @@ static void qtest_process_command(CharBackend *chr, gchar **words) } else { ns = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, QEMU_TIMER_ATTR_ALL); + if (ns < 0) { + qtest_send(chr, "FAIL " + "cannot advance clock to the next deadline " + "because there is no pending deadline\n"); + return; + } + } + new_ns = qemu_clock_advance_virtual_time(old_ns + ns); + if (new_ns > old_ns) { + qtest_sendf(chr, "OK %"PRIi64"\n", new_ns); + } else { + qtest_sendf(chr, "FAIL could not advance time\n"); } - qemu_clock_advance_virtual_time( - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns); - qtest_send_prefix(chr); - qtest_sendf(chr, "OK %"PRIi64"\n", - (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); } else if (strcmp(words[0], "module_load") == 0) { Error *local_err = NULL; int rv; g_assert(words[1] && words[2]); - qtest_send_prefix(chr); rv = module_load(words[1], words[2], &local_err); if (rv > 0) { qtest_sendf(chr, "OK\n"); @@ -740,43 +736,37 @@ static void qtest_process_command(CharBackend *chr, gchar **words) qtest_sendf(chr, "FAIL\n"); } } else if (qtest_enabled() && strcmp(words[0], "clock_set") == 0) { - int64_t ns; + int64_t ns, new_ns; int ret; g_assert(words[1]); ret = qemu_strtoi64(words[1], NULL, 0, &ns); g_assert(ret == 0); - qemu_clock_advance_virtual_time(ns); - qtest_send_prefix(chr); - qtest_sendf(chr, "OK %"PRIi64"\n", - (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); + new_ns = qemu_clock_advance_virtual_time(ns); + qtest_sendf(chr, "%s %"PRIi64"\n", + new_ns == ns ? "OK" : "FAIL", new_ns); } else if (process_command_cb && process_command_cb(chr, words)) { /* Command got consumed by the callback handler */ } else { - qtest_send_prefix(chr); qtest_sendf(chr, "FAIL Unknown command '%s'\n", words[0]); } } +/* + * Process as much of @inbuf as we can in newline terminated chunks. + * Remove the processed commands from @inbuf as we go. + */ static void qtest_process_inbuf(CharBackend *chr, GString *inbuf) { char *end; while ((end = strchr(inbuf->str, '\n')) != NULL) { - size_t offset; - GString *cmd; - gchar **words; - - offset = end - inbuf->str; + size_t len = end - inbuf->str; + g_autofree char *cmd = g_strndup(inbuf->str, len); + g_auto(GStrv) words = g_strsplit(cmd, " ", 0); - cmd = g_string_new_len(inbuf->str, offset); - g_string_erase(inbuf, 0, offset + 1); - - words = g_strsplit(cmd->str, " ", 0); + g_string_erase(inbuf, 0, len + 1); qtest_process_command(chr, words); - g_strfreev(words); - - g_string_free(cmd, TRUE); } } @@ -1004,7 +994,7 @@ static char *qtest_get_chardev(Object *obj, Error **errp) return g_strdup(q->chr_name); } -static void qtest_class_init(ObjectClass *oc, void *data) +static void qtest_class_init(ObjectClass *oc, const void *data) { UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); @@ -1022,7 +1012,7 @@ static const TypeInfo qtest_info = { .parent = TYPE_OBJECT, .class_init = qtest_class_init, .instance_size = sizeof(QTest), - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_USER_CREATABLE }, { } } diff --git a/system/ram-block-attributes.c b/system/ram-block-attributes.c new file mode 100644 index 0000000..68e8a02 --- /dev/null +++ b/system/ram-block-attributes.c @@ -0,0 +1,444 @@ +/* + * QEMU ram block attributes + * + * Copyright Intel + * + * Author: + * Chenyi Qiang <chenyi.qiang@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "system/ramblock.h" +#include "trace.h" + +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RamBlockAttributes, + ram_block_attributes, + RAM_BLOCK_ATTRIBUTES, + OBJECT, + { TYPE_RAM_DISCARD_MANAGER }, + { }) + +static size_t +ram_block_attributes_get_block_size(const RamBlockAttributes *attr) +{ + /* + * Because page conversion could be manipulated in the size of at least 4K + * or 4K aligned, Use the host page size as the granularity to track the + * memory attribute. + */ + g_assert(attr && attr->ram_block); + g_assert(attr->ram_block->page_size == qemu_real_host_page_size()); + return attr->ram_block->page_size; +} + + +static bool +ram_block_attributes_rdm_is_populated(const RamDiscardManager *rdm, + const MemoryRegionSection *section) +{ + const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + const size_t block_size = ram_block_attributes_get_block_size(attr); + const uint64_t first_bit = section->offset_within_region / block_size; + const uint64_t last_bit = + first_bit + int128_get64(section->size) / block_size - 1; + unsigned long first_discarded_bit; + + first_discarded_bit = find_next_zero_bit(attr->bitmap, last_bit + 1, + first_bit); + return first_discarded_bit > last_bit; +} + +typedef int (*ram_block_attributes_section_cb)(MemoryRegionSection *s, + void *arg); + +static int +ram_block_attributes_notify_populate_cb(MemoryRegionSection *section, + void *arg) +{ + RamDiscardListener *rdl = arg; + + return rdl->notify_populate(rdl, section); +} + +static int +ram_block_attributes_notify_discard_cb(MemoryRegionSection *section, + void *arg) +{ + RamDiscardListener *rdl = arg; + + rdl->notify_discard(rdl, section); + return 0; +} + +static int +ram_block_attributes_for_each_populated_section(const RamBlockAttributes *attr, + MemoryRegionSection *section, + void *arg, + ram_block_attributes_section_cb cb) +{ + unsigned long first_bit, last_bit; + uint64_t offset, size; + const size_t block_size = ram_block_attributes_get_block_size(attr); + int ret = 0; + + first_bit = section->offset_within_region / block_size; + first_bit = find_next_bit(attr->bitmap, attr->bitmap_size, + first_bit); + + while (first_bit < attr->bitmap_size) { + MemoryRegionSection tmp = *section; + + offset = first_bit * block_size; + last_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * block_size; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + break; + } + + ret = cb(&tmp, arg); + if (ret) { + error_report("%s: Failed to notify RAM discard listener: %s", + __func__, strerror(-ret)); + break; + } + + first_bit = find_next_bit(attr->bitmap, attr->bitmap_size, + last_bit + 2); + } + + return ret; +} + +static int +ram_block_attributes_for_each_discarded_section(const RamBlockAttributes *attr, + MemoryRegionSection *section, + void *arg, + ram_block_attributes_section_cb cb) +{ + unsigned long first_bit, last_bit; + uint64_t offset, size; + const size_t block_size = ram_block_attributes_get_block_size(attr); + int ret = 0; + + first_bit = section->offset_within_region / block_size; + first_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size, + first_bit); + + while (first_bit < attr->bitmap_size) { + MemoryRegionSection tmp = *section; + + offset = first_bit * block_size; + last_bit = find_next_bit(attr->bitmap, attr->bitmap_size, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * block_size; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + break; + } + + ret = cb(&tmp, arg); + if (ret) { + error_report("%s: Failed to notify RAM discard listener: %s", + __func__, strerror(-ret)); + break; + } + + first_bit = find_next_zero_bit(attr->bitmap, + attr->bitmap_size, + last_bit + 2); + } + + return ret; +} + +static uint64_t +ram_block_attributes_rdm_get_min_granularity(const RamDiscardManager *rdm, + const MemoryRegion *mr) +{ + const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + + g_assert(mr == attr->ram_block->mr); + return ram_block_attributes_get_block_size(attr); +} + +static void +ram_block_attributes_rdm_register_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + int ret; + + g_assert(section->mr == attr->ram_block->mr); + rdl->section = memory_region_section_new_copy(section); + + QLIST_INSERT_HEAD(&attr->rdl_list, rdl, next); + + ret = ram_block_attributes_for_each_populated_section(attr, section, rdl, + ram_block_attributes_notify_populate_cb); + if (ret) { + error_report("%s: Failed to register RAM discard listener: %s", + __func__, strerror(-ret)); + exit(1); + } +} + +static void +ram_block_attributes_rdm_unregister_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + int ret; + + g_assert(rdl->section); + g_assert(rdl->section->mr == attr->ram_block->mr); + + if (rdl->double_discard_supported) { + rdl->notify_discard(rdl, rdl->section); + } else { + ret = ram_block_attributes_for_each_populated_section(attr, + rdl->section, rdl, ram_block_attributes_notify_discard_cb); + if (ret) { + error_report("%s: Failed to unregister RAM discard listener: %s", + __func__, strerror(-ret)); + exit(1); + } + } + + memory_region_section_free_copy(rdl->section); + rdl->section = NULL; + QLIST_REMOVE(rdl, next); +} + +typedef struct RamBlockAttributesReplayData { + ReplayRamDiscardState fn; + void *opaque; +} RamBlockAttributesReplayData; + +static int ram_block_attributes_rdm_replay_cb(MemoryRegionSection *section, + void *arg) +{ + RamBlockAttributesReplayData *data = arg; + + return data->fn(section, data->opaque); +} + +static int +ram_block_attributes_rdm_replay_populated(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque }; + + g_assert(section->mr == attr->ram_block->mr); + return ram_block_attributes_for_each_populated_section(attr, section, &data, + ram_block_attributes_rdm_replay_cb); +} + +static int +ram_block_attributes_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque }; + + g_assert(section->mr == attr->ram_block->mr); + return ram_block_attributes_for_each_discarded_section(attr, section, &data, + ram_block_attributes_rdm_replay_cb); +} + +static bool +ram_block_attributes_is_valid_range(RamBlockAttributes *attr, uint64_t offset, + uint64_t size) +{ + MemoryRegion *mr = attr->ram_block->mr; + + g_assert(mr); + + uint64_t region_size = memory_region_size(mr); + const size_t block_size = ram_block_attributes_get_block_size(attr); + + if (!QEMU_IS_ALIGNED(offset, block_size) || + !QEMU_IS_ALIGNED(size, block_size)) { + return false; + } + if (offset + size <= offset) { + return false; + } + if (offset + size > region_size) { + return false; + } + return true; +} + +static void ram_block_attributes_notify_discard(RamBlockAttributes *attr, + uint64_t offset, + uint64_t size) +{ + RamDiscardListener *rdl; + + QLIST_FOREACH(rdl, &attr->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } + rdl->notify_discard(rdl, &tmp); + } +} + +static int +ram_block_attributes_notify_populate(RamBlockAttributes *attr, + uint64_t offset, uint64_t size) +{ + RamDiscardListener *rdl; + int ret = 0; + + QLIST_FOREACH(rdl, &attr->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } + ret = rdl->notify_populate(rdl, &tmp); + if (ret) { + break; + } + } + + return ret; +} + +int ram_block_attributes_state_change(RamBlockAttributes *attr, + uint64_t offset, uint64_t size, + bool to_discard) +{ + const size_t block_size = ram_block_attributes_get_block_size(attr); + const unsigned long first_bit = offset / block_size; + const unsigned long nbits = size / block_size; + const unsigned long last_bit = first_bit + nbits - 1; + const bool is_discarded = find_next_bit(attr->bitmap, attr->bitmap_size, + first_bit) > last_bit; + const bool is_populated = find_next_zero_bit(attr->bitmap, + attr->bitmap_size, first_bit) > last_bit; + unsigned long bit; + int ret = 0; + + if (!ram_block_attributes_is_valid_range(attr, offset, size)) { + error_report("%s, invalid range: offset 0x%" PRIx64 ", size " + "0x%" PRIx64, __func__, offset, size); + return -EINVAL; + } + + trace_ram_block_attributes_state_change(offset, size, + is_discarded ? "discarded" : + is_populated ? "populated" : + "mixture", + to_discard ? "discarded" : + "populated"); + if (to_discard) { + if (is_discarded) { + /* Already private */ + } else if (is_populated) { + /* Completely shared */ + bitmap_clear(attr->bitmap, first_bit, nbits); + ram_block_attributes_notify_discard(attr, offset, size); + } else { + /* Unexpected mixture: process individual blocks */ + for (bit = first_bit; bit < first_bit + nbits; bit++) { + if (!test_bit(bit, attr->bitmap)) { + continue; + } + clear_bit(bit, attr->bitmap); + ram_block_attributes_notify_discard(attr, bit * block_size, + block_size); + } + } + } else { + if (is_populated) { + /* Already shared */ + } else if (is_discarded) { + /* Completely private */ + bitmap_set(attr->bitmap, first_bit, nbits); + ret = ram_block_attributes_notify_populate(attr, offset, size); + } else { + /* Unexpected mixture: process individual blocks */ + for (bit = first_bit; bit < first_bit + nbits; bit++) { + if (test_bit(bit, attr->bitmap)) { + continue; + } + set_bit(bit, attr->bitmap); + ret = ram_block_attributes_notify_populate(attr, + bit * block_size, + block_size); + if (ret) { + break; + } + } + } + } + + return ret; +} + +RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block) +{ + const int block_size = qemu_real_host_page_size(); + RamBlockAttributes *attr; + MemoryRegion *mr = ram_block->mr; + + attr = RAM_BLOCK_ATTRIBUTES(object_new(TYPE_RAM_BLOCK_ATTRIBUTES)); + + attr->ram_block = ram_block; + if (memory_region_set_ram_discard_manager(mr, RAM_DISCARD_MANAGER(attr))) { + object_unref(OBJECT(attr)); + return NULL; + } + attr->bitmap_size = + ROUND_UP(int128_get64(mr->size), block_size) / block_size; + attr->bitmap = bitmap_new(attr->bitmap_size); + + return attr; +} + +void ram_block_attributes_destroy(RamBlockAttributes *attr) +{ + g_assert(attr); + + g_free(attr->bitmap); + memory_region_set_ram_discard_manager(attr->ram_block->mr, NULL); + object_unref(OBJECT(attr)); +} + +static void ram_block_attributes_init(Object *obj) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(obj); + + QLIST_INIT(&attr->rdl_list); +} + +static void ram_block_attributes_finalize(Object *obj) +{ +} + +static void ram_block_attributes_class_init(ObjectClass *klass, + const void *data) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass); + + rdmc->get_min_granularity = ram_block_attributes_rdm_get_min_granularity; + rdmc->register_listener = ram_block_attributes_rdm_register_listener; + rdmc->unregister_listener = ram_block_attributes_rdm_unregister_listener; + rdmc->is_populated = ram_block_attributes_rdm_is_populated; + rdmc->replay_populated = ram_block_attributes_rdm_replay_populated; + rdmc->replay_discarded = ram_block_attributes_rdm_replay_discarded; +} diff --git a/system/rtc.c b/system/rtc.c index dc44576..5695128 100644 --- a/system/rtc.c +++ b/system/rtc.c @@ -29,9 +29,9 @@ #include "qemu/option.h" #include "qemu/timer.h" #include "qom/object.h" -#include "sysemu/replay.h" -#include "sysemu/sysemu.h" -#include "sysemu/rtc.h" +#include "system/replay.h" +#include "system/system.h" +#include "system/rtc.h" #include "hw/rtc/mc146818rtc.h" static enum { @@ -62,7 +62,7 @@ static time_t qemu_ref_timedate(QEMUClockType clock) } break; default: - assert(0); + g_assert_not_reached(); } return value; } diff --git a/system/runstate-action.c b/system/runstate-action.c index ae0761a..f912bc8 100644 --- a/system/runstate-action.c +++ b/system/runstate-action.c @@ -7,8 +7,8 @@ */ #include "qemu/osdep.h" -#include "sysemu/runstate-action.h" -#include "sysemu/watchdog.h" +#include "system/runstate-action.h" +#include "system/watchdog.h" #include "qemu/config-file.h" #include "qapi/error.h" #include "qemu/option_int.h" diff --git a/system/runstate-hmp-cmds.c b/system/runstate-hmp-cmds.c index 2df670f..be1d676 100644 --- a/system/runstate-hmp-cmds.c +++ b/system/runstate-hmp-cmds.c @@ -19,7 +19,7 @@ #include "monitor/monitor.h" #include "qapi/error.h" #include "qapi/qapi-commands-run-state.h" -#include "qapi/qmp/qdict.h" +#include "qobject/qdict.h" #include "qemu/accel.h" void hmp_info_status(Monitor *mon, const QDict *qdict) diff --git a/system/runstate.c b/system/runstate.c index ec32e27..38900c9 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -32,6 +32,7 @@ #include "exec/cpu-common.h" #include "gdbstub/syscalls.h" #include "hw/boards.h" +#include "hw/resettable.h" #include "migration/misc.h" #include "migration/postcopy-ram.h" #include "monitor/monitor.h" @@ -50,14 +51,14 @@ #include "qemu/thread.h" #include "qom/object.h" #include "qom/object_interfaces.h" -#include "sysemu/cpus.h" -#include "sysemu/qtest.h" -#include "sysemu/replay.h" -#include "sysemu/reset.h" -#include "sysemu/runstate.h" -#include "sysemu/runstate-action.h" -#include "sysemu/sysemu.h" -#include "sysemu/tpm.h" +#include "system/cpus.h" +#include "system/qtest.h" +#include "system/replay.h" +#include "system/reset.h" +#include "system/runstate.h" +#include "system/runstate-action.h" +#include "system/system.h" +#include "system/tpm.h" #include "trace.h" static NotifierList exit_notifiers = @@ -181,6 +182,12 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE__MAX, RUN_STATE__MAX }, }; +static const RunStateTransition replay_play_runstate_transitions_def[] = { + { RUN_STATE_SHUTDOWN, RUN_STATE_RUNNING}, + + { RUN_STATE__MAX, RUN_STATE__MAX }, +}; + static bool runstate_valid_transitions[RUN_STATE__MAX][RUN_STATE__MAX]; bool runstate_check(RunState state) @@ -188,14 +195,33 @@ bool runstate_check(RunState state) return current_run_state == state; } -static void runstate_init(void) +static void transitions_set_valid(const RunStateTransition *rst) { const RunStateTransition *p; - memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions)); - for (p = &runstate_transitions_def[0]; p->from != RUN_STATE__MAX; p++) { + for (p = rst; p->from != RUN_STATE__MAX; p++) { runstate_valid_transitions[p->from][p->to] = true; } +} + +void runstate_replay_enable(void) +{ + assert(replay_mode != REPLAY_MODE_NONE); + + if (replay_mode == REPLAY_MODE_PLAY) { + /* + * When reverse-debugging, it is possible to move state from + * shutdown to running. + */ + transitions_set_valid(&replay_play_runstate_transitions_def[0]); + } +} + +static void runstate_init(void) +{ + memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions)); + + transitions_set_valid(&runstate_transitions_def[0]); qemu_mutex_init(&vmstop_lock); } @@ -271,6 +297,7 @@ void qemu_system_vmstop_request(RunState state) struct VMChangeStateEntry { VMChangeStateHandler *cb; VMChangeStateHandler *prepare_cb; + VMChangeStateHandlerWithRet *cb_ret; void *opaque; QTAILQ_ENTRY(VMChangeStateEntry) entries; int priority; @@ -294,14 +321,15 @@ static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head = VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( VMChangeStateHandler *cb, void *opaque, int priority) { - return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque, - priority); + return qemu_add_vm_change_state_handler_prio_full(cb, NULL, NULL, + opaque, priority); } /** * qemu_add_vm_change_state_handler_prio_full: * @cb: the main callback to invoke * @prepare_cb: a callback to invoke before the main callback + * @cb_ret: the main callback to invoke with return value * @opaque: user data passed to the callbacks * @priority: low priorities execute first when the vm runs and the reverse is * true when the vm stops @@ -318,6 +346,7 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( VMChangeStateEntry * qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque, int priority) { VMChangeStateEntry *e; @@ -326,6 +355,7 @@ qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, e = g_malloc0(sizeof(*e)); e->cb = cb; e->prepare_cb = prepare_cb; + e->cb_ret = cb_ret; e->opaque = opaque; e->priority = priority; @@ -353,9 +383,10 @@ void qemu_del_vm_change_state_handler(VMChangeStateEntry *e) g_free(e); } -void vm_state_notify(bool running, RunState state) +int vm_state_notify(bool running, RunState state) { VMChangeStateEntry *e, *next; + int ret = 0; trace_vm_state_notify(running, state, RunState_str(state)); @@ -367,7 +398,17 @@ void vm_state_notify(bool running, RunState state) } QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) { - e->cb(e->opaque, running, state); + if (e->cb) { + e->cb(e->opaque, running, state); + } else if (e->cb_ret) { + /* + * Here ignore the return value of cb_ret because + * we only care about the stopping the device during + * the VM live migration to indicate whether the + * connection between qemu and backend is normal. + */ + e->cb_ret(e->opaque, running, state); + } } } else { QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { @@ -377,9 +418,19 @@ void vm_state_notify(bool running, RunState state) } QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) { - e->cb(e->opaque, running, state); + if (e->cb) { + e->cb(e->opaque, running, state); + } else if (e->cb_ret) { + /* + * We should execute all registered callbacks even if + * one of them returns failure, otherwise, some cleanup + * work of the device will be skipped. + */ + ret |= e->cb_ret(e->opaque, running, state); + } } } + return ret; } static ShutdownCause reset_requested; @@ -482,15 +533,23 @@ static int qemu_debug_requested(void) void qemu_system_reset(ShutdownCause reason) { MachineClass *mc; + ResetType type; mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL; cpu_synchronize_all_states(); + switch (reason) { + case SHUTDOWN_CAUSE_SNAPSHOT_LOAD: + type = RESET_TYPE_SNAPSHOT_LOAD; + break; + default: + type = RESET_TYPE_COLD; + } if (mc && mc->reset) { - mc->reset(current_machine, reason); + mc->reset(current_machine, type); } else { - qemu_devices_reset(reason); + qemu_devices_reset(type); } switch (reason) { case SHUTDOWN_CAUSE_NONE: @@ -531,6 +590,58 @@ static void qemu_system_wakeup(void) } } +static char *tdx_parse_panic_message(char *message) +{ + bool printable = false; + char *buf = NULL; + int len = 0, i; + + /* + * Although message is defined as a json string, we shouldn't + * unconditionally treat it as is because the guest generated it and + * it's not necessarily trustable. + */ + if (message) { + /* The caller guarantees the NULL-terminated string. */ + len = strlen(message); + + printable = len > 0; + for (i = 0; i < len; i++) { + if (!(0x20 <= message[i] && message[i] <= 0x7e)) { + printable = false; + break; + } + } + } + + if (len == 0) { + buf = g_malloc(1); + buf[0] = '\0'; + } else { + if (!printable) { + /* 3 = length of "%02x " */ + buf = g_malloc(len * 3); + for (i = 0; i < len; i++) { + if (message[i] == '\0') { + break; + } else { + sprintf(buf + 3 * i, "%02x ", message[i]); + } + } + if (i > 0) { + /* replace the last ' '(space) to NULL */ + buf[i * 3 - 1] = '\0'; + } else { + buf[0] = '\0'; + } + } else { + buf = g_strdup(message); + } + } + + return buf; +} + void qemu_system_guest_panicked(GuestPanicInformation *info) { qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed"); @@ -572,7 +683,20 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) S390CrashReason_str(info->u.s390.reason), info->u.s390.psw_mask, info->u.s390.psw_addr); + } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) { + char *message = tdx_parse_panic_message(info->u.tdx.message); + qemu_log_mask(LOG_GUEST_ERROR, + "\nTDX guest reports fatal error." + " error code: 0x%" PRIx32 " error message:\"%s\"\n", + info->u.tdx.error_code, message); + g_free(message); + if (info->u.tdx.gpa != -1ull) { + qemu_log_mask(LOG_GUEST_ERROR, "Additional error information " + "can be found at gpa page: 0x%" PRIx64 "\n", + info->u.tdx.gpa); + } } + qapi_free_GuestPanicInformation(info); } } @@ -584,6 +708,12 @@ void qemu_system_guest_crashloaded(GuestPanicInformation *info) qapi_free_GuestPanicInformation(info); } +void qemu_system_guest_pvshutdown(void) +{ + qapi_event_send_guest_pvshutdown(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); +} + void qemu_system_reset_request(ShutdownCause reason) { if (reboot_action == REBOOT_ACTION_SHUTDOWN && @@ -810,6 +940,7 @@ void qemu_remove_exit_notifier(Notifier *notify) static void qemu_run_exit_notifiers(void) { + BQL_LOCK_GUARD(); notifier_list_notify(&exit_notifiers, NULL); } diff --git a/system/tpm.c b/system/tpm.c index 7164ea7..8df0f6e 100644 --- a/system/tpm.c +++ b/system/tpm.c @@ -17,8 +17,8 @@ #include "qapi/error.h" #include "qapi/qapi-commands-tpm.h" #include "qapi/qmp/qerror.h" -#include "sysemu/tpm_backend.h" -#include "sysemu/tpm.h" +#include "system/tpm_backend.h" +#include "system/tpm.h" #include "qemu/config-file.h" #include "qemu/error-report.h" diff --git a/system/trace-events b/system/trace-events index 69c9044..82856e4 100644 --- a/system/trace-events +++ b/system/trace-events @@ -4,6 +4,13 @@ # Since requests are raised via monitor, not many tracepoints are needed. balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu" +# dma-helpers.c +dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d" +dma_aio_cancel(void *dbs) "dbs=%p" +dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p" +dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d" +dma_map_wait(void *dbs) "dbs=%p" + # ioport.c cpu_in(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u" cpu_out(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u" @@ -21,6 +28,13 @@ flatview_destroy(void *view, void *root) "%p (root %p)" flatview_destroy_rcu(void *view, void *root) "%p (root %p)" global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32 +# physmem.c +address_space_map(void *as, uint64_t addr, uint64_t len, bool is_write, uint32_t attrs) "as:%p addr 0x%"PRIx64":%"PRIx64" write:%d attrs:0x%x" +find_ram_offset(uint64_t size, uint64_t offset) "size: 0x%" PRIx64 " @ 0x%" PRIx64 +find_ram_offset_loop(uint64_t size, uint64_t candidate, uint64_t offset, uint64_t next, uint64_t mingap) "trying size: 0x%" PRIx64 " @ 0x%" PRIx64 ", offset: 0x%" PRIx64" next: 0x%" PRIx64 " mingap: 0x%" PRIx64 +ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_madvise, bool need_fallocate, int ret) "%s@%p + 0x%zx: madvise: %d fallocate: %d ret: %d" +qemu_ram_alloc_shared(const char *name, size_t size, size_t max_size, int fd, void *host) "%s size %zu max_size %zu fd %d host %p" + # cpus.c vm_stop_flush_all(int ret) "ret %d" @@ -38,3 +52,6 @@ dirtylimit_state_finalize(void) dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us" dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64 dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us" + +# ram-block-attributes.c +ram_block_attributes_state_change(uint64_t offset, uint64_t size, const char *from, const char *to) "offset 0x%"PRIx64" size 0x%"PRIx64" from '%s' to '%s'" diff --git a/system/vl.c b/system/vl.c index cfcb674..3b7057e 100644 --- a/system/vl.c +++ b/system/vl.c @@ -26,25 +26,28 @@ #include "qemu/help-texts.h" #include "qemu/datadir.h" #include "qemu/units.h" +#include "qemu/module.h" +#include "qemu/target-info.h" #include "exec/cpu-common.h" #include "exec/page-vary.h" #include "hw/qdev-properties.h" #include "qapi/compat-policy.h" #include "qapi/error.h" -#include "qapi/qmp/qdict.h" -#include "qapi/qmp/qstring.h" -#include "qapi/qmp/qjson.h" +#include "qobject/qdict.h" +#include "qobject/qstring.h" +#include "qobject/qjson.h" #include "qemu-version.h" #include "qemu/cutils.h" #include "qemu/help_option.h" #include "qemu/hw-version.h" #include "qemu/uuid.h" -#include "sysemu/reset.h" -#include "sysemu/runstate.h" -#include "sysemu/runstate-action.h" -#include "sysemu/seccomp.h" -#include "sysemu/tcg.h" -#include "sysemu/xen.h" +#include "qemu/target-info.h" +#include "system/reset.h" +#include "system/runstate.h" +#include "system/runstate-action.h" +#include "system/seccomp.h" +#include "system/tcg.h" +#include "system/xen.h" #include "qemu/error-report.h" #include "qemu/sockets.h" @@ -53,6 +56,7 @@ #include "hw/usb.h" #include "hw/isa/isa.h" #include "hw/scsi/scsi.h" +#include "hw/sd/sd.h" #include "hw/display/vga.h" #include "hw/firmware/smbios.h" #include "hw/acpi/acpi.h" @@ -64,30 +68,32 @@ #include "monitor/monitor.h" #include "ui/console.h" #include "ui/input.h" -#include "sysemu/sysemu.h" -#include "sysemu/numa.h" -#include "sysemu/hostmem.h" +#include "system/system.h" +#include "system/numa.h" +#include "system/hostmem.h" #include "exec/gdbstub.h" #include "gdbstub/enums.h" #include "qemu/timer.h" #include "chardev/char.h" #include "qemu/bitmap.h" #include "qemu/log.h" -#include "sysemu/blockdev.h" +#include "system/blockdev.h" #include "hw/block/block.h" #include "hw/i386/x86.h" #include "hw/i386/pc.h" +#include "migration/cpr.h" #include "migration/misc.h" #include "migration/snapshot.h" -#include "sysemu/tpm.h" -#include "sysemu/dma.h" +#include "system/tpm.h" +#include "system/dma.h" #include "hw/audio/soundhw.h" #include "audio/audio.h" -#include "sysemu/cpus.h" -#include "sysemu/cpu-timers.h" +#include "system/cpus.h" +#include "system/cpu-timers.h" +#include "exec/icount.h" #include "migration/colo.h" #include "migration/postcopy-ram.h" -#include "sysemu/kvm.h" +#include "system/kvm.h" #include "qapi/qobject-input-visitor.h" #include "qemu/option.h" #include "qemu/config-file.h" @@ -95,7 +101,7 @@ #ifdef CONFIG_VIRTFS #include "fsdev/qemu-fsdev.h" #endif -#include "sysemu/qtest.h" +#include "system/qtest.h" #ifdef CONFIG_TCG #include "tcg/perf.h" #endif @@ -106,8 +112,8 @@ #include "trace/control.h" #include "qemu/plugin.h" #include "qemu/queue.h" -#include "sysemu/arch_init.h" -#include "exec/confidential-guest-support.h" +#include "system/arch_init.h" +#include "system/confidential-guest-support.h" #include "ui/qemu-spice.h" #include "qapi/string-input-visitor.h" @@ -116,13 +122,14 @@ #include "qom/object_interfaces.h" #include "semihosting/semihost.h" #include "crypto/init.h" -#include "sysemu/replay.h" +#include "system/replay.h" #include "qapi/qapi-events-run-state.h" #include "qapi/qapi-types-audio.h" #include "qapi/qapi-visit-audio.h" #include "qapi/qapi-visit-block-core.h" #include "qapi/qapi-visit-compat.h" #include "qapi/qapi-visit-machine.h" +#include "qapi/qapi-visit-migration.h" #include "qapi/qapi-visit-ui.h" #include "qapi/qapi-commands-block-core.h" #include "qapi/qapi-commands-migration.h" @@ -131,7 +138,7 @@ #include "qapi/qapi-commands-ui.h" #include "block/qdict.h" #include "qapi/qmp/qerror.h" -#include "sysemu/iothread.h" +#include "system/iothread.h" #include "qemu/guest-random.h" #include "qemu/keyval.h" @@ -159,6 +166,8 @@ typedef struct DeviceOption { static const char *cpu_option; static const char *mem_path; static const char *incoming; +static const char *incoming_str[MIGRATION_CHANNEL_TYPE__MAX]; +static MigrationChannel *incoming_channels[MIGRATION_CHANNEL_TYPE__MAX]; static const char *loadvm; static const char *accelerators; static bool have_custom_ram_size; @@ -190,7 +199,7 @@ static int default_parallel = 1; static int default_monitor = 1; static int default_floppy = 1; static int default_cdrom = 1; -static int default_sdcard = 1; +static bool auto_create_sdcard = true; static int default_vga = 1; static int default_net = 1; @@ -347,7 +356,7 @@ static QemuOptsList qemu_overcommit_opts = { .desc = { { .name = "mem-lock", - .type = QEMU_OPT_BOOL, + .type = QEMU_OPT_STRING, }, { .name = "cpu-pm", @@ -714,7 +723,7 @@ static void configure_blockdev(BlockdevOptionsQueue *bdo_queue, default_drive(default_cdrom, snapshot, machine_class->block_default_type, 2, CDROM_OPTS); default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); - default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); + default_drive(auto_create_sdcard, snapshot, IF_SD, 0, SD_OPTS); } @@ -759,7 +768,7 @@ static QemuOptsList qemu_smp_opts = { }, }; -#if defined(CONFIG_POSIX) +#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN) static QemuOptsList qemu_run_with_opts = { .name = "run-with", .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), @@ -792,8 +801,8 @@ static QemuOptsList qemu_run_with_opts = { static void realtime_init(void) { - if (enable_mlock) { - if (os_mlock() < 0) { + if (should_mlock(mlock_state)) { + if (os_mlock(is_mlock_on_fault(mlock_state)) < 0) { error_report("locking memory failed"); exit(1); } @@ -811,29 +820,15 @@ static void configure_msg(QemuOpts *opts) /***********************************************************/ /* USB devices */ -static int usb_device_add(const char *devname) +static bool usb_parse(const char *cmdline, Error **errp) { - USBDevice *dev = NULL; - - if (!machine_usb(current_machine)) { - return -1; - } - - dev = usbdevice_create(devname); - if (!dev) - return -1; - - return 0; -} + g_assert(machine_usb(current_machine)); -static int usb_parse(const char *cmdline) -{ - int r; - r = usb_device_add(cmdline); - if (r < 0) { - error_report("could not add USB device '%s'", cmdline); + if (!usbdevice_create(cmdline)) { + error_setg(errp, "could not add USB device '%s'", cmdline); + return false; } - return r; + return true; } /***********************************************************/ @@ -885,11 +880,11 @@ static void help(int exitcode) g_get_prgname()); #define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \ - if ((arch_mask) & arch_type) \ + if (qemu_arch_available(arch_mask)) \ fputs(opt_help, stdout); #define ARCHHEADING(text, arch_mask) \ - if ((arch_mask) & arch_type) \ + if (qemu_arch_available(arch_mask)) \ puts(stringify(text)); #define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL) @@ -1000,9 +995,16 @@ static bool vga_interface_available(VGAInterfaceType t) const VGAInterfaceInfo *ti = &vga_interfaces[t]; assert(t < VGA_TYPE_MAX); - return !ti->class_names[0] || - module_object_class_by_name(ti->class_names[0]) || - module_object_class_by_name(ti->class_names[1]); + + if (!ti->class_names[0] || module_object_class_by_name(ti->class_names[0])) { + return true; + } + + if (ti->class_names[1] && module_object_class_by_name(ti->class_names[1])) { + return true; + } + + return false; } static const char * @@ -1177,7 +1179,8 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp) size = strlen(str); /* NUL terminator NOT included in fw_cfg blob */ buf = g_memdup(str, size); } else if (nonempty_str(gen_id)) { - if (!fw_cfg_add_from_generator(fw_cfg, name, gen_id, errp)) { + if (!fw_cfg_add_file_from_generator(fw_cfg, object_get_objects_root(), + gen_id, name, errp)) { return -1; } return 0; @@ -1189,10 +1192,7 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp) return -1; } } - /* For legacy, keep user files in a specific global order. */ - fw_cfg_set_order_override(fw_cfg, FW_CFG_ORDER_OVERRIDE_USER); fw_cfg_add_file(fw_cfg, name, buf, size); - fw_cfg_reset_order_override(fw_cfg); return 0; } @@ -1299,22 +1299,27 @@ static void add_device_config(int type, const char *cmdline) QTAILQ_INSERT_TAIL(&device_configs, conf, next); } -static int foreach_device_config(int type, int (*func)(const char *cmdline)) +/** + * foreach_device_config_or_exit(): process per-device configs + * @type: device_config type + * @func: device specific config function, returning pass/fail + * + * @func is called with the &error_fatal handler so device specific + * error messages can be reported on failure. + */ +static void foreach_device_config_or_exit(int type, + bool (*func)(const char *cmdline, + Error **errp)) { struct device_config *conf; - int rc; QTAILQ_FOREACH(conf, &device_configs, next) { if (conf->type != type) continue; loc_push_restore(&conf->loc); - rc = func(conf->cmdline); + func(conf->cmdline, &error_fatal); loc_pop(&conf->loc); - if (rc) { - return rc; - } } - return 0; } static void qemu_disable_default_devices(void) @@ -1343,8 +1348,8 @@ static void qemu_disable_default_devices(void) if (!has_defaults || machine_class->no_cdrom) { default_cdrom = 0; } - if (!has_defaults || machine_class->no_sdcard) { - default_sdcard = 0; + if (!has_defaults || !machine_class->auto_create_sdcard) { + auto_create_sdcard = false; } if (!has_defaults) { default_audio = 0; @@ -1444,7 +1449,7 @@ static void qemu_create_default_devices(void) } } -static int serial_parse(const char *devname) +static bool serial_parse(const char *devname, Error **errp) { int index = num_serial_hds; @@ -1459,13 +1464,13 @@ static int serial_parse(const char *devname) serial_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL); if (!serial_hds[index]) { - error_report("could not connect serial device" - " to character backend '%s'", devname); - return -1; + error_setg(errp, "could not connect serial device" + " to character backend '%s'", devname); + return false; } } num_serial_hds++; - return 0; + return true; } Chardev *serial_hd(int i) @@ -1477,47 +1482,47 @@ Chardev *serial_hd(int i) return NULL; } -static int parallel_parse(const char *devname) +static bool parallel_parse(const char *devname, Error **errp) { static int index = 0; char label[32]; if (strcmp(devname, "none") == 0) - return 0; + return true; if (index == MAX_PARALLEL_PORTS) { - error_report("too many parallel ports"); - exit(1); + error_setg(errp, "too many parallel ports"); + return false; } snprintf(label, sizeof(label), "parallel%d", index); parallel_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL); if (!parallel_hds[index]) { - error_report("could not connect parallel device" - " to character backend '%s'", devname); - return -1; + error_setg(errp, "could not connect parallel device" + " to character backend '%s'", devname); + return false; } index++; - return 0; + return true; } -static int debugcon_parse(const char *devname) +static bool debugcon_parse(const char *devname, Error **errp) { QemuOpts *opts; if (!qemu_chr_new_mux_mon("debugcon", devname, NULL)) { - error_report("invalid character backend '%s'", devname); - exit(1); + error_setg(errp, "invalid character backend '%s'", devname); + return false; } opts = qemu_opts_create(qemu_find_opts("device"), "debugcon", 1, NULL); if (!opts) { - error_report("already have a debugcon device"); - exit(1); + error_setg(errp, "already have a debugcon device"); + return false; } qemu_opt_set(opts, "driver", "isa-debugcon", &error_abort); qemu_opt_set(opts, "chardev", "debugcon", &error_abort); - return 0; + return true; } -static gint machine_class_cmp(gconstpointer a, gconstpointer b) +static gint machine_class_cmp(gconstpointer a, gconstpointer b, gpointer d) { const MachineClass *mc1 = a, *mc2 = b; int res; @@ -1557,7 +1562,7 @@ static void machine_help_func(const QDict *qdict) GSList *el; const char *type = qdict_get_try_str(qdict, "type"); - machines = object_class_get_list(TYPE_MACHINE, false); + machines = object_class_get_list(target_machine_typename(), false); if (type) { ObjectClass *machine_class = OBJECT_CLASS(find_machine(type, machines)); if (machine_class) { @@ -1567,7 +1572,7 @@ static void machine_help_func(const QDict *qdict) } printf("Supported machines are:\n"); - machines = g_slist_sort(machines, machine_class_cmp); + machines = g_slist_sort_with_data(machines, machine_class_cmp, NULL); for (el = machines; el; el = el->next) { MachineClass *mc = el->data; if (mc->alias) { @@ -1665,28 +1670,27 @@ static const QEMUOption *lookup_opt(int argc, char **argv, static MachineClass *select_machine(QDict *qdict, Error **errp) { + ERRP_GUARD(); const char *machine_type = qdict_get_try_str(qdict, "type"); - GSList *machines = object_class_get_list(TYPE_MACHINE, false); - MachineClass *machine_class; - Error *local_err = NULL; + g_autoptr(GSList) machines = object_class_get_list(TYPE_MACHINE, false); + MachineClass *machine_class = NULL; if (machine_type) { machine_class = find_machine(machine_type, machines); - qdict_del(qdict, "type"); if (!machine_class) { - error_setg(&local_err, "unsupported machine type"); + error_setg(errp, "unsupported machine type: \"%s\"", machine_type); } + qdict_del(qdict, "type"); } else { machine_class = find_default_machine(machines); if (!machine_class) { - error_setg(&local_err, "No machine specified, and there is no default"); + error_setg(errp, "No machine specified, and there is no default"); } } - g_slist_free(machines); - if (local_err) { - error_append_hint(&local_err, "Use -machine help to list supported machines\n"); - error_propagate(errp, local_err); + if (!machine_class) { + error_append_hint(errp, + "Use -machine help to list supported machines\n"); } return machine_class; } @@ -1815,6 +1819,30 @@ static void object_option_add_visitor(Visitor *v) QTAILQ_INSERT_TAIL(&object_opts, opt, next); } +static void incoming_option_parse(const char *str) +{ + MigrationChannelType type = MIGRATION_CHANNEL_TYPE_MAIN; + MigrationChannel *channel; + Visitor *v; + + if (!strcmp(str, "defer")) { + channel = NULL; + } else if (migrate_is_uri(str)) { + migrate_uri_parse(str, &channel, &error_fatal); + } else { + v = qobject_input_visitor_new_str(str, "channel-type", &error_fatal); + visit_type_MigrationChannel(v, NULL, &channel, &error_fatal); + visit_free(v); + type = channel->channel_type; + } + + /* New incoming spec replaces the previous */ + qapi_free_MigrationChannel(incoming_channels[type]); + incoming_channels[type] = channel; + incoming_str[type] = str; + incoming = incoming_str[MIGRATION_CHANNEL_TYPE_MAIN]; +} + static void object_option_parse(const char *str) { QemuOpts *opts; @@ -1835,7 +1863,8 @@ static void object_option_parse(const char *str) type = qemu_opt_get(opts, "qom-type"); if (!type) { - error_setg(&error_fatal, QERR_MISSING_PARAMETER, "qom-type"); + error_report(QERR_MISSING_PARAMETER, "qom-type"); + exit(1); } if (user_creatable_print_help(type, opts)) { exit(0); @@ -1848,6 +1877,44 @@ static void object_option_parse(const char *str) visit_free(v); } +static void overcommit_parse(const char *str) +{ + QemuOpts *opts; + const char *mem_lock_opt; + + opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"), + str, false); + if (!opts) { + exit(1); + } + + enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm); + + mem_lock_opt = qemu_opt_get(opts, "mem-lock"); + if (!mem_lock_opt) { + return; + } + + if (strcmp(mem_lock_opt, "on") == 0) { + mlock_state = MLOCK_ON; + return; + } + + if (strcmp(mem_lock_opt, "off") == 0) { + mlock_state = MLOCK_OFF; + return; + } + + if (strcmp(mem_lock_opt, "on-fault") == 0) { + mlock_state = MLOCK_ON_FAULT; + return; + } + + error_report("parameter 'mem-lock' expects one of " + "'on', 'off', 'on-fault'"); + exit(1); +} + /* * Very early object creation, before the sandbox options have been activated. */ @@ -1965,11 +2032,12 @@ static void qemu_create_early_backends(void) qemu_console_early_init(); - if (dpy.has_gl && dpy.gl != DISPLAYGL_MODE_OFF && display_opengl == 0) { + if (dpy.has_gl && dpy.gl != DISPLAY_GL_MODE_OFF && display_opengl == 0) { #if defined(CONFIG_OPENGL) - error_report("OpenGL is not supported by the display"); + error_report("OpenGL is not supported by display backend '%s'", + DisplayType_str(dpy.type)); #else - error_report("OpenGL support is disabled"); + error_report("OpenGL support was not enabled in this build of QEMU"); #endif exit(1); } @@ -2035,12 +2103,9 @@ static void qemu_create_late_backends(void) qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, &error_fatal); - if (foreach_device_config(DEV_SERIAL, serial_parse) < 0) - exit(1); - if (foreach_device_config(DEV_PARALLEL, parallel_parse) < 0) - exit(1); - if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0) - exit(1); + foreach_device_config_or_exit(DEV_SERIAL, serial_parse); + foreach_device_config_or_exit(DEV_PARALLEL, parallel_parse); + foreach_device_config_or_exit(DEV_DEBUGCON, debugcon_parse); /* now chardevs have been created we may have semihosting to connect */ qemu_semihosting_chardev_init(); @@ -2104,6 +2169,19 @@ static void parse_memory_options(void) loc_pop(&loc); } +static void qemu_create_machine_containers(Object *machine) +{ + static const char *const containers[] = { + "unattached", + "peripheral", + "peripheral-anon", + }; + + for (unsigned i = 0; i < ARRAY_SIZE(containers); i++) { + object_property_add_new_container(machine, containers[i]); + } +} + static void qemu_create_machine(QDict *qdict) { MachineClass *machine_class = select_machine(qdict, &error_fatal); @@ -2112,8 +2190,8 @@ static void qemu_create_machine(QDict *qdict) current_machine = MACHINE(object_new_with_class(OBJECT_CLASS(machine_class))); object_property_add_child(object_get_root(), "machine", OBJECT(current_machine)); - object_property_add_child(container_get(OBJECT(current_machine), - "/unattached"), + qemu_create_machine_containers(OBJECT(current_machine)); + object_property_add_child(machine_get_container("unattached"), "sysbus", OBJECT(sysbus_get_default())); if (machine_class->minimum_page_bits) { @@ -2354,6 +2432,7 @@ static void configure_accelerators(const char *progname) /* Select the default accelerator */ bool have_tcg = accel_find("tcg"); bool have_kvm = accel_find("kvm"); + bool have_hvf = accel_find("hvf"); if (have_tcg && have_kvm) { if (g_str_has_suffix(progname, "kvm")) { @@ -2366,6 +2445,8 @@ static void configure_accelerators(const char *progname) accelerators = "kvm"; } else if (have_tcg) { accelerators = "tcg"; + } else if (have_hvf) { + accelerators = "hvf"; } else { error_report("No accelerator selected and" " no default accelerator available"); @@ -2415,19 +2496,25 @@ static void configure_accelerators(const char *progname) static void qemu_validate_options(const QDict *machine_opts) { const char *kernel_filename = qdict_get_try_str(machine_opts, "kernel"); + const char *shim_filename = qdict_get_try_str(machine_opts, "shim"); const char *initrd_filename = qdict_get_try_str(machine_opts, "initrd"); const char *kernel_cmdline = qdict_get_try_str(machine_opts, "append"); if (kernel_filename == NULL) { - if (kernel_cmdline != NULL) { - error_report("-append only allowed with -kernel option"); - exit(1); - } + if (kernel_cmdline != NULL) { + error_report("-append only allowed with -kernel option"); + exit(1); + } + + if (shim_filename != NULL) { + error_report("-shim only allowed with -kernel option"); + exit(1); + } - if (initrd_filename != NULL) { - error_report("-initrd only allowed with -kernel option"); - exit(1); - } + if (initrd_filename != NULL) { + error_report("-initrd only allowed with -kernel option"); + exit(1); + } } if (loadvm && incoming) { @@ -2614,12 +2701,27 @@ static void qemu_init_displays(void) static void qemu_init_board(void) { + MachineClass *machine_class = MACHINE_GET_CLASS(current_machine); + /* process plugin before CPUs are created, but once -smp has been parsed */ qemu_plugin_load_list(&plugin_list, &error_fatal); /* From here on we enter MACHINE_PHASE_INITIALIZED. */ machine_run_board_init(current_machine, mem_path, &error_fatal); + if (machine_class->auto_create_sdcard) { + bool ambigous; + + /* Ensure there is a SD bus available to create SD card on */ + Object *obj = object_resolve_path_type("", TYPE_SD_BUS, &ambigous); + if (!obj && !ambigous) { + fprintf(stderr, "Can not create sd-card on '%s' machine" + " because it lacks a sd-bus\n", + machine_class->name); + abort(); + } + } + drive_check_orphaned(); realtime_init(); @@ -2636,29 +2738,20 @@ static void qemu_create_cli_devices(void) /* init USB devices */ if (machine_usb(current_machine)) { - if (foreach_device_config(DEV_USB, usb_parse) < 0) - exit(1); + foreach_device_config_or_exit(DEV_USB, usb_parse); } /* init generic devices */ - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE); qemu_opts_foreach(qemu_find_opts("device"), device_init_func, NULL, &error_fatal); QTAILQ_FOREACH(opt, &device_opts, next) { - DeviceState *dev; + QObject *ret_data = NULL; + loc_push_restore(&opt->loc); - /* - * TODO Eventually we should call qmp_device_add() here to make sure it - * behaves the same, but QMP still has to accept incorrectly typed - * options until libvirt is fixed and we want to be strict on the CLI - * from the start, so call qdev_device_add_from_qdict() directly for - * now. - */ - dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal); - object_unref(OBJECT(dev)); + qmp_device_add(opt->opts, &ret_data, &error_fatal); + assert(ret_data == NULL); /* error_fatal aborts */ loc_pop(&opt->loc); } - rom_reset_order_override(); } static bool qemu_machine_creation_done(Error **errp) @@ -2690,10 +2783,8 @@ static bool qemu_machine_creation_done(Error **errp) exit(1); } - if (foreach_device_config(DEV_GDB, gdbserver_start) < 0) { - error_setg(errp, "could not start gdbserver"); - return false; - } + foreach_device_config_or_exit(DEV_GDB, gdbserver_start); + if (!vga_interface_created && !default_vga && vga_interface_type != VGA_NONE) { warn_report("A -vga option was passed but this machine " @@ -2728,8 +2819,11 @@ void qmp_x_exit_preconfig(Error **errp) if (incoming) { Error *local_err = NULL; if (strcmp(incoming, "defer") != 0) { - qmp_migrate_incoming(incoming, false, NULL, true, true, - &local_err); + g_autofree MigrationChannelList *channels = + g_new0(MigrationChannelList, 1); + + channels->value = incoming_channels[MIGRATION_CHANNEL_TYPE_MAIN]; + qmp_migrate_incoming(NULL, true, channels, true, true, &local_err); if (local_err) { error_reportf_err(local_err, "-incoming %s: ", incoming); exit(1); @@ -2790,7 +2884,10 @@ void qemu_init(int argc, char **argv) os_setup_limits(); - qemu_init_arch_modules(); +#ifdef CONFIG_MODULES + module_init_info(qemu_modinfo); + module_allow_arch(target_name()); +#endif qemu_init_subsystems(); @@ -2829,7 +2926,7 @@ void qemu_init(int argc, char **argv) const QEMUOption *popt; popt = lookup_opt(argc, argv, &optarg, &optind); - if (!(popt->arch_mask & arch_type)) { + if (!qemu_arch_available(popt->arch_mask)) { error_report("Option not supported for this target"); exit(1); } @@ -2903,20 +3000,12 @@ void qemu_init(int argc, char **argv) nographic = true; dpy.type = DISPLAY_TYPE_NONE; break; - case QEMU_OPTION_portrait: - graphic_rotate = 90; - break; - case QEMU_OPTION_rotate: - graphic_rotate = strtol(optarg, (char **) &optarg, 10); - if (graphic_rotate != 0 && graphic_rotate != 90 && - graphic_rotate != 180 && graphic_rotate != 270) { - error_report("only 90, 180, 270 deg rotation is available"); - exit(1); - } - break; case QEMU_OPTION_kernel: qdict_put_str(machine_opts_dict, "kernel", optarg); break; + case QEMU_OPTION_shim: + qdict_put_str(machine_opts_dict, "shim", optarg); + break; case QEMU_OPTION_initrd: qdict_put_str(machine_opts_dict, "initrd", optarg); break; @@ -3436,6 +3525,7 @@ void qemu_init(int argc, char **argv) nb_prom_envs++; break; case QEMU_OPTION_old_param: + warn_report("-old-param is deprecated"); old_param = 1; break; case QEMU_OPTION_rtc: @@ -3456,7 +3546,7 @@ void qemu_init(int argc, char **argv) if (!incoming) { runstate_set(RUN_STATE_INMIGRATE); } - incoming = optarg; + incoming_option_parse(optarg); break; case QEMU_OPTION_only_migratable: only_migratable = 1; @@ -3541,13 +3631,7 @@ void qemu_init(int argc, char **argv) object_option_parse(optarg); break; case QEMU_OPTION_overcommit: - opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"), - optarg, false); - if (!opts) { - exit(1); - } - enable_mlock = qemu_opt_get_bool(opts, "mem-lock", false); - enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", false); + overcommit_parse(optarg); break; case QEMU_OPTION_compat: { @@ -3590,16 +3674,7 @@ void qemu_init(int argc, char **argv) case QEMU_OPTION_nouserconfig: /* Nothing to be parsed here. Especially, do not error out below. */ break; -#if defined(CONFIG_POSIX) - case QEMU_OPTION_runas: - warn_report("-runas is deprecated, use '-run-with user=...' instead"); - if (!os_set_runas(optarg)) { - error_report("User \"%s\" doesn't exist" - " (and is not <uid>:<gid>)", - optarg); - exit(1); - } - break; +#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN) case QEMU_OPTION_daemonize: os_set_daemonize(true); break; @@ -3683,6 +3758,12 @@ void qemu_init(int argc, char **argv) qemu_create_machine(machine_opts_dict); + /* + * Load incoming CPR state before any devices are created, because it + * contains file descriptors that are needed in device initialization code. + */ + cpr_state_load(incoming_channels[MIGRATION_CHANNEL_TYPE_CPR], &error_fatal); + suspend_mux_open(); qemu_disable_default_devices(); diff --git a/system/watchpoint.c b/system/watchpoint.c index 2aa2a9e..21d0bb3 100644 --- a/system/watchpoint.c +++ b/system/watchpoint.c @@ -19,7 +19,9 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" -#include "exec/exec-all.h" +#include "exec/cputlb.h" +#include "exec/target_page.h" +#include "exec/watchpoint.h" #include "hw/core/cpu.h" /* Add a watchpoint. */ |