aboutsummaryrefslogtreecommitdiff
path: root/system
diff options
context:
space:
mode:
Diffstat (limited to 'system')
-rw-r--r--system/arch_init.c26
-rw-r--r--system/async-teardown.c37
-rw-r--r--system/balloon.c4
-rw-r--r--system/bootdevice.c4
-rw-r--r--system/cpu-throttle.c128
-rw-r--r--system/cpu-timers.c14
-rw-r--r--system/cpus.c85
-rw-r--r--system/datadir.c5
-rw-r--r--system/device_tree-stub.c5
-rw-r--r--system/device_tree.c26
-rw-r--r--system/dirtylimit.c15
-rw-r--r--system/dma-helpers.c16
-rw-r--r--system/globals-target.c24
-rw-r--r--system/globals.c19
-rw-r--r--system/ioport.c9
-rw-r--r--system/main.c57
-rw-r--r--system/memory-internal.h57
-rw-r--r--system/memory.c217
-rw-r--r--system/memory_ldst.c.inc18
-rw-r--r--system/memory_mapping.c10
-rw-r--r--system/meson.build18
-rw-r--r--system/physmem.c652
-rw-r--r--system/qdev-monitor.c201
-rw-r--r--system/qemu-seccomp.c6
-rw-r--r--system/qtest.c94
-rw-r--r--system/ram-block-attributes.c444
-rw-r--r--system/rtc.c8
-rw-r--r--system/runstate-action.c4
-rw-r--r--system/runstate-hmp-cmds.c2
-rw-r--r--system/runstate.c167
-rw-r--r--system/tpm.c4
-rw-r--r--system/trace-events17
-rw-r--r--system/vl.c421
-rw-r--r--system/watchpoint.c4
34 files changed, 1920 insertions, 898 deletions
diff --git a/system/arch_init.c b/system/arch_init.c
index 79716f9..e857368 100644
--- a/system/arch_init.c
+++ b/system/arch_init.c
@@ -22,29 +22,9 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
-#include "qemu/module.h"
-#include "sysemu/arch_init.h"
+#include "system/arch_init.h"
-#ifdef TARGET_SPARC
-int graphic_width = 1024;
-int graphic_height = 768;
-int graphic_depth = 8;
-#elif defined(TARGET_M68K)
-int graphic_width = 800;
-int graphic_height = 600;
-int graphic_depth = 8;
-#else
-int graphic_width = 800;
-int graphic_height = 600;
-int graphic_depth = 32;
-#endif
-
-const uint32_t arch_type = QEMU_ARCH;
-
-void qemu_init_arch_modules(void)
+bool qemu_arch_available(unsigned qemu_arch_mask)
{
-#ifdef CONFIG_MODULES
- module_init_info(qemu_modinfo);
- module_allow_arch(TARGET_NAME);
-#endif
+ return qemu_arch_mask & QEMU_ARCH;
}
diff --git a/system/async-teardown.c b/system/async-teardown.c
index 396963c..9148ee8 100644
--- a/system/async-teardown.c
+++ b/system/async-teardown.c
@@ -26,40 +26,6 @@
static pid_t the_ppid;
-/*
- * Close all open file descriptors.
- */
-static void close_all_open_fd(void)
-{
- struct dirent *de;
- int fd, dfd;
- DIR *dir;
-
-#ifdef CONFIG_CLOSE_RANGE
- int r = close_range(0, ~0U, 0);
- if (!r) {
- /* Success, no need to try other ways. */
- return;
- }
-#endif
-
- dir = opendir("/proc/self/fd");
- if (!dir) {
- /* If /proc is not mounted, there is nothing that can be done. */
- return;
- }
- /* Avoid closing the directory. */
- dfd = dirfd(dir);
-
- for (de = readdir(dir); de; de = readdir(dir)) {
- fd = atoi(de->d_name);
- if (fd != dfd) {
- close(fd);
- }
- }
- closedir(dir);
-}
-
static void hup_handler(int signal)
{
/* Check every second if this process has been reparented. */
@@ -85,9 +51,8 @@ static int async_teardown_fn(void *arg)
/*
* Close all file descriptors that might have been inherited from the
* main qemu process when doing clone, needed to make libvirt happy.
- * Not using close_range for increased compatibility with older kernels.
*/
- close_all_open_fd();
+ qemu_close_all_open_fd(NULL, 0);
/* Set up a handler for SIGHUP and unblock SIGHUP. */
sigaction(SIGHUP, &sa, NULL);
diff --git a/system/balloon.c b/system/balloon.c
index fda7af8..311fa50 100644
--- a/system/balloon.c
+++ b/system/balloon.c
@@ -26,8 +26,8 @@
#include "qemu/osdep.h"
#include "qemu/atomic.h"
-#include "sysemu/kvm.h"
-#include "sysemu/balloon.h"
+#include "system/kvm.h"
+#include "system/balloon.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qmp/qerror.h"
diff --git a/system/bootdevice.c b/system/bootdevice.c
index 2579b26..1845be4 100644
--- a/system/bootdevice.c
+++ b/system/bootdevice.c
@@ -24,10 +24,10 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "sysemu/sysemu.h"
+#include "system/system.h"
#include "qapi/visitor.h"
#include "qemu/error-report.h"
-#include "sysemu/reset.h"
+#include "system/reset.h"
#include "hw/qdev-core.h"
#include "hw/boards.h"
diff --git a/system/cpu-throttle.c b/system/cpu-throttle.c
deleted file mode 100644
index c951a6c..0000000
--- a/system/cpu-throttle.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/thread.h"
-#include "hw/core/cpu.h"
-#include "qemu/main-loop.h"
-#include "sysemu/cpus.h"
-#include "sysemu/cpu-throttle.h"
-
-/* vcpu throttling controls */
-static QEMUTimer *throttle_timer;
-static unsigned int throttle_percentage;
-
-#define CPU_THROTTLE_PCT_MIN 1
-#define CPU_THROTTLE_PCT_MAX 99
-#define CPU_THROTTLE_TIMESLICE_NS 10000000
-
-static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
-{
- double pct;
- double throttle_ratio;
- int64_t sleeptime_ns, endtime_ns;
-
- if (!cpu_throttle_get_percentage()) {
- return;
- }
-
- pct = (double)cpu_throttle_get_percentage() / 100;
- throttle_ratio = pct / (1 - pct);
- /* Add 1ns to fix double's rounding error (like 0.9999999...) */
- sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
- endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
- while (sleeptime_ns > 0 && !cpu->stop) {
- if (sleeptime_ns > SCALE_MS) {
- qemu_cond_timedwait_bql(cpu->halt_cond,
- sleeptime_ns / SCALE_MS);
- } else {
- bql_unlock();
- g_usleep(sleeptime_ns / SCALE_US);
- bql_lock();
- }
- sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- }
- qatomic_set(&cpu->throttle_thread_scheduled, 0);
-}
-
-static void cpu_throttle_timer_tick(void *opaque)
-{
- CPUState *cpu;
- double pct;
-
- /* Stop the timer if needed */
- if (!cpu_throttle_get_percentage()) {
- return;
- }
- CPU_FOREACH(cpu) {
- if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
- async_run_on_cpu(cpu, cpu_throttle_thread,
- RUN_ON_CPU_NULL);
- }
- }
-
- pct = (double)cpu_throttle_get_percentage() / 100;
- timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
- CPU_THROTTLE_TIMESLICE_NS / (1 - pct));
-}
-
-void cpu_throttle_set(int new_throttle_pct)
-{
- /*
- * boolean to store whether throttle is already active or not,
- * before modifying throttle_percentage
- */
- bool throttle_active = cpu_throttle_active();
-
- /* Ensure throttle percentage is within valid range */
- new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
- new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
-
- qatomic_set(&throttle_percentage, new_throttle_pct);
-
- if (!throttle_active) {
- cpu_throttle_timer_tick(NULL);
- }
-}
-
-void cpu_throttle_stop(void)
-{
- qatomic_set(&throttle_percentage, 0);
-}
-
-bool cpu_throttle_active(void)
-{
- return (cpu_throttle_get_percentage() != 0);
-}
-
-int cpu_throttle_get_percentage(void)
-{
- return qatomic_read(&throttle_percentage);
-}
-
-void cpu_throttle_init(void)
-{
- throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
- cpu_throttle_timer_tick, NULL);
-}
diff --git a/system/cpu-timers.c b/system/cpu-timers.c
index 0b31c9a..cb35fa6 100644
--- a/system/cpu-timers.c
+++ b/system/cpu-timers.c
@@ -27,16 +27,16 @@
#include "migration/vmstate.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "sysemu/cpus.h"
+#include "system/cpus.h"
#include "qemu/main-loop.h"
#include "qemu/option.h"
#include "qemu/seqlock.h"
-#include "sysemu/replay.h"
-#include "sysemu/runstate.h"
+#include "system/replay.h"
+#include "system/runstate.h"
#include "hw/core/cpu.h"
-#include "sysemu/cpu-timers.h"
-#include "sysemu/cpu-throttle.h"
-#include "sysemu/cpu-timers-internal.h"
+#include "system/cpu-timers.h"
+#include "system/cpu-timers-internal.h"
+#include "exec/icount.h"
/* clock and ticks */
@@ -272,6 +272,4 @@ void cpu_timers_init(void)
seqlock_init(&timers_state.vm_clock_seqlock);
qemu_spin_init(&timers_state.vm_clock_lock);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
-
- cpu_throttle_init();
}
diff --git a/system/cpus.c b/system/cpus.c
index d3640c9..d16b0df 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -31,18 +31,19 @@
#include "qapi/qapi-events-run-state.h"
#include "qapi/qmp/qerror.h"
#include "exec/gdbstub.h"
-#include "sysemu/hw_accel.h"
+#include "system/accel-ops.h"
+#include "system/hw_accel.h"
#include "exec/cpu-common.h"
#include "qemu/thread.h"
#include "qemu/main-loop.h"
#include "qemu/plugin.h"
-#include "sysemu/cpus.h"
+#include "system/cpus.h"
#include "qemu/guest-random.h"
#include "hw/nmi.h"
-#include "sysemu/replay.h"
-#include "sysemu/runstate.h"
-#include "sysemu/cpu-timers.h"
-#include "sysemu/whpx.h"
+#include "system/replay.h"
+#include "system/runstate.h"
+#include "system/cpu-timers.h"
+#include "system/whpx.h"
#include "hw/boards.h"
#include "hw/hw.h"
#include "trace.h"
@@ -298,14 +299,18 @@ static int do_vm_stop(RunState state, bool send_stop)
if (oldstate == RUN_STATE_RUNNING) {
pause_all_vcpus();
}
- vm_state_notify(0, state);
+ ret = vm_state_notify(0, state);
if (send_stop) {
qapi_event_send_stop();
}
}
bdrv_drain_all();
- ret = bdrv_flush_all();
+ /*
+ * Even if vm_state_notify() return failure,
+ * it would be better to flush as before.
+ */
+ ret |= bdrv_flush_all();
trace_vm_stop_flush_all(ret);
return ret;
@@ -514,6 +519,20 @@ bool qemu_in_vcpu_thread(void)
QEMU_DEFINE_STATIC_CO_TLS(bool, bql_locked)
+static uint32_t bql_unlock_blocked;
+
+void bql_block_unlock(bool increase)
+{
+ uint32_t new_value;
+
+ assert(bql_locked());
+
+ /* check for overflow! */
+ new_value = bql_unlock_blocked + increase - !increase;
+ assert((new_value > bql_unlock_blocked) == increase);
+ bql_unlock_blocked = new_value;
+}
+
bool bql_locked(void)
{
return get_bql_locked();
@@ -524,6 +543,12 @@ bool qemu_in_main_thread(void)
return bql_locked();
}
+void rust_bql_mock_lock(void)
+{
+ error_report("This function should be used only from tests");
+ abort();
+}
+
/*
* The BQL is taken from so many places that it is worth profiling the
* callers directly, instead of funneling them all through a single function.
@@ -540,6 +565,7 @@ void bql_lock_impl(const char *file, int line)
void bql_unlock(void)
{
g_assert(bql_locked());
+ g_assert(!bql_unlock_blocked);
set_bql_locked(false);
qemu_mutex_unlock(&bql);
}
@@ -568,6 +594,22 @@ void cpu_thread_signal_destroyed(CPUState *cpu)
qemu_cond_signal(&qemu_cpu_cond);
}
+void cpu_pause(CPUState *cpu)
+{
+ if (qemu_cpu_is_self(cpu)) {
+ qemu_cpu_stop(cpu, true);
+ } else {
+ cpu->stop = true;
+ qemu_cpu_kick(cpu);
+ }
+}
+
+void cpu_resume(CPUState *cpu)
+{
+ cpu->stop = false;
+ cpu->stopped = false;
+ qemu_cpu_kick(cpu);
+}
static bool all_vcpus_paused(void)
{
@@ -588,12 +630,7 @@ void pause_all_vcpus(void)
qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
CPU_FOREACH(cpu) {
- if (qemu_cpu_is_self(cpu)) {
- qemu_cpu_stop(cpu, true);
- } else {
- cpu->stop = true;
- qemu_cpu_kick(cpu);
- }
+ cpu_pause(cpu);
}
/* We need to drop the replay_lock so any vCPU threads woken up
@@ -613,13 +650,6 @@ void pause_all_vcpus(void)
bql_lock();
}
-void cpu_resume(CPUState *cpu)
-{
- cpu->stop = false;
- cpu->stopped = false;
- qemu_cpu_kick(cpu);
-}
-
void resume_all_vcpus(void)
{
CPUState *cpu;
@@ -662,7 +692,6 @@ void qemu_init_vcpu(CPUState *cpu)
{
MachineState *ms = MACHINE(qdev_get_machine());
- cpu->nr_cores = machine_topo_get_cores_per_socket(ms);
cpu->nr_threads = ms->smp.threads;
cpu->stopped = true;
cpu->random_seed = qemu_guest_random_seed_thread_part1();
@@ -788,14 +817,14 @@ int vm_stop_force_state(RunState state)
}
}
-void qmp_memsave(int64_t addr, int64_t size, const char *filename,
+void qmp_memsave(uint64_t addr, uint64_t size, const char *filename,
bool has_cpu, int64_t cpu_index, Error **errp)
{
FILE *f;
- uint32_t l;
+ uint64_t l;
CPUState *cpu;
uint8_t buf[1024];
- int64_t orig_addr = addr, orig_size = size;
+ uint64_t orig_addr = addr, orig_size = size;
if (!has_cpu) {
cpu_index = 0;
@@ -819,7 +848,7 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
if (l > size)
l = size;
if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
- error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
+ error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRIu64
" specified", orig_addr, orig_size);
goto exit;
}
@@ -836,11 +865,11 @@ exit:
fclose(f);
}
-void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
+void qmp_pmemsave(uint64_t addr, uint64_t size, const char *filename,
Error **errp)
{
FILE *f;
- uint32_t l;
+ uint64_t l;
uint8_t buf[1024];
f = fopen(filename, "wb");
diff --git a/system/datadir.c b/system/datadir.c
index c9237cb..f96f8fc 100644
--- a/system/datadir.c
+++ b/system/datadir.c
@@ -30,7 +30,7 @@
static const char *data_dir[16];
static int data_dir_idx;
-char *qemu_find_file(int type, const char *name)
+char *qemu_find_file(QemuFileType type, const char *name)
{
int i;
const char *subdir;
@@ -46,6 +46,9 @@ char *qemu_find_file(int type, const char *name)
case QEMU_FILE_TYPE_BIOS:
subdir = "";
break;
+ case QEMU_FILE_TYPE_DTB:
+ subdir = "dtb/";
+ break;
case QEMU_FILE_TYPE_KEYMAP:
subdir = "keymaps/";
break;
diff --git a/system/device_tree-stub.c b/system/device_tree-stub.c
index bddda6f..428330b 100644
--- a/system/device_tree-stub.c
+++ b/system/device_tree-stub.c
@@ -5,6 +5,9 @@
#ifdef CONFIG_FDT
void qmp_dumpdtb(const char *filename, Error **errp)
{
- error_setg(errp, "This machine doesn't have a FDT");
+ ERRP_GUARD();
+
+ error_setg(errp, "This machine doesn't have an FDT");
+ error_append_hint(errp, "(this machine type definitely doesn't use FDT)\n");
}
#endif
diff --git a/system/device_tree.c b/system/device_tree.c
index 2e38259..aa3fe95 100644
--- a/system/device_tree.c
+++ b/system/device_tree.c
@@ -23,12 +23,12 @@
#include "qemu/bswap.h"
#include "qemu/cutils.h"
#include "qemu/guest-random.h"
-#include "sysemu/device_tree.h"
+#include "system/device_tree.h"
#include "hw/loader.h"
#include "hw/boards.h"
#include "qemu/config-file.h"
#include "qapi/qapi-commands-machine.h"
-#include "qapi/qmp/qdict.h"
+#include "qobject/qdict.h"
#include "monitor/hmp.h"
#include <libfdt.h>
@@ -594,21 +594,6 @@ int qemu_fdt_add_path(void *fdt, const char *path)
return retval;
}
-void qemu_fdt_dumpdtb(void *fdt, int size)
-{
- const char *dumpdtb = current_machine->dumpdtb;
-
- if (dumpdtb) {
- /* Dump the dtb to a file and quit */
- if (g_file_set_contents(dumpdtb, fdt, size, NULL)) {
- info_report("dtb dumped to %s. Exiting.", dumpdtb);
- exit(0);
- }
- error_report("%s: Failed dumping dtb to %s", __func__, dumpdtb);
- exit(1);
- }
-}
-
int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
const char *node_path,
const char *property,
@@ -650,11 +635,16 @@ out:
void qmp_dumpdtb(const char *filename, Error **errp)
{
+ ERRP_GUARD();
+
g_autoptr(GError) err = NULL;
uint32_t size;
if (!current_machine->fdt) {
- error_setg(errp, "This machine doesn't have a FDT");
+ error_setg(errp, "This machine doesn't have an FDT");
+ error_append_hint(errp,
+ "(Perhaps it doesn't support FDT at all, or perhaps "
+ "you need to provide an FDT with the -fdt option?)\n");
return;
}
diff --git a/system/dirtylimit.c b/system/dirtylimit.c
index ab20da3..b48c0d4 100644
--- a/system/dirtylimit.c
+++ b/system/dirtylimit.c
@@ -13,16 +13,16 @@
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "qapi/qapi-commands-migration.h"
-#include "qapi/qmp/qdict.h"
+#include "qobject/qdict.h"
#include "qapi/error.h"
-#include "sysemu/dirtyrate.h"
-#include "sysemu/dirtylimit.h"
+#include "system/dirtyrate.h"
+#include "system/dirtylimit.h"
#include "monitor/hmp.h"
#include "monitor/monitor.h"
-#include "exec/memory.h"
+#include "system/memory.h"
#include "exec/target_page.h"
#include "hw/boards.h"
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
#include "trace.h"
#include "migration/misc.h"
@@ -80,8 +80,7 @@ static void vcpu_dirty_rate_stat_collect(void)
int i = 0;
int64_t period = DIRTYLIMIT_CALC_TIME_MS;
- if (migrate_dirty_limit() &&
- migration_is_active()) {
+ if (migrate_dirty_limit() && migration_is_running()) {
period = migrate_vcpu_dirty_limit_period();
}
@@ -338,8 +337,6 @@ static void dirtylimit_adjust_throttle(CPUState *cpu)
if (!dirtylimit_done(quota, current)) {
dirtylimit_set_throttle(cpu, quota, current);
}
-
- return;
}
void dirtylimit_process(void)
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
index 7401330..0d592f6 100644
--- a/system/dma-helpers.c
+++ b/system/dma-helpers.c
@@ -8,12 +8,12 @@
*/
#include "qemu/osdep.h"
-#include "sysemu/block-backend.h"
-#include "sysemu/dma.h"
-#include "trace/trace-root.h"
+#include "system/block-backend.h"
+#include "system/dma.h"
+#include "trace.h"
#include "qemu/thread.h"
#include "qemu/main-loop.h"
-#include "sysemu/cpu-timers.h"
+#include "exec/icount.h"
#include "qemu/range.h"
/* #define DEBUG_IOMMU */
@@ -211,7 +211,7 @@ static const AIOCBInfo dma_aiocb_info = {
.cancel_async = dma_aio_cancel,
};
-BlockAIOCB *dma_blk_io(AioContext *ctx,
+BlockAIOCB *dma_blk_io(
QEMUSGList *sg, uint64_t offset, uint32_t align,
DMAIOFunc *io_func, void *io_func_opaque,
BlockCompletionFunc *cb,
@@ -223,7 +223,7 @@ BlockAIOCB *dma_blk_io(AioContext *ctx,
dbs->acb = NULL;
dbs->sg = sg;
- dbs->ctx = ctx;
+ dbs->ctx = qemu_get_current_aio_context();
dbs->offset = offset;
dbs->align = align;
dbs->sg_cur_index = 0;
@@ -251,7 +251,7 @@ BlockAIOCB *dma_blk_read(BlockBackend *blk,
QEMUSGList *sg, uint64_t offset, uint32_t align,
void (*cb)(void *opaque, int ret), void *opaque)
{
- return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+ return dma_blk_io(sg, offset, align,
dma_blk_read_io_func, blk, cb, opaque,
DMA_DIRECTION_FROM_DEVICE);
}
@@ -269,7 +269,7 @@ BlockAIOCB *dma_blk_write(BlockBackend *blk,
QEMUSGList *sg, uint64_t offset, uint32_t align,
void (*cb)(void *opaque, int ret), void *opaque)
{
- return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+ return dma_blk_io(sg, offset, align,
dma_blk_write_io_func, blk, cb, opaque,
DMA_DIRECTION_TO_DEVICE);
}
diff --git a/system/globals-target.c b/system/globals-target.c
new file mode 100644
index 0000000..9897205
--- /dev/null
+++ b/system/globals-target.c
@@ -0,0 +1,24 @@
+/*
+ * Global variables that should not exist (target specific)
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "qemu/osdep.h"
+#include "system/system.h"
+
+#ifdef TARGET_SPARC
+int graphic_width = 1024;
+int graphic_height = 768;
+int graphic_depth = 8;
+#elif defined(TARGET_M68K)
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 8;
+#else
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 32;
+#endif
diff --git a/system/globals.c b/system/globals.c
index e353584..9640c95 100644
--- a/system/globals.c
+++ b/system/globals.c
@@ -28,19 +28,28 @@
#include "hw/loader.h"
#include "hw/xen/xen.h"
#include "net/net.h"
-#include "sysemu/cpus.h"
-#include "sysemu/sysemu.h"
+#include "system/cpus.h"
+#include "system/system.h"
+
+bool should_mlock(MlockState state)
+{
+ return state == MLOCK_ON || state == MLOCK_ON_FAULT;
+}
+
+bool is_mlock_on_fault(MlockState state)
+{
+ return state == MLOCK_ON_FAULT;
+}
enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
int display_opengl;
const char* keyboard_layout;
-bool enable_mlock;
+MlockState mlock_state;
bool enable_cpu_pm;
int autostart = 1;
int vga_interface_type = VGA_NONE;
bool vga_interface_created;
Chardev *parallel_hds[MAX_PARALLEL_PORTS];
-int graphic_rotate;
QEMUOptionRom option_rom[MAX_OPTION_ROMS];
int nb_option_roms;
int old_param;
@@ -49,7 +58,6 @@ unsigned int nb_prom_envs;
const char *prom_envs[MAX_PROM_ENVS];
uint8_t *boot_splash_filedata;
int only_migratable; /* turn it off unless user states otherwise */
-int icount_align_option;
/* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the
* little-endian "wire format" described in the SMBIOS 2.6 specification.
@@ -60,6 +68,7 @@ bool qemu_uuid_set;
uint32_t xen_domid;
enum xen_mode xen_mode = XEN_DISABLED;
bool xen_domid_restrict;
+bool xen_is_stubdomain;
struct evtchn_backend_ops *xen_evtchn_ops;
struct gnttab_backend_ops *xen_gnttab_ops;
struct foreignmem_backend_ops *xen_foreignmem_ops;
diff --git a/system/ioport.c b/system/ioport.c
index fd551d0..4f96e91 100644
--- a/system/ioport.c
+++ b/system/ioport.c
@@ -26,10 +26,9 @@
*/
#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/ioport.h"
-#include "exec/memory.h"
-#include "exec/address-spaces.h"
+#include "system/ioport.h"
+#include "system/memory.h"
+#include "system/address-spaces.h"
#include "trace.h"
struct MemoryRegionPortioList {
@@ -258,7 +257,7 @@ static void portio_list_add_1(PortioList *piolist,
object_ref(&mrpio->mr);
object_unparent(OBJECT(&mrpio->mr));
if (!piolist->owner) {
- owner = container_get(qdev_get_machine(), "/unattached");
+ owner = machine_get_container("unattached");
} else {
owner = piolist->owner;
}
diff --git a/system/main.c b/system/main.c
index 9b91d21..b8f7157 100644
--- a/system/main.c
+++ b/system/main.c
@@ -24,26 +24,73 @@
#include "qemu/osdep.h"
#include "qemu-main.h"
-#include "sysemu/sysemu.h"
+#include "qemu/main-loop.h"
+#include "system/replay.h"
+#include "system/system.h"
#ifdef CONFIG_SDL
+/*
+ * SDL insists on wrapping the main() function with its own implementation on
+ * some platforms; it does so via a macro that renames our main function, so
+ * <SDL.h> must be #included here even with no SDL code called from this file.
+ */
#include <SDL.h>
#endif
-int qemu_default_main(void)
+#ifdef CONFIG_DARWIN
+#include <CoreFoundation/CoreFoundation.h>
+#endif
+
+static void *qemu_default_main(void *opaque)
{
int status;
+ replay_mutex_lock();
+ bql_lock();
status = qemu_main_loop();
qemu_cleanup(status);
+ bql_unlock();
+ replay_mutex_unlock();
- return status;
+ exit(status);
}
-int (*qemu_main)(void) = qemu_default_main;
+int (*qemu_main)(void);
+
+#ifdef CONFIG_DARWIN
+static int os_darwin_cfrunloop_main(void)
+{
+ CFRunLoopRun();
+ g_assert_not_reached();
+}
+int (*qemu_main)(void) = os_darwin_cfrunloop_main;
+#endif
int main(int argc, char **argv)
{
qemu_init(argc, argv);
- return qemu_main();
+
+ /*
+ * qemu_init acquires the BQL and replay mutex lock. BQL is acquired when
+ * initializing cpus, to block associated threads until initialization is
+ * complete. Replay_mutex lock is acquired on initialization, because it
+ * must be held when configuring icount_mode.
+ *
+ * On MacOS, qemu main event loop runs in a background thread, as main
+ * thread must be reserved for UI. Thus, we need to transfer lock ownership,
+ * and the simplest way to do that is to release them, and reacquire them
+ * from qemu_default_main.
+ */
+ bql_unlock();
+ replay_mutex_unlock();
+
+ if (qemu_main) {
+ QemuThread main_loop_thread;
+ qemu_thread_create(&main_loop_thread, "qemu_main",
+ qemu_default_main, NULL, QEMU_THREAD_DETACHED);
+ return qemu_main();
+ } else {
+ qemu_default_main(NULL);
+ g_assert_not_reached();
+ }
}
diff --git a/system/memory-internal.h b/system/memory-internal.h
new file mode 100644
index 0000000..46f758f
--- /dev/null
+++ b/system/memory-internal.h
@@ -0,0 +1,57 @@
+/*
+ * Declarations for functions which are internal to the memory subsystem.
+ *
+ * Copyright 2011 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ * Avi Kivity <avi@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef MEMORY_INTERNAL_H
+#define MEMORY_INTERNAL_H
+
+#ifndef CONFIG_USER_ONLY
+static inline AddressSpaceDispatch *flatview_to_dispatch(FlatView *fv)
+{
+ return fv->dispatch;
+}
+
+static inline AddressSpaceDispatch *address_space_to_dispatch(AddressSpace *as)
+{
+ return flatview_to_dispatch(address_space_to_flatview(as));
+}
+
+FlatView *address_space_get_flatview(AddressSpace *as);
+void flatview_unref(FlatView *view);
+
+extern const MemoryRegionOps unassigned_mem_ops;
+
+void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section);
+AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv);
+void address_space_dispatch_compact(AddressSpaceDispatch *d);
+void address_space_dispatch_free(AddressSpaceDispatch *d);
+
+void mtree_print_dispatch(struct AddressSpaceDispatch *d,
+ MemoryRegion *root);
+
+/* returns true if end is big endian. */
+static inline bool devend_big_endian(enum device_endian end)
+{
+ if (end == DEVICE_NATIVE_ENDIAN) {
+ return target_big_endian();
+ }
+ return end == DEVICE_BIG_ENDIAN;
+}
+
+/* enum device_endian to MemOp. */
+static inline MemOp devend_memop(enum device_endian end)
+{
+ return devend_big_endian(end) ? MO_BE : MO_LE;
+}
+
+#endif
+#endif
diff --git a/system/memory.c b/system/memory.c
index 47c600d..76b44b8 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -16,7 +16,7 @@
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "qapi/error.h"
-#include "exec/memory.h"
+#include "system/memory.h"
#include "qapi/visitor.h"
#include "qemu/bitops.h"
#include "qemu/error-report.h"
@@ -24,16 +24,16 @@
#include "qemu/qemu-print.h"
#include "qom/object.h"
#include "trace.h"
-
-#include "exec/memory-internal.h"
-#include "exec/ram_addr.h"
-#include "sysemu/kvm.h"
-#include "sysemu/runstate.h"
-#include "sysemu/tcg.h"
+#include "system/ram_addr.h"
+#include "system/kvm.h"
+#include "system/runstate.h"
+#include "system/tcg.h"
#include "qemu/accel.h"
#include "hw/boards.h"
#include "migration/vmstate.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
+
+#include "memory-internal.h"
//#define DEBUG_UNASSIGNED
@@ -353,15 +353,6 @@ static void flatview_simplify(FlatView *view)
}
}
-static bool memory_region_big_endian(MemoryRegion *mr)
-{
-#if TARGET_BIG_ENDIAN
- return mr->ops->endianness != DEVICE_LITTLE_ENDIAN;
-#else
- return mr->ops->endianness == DEVICE_BIG_ENDIAN;
-#endif
-}
-
static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op)
{
if ((op & MO_BSWAP) != devend_memop(mr->ops->endianness)) {
@@ -563,7 +554,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
/* FIXME: support unaligned access? */
access_size = MAX(MIN(size, access_size_max), access_size_min);
access_mask = MAKE_64BIT_MASK(0, access_size * 8);
- if (memory_region_big_endian(mr)) {
+ if (devend_big_endian(mr->ops->endianness)) {
for (i = 0; i < size; i += access_size) {
r |= access_fn(mr, addr + i, value, access_size,
(size - access_size - i) * 8, access_mask, attrs);
@@ -941,6 +932,38 @@ static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as)
}
}
+static void
+flat_range_coalesced_io_notify_listener_add_del(FlatRange *fr,
+ MemoryRegionSection *mrs,
+ MemoryListener *listener,
+ AddressSpace *as, bool add)
+{
+ CoalescedMemoryRange *cmr;
+ MemoryRegion *mr = fr->mr;
+ AddrRange tmp;
+
+ QTAILQ_FOREACH(cmr, &mr->coalesced, link) {
+ tmp = addrrange_shift(cmr->addr,
+ int128_sub(fr->addr.start,
+ int128_make64(fr->offset_in_region)));
+
+ if (!addrrange_intersects(tmp, fr->addr)) {
+ return;
+ }
+ tmp = addrrange_intersection(tmp, fr->addr);
+
+ if (add && listener->coalesced_io_add) {
+ listener->coalesced_io_add(listener, mrs,
+ int128_get64(tmp.start),
+ int128_get64(tmp.size));
+ } else if (!add && listener->coalesced_io_del) {
+ listener->coalesced_io_del(listener, mrs,
+ int128_get64(tmp.start),
+ int128_get64(tmp.size));
+ }
+ }
+}
+
static void address_space_update_topology_pass(AddressSpace *as,
const FlatView *old_view,
const FlatView *new_view,
@@ -1206,7 +1229,7 @@ static void memory_region_do_init(MemoryRegion *mr,
char *name_array = g_strdup_printf("%s[*]", escaped_name);
if (!owner) {
- owner = container_get(qdev_get_machine(), "/unattached");
+ owner = machine_get_container("unattached");
}
object_property_add_child(owner, name_array, OBJECT(mr));
@@ -1359,7 +1382,7 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr,
static const MemoryRegionOps ram_device_mem_ops = {
.read = memory_region_ram_device_read,
.write = memory_region_ram_device_write,
- .endianness = DEVICE_HOST_ENDIAN,
+ .endianness = HOST_BIG_ENDIAN ? DEVICE_BIG_ENDIAN : DEVICE_LITTLE_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
@@ -1380,7 +1403,7 @@ bool memory_region_access_valid(MemoryRegion *mr,
{
if (mr->ops->valid.accepts
&& !mr->ops->valid.accepts(mr->opaque, addr, size, is_write, attrs)) {
- qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+ qemu_log_mask(LOG_INVALID_MEM, "Invalid %s at addr 0x%" HWADDR_PRIX
", size %u, region '%s', reason: rejected\n",
is_write ? "write" : "read",
addr, size, memory_region_name(mr));
@@ -1388,7 +1411,7 @@ bool memory_region_access_valid(MemoryRegion *mr,
}
if (!mr->ops->valid.unaligned && (addr & (size - 1))) {
- qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+ qemu_log_mask(LOG_INVALID_MEM, "Invalid %s at addr 0x%" HWADDR_PRIX
", size %u, region '%s', reason: unaligned\n",
is_write ? "write" : "read",
addr, size, memory_region_name(mr));
@@ -1402,7 +1425,7 @@ bool memory_region_access_valid(MemoryRegion *mr,
if (size > mr->ops->valid.max_access_size
|| size < mr->ops->valid.min_access_size) {
- qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+ qemu_log_mask(LOG_INVALID_MEM, "Invalid %s at addr 0x%" HWADDR_PRIX
", size %u, region '%s', reason: invalid size "
"(min:%u max:%u)\n",
is_write ? "write" : "read",
@@ -1604,7 +1627,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr,
return true;
}
-#ifdef CONFIG_POSIX
+#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN)
bool memory_region_init_ram_from_file(MemoryRegion *mr,
Object *owner,
const char *name,
@@ -1648,8 +1671,8 @@ bool memory_region_init_ram_from_fd(MemoryRegion *mr,
mr->readonly = !!(ram_flags & RAM_READONLY);
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
- mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset,
- &err);
+ mr->ram_block = qemu_ram_alloc_from_fd(size, size, NULL, mr, ram_flags, fd,
+ offset, false, &err);
if (err) {
mr->size = int128_zero();
object_unparent(OBJECT(mr));
@@ -1901,32 +1924,6 @@ static int memory_region_update_iommu_notify_flags(IOMMUMemoryRegion *iommu_mr,
return ret;
}
-int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
- uint64_t page_size_mask,
- Error **errp)
-{
- IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
- int ret = 0;
-
- if (imrc->iommu_set_page_size_mask) {
- ret = imrc->iommu_set_page_size_mask(iommu_mr, page_size_mask, errp);
- }
- return ret;
-}
-
-int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr,
- GList *iova_ranges,
- Error **errp)
-{
- IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
- int ret = 0;
-
- if (imrc->iommu_set_iova_ranges) {
- ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp);
- }
- return ret;
-}
-
int memory_region_register_iommu_notifier(MemoryRegion *mr,
IOMMUNotifier *n, Error **errp)
{
@@ -2109,12 +2106,16 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
return mr->rdm;
}
-void memory_region_set_ram_discard_manager(MemoryRegion *mr,
- RamDiscardManager *rdm)
+int memory_region_set_ram_discard_manager(MemoryRegion *mr,
+ RamDiscardManager *rdm)
{
g_assert(memory_region_is_ram(mr));
- g_assert(!rdm || !mr->rdm);
+ if (mr->rdm && rdm) {
+ return -EBUSY;
+ }
+
mr->rdm = rdm;
+ return 0;
}
uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
@@ -2137,7 +2138,7 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
MemoryRegionSection *section,
- ReplayRamPopulate replay_fn,
+ ReplayRamDiscardState replay_fn,
void *opaque)
{
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
@@ -2146,15 +2147,15 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
return rdmc->replay_populated(rdm, section, replay_fn, opaque);
}
-void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
- MemoryRegionSection *section,
- ReplayRamDiscard replay_fn,
- void *opaque)
+int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
{
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
g_assert(rdmc->replay_discarded);
- rdmc->replay_discarded(rdm, section, replay_fn, opaque);
+ return rdmc->replay_discarded(rdm, section, replay_fn, opaque);
}
void ram_discard_manager_register_listener(RamDiscardManager *rdm,
@@ -2177,18 +2178,14 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
}
/* Called with rcu_read_lock held. */
-bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- bool *mr_has_discard_manager, Error **errp)
+MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
MemoryRegion *mr;
hwaddr xlat;
hwaddr len = iotlb->addr_mask + 1;
bool writable = iotlb->perm & IOMMU_WO;
- if (mr_has_discard_manager) {
- *mr_has_discard_manager = false;
- }
/*
* The IOMMU TLB entry we have just covers translation through
* this IOMMU to its immediate target. We need to translate
@@ -2198,7 +2195,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
&xlat, &len, writable, MEMTXATTRS_UNSPECIFIED);
if (!memory_region_is_ram(mr)) {
error_setg(errp, "iommu map to non memory area %" HWADDR_PRIx "", xlat);
- return false;
+ return NULL;
} else if (memory_region_has_ram_discard_manager(mr)) {
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
MemoryRegionSection tmp = {
@@ -2206,9 +2203,6 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
.offset_within_region = xlat,
.size = int128_make64(len),
};
- if (mr_has_discard_manager) {
- *mr_has_discard_manager = true;
- }
/*
* Malicious VMs can map memory into the IOMMU, which is expected
* to remain discarded. vfio will pin all pages, populating memory.
@@ -2219,7 +2213,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
error_setg(errp, "iommu map to discarded memory (e.g., unplugged"
" via virtio-mem): %" HWADDR_PRIx "",
iotlb->translated_addr);
- return false;
+ return NULL;
}
}
@@ -2229,22 +2223,11 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
*/
if (len & iotlb->addr_mask) {
error_setg(errp, "iommu has granularity incompatible with target AS");
- return false;
- }
-
- if (vaddr) {
- *vaddr = memory_region_get_ram_ptr(mr) + xlat;
- }
-
- if (ram_addr) {
- *ram_addr = memory_region_get_ram_addr(mr) + xlat;
- }
-
- if (read_only) {
- *read_only = !writable || mr->readonly;
+ return NULL;
}
- return true;
+ *xlat_p = xlat;
+ return mr;
}
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
@@ -2578,7 +2561,8 @@ void memory_region_add_eventfd(MemoryRegion *mr,
unsigned i;
if (size) {
- adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE);
+ MemOp mop = (target_big_endian() ? MO_BE : MO_LE) | size_memop(size);
+ adjust_endianness(mr, &mrfd.data, mop);
}
memory_region_transaction_begin();
for (i = 0; i < mr->ioeventfd_nb; ++i) {
@@ -2613,7 +2597,8 @@ void memory_region_del_eventfd(MemoryRegion *mr,
unsigned i;
if (size) {
- adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE);
+ MemOp mop = (target_big_endian() ? MO_BE : MO_LE) | size_memop(size);
+ adjust_endianness(mr, &mrfd.data, mop);
}
memory_region_transaction_begin();
for (i = 0; i < mr->ioeventfd_nb; ++i) {
@@ -3041,8 +3026,10 @@ void memory_global_dirty_log_stop(unsigned int flags)
static void listener_add_address_space(MemoryListener *listener,
AddressSpace *as)
{
+ unsigned i;
FlatView *view;
FlatRange *fr;
+ MemoryRegionIoeventfd *fd;
if (listener->begin) {
listener->begin(listener);
@@ -3067,10 +3054,34 @@ static void listener_add_address_space(MemoryListener *listener,
if (listener->region_add) {
listener->region_add(listener, &section);
}
+
+ /* send coalesced io add notifications */
+ flat_range_coalesced_io_notify_listener_add_del(fr, &section,
+ listener, as, true);
+
if (fr->dirty_log_mask && listener->log_start) {
listener->log_start(listener, &section, 0, fr->dirty_log_mask);
}
}
+
+ /*
+ * register all eventfds for this address space for the newly registered
+ * listener.
+ */
+ for (i = 0; i < as->ioeventfd_nb; i++) {
+ fd = &as->ioeventfds[i];
+ MemoryRegionSection section = (MemoryRegionSection) {
+ .fv = view,
+ .offset_within_address_space = int128_get64(fd->addr.start),
+ .size = fd->addr.size,
+ };
+
+ if (listener->eventfd_add) {
+ listener->eventfd_add(listener, &section,
+ fd->match_data, fd->data, fd->e);
+ }
+ }
+
if (listener->commit) {
listener->commit(listener);
}
@@ -3080,8 +3091,10 @@ static void listener_add_address_space(MemoryListener *listener,
static void listener_del_address_space(MemoryListener *listener,
AddressSpace *as)
{
+ unsigned i;
FlatView *view;
FlatRange *fr;
+ MemoryRegionIoeventfd *fd;
if (listener->begin) {
listener->begin(listener);
@@ -3093,10 +3106,33 @@ static void listener_del_address_space(MemoryListener *listener,
if (fr->dirty_log_mask && listener->log_stop) {
listener->log_stop(listener, &section, fr->dirty_log_mask, 0);
}
+
+ /* send coalesced io del notifications */
+ flat_range_coalesced_io_notify_listener_add_del(fr, &section,
+ listener, as, false);
if (listener->region_del) {
listener->region_del(listener, &section);
}
}
+
+ /*
+ * de-register all eventfds for this address space for the current
+ * listener.
+ */
+ for (i = 0; i < as->ioeventfd_nb; i++) {
+ fd = &as->ioeventfds[i];
+ MemoryRegionSection section = (MemoryRegionSection) {
+ .fv = view,
+ .offset_within_address_space = int128_get64(fd->addr.start),
+ .size = fd->addr.size,
+ };
+
+ if (listener->eventfd_del) {
+ listener->eventfd_del(listener, &section,
+ fd->match_data, fd->data, fd->e);
+ }
+ }
+
if (listener->commit) {
listener->commit(listener);
}
@@ -3174,7 +3210,8 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
as->ioeventfds = NULL;
QTAILQ_INIT(&as->listeners);
QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
- as->bounce.in_use = false;
+ as->max_bounce_buffer_size = DEFAULT_MAX_BOUNCE_BUFFER_SIZE;
+ as->bounce_buffer_size = 0;
qemu_mutex_init(&as->map_client_list_lock);
QLIST_INIT(&as->map_client_list);
as->name = g_strdup(name ? name : "anonymous");
@@ -3184,7 +3221,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
static void do_address_space_destroy(AddressSpace *as)
{
- assert(!qatomic_read(&as->bounce.in_use));
+ assert(qatomic_read(&as->bounce_buffer_size) == 0);
assert(QLIST_EMPTY(&as->map_client_list));
qemu_mutex_destroy(&as->map_client_list_lock);
diff --git a/system/memory_ldst.c.inc b/system/memory_ldst.c.inc
index 0e6f394..7f32d3d 100644
--- a/system/memory_ldst.c.inc
+++ b/system/memory_ldst.c.inc
@@ -34,7 +34,7 @@ static inline uint32_t glue(address_space_ldl_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
- if (l < 4 || !memory_access_is_direct(mr, false)) {
+ if (l < 4 || !memory_access_is_direct(mr, false, attrs)) {
release_lock |= prepare_mmio_access(mr);
/* I/O case */
@@ -103,7 +103,7 @@ static inline uint64_t glue(address_space_ldq_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
- if (l < 8 || !memory_access_is_direct(mr, false)) {
+ if (l < 8 || !memory_access_is_direct(mr, false, attrs)) {
release_lock |= prepare_mmio_access(mr);
/* I/O case */
@@ -170,7 +170,7 @@ uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
- if (!memory_access_is_direct(mr, false)) {
+ if (!memory_access_is_direct(mr, false, attrs)) {
release_lock |= prepare_mmio_access(mr);
/* I/O case */
@@ -207,7 +207,7 @@ static inline uint16_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, false, attrs);
- if (l < 2 || !memory_access_is_direct(mr, false)) {
+ if (l < 2 || !memory_access_is_direct(mr, false, attrs)) {
release_lock |= prepare_mmio_access(mr);
/* I/O case */
@@ -277,7 +277,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (l < 4 || !memory_access_is_direct(mr, true)) {
+ if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val, MO_32, attrs);
@@ -314,7 +314,7 @@ static inline void glue(address_space_stl_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (l < 4 || !memory_access_is_direct(mr, true)) {
+ if (l < 4 || !memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val,
MO_32 | devend_memop(endian), attrs);
@@ -377,7 +377,7 @@ void glue(address_space_stb, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (!memory_access_is_direct(mr, true)) {
+ if (!memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val, MO_8, attrs);
} else {
@@ -410,7 +410,7 @@ static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (l < 2 || !memory_access_is_direct(mr, true)) {
+ if (l < 2 || !memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val,
MO_16 | devend_memop(endian), attrs);
@@ -474,7 +474,7 @@ static void glue(address_space_stq_internal, SUFFIX)(ARG1_DECL,
RCU_READ_LOCK();
mr = TRANSLATE(addr, &addr1, &l, true, attrs);
- if (l < 8 || !memory_access_is_direct(mr, true)) {
+ if (l < 8 || !memory_access_is_direct(mr, true, attrs)) {
release_lock |= prepare_mmio_access(mr);
r = memory_region_dispatch_write(mr, addr1, val,
MO_64 | devend_memop(endian), attrs);
diff --git a/system/memory_mapping.c b/system/memory_mapping.c
index 6f884c5..da708a0 100644
--- a/system/memory_mapping.c
+++ b/system/memory_mapping.c
@@ -12,11 +12,12 @@
*/
#include "qemu/osdep.h"
+#include "qemu/range.h"
#include "qapi/error.h"
-#include "sysemu/memory_mapping.h"
-#include "exec/memory.h"
-#include "exec/address-spaces.h"
+#include "system/memory_mapping.h"
+#include "system/memory.h"
+#include "system/address-spaces.h"
#include "hw/core/cpu.h"
//#define DEBUG_GUEST_PHYS_REGION_ADD
@@ -353,8 +354,7 @@ void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
MemoryMapping *cur, *next;
QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) {
- if (cur->phys_addr >= begin + length ||
- cur->phys_addr + cur->length <= begin) {
+ if (!ranges_overlap(cur->phys_addr, cur->length, begin, length)) {
QTAILQ_REMOVE(&list->head, cur, next);
g_free(cur);
list->num--;
diff --git a/system/meson.build b/system/meson.build
index a296270..6d21ff9 100644
--- a/system/meson.build
+++ b/system/meson.build
@@ -1,22 +1,26 @@
specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: [files(
'arch_init.c',
- 'ioport.c',
- 'memory.c',
- 'physmem.c',
- 'watchpoint.c',
+ 'globals-target.c',
)])
system_ss.add(files(
+ 'vl.c',
+), sdl, libpmem, libdaxctl)
+
+system_ss.add(files(
'balloon.c',
'bootdevice.c',
'cpus.c',
- 'cpu-throttle.c',
'cpu-timers.c',
'datadir.c',
'dirtylimit.c',
'dma-helpers.c',
'globals.c',
+ 'ioport.c',
+ 'ram-block-attributes.c',
'memory_mapping.c',
+ 'memory.c',
+ 'physmem.c',
'qdev-monitor.c',
'qtest.c',
'rtc.c',
@@ -24,8 +28,8 @@ system_ss.add(files(
'runstate-hmp-cmds.c',
'runstate.c',
'tpm-hmp-cmds.c',
- 'vl.c',
-), sdl, libpmem, libdaxctl)
+ 'watchpoint.c',
+))
if have_tpm
system_ss.add(files('tpm.c'))
diff --git a/system/physmem.c b/system/physmem.c
index 33d09f7..ff0ca40 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -28,32 +28,35 @@
#include "qemu/lockable.h"
#ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
+#include "accel/tcg/iommu.h"
#endif /* CONFIG_TCG */
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
#include "exec/page-protection.h"
#include "exec/target_page.h"
+#include "exec/translation-block.h"
#include "hw/qdev-core.h"
#include "hw/qdev-properties.h"
#include "hw/boards.h"
-#include "sysemu/xen.h"
-#include "sysemu/kvm.h"
-#include "sysemu/tcg.h"
-#include "sysemu/qtest.h"
+#include "system/xen.h"
+#include "system/kvm.h"
+#include "system/tcg.h"
+#include "system/qtest.h"
#include "qemu/timer.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
#include "qemu/qemu-print.h"
#include "qemu/log.h"
#include "qemu/memalign.h"
-#include "exec/memory.h"
-#include "exec/ioport.h"
-#include "sysemu/dma.h"
-#include "sysemu/hostmem.h"
-#include "sysemu/hw_accel.h"
-#include "sysemu/xen-mapcache.h"
-#include "trace/trace-root.h"
+#include "qemu/memfd.h"
+#include "system/memory.h"
+#include "system/ioport.h"
+#include "system/dma.h"
+#include "system/hostmem.h"
+#include "system/hw_accel.h"
+#include "system/xen-mapcache.h"
+#include "trace.h"
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
#include <linux/falloc.h>
@@ -61,14 +64,16 @@
#include "qemu/rcu_queue.h"
#include "qemu/main-loop.h"
-#include "exec/translate-all.h"
-#include "sysemu/replay.h"
+#include "system/replay.h"
-#include "exec/memory-internal.h"
-#include "exec/ram_addr.h"
+#include "system/ram_addr.h"
#include "qemu/pmem.h"
+#include "qapi/qapi-types-migration.h"
+#include "migration/blocker.h"
+#include "migration/cpr.h"
+#include "migration/options.h"
#include "migration/vmstate.h"
#include "qemu/range.h"
@@ -82,6 +87,8 @@
#include <daxctl/libdaxctl.h>
#endif
+#include "memory-internal.h"
+
//#define DEBUG_SUBPAGE
/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
@@ -152,6 +159,7 @@ static void io_mem_init(void);
static void memory_map_init(void);
static void tcg_log_global_after_sync(MemoryListener *listener);
static void tcg_commit(MemoryListener *listener);
+static bool ram_is_cpr_compatible(RAMBlock *rb);
/**
* CPUAddressSpace: all the information a CPU needs about an AddressSpace
@@ -571,7 +579,7 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
is_write, true, &as, attrs);
mr = section.mr;
- if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
+ if (xen_enabled() && memory_access_is_direct(mr, is_write, attrs)) {
hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
*plen = MIN(page, *plen);
}
@@ -579,6 +587,8 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
return mr;
}
+#ifdef CONFIG_TCG
+
typedef struct TCGIOMMUNotifier {
IOMMUNotifier n;
MemoryRegion *mr;
@@ -738,6 +748,33 @@ translate_fail:
return &d->map.sections[PHYS_SECTION_UNASSIGNED];
}
+MemoryRegionSection *iotlb_to_section(CPUState *cpu,
+ hwaddr index, MemTxAttrs attrs)
+{
+ int asidx = cpu_asidx_from_attrs(cpu, attrs);
+ CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
+ AddressSpaceDispatch *d = cpuas->memory_dispatch;
+ int section_index = index & ~TARGET_PAGE_MASK;
+ MemoryRegionSection *ret;
+
+ assert(section_index < d->map.sections_nb);
+ ret = d->map.sections + section_index;
+ assert(ret->mr);
+ assert(ret->mr->ops);
+
+ return ret;
+}
+
+/* Called from RCU critical section */
+hwaddr memory_region_section_get_iotlb(CPUState *cpu,
+ MemoryRegionSection *section)
+{
+ AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);
+ return section - d->map.sections;
+}
+
+#endif /* CONFIG_TCG */
+
void cpu_address_space_init(CPUState *cpu, int asidx,
const char *prefix, MemoryRegion *mr)
{
@@ -763,6 +800,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
if (!cpu->cpu_ases) {
cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
+ cpu->cpu_ases_count = cpu->num_ases;
}
newas = &cpu->cpu_ases[asidx];
@@ -776,6 +814,34 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
}
}
+void cpu_address_space_destroy(CPUState *cpu, int asidx)
+{
+ CPUAddressSpace *cpuas;
+
+ assert(cpu->cpu_ases);
+ assert(asidx >= 0 && asidx < cpu->num_ases);
+ /* KVM cannot currently support multiple address spaces. */
+ assert(asidx == 0 || !kvm_enabled());
+
+ cpuas = &cpu->cpu_ases[asidx];
+ if (tcg_enabled()) {
+ memory_listener_unregister(&cpuas->tcg_as_listener);
+ }
+
+ address_space_destroy(cpuas->as);
+ g_free_rcu(cpuas->as, rcu);
+
+ if (asidx == 0) {
+ /* reset the convenience alias for address space 0 */
+ cpu->as = NULL;
+ }
+
+ if (--cpu->cpu_ases_count == 0) {
+ g_free(cpu->cpu_ases);
+ cpu->cpu_ases = NULL;
+ }
+}
+
AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
{
/* Return the AddressSpace corresponding to the specified index */
@@ -894,13 +960,19 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
(MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client)
{
DirtyMemoryBlocks *blocks;
- ram_addr_t start = memory_region_get_ram_addr(mr) + offset;
+ ram_addr_t start, first, last;
unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
- ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
- ram_addr_t last = QEMU_ALIGN_UP(start + length, align);
DirtyBitmapSnapshot *snap;
unsigned long page, end, dest;
+ start = memory_region_get_ram_addr(mr);
+ /* We know we're only called for RAM MemoryRegions */
+ assert(start != RAM_ADDR_INVALID);
+ start += offset;
+
+ first = QEMU_ALIGN_DOWN(start, align);
+ last = QEMU_ALIGN_UP(start + length, align);
+
snap = g_malloc0(sizeof(*snap) +
((last - first) >> (TARGET_PAGE_BITS + 3)));
snap->start = first;
@@ -959,14 +1031,6 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
return false;
}
-/* Called from RCU critical section */
-hwaddr memory_region_section_get_iotlb(CPUState *cpu,
- MemoryRegionSection *section)
-{
- AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);
- return section - d->map.sections;
-}
-
static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
uint16_t section);
static subpage_t *subpage_init(FlatView *fv, hwaddr base);
@@ -1200,7 +1264,7 @@ long qemu_maxrampagesize(void)
return pagesize;
}
-#ifdef CONFIG_POSIX
+#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN)
static int64_t get_file_size(int fd)
{
int64_t size;
@@ -1499,18 +1563,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
return offset;
}
-static unsigned long last_ram_page(void)
-{
- RAMBlock *block;
- ram_addr_t last = 0;
-
- RCU_READ_LOCK_GUARD();
- RAMBLOCK_FOREACH(block) {
- last = MAX(last, block->offset + block->max_length);
- }
- return last >> TARGET_PAGE_BITS;
-}
-
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
int ret;
@@ -1521,7 +1573,7 @@ static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
if (ret) {
perror("qemu_madvise");
fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
- "but dump_guest_core=off specified\n");
+ "but dump-guest-core=off specified\n");
}
}
}
@@ -1637,6 +1689,18 @@ void qemu_ram_unset_idstr(RAMBlock *block)
}
}
+static char *cpr_name(MemoryRegion *mr)
+{
+ const char *mr_name = memory_region_name(mr);
+ g_autofree char *id = mr->dev ? qdev_get_dev_path(mr->dev) : NULL;
+
+ if (id) {
+ return g_strdup_printf("%s/%s", id, mr_name);
+ } else {
+ return g_strdup(mr_name);
+ }
+}
+
size_t qemu_ram_pagesize(RAMBlock *rb)
{
return rb->page_size;
@@ -1764,13 +1828,11 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
}
/* Called with ram_list.mutex held */
-static void dirty_memory_extend(ram_addr_t old_ram_size,
- ram_addr_t new_ram_size)
+static void dirty_memory_extend(ram_addr_t new_ram_size)
{
- ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
- DIRTY_MEMORY_BLOCK_SIZE);
- ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
- DIRTY_MEMORY_BLOCK_SIZE);
+ unsigned int old_num_blocks = ram_list.num_dirty_blocks;
+ unsigned int new_num_blocks = DIV_ROUND_UP(new_ram_size,
+ DIRTY_MEMORY_BLOCK_SIZE);
int i;
/* Only need to extend if block count increased */
@@ -1802,6 +1864,8 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
g_free_rcu(old_blocks, rcu);
}
}
+
+ ram_list.num_dirty_blocks = new_num_blocks;
}
static void ram_block_add(RAMBlock *new_block, Error **errp)
@@ -1811,11 +1875,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
RAMBlock *block;
RAMBlock *last_block = NULL;
bool free_on_error = false;
- ram_addr_t old_ram_size, new_ram_size;
+ ram_addr_t ram_size;
Error *err = NULL;
- old_ram_size = last_ram_page();
-
qemu_mutex_lock_ramlist();
new_block->offset = find_ram_offset(new_block->max_length);
@@ -1845,11 +1907,18 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
}
if (new_block->flags & RAM_GUEST_MEMFD) {
- assert(kvm_enabled());
+ int ret;
+
+ if (!kvm_enabled()) {
+ error_setg(errp, "cannot set up private guest memory for %s: KVM required",
+ object_get_typename(OBJECT(current_machine->cgs)));
+ goto out_free;
+ }
assert(new_block->guest_memfd < 0);
- if (ram_block_discard_require(true) < 0) {
- error_setg_errno(errp, errno,
+ ret = ram_block_coordinated_discard_require(true);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
"cannot set up private guest memory: discard currently blocked");
error_append_hint(errp, "Are you using assigned devices?\n");
goto out_free;
@@ -1861,13 +1930,41 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
qemu_mutex_unlock_ramlist();
goto out_free;
}
- }
- new_ram_size = MAX(old_ram_size,
- (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
- if (new_ram_size > old_ram_size) {
- dirty_memory_extend(old_ram_size, new_ram_size);
+ /*
+ * The attribute bitmap of the RamBlockAttributes is default to
+ * discarded, which mimics the behavior of kvm_set_phys_mem() when it
+ * calls kvm_set_memory_attributes_private(). This leads to a brief
+ * period of inconsistency between the creation of the RAMBlock and its
+ * mapping into the physical address space. However, this is not
+ * problematic, as no users rely on the attribute status to perform
+ * any actions during this interval.
+ */
+ new_block->attributes = ram_block_attributes_create(new_block);
+ if (!new_block->attributes) {
+ error_setg(errp, "Failed to create ram block attribute");
+ close(new_block->guest_memfd);
+ ram_block_coordinated_discard_require(false);
+ qemu_mutex_unlock_ramlist();
+ goto out_free;
+ }
+
+ /*
+ * Add a specific guest_memfd blocker if a generic one would not be
+ * added by ram_block_add_cpr_blocker.
+ */
+ if (ram_is_cpr_compatible(new_block)) {
+ error_setg(&new_block->cpr_blocker,
+ "Memory region %s uses guest_memfd, "
+ "which is not supported with CPR.",
+ memory_region_name(new_block->mr));
+ migrate_add_blocker_modes(&new_block->cpr_blocker, errp,
+ MIG_MODE_CPR_TRANSFER, -1);
+ }
}
+
+ ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
+ dirty_memory_extend(ram_size);
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
* QLIST (which has an RCU-friendly variant) does not have insertion at
* tail, so save the last element in last_block.
@@ -1920,19 +2017,28 @@ out_free:
}
}
-#ifdef CONFIG_POSIX
-RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
+#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN)
+RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
+ qemu_ram_resize_cb resized, MemoryRegion *mr,
uint32_t ram_flags, int fd, off_t offset,
+ bool grow,
Error **errp)
{
+ ERRP_GUARD();
RAMBlock *new_block;
Error *local_err = NULL;
- int64_t file_size, file_align;
+ int64_t file_size, file_align, share_flags;
+
+ share_flags = ram_flags & (RAM_PRIVATE | RAM_SHARED);
+ assert(share_flags != (RAM_SHARED | RAM_PRIVATE));
+ ram_flags &= ~RAM_PRIVATE;
/* Just support these ram flags by now. */
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
- RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
+ RAM_READONLY_FD | RAM_GUEST_MEMFD |
+ RAM_RESIZEABLE)) == 0);
+ assert(max_size >= size);
if (xen_enabled()) {
error_setg(errp, "-mem-path not supported with Xen");
@@ -1947,12 +2053,16 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
size = TARGET_PAGE_ALIGN(size);
size = REAL_HOST_PAGE_ALIGN(size);
+ max_size = TARGET_PAGE_ALIGN(max_size);
+ max_size = REAL_HOST_PAGE_ALIGN(max_size);
file_size = get_file_size(fd);
- if (file_size > offset && file_size < (offset + size)) {
- error_setg(errp, "backing store size 0x%" PRIx64
- " does not match 'size' option 0x" RAM_ADDR_FMT,
- file_size, size);
+ if (file_size && file_size < offset + max_size && !grow) {
+ error_setg(errp, "%s backing store size 0x%" PRIx64
+ " is too small for 'size' option 0x" RAM_ADDR_FMT
+ " plus 'offset' option 0x%" PRIx64,
+ memory_region_name(mr), file_size, max_size,
+ (uint64_t)offset);
return NULL;
}
@@ -1967,11 +2077,13 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
new_block = g_malloc0(sizeof(*new_block));
new_block->mr = mr;
new_block->used_length = size;
- new_block->max_length = size;
+ new_block->max_length = max_size;
+ new_block->resized = resized;
new_block->flags = ram_flags;
new_block->guest_memfd = -1;
- new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
- errp);
+ new_block->host = file_ram_alloc(new_block, max_size, fd,
+ file_size < offset + max_size,
+ offset, errp);
if (!new_block->host) {
g_free(new_block);
return NULL;
@@ -2023,7 +2135,8 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
return NULL;
}
- block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp);
+ block = qemu_ram_alloc_from_fd(size, size, NULL, mr, ram_flags, fd, offset,
+ false, errp);
if (!block) {
if (created) {
unlink(mem_path);
@@ -2036,21 +2149,98 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
}
#endif
+#ifdef CONFIG_POSIX
+/*
+ * Create MAP_SHARED RAMBlocks by mmap'ing a file descriptor, so it can be
+ * shared with another process if CPR is being used. Use memfd if available
+ * because it has no size limits, else use POSIX shm.
+ */
+static int qemu_ram_get_shared_fd(const char *name, bool *reused, Error **errp)
+{
+ int fd = cpr_find_fd(name, 0);
+
+ if (fd >= 0) {
+ *reused = true;
+ return fd;
+ }
+
+ if (qemu_memfd_check(0)) {
+ fd = qemu_memfd_create(name, 0, 0, 0, 0, errp);
+ } else {
+ fd = qemu_shm_alloc(0, errp);
+ }
+
+ if (fd >= 0) {
+ cpr_save_fd(name, 0, fd);
+ }
+ *reused = false;
+ return fd;
+}
+#endif
+
static
RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
- void (*resized)(const char*,
- uint64_t length,
- void *host),
+ qemu_ram_resize_cb resized,
void *host, uint32_t ram_flags,
MemoryRegion *mr, Error **errp)
{
RAMBlock *new_block;
Error *local_err = NULL;
- int align;
+ int align, share_flags;
+
+ share_flags = ram_flags & (RAM_PRIVATE | RAM_SHARED);
+ assert(share_flags != (RAM_SHARED | RAM_PRIVATE));
+ ram_flags &= ~RAM_PRIVATE;
assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
assert(!host ^ (ram_flags & RAM_PREALLOC));
+ assert(max_size >= size);
+
+ /* ignore RAM_SHARED for Windows and emscripten*/
+#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN)
+ if (!host) {
+ if (!share_flags && current_machine->aux_ram_share) {
+ ram_flags |= RAM_SHARED;
+ }
+ if (ram_flags & RAM_SHARED) {
+ bool reused;
+ g_autofree char *name = cpr_name(mr);
+ int fd = qemu_ram_get_shared_fd(name, &reused, errp);
+
+ if (fd < 0) {
+ return NULL;
+ }
+
+ /* Use same alignment as qemu_anon_ram_alloc */
+ mr->align = QEMU_VMALLOC_ALIGN;
+
+ /*
+ * This can fail if the shm mount size is too small, or alloc from
+ * fd is not supported, but previous QEMU versions that called
+ * qemu_anon_ram_alloc for anonymous shared memory could have
+ * succeeded. Quietly fail and fall back.
+ *
+ * After cpr-transfer, new QEMU could create a memory region
+ * with a larger max size than old, so pass reused to grow the
+ * region if necessary. The extra space will be usable after a
+ * guest reset.
+ */
+ new_block = qemu_ram_alloc_from_fd(size, max_size, resized, mr,
+ ram_flags, fd, 0, reused, NULL);
+ if (new_block) {
+ trace_qemu_ram_alloc_shared(name, new_block->used_length,
+ new_block->max_length, fd,
+ new_block->host);
+ return new_block;
+ }
+
+ cpr_delete_fd(name, 0);
+ close(fd);
+ /* fall back to anon allocation */
+ }
+ }
+#endif
align = qemu_real_host_page_size();
align = MAX(align, TARGET_PAGE_SIZE);
@@ -2062,7 +2252,6 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
new_block->resized = resized;
new_block->used_length = size;
new_block->max_length = max_size;
- assert(max_size >= size);
new_block->fd = -1;
new_block->guest_memfd = -1;
new_block->page_size = qemu_real_host_page_size();
@@ -2087,15 +2276,14 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
MemoryRegion *mr, Error **errp)
{
- assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
+ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD |
+ RAM_PRIVATE)) == 0);
return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
}
RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
- void (*resized)(const char*,
- uint64_t length,
- void *host),
- MemoryRegion *mr, Error **errp)
+ qemu_ram_resize_cb resized,
+ MemoryRegion *mr, Error **errp)
{
return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
RAM_RESIZEABLE, mr, errp);
@@ -2107,7 +2295,7 @@ static void reclaim_ramblock(RAMBlock *block)
;
} else if (xen_enabled()) {
xen_invalidate_map_cache_entry(block->host);
-#ifndef _WIN32
+#if !defined(_WIN32) && !defined(EMSCRIPTEN)
} else if (block->fd >= 0) {
qemu_ram_munmap(block->fd, block->host, block->max_length);
close(block->fd);
@@ -2117,8 +2305,9 @@ static void reclaim_ramblock(RAMBlock *block)
}
if (block->guest_memfd >= 0) {
+ ram_block_attributes_destroy(block->attributes);
close(block->guest_memfd);
- ram_block_discard_require(false);
+ ram_block_coordinated_discard_require(false);
}
g_free(block);
@@ -2126,6 +2315,8 @@ static void reclaim_ramblock(RAMBlock *block)
void qemu_ram_free(RAMBlock *block)
{
+ g_autofree char *name = NULL;
+
if (!block) {
return;
}
@@ -2136,6 +2327,8 @@ void qemu_ram_free(RAMBlock *block)
}
qemu_mutex_lock_ramlist();
+ name = cpr_name(block->mr);
+ cpr_delete_fd(name, 0);
QLIST_REMOVE_RCU(block, next);
ram_list.mru_block = NULL;
/* Write list before version */
@@ -2146,45 +2339,80 @@ void qemu_ram_free(RAMBlock *block)
}
#ifndef _WIN32
-void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
+/* Simply remap the given VM memory location from start to start+length */
+static int qemu_ram_remap_mmap(RAMBlock *block, uint64_t start, size_t length)
+{
+ int flags, prot;
+ void *area;
+ void *host_startaddr = block->host + start;
+
+ assert(block->fd < 0);
+ flags = MAP_FIXED | MAP_ANONYMOUS;
+ flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
+ flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+ prot = PROT_READ;
+ prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
+ area = mmap(host_startaddr, length, prot, flags, -1, 0);
+ return area != host_startaddr ? -errno : 0;
+}
+
+/*
+ * qemu_ram_remap - remap a single RAM page
+ *
+ * @addr: address in ram_addr_t address space.
+ *
+ * This function will try remapping a single page of guest RAM identified by
+ * @addr, essentially discarding memory to recover from previously poisoned
+ * memory (MCE). The page size depends on the RAMBlock (i.e., hugetlb). @addr
+ * does not have to point at the start of the page.
+ *
+ * This function is only to be used during system resets; it will kill the
+ * VM if remapping failed.
+ */
+void qemu_ram_remap(ram_addr_t addr)
{
RAMBlock *block;
- ram_addr_t offset;
- int flags;
- void *area, *vaddr;
- int prot;
+ uint64_t offset;
+ void *vaddr;
+ size_t page_size;
RAMBLOCK_FOREACH(block) {
offset = addr - block->offset;
if (offset < block->max_length) {
+ /* Respect the pagesize of our RAMBlock */
+ page_size = qemu_ram_pagesize(block);
+ offset = QEMU_ALIGN_DOWN(offset, page_size);
+
vaddr = ramblock_ptr(block, offset);
if (block->flags & RAM_PREALLOC) {
;
} else if (xen_enabled()) {
abort();
} else {
- flags = MAP_FIXED;
- flags |= block->flags & RAM_SHARED ?
- MAP_SHARED : MAP_PRIVATE;
- flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
- prot = PROT_READ;
- prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
- if (block->fd >= 0) {
- area = mmap(vaddr, length, prot, flags, block->fd,
- offset + block->fd_offset);
- } else {
- flags |= MAP_ANONYMOUS;
- area = mmap(vaddr, length, prot, flags, -1, 0);
- }
- if (area != vaddr) {
- error_report("Could not remap addr: "
- RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
- length, addr);
- exit(1);
+ if (ram_block_discard_range(block, offset, page_size) != 0) {
+ /*
+ * Fall back to using mmap() only for anonymous mapping,
+ * as if a backing file is associated we may not be able
+ * to recover the memory in all cases.
+ * So don't take the risk of using only mmap and fail now.
+ */
+ if (block->fd >= 0) {
+ error_report("Could not remap RAM %s:%" PRIx64 "+%"
+ PRIx64 " +%zx", block->idstr, offset,
+ block->fd_offset, page_size);
+ exit(1);
+ }
+ if (qemu_ram_remap_mmap(block, offset, page_size) != 0) {
+ error_report("Could not remap RAM %s:%" PRIx64 " +%zx",
+ block->idstr, offset, page_size);
+ exit(1);
+ }
}
- memory_try_enable_merging(vaddr, length);
- qemu_ram_setup_dump(vaddr, length);
+ memory_try_enable_merging(vaddr, page_size);
+ qemu_ram_setup_dump(vaddr, page_size);
}
+
+ break;
}
}
}
@@ -2277,6 +2505,10 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t ram_addr;
RCU_READ_LOCK_GUARD();
ram_addr = xen_ram_addr_from_mapcache(ptr);
+ if (ram_addr == RAM_ADDR_INVALID) {
+ return NULL;
+ }
+
block = qemu_get_ram_block(ram_addr);
if (block) {
*offset = ram_addr - block->offset;
@@ -2478,23 +2710,6 @@ static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr)
return phys_section_add(map, &section);
}
-MemoryRegionSection *iotlb_to_section(CPUState *cpu,
- hwaddr index, MemTxAttrs attrs)
-{
- int asidx = cpu_asidx_from_attrs(cpu, attrs);
- CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
- AddressSpaceDispatch *d = cpuas->memory_dispatch;
- int section_index = index & ~TARGET_PAGE_MASK;
- MemoryRegionSection *ret;
-
- assert(section_index < d->map.sections_nb);
- ret = d->map.sections + section_index;
- assert(ret->mr);
- assert(ret->mr->ops);
-
- return ret;
-}
-
static void io_mem_init(void)
{
memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
@@ -2623,7 +2838,11 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
hwaddr length)
{
uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
- addr += memory_region_get_ram_addr(mr);
+ ram_addr_t ramaddr = memory_region_get_ram_addr(mr);
+
+ /* We know we're only called for RAM MemoryRegions */
+ assert(ramaddr != RAM_ADDR_INVALID);
+ addr += ramaddr;
/* No early return if dirty_log_mask is or becomes 0, because
* cpu_physical_memory_set_dirty_range will still call
@@ -2635,7 +2854,7 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
}
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
assert(tcg_enabled());
- tb_invalidate_phys_range(addr, addr + length - 1);
+ tb_invalidate_phys_range(NULL, addr, addr + length - 1);
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
}
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
@@ -2716,7 +2935,7 @@ static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs,
if (memory_region_is_ram(mr)) {
return true;
}
- qemu_log_mask(LOG_GUEST_ERROR,
+ qemu_log_mask(LOG_INVALID_MEM,
"Invalid access to non-RAM device at "
"addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", "
"region '%s'\n", addr, len, memory_region_name(mr));
@@ -2732,7 +2951,7 @@ static MemTxResult flatview_write_continue_step(MemTxAttrs attrs,
return MEMTX_ACCESS_ERROR;
}
- if (!memory_access_is_direct(mr, true)) {
+ if (!memory_access_is_direct(mr, true, attrs)) {
uint64_t val;
MemTxResult result;
bool release_lock = prepare_mmio_access(mr);
@@ -2828,7 +3047,7 @@ static MemTxResult flatview_read_continue_step(MemTxAttrs attrs, uint8_t *buf,
return MEMTX_ACCESS_ERROR;
}
- if (!memory_access_is_direct(mr, false)) {
+ if (!memory_access_is_direct(mr, false, attrs)) {
/* I/O case */
uint64_t val;
MemTxResult result;
@@ -3000,8 +3219,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
l = len;
mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
- if (!(memory_region_is_ram(mr) ||
- memory_region_is_romd(mr))) {
+ if (!memory_region_supports_direct_access(mr)) {
l = memory_access_size(mr, l, addr1);
} else {
/* ROM/RAM case */
@@ -3049,6 +3267,20 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len)
NULL, len, FLUSH_CACHE);
}
+/*
+ * A magic value stored in the first 8 bytes of the bounce buffer struct. Used
+ * to detect illegal pointers passed to address_space_unmap.
+ */
+#define BOUNCE_BUFFER_MAGIC 0xb4017ceb4ffe12ed
+
+typedef struct {
+ uint64_t magic;
+ MemoryRegion *mr;
+ hwaddr addr;
+ size_t len;
+ uint8_t buffer[];
+} BounceBuffer;
+
static void
address_space_unregister_map_client_do(AddressSpaceMapClient *client)
{
@@ -3074,9 +3306,9 @@ void address_space_register_map_client(AddressSpace *as, QEMUBH *bh)
QEMU_LOCK_GUARD(&as->map_client_list_lock);
client->bh = bh;
QLIST_INSERT_HEAD(&as->map_client_list, client, link);
- /* Write map_client_list before reading in_use. */
+ /* Write map_client_list before reading bounce_buffer_size. */
smp_mb();
- if (!qatomic_read(&as->bounce.in_use)) {
+ if (qatomic_read(&as->bounce_buffer_size) < as->max_bounce_buffer_size) {
address_space_notify_map_clients_locked(as);
}
}
@@ -3124,7 +3356,7 @@ static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
while (len > 0) {
l = len;
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
- if (!memory_access_is_direct(mr, is_write)) {
+ if (!memory_access_is_direct(mr, is_write, attrs)) {
l = memory_access_size(mr, l, addr);
if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) {
return false;
@@ -3193,6 +3425,8 @@ void *address_space_map(AddressSpace *as,
MemoryRegion *mr;
FlatView *fv;
+ trace_address_space_map(as, addr, len, is_write, *(uint32_t *) &attrs);
+
if (len == 0) {
return NULL;
}
@@ -3202,29 +3436,41 @@ void *address_space_map(AddressSpace *as,
fv = address_space_to_flatview(as);
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
- if (!memory_access_is_direct(mr, is_write)) {
- if (qatomic_xchg(&as->bounce.in_use, true)) {
+ if (!memory_access_is_direct(mr, is_write, attrs)) {
+ size_t used = qatomic_read(&as->bounce_buffer_size);
+ for (;;) {
+ hwaddr alloc = MIN(as->max_bounce_buffer_size - used, l);
+ size_t new_size = used + alloc;
+ size_t actual =
+ qatomic_cmpxchg(&as->bounce_buffer_size, used, new_size);
+ if (actual == used) {
+ l = alloc;
+ break;
+ }
+ used = actual;
+ }
+
+ if (l == 0) {
*plen = 0;
return NULL;
}
- /* Avoid unbounded allocations */
- l = MIN(l, TARGET_PAGE_SIZE);
- as->bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
- as->bounce.addr = addr;
- as->bounce.len = l;
+ BounceBuffer *bounce = g_malloc0(l + sizeof(BounceBuffer));
+ bounce->magic = BOUNCE_BUFFER_MAGIC;
memory_region_ref(mr);
- as->bounce.mr = mr;
+ bounce->mr = mr;
+ bounce->addr = addr;
+ bounce->len = l;
+
if (!is_write) {
- flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED,
- as->bounce.buffer, l);
+ flatview_read(fv, addr, attrs,
+ bounce->buffer, l);
}
*plen = l;
- return as->bounce.buffer;
+ return bounce->buffer;
}
-
memory_region_ref(mr);
*plen = flatview_extend_translation(fv, addr, len, mr, xlat,
l, is_write, attrs);
@@ -3239,12 +3485,11 @@ void *address_space_map(AddressSpace *as,
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
bool is_write, hwaddr access_len)
{
- if (buffer != as->bounce.buffer) {
- MemoryRegion *mr;
- ram_addr_t addr1;
+ MemoryRegion *mr;
+ ram_addr_t addr1;
- mr = memory_region_from_host(buffer, &addr1);
- assert(mr != NULL);
+ mr = memory_region_from_host(buffer, &addr1);
+ if (mr != NULL) {
if (is_write) {
invalidate_and_set_dirty(mr, addr1, access_len);
}
@@ -3254,15 +3499,22 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
memory_region_unref(mr);
return;
}
+
+
+ BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer);
+ assert(bounce->magic == BOUNCE_BUFFER_MAGIC);
+
if (is_write) {
- address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED,
- as->bounce.buffer, access_len);
- }
- qemu_vfree(as->bounce.buffer);
- as->bounce.buffer = NULL;
- memory_region_unref(as->bounce.mr);
- /* Clear in_use before reading map_client_list. */
- qatomic_set_mb(&as->bounce.in_use, false);
+ address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED,
+ bounce->buffer, access_len);
+ }
+
+ qatomic_sub(&as->bounce_buffer_size, bounce->len);
+ bounce->magic = ~BOUNCE_BUFFER_MAGIC;
+ memory_region_unref(bounce->mr);
+ g_free(bounce);
+ /* Write bounce_buffer_size before reading map_client_list. */
+ smp_mb();
address_space_notify_map_clients(as);
}
@@ -3317,7 +3569,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache,
mr = cache->mrs.mr;
memory_region_ref(mr);
- if (memory_access_is_direct(mr, is_write)) {
+ if (memory_access_is_direct(mr, is_write, MEMTXATTRS_UNSPECIFIED)) {
/* We don't care about the memory attributes here as we're only
* doing this if we found actual RAM, which behaves the same
* regardless of attributes; so UNSPECIFIED is fine.
@@ -3510,13 +3762,8 @@ int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
if (l > len)
l = len;
phys_addr += (addr & ~TARGET_PAGE_MASK);
- if (is_write) {
- res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
- attrs, buf, l);
- } else {
- res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr,
- attrs, buf, l);
- }
+ res = address_space_rw(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf,
+ l, is_write);
if (res != MEMTX_OK) {
return -1;
}
@@ -3626,18 +3873,19 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
}
ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- start, length);
+ start + rb->fd_offset, length);
if (ret) {
ret = -errno;
- error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
- __func__, rb->idstr, start, length, ret);
+ error_report("%s: Failed to fallocate %s:%" PRIx64 "+%" PRIx64
+ " +%zx (%d)", __func__, rb->idstr, start,
+ rb->fd_offset, length, ret);
goto err;
}
#else
ret = -ENOSYS;
error_report("%s: fallocate not available/file"
- "%s:%" PRIx64 " +%zx (%d)",
- __func__, rb->idstr, start, length, ret);
+ "%s:%" PRIx64 "+%" PRIx64 " +%zx (%d)", __func__,
+ rb->idstr, start, rb->fd_offset, length, ret);
goto err;
#endif
}
@@ -3684,6 +3932,7 @@ int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
int ret = -1;
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ /* ignore fd_offset with guest_memfd */
ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
start, length);
@@ -3888,3 +4137,58 @@ bool ram_block_discard_is_required(void)
return qatomic_read(&ram_block_discard_required_cnt) ||
qatomic_read(&ram_block_coordinated_discard_required_cnt);
}
+
+/*
+ * Return true if ram is compatible with CPR. Do not exclude rom,
+ * because the rom file could change in new QEMU.
+ */
+static bool ram_is_cpr_compatible(RAMBlock *rb)
+{
+ MemoryRegion *mr = rb->mr;
+
+ if (!mr || !memory_region_is_ram(mr)) {
+ return true;
+ }
+
+ /* Ram device is remapped in new QEMU */
+ if (memory_region_is_ram_device(mr)) {
+ return true;
+ }
+
+ /*
+ * A file descriptor is passed to new QEMU and remapped, or its backing
+ * file is reopened and mapped. It must be shared to avoid COW.
+ */
+ if (rb->fd >= 0 && qemu_ram_is_shared(rb)) {
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Add a blocker for each volatile ram block. This function should only be
+ * called after we know that the block is migratable. Non-migratable blocks
+ * are either re-created in new QEMU, or are handled specially, or are covered
+ * by a device-level CPR blocker.
+ */
+void ram_block_add_cpr_blocker(RAMBlock *rb, Error **errp)
+{
+ assert(qemu_ram_is_migratable(rb));
+
+ if (ram_is_cpr_compatible(rb)) {
+ return;
+ }
+
+ error_setg(&rb->cpr_blocker,
+ "Memory region %s is not compatible with CPR. share=on is "
+ "required for memory-backend objects, and aux-ram-share=on is "
+ "required.", memory_region_name(rb->mr));
+ migrate_add_blocker_modes(&rb->cpr_blocker, errp, MIG_MODE_CPR_TRANSFER,
+ -1);
+}
+
+void ram_block_del_cpr_blocker(RAMBlock *rb)
+{
+ migrate_del_blocker(&rb->cpr_blocker);
+}
diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
index 6af6ef7..5588ed2 100644
--- a/system/qdev-monitor.c
+++ b/system/qdev-monitor.c
@@ -22,13 +22,14 @@
#include "monitor/hmp.h"
#include "monitor/monitor.h"
#include "monitor/qdev.h"
-#include "sysemu/arch_init.h"
+#include "system/arch_init.h"
+#include "system/runstate.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-qdev.h"
-#include "qapi/qmp/dispatch.h"
-#include "qapi/qmp/qdict.h"
+#include "qapi/qmp-registry.h"
+#include "qobject/qdict.h"
#include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qstring.h"
+#include "qobject/qstring.h"
#include "qapi/qobject-input-visitor.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
@@ -36,7 +37,7 @@
#include "qemu/option.h"
#include "qemu/qemu-print.h"
#include "qemu/option_int.h"
-#include "sysemu/block-backend.h"
+#include "system/block-backend.h"
#include "migration/misc.h"
#include "qemu/cutils.h"
#include "hw/qdev-properties.h"
@@ -55,12 +56,18 @@ typedef struct QDevAlias
} QDevAlias;
/* default virtio transport per architecture */
-#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | QEMU_ARCH_ARM | \
- QEMU_ARCH_HPPA | QEMU_ARCH_I386 | \
- QEMU_ARCH_MIPS | QEMU_ARCH_PPC | \
- QEMU_ARCH_RISCV | QEMU_ARCH_SH4 | \
- QEMU_ARCH_SPARC | QEMU_ARCH_XTENSA | \
- QEMU_ARCH_LOONGARCH)
+#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | \
+ QEMU_ARCH_ARM | \
+ QEMU_ARCH_HPPA | \
+ QEMU_ARCH_I386 | \
+ QEMU_ARCH_LOONGARCH | \
+ QEMU_ARCH_MIPS | \
+ QEMU_ARCH_OPENRISC | \
+ QEMU_ARCH_PPC | \
+ QEMU_ARCH_RISCV | \
+ QEMU_ARCH_SH4 | \
+ QEMU_ARCH_SPARC | \
+ QEMU_ARCH_XTENSA)
#define QEMU_ARCH_VIRTIO_CCW (QEMU_ARCH_S390X)
#define QEMU_ARCH_VIRTIO_MMIO (QEMU_ARCH_M68K)
@@ -125,7 +132,7 @@ static const char *qdev_class_get_alias(DeviceClass *dc)
for (i = 0; qdev_alias_table[i].typename; i++) {
if (qdev_alias_table[i].arch_mask &&
- !(qdev_alias_table[i].arch_mask & arch_type)) {
+ !qemu_arch_available(qdev_alias_table[i].arch_mask)) {
continue;
}
@@ -211,7 +218,7 @@ static const char *find_typename_by_alias(const char *alias)
for (i = 0; qdev_alias_table[i].alias; i++) {
if (qdev_alias_table[i].arch_mask &&
- !(qdev_alias_table[i].arch_mask & arch_type)) {
+ !qemu_arch_available(qdev_alias_table[i].arch_mask)) {
continue;
}
@@ -256,8 +263,7 @@ static DeviceClass *qdev_get_device_class(const char **driver, Error **errp)
}
dc = DEVICE_CLASS(oc);
- if (!dc->user_creatable ||
- (phase_check(PHASE_MACHINE_READY) && !dc->hotpluggable)) {
+ if (!dc->user_creatable) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver",
"a pluggable device type");
return NULL;
@@ -341,7 +347,7 @@ static Object *qdev_get_peripheral(void)
static Object *dev;
if (dev == NULL) {
- dev = container_get(qdev_get_machine(), "/peripheral");
+ dev = machine_get_container("peripheral");
}
return dev;
@@ -352,7 +358,7 @@ static Object *qdev_get_peripheral_anon(void)
static Object *dev;
if (dev == NULL) {
- dev = container_get(qdev_get_machine(), "/peripheral-anon");
+ dev = machine_get_container("peripheral-anon");
}
return dev;
@@ -624,6 +630,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
char *id;
DeviceState *dev = NULL;
BusState *bus = NULL;
+ QDict *properties;
driver = qdict_get_try_str(opts, "driver");
if (!driver) {
@@ -668,12 +675,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
return NULL;
}
- if (phase_check(PHASE_MACHINE_READY) && bus && !qbus_is_hotpluggable(bus)) {
- error_setg(errp, "Bus '%s' does not support hotplugging", bus->name);
- return NULL;
- }
-
- if (!migration_is_idle()) {
+ if (migration_is_running()) {
error_setg(errp, "device_add not allowed while migrating");
return NULL;
}
@@ -682,17 +684,9 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
dev = qdev_new(driver);
/* Check whether the hotplug is allowed by the machine */
- if (phase_check(PHASE_MACHINE_READY)) {
- if (!qdev_hotplug_allowed(dev, errp)) {
- goto err_del_dev;
- }
-
- if (!bus && !qdev_get_machine_hotplug_handler(dev)) {
- /* No bus, no machine hotplug handler --> device is not hotpluggable */
- error_setg(errp, "Device '%s' can not be hotplugged on this machine",
- driver);
- goto err_del_dev;
- }
+ if (phase_check(PHASE_MACHINE_READY) &&
+ !qdev_hotplug_allowed(dev, bus, errp)) {
+ goto err_del_dev;
}
/*
@@ -705,13 +699,14 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts,
}
/* set properties */
- dev->opts = qdict_clone_shallow(opts);
- qdict_del(dev->opts, "driver");
- qdict_del(dev->opts, "bus");
- qdict_del(dev->opts, "id");
+ properties = qdict_clone_shallow(opts);
+ qdict_del(properties, "driver");
+ qdict_del(properties, "bus");
+ qdict_del(properties, "id");
- object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json,
+ object_set_properties_from_keyval(&dev->parent_obj, properties, from_json,
errp);
+ qobject_unref(properties);
if (*errp) {
goto err_del_dev;
}
@@ -745,19 +740,18 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
#define qdev_printf(fmt, ...) monitor_printf(mon, "%*s" fmt, indent, "", ## __VA_ARGS__)
-static void qdev_print_props(Monitor *mon, DeviceState *dev, Property *props,
+static void qdev_print_props(Monitor *mon, DeviceState *dev, DeviceClass *dc,
int indent)
{
- if (!props)
- return;
- for (; props->name; props++) {
+ for (int i = 0, n = dc->props_count_; i < n; ++i) {
+ const Property *prop = &dc->props_[i];
char *value;
- char *legacy_name = g_strdup_printf("legacy-%s", props->name);
+ char *legacy_name = g_strdup_printf("legacy-%s", prop->name);
if (object_property_get_type(OBJECT(dev), legacy_name, NULL)) {
value = object_property_get_str(OBJECT(dev), legacy_name, NULL);
} else {
- value = object_property_print(OBJECT(dev), props->name, true,
+ value = object_property_print(OBJECT(dev), prop->name, true,
NULL);
}
g_free(legacy_name);
@@ -765,7 +759,7 @@ static void qdev_print_props(Monitor *mon, DeviceState *dev, Property *props,
if (!value) {
continue;
}
- qdev_printf("%s = %s\n", props->name,
+ qdev_printf("%s = %s\n", prop->name,
*value ? value : "<null>");
g_free(value);
}
@@ -805,7 +799,7 @@ static void qdev_print(Monitor *mon, DeviceState *dev, int indent)
}
class = object_get_class(OBJECT(dev));
do {
- qdev_print_props(mon, dev, DEVICE_CLASS(class)->props_, indent);
+ qdev_print_props(mon, dev, DEVICE_CLASS(class), indent);
class = object_class_get_parent(class);
} while (class != object_class_by_name(TYPE_DEVICE));
bus_print_dev(dev->parent_bus, mon, dev, indent);
@@ -849,18 +843,9 @@ void hmp_info_qdm(Monitor *mon, const QDict *qdict)
void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp)
{
- QemuOpts *opts;
DeviceState *dev;
- opts = qemu_opts_from_qdict(qemu_find_opts("device"), qdict, errp);
- if (!opts) {
- return;
- }
- if (!monitor_cur_is_qmp() && qdev_device_help(opts)) {
- qemu_opts_del(opts);
- return;
- }
- dev = qdev_device_add(opts, errp);
+ dev = qdev_device_add_from_qdict(qdict, true, errp);
if (!dev) {
/*
* Drain all pending RCU callbacks. This is done because
@@ -872,20 +857,24 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp)
* to the user
*/
drain_call_rcu();
-
- qemu_opts_del(opts);
- return;
}
object_unref(OBJECT(dev));
}
-static DeviceState *find_device_state(const char *id, Error **errp)
+/*
+ * Note that creating new APIs using error classes other than GenericError is
+ * not recommended. Set use_generic_error=true for new interfaces.
+ */
+static DeviceState *find_device_state(const char *id, bool use_generic_error,
+ Error **errp)
{
Object *obj = object_resolve_path_at(qdev_get_peripheral(), id);
DeviceState *dev;
if (!obj) {
- error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ error_set(errp,
+ (use_generic_error ?
+ ERROR_CLASS_GENERIC_ERROR : ERROR_CLASS_DEVICE_NOT_FOUND),
"Device '%s' not found", id);
return NULL;
}
@@ -901,28 +890,15 @@ static DeviceState *find_device_state(const char *id, Error **errp)
void qdev_unplug(DeviceState *dev, Error **errp)
{
- DeviceClass *dc = DEVICE_GET_CLASS(dev);
HotplugHandler *hotplug_ctrl;
HotplugHandlerClass *hdc;
Error *local_err = NULL;
- if (qdev_unplug_blocked(dev, errp)) {
- return;
- }
-
- if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) {
- error_setg(errp, "Bus '%s' does not support hotplugging",
- dev->parent_bus->name);
- return;
- }
-
- if (!dc->hotpluggable) {
- error_setg(errp, "Device '%s' does not support hotplugging",
- object_get_typename(OBJECT(dev)));
+ if (!qdev_hotunplug_allowed(dev, errp)) {
return;
}
- if (!migration_is_idle() && !dev->allow_unplug_during_migration) {
+ if (migration_is_running() && !dev->allow_unplug_during_migration) {
error_setg(errp, "device_del not allowed while migrating");
return;
}
@@ -950,7 +926,7 @@ void qdev_unplug(DeviceState *dev, Error **errp)
void qmp_device_del(const char *id, Error **errp)
{
- DeviceState *dev = find_device_state(id, errp);
+ DeviceState *dev = find_device_state(id, false, errp);
if (dev != NULL) {
if (dev->pending_deleted_event &&
(dev->pending_deleted_expires_ms == 0 ||
@@ -964,11 +940,74 @@ void qmp_device_del(const char *id, Error **errp)
}
}
+int qdev_sync_config(DeviceState *dev, Error **errp)
+{
+ DeviceClass *dc = DEVICE_GET_CLASS(dev);
+
+ if (!dc->sync_config) {
+ error_setg(errp, "device-sync-config is not supported for '%s'",
+ object_get_typename(OBJECT(dev)));
+ return -ENOTSUP;
+ }
+
+ return dc->sync_config(dev, errp);
+}
+
+void qmp_device_sync_config(const char *id, Error **errp)
+{
+ DeviceState *dev;
+
+ /*
+ * During migration there is a race between syncing`configuration
+ * and migrating it (if migrate first, that target would get
+ * outdated version), so let's just not allow it.
+ */
+
+ if (migration_is_running()) {
+ error_setg(errp, "Config synchronization is not allowed "
+ "during migration");
+ return;
+ }
+
+ dev = find_device_state(id, true, errp);
+ if (!dev) {
+ return;
+ }
+
+ qdev_sync_config(dev, errp);
+}
+
void hmp_device_add(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
+ QemuOpts *opts;
+ DeviceState *dev;
+
+ opts = qemu_opts_from_qdict(qemu_find_opts("device"), qdict, &err);
+ if (!opts) {
+ goto out;
+ }
+ if (qdev_device_help(opts)) {
+ qemu_opts_del(opts);
+ return;
+ }
+ dev = qdev_device_add(opts, &err);
+ if (!dev) {
+ /*
+ * Drain all pending RCU callbacks. This is done because
+ * some bus related operations can delay a device removal
+ * (in this case this can happen if device is added and then
+ * removed due to a configuration error)
+ * to a RCU callback, but user might expect that this interface
+ * will finish its job completely once qmp command returns result
+ * to the user
+ */
+ drain_call_rcu();
- qmp_device_add((QDict *)qdict, NULL, &err);
+ qemu_opts_del(opts);
+ }
+ object_unref(dev);
+out:
hmp_handle_error(mon, err);
}
@@ -1034,7 +1073,7 @@ static GSList *qdev_build_hotpluggable_device_list(Object *peripheral)
static void peripheral_device_del_completion(ReadLineState *rs,
const char *str)
{
- Object *peripheral = container_get(qdev_get_machine(), "/peripheral");
+ Object *peripheral = machine_get_container("peripheral");
GSList *list, *item;
list = qdev_build_hotpluggable_device_list(peripheral);
@@ -1070,7 +1109,7 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp)
GLOBAL_STATE_CODE();
- dev = find_device_state(id, errp);
+ dev = find_device_state(id, false, errp);
if (dev == NULL) {
return NULL;
}
diff --git a/system/qemu-seccomp.c b/system/qemu-seccomp.c
index 98ffce0..f8e1238 100644
--- a/system/qemu-seccomp.c
+++ b/system/qemu-seccomp.c
@@ -20,7 +20,7 @@
#include "qemu/module.h"
#include <sys/prctl.h>
#include <seccomp.h>
-#include "sysemu/seccomp.h"
+#include "system/seccomp.h"
#include <linux/seccomp.h>
/* For some architectures (notably ARM) cacheflush is not supported until
@@ -47,10 +47,10 @@ const struct scmp_arg_cmp sched_setscheduler_arg[] = {
};
/*
- * See 'NOTES' in 'man 2 clone' - s390 & cross have 'flags' in
+ * See 'NOTES' in 'man 2 clone' - s390 has 'flags' in
* different position to other architectures
*/
-#if defined(HOST_S390X) || defined(HOST_S390) || defined(HOST_CRIS)
+#if defined(HOST_S390X) || defined(HOST_S390)
#define CLONE_FLAGS_ARG 1
#else
#define CLONE_FLAGS_ARG 0
diff --git a/system/qtest.c b/system/qtest.c
index 12703a2..301b03b 100644
--- a/system/qtest.c
+++ b/system/qtest.c
@@ -13,17 +13,17 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "sysemu/qtest.h"
-#include "sysemu/runstate.h"
+#include "system/qtest.h"
+#include "system/runstate.h"
#include "chardev/char-fe.h"
-#include "exec/ioport.h"
-#include "exec/memory.h"
+#include "system/ioport.h"
+#include "system/memory.h"
#include "exec/tswap.h"
#include "hw/qdev-core.h"
#include "hw/irq.h"
#include "hw/core/cpu.h"
#include "qemu/accel.h"
-#include "sysemu/cpu-timers.h"
+#include "system/cpu-timers.h"
#include "qemu/config-file.h"
#include "qemu/option.h"
#include "qemu/error-report.h"
@@ -78,6 +78,11 @@ static void *qtest_server_send_opaque;
* let you adjust the value of the clock (monotonically). All the commands
* return the current value of the clock in nanoseconds.
*
+ * If the commands FAIL then time wasn't advanced which is likely
+ * because the machine was in a paused state or no timer events exist
+ * in the future. This will cause qtest to abort and the test will
+ * need to check its assumptions.
+ *
* .. code-block:: none
*
* > clock_step
@@ -260,7 +265,7 @@ static int hex2nib(char ch)
}
}
-void qtest_send_prefix(CharBackend *chr)
+static void qtest_log_timestamp(void)
{
if (!qtest_log_fp || !qtest_opened) {
return;
@@ -277,7 +282,7 @@ static void G_GNUC_PRINTF(1, 2) qtest_log_send(const char *fmt, ...)
return;
}
- qtest_send_prefix(NULL);
+ qtest_log_timestamp();
va_start(ap, fmt);
vfprintf(qtest_log_fp, fmt, ap);
@@ -296,6 +301,7 @@ static void qtest_server_char_be_send(void *opaque, const char *str)
static void qtest_send(CharBackend *chr, const char *str)
{
+ qtest_log_timestamp();
qtest_server_send(qtest_server_send_opaque, str);
}
@@ -319,7 +325,6 @@ static void qtest_irq_handler(void *opaque, int n, int level)
if (irq_levels[n] != level) {
CharBackend *chr = &qtest->qtest_chr;
irq_levels[n] = level;
- qtest_send_prefix(chr);
qtest_sendf(chr, "IRQ %s %d\n",
level ? "raise" : "lower", n);
}
@@ -375,19 +380,16 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
is_outbound = words[0][14] == 'o';
dev = DEVICE(object_resolve_path(words[1], NULL));
if (!dev) {
- qtest_send_prefix(chr);
qtest_send(chr, "FAIL Unknown device\n");
return;
}
if (is_named && !is_outbound) {
- qtest_send_prefix(chr);
qtest_send(chr, "FAIL Interception of named in-GPIOs not yet supported\n");
return;
}
if (irq_intercept_dev) {
- qtest_send_prefix(chr);
if (irq_intercept_dev != dev) {
qtest_send(chr, "FAIL IRQ intercept already enabled\n");
} else {
@@ -414,7 +416,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
}
}
- qtest_send_prefix(chr);
if (interception_succeeded) {
irq_intercept_dev = dev;
qtest_send(chr, "OK\n");
@@ -433,7 +434,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
dev = DEVICE(object_resolve_path(words[1], NULL));
if (!dev) {
- qtest_send_prefix(chr);
qtest_send(chr, "FAIL Unknown device\n");
return;
}
@@ -452,7 +452,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
irq = qdev_get_gpio_in_named(dev, name, num);
qemu_set_irq(irq, level);
- qtest_send_prefix(chr);
qtest_send(chr, "OK\n");
} else if (strcmp(words[0], "outb") == 0 ||
strcmp(words[0], "outw") == 0 ||
@@ -475,7 +474,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
} else if (words[0][3] == 'l') {
cpu_outl(addr, value);
}
- qtest_send_prefix(chr);
qtest_send(chr, "OK\n");
} else if (strcmp(words[0], "inb") == 0 ||
strcmp(words[0], "inw") == 0 ||
@@ -496,7 +494,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
} else if (words[0][2] == 'l') {
value = cpu_inl(addr);
}
- qtest_send_prefix(chr);
qtest_sendf(chr, "OK 0x%04x\n", value);
} else if (strcmp(words[0], "writeb") == 0 ||
strcmp(words[0], "writew") == 0 ||
@@ -532,7 +529,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
&data, 8);
}
- qtest_send_prefix(chr);
qtest_send(chr, "OK\n");
} else if (strcmp(words[0], "readb") == 0 ||
strcmp(words[0], "readw") == 0 ||
@@ -566,7 +562,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
&value, 8);
tswap64s(&value);
}
- qtest_send_prefix(chr);
qtest_sendf(chr, "OK 0x%016" PRIx64 "\n", value);
} else if (strcmp(words[0], "read") == 0) {
g_autoptr(GString) enc = NULL;
@@ -588,7 +583,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
enc = qemu_hexdump_line(NULL, data, len, 0, 0);
- qtest_send_prefix(chr);
qtest_sendf(chr, "OK 0x%s\n", enc->str);
g_free(data);
@@ -608,7 +602,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
len);
b64_data = g_base64_encode(data, len);
- qtest_send_prefix(chr);
qtest_sendf(chr, "OK %s\n", b64_data);
g_free(data);
@@ -644,7 +637,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
len);
g_free(data);
- qtest_send_prefix(chr);
qtest_send(chr, "OK\n");
} else if (strcmp(words[0], "memset") == 0) {
uint64_t addr, len;
@@ -668,7 +660,6 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
g_free(data);
}
- qtest_send_prefix(chr);
qtest_send(chr, "OK\n");
} else if (strcmp(words[0], "b64write") == 0) {
uint64_t addr, len;
@@ -700,17 +691,16 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
len);
- qtest_send_prefix(chr);
qtest_send(chr, "OK\n");
} else if (strcmp(words[0], "endianness") == 0) {
- qtest_send_prefix(chr);
- if (target_words_bigendian()) {
+ if (target_big_endian()) {
qtest_sendf(chr, "OK big\n");
} else {
qtest_sendf(chr, "OK little\n");
}
} else if (qtest_enabled() && strcmp(words[0], "clock_step") == 0) {
- int64_t ns;
+ int64_t old_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ int64_t ns, new_ns;
if (words[1]) {
int ret = qemu_strtoi64(words[1], NULL, 0, &ns);
@@ -718,18 +708,24 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
} else {
ns = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
QEMU_TIMER_ATTR_ALL);
+ if (ns < 0) {
+ qtest_send(chr, "FAIL "
+ "cannot advance clock to the next deadline "
+ "because there is no pending deadline\n");
+ return;
+ }
+ }
+ new_ns = qemu_clock_advance_virtual_time(old_ns + ns);
+ if (new_ns > old_ns) {
+ qtest_sendf(chr, "OK %"PRIi64"\n", new_ns);
+ } else {
+ qtest_sendf(chr, "FAIL could not advance time\n");
}
- qemu_clock_advance_virtual_time(
- qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns);
- qtest_send_prefix(chr);
- qtest_sendf(chr, "OK %"PRIi64"\n",
- (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
} else if (strcmp(words[0], "module_load") == 0) {
Error *local_err = NULL;
int rv;
g_assert(words[1] && words[2]);
- qtest_send_prefix(chr);
rv = module_load(words[1], words[2], &local_err);
if (rv > 0) {
qtest_sendf(chr, "OK\n");
@@ -740,43 +736,37 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
qtest_sendf(chr, "FAIL\n");
}
} else if (qtest_enabled() && strcmp(words[0], "clock_set") == 0) {
- int64_t ns;
+ int64_t ns, new_ns;
int ret;
g_assert(words[1]);
ret = qemu_strtoi64(words[1], NULL, 0, &ns);
g_assert(ret == 0);
- qemu_clock_advance_virtual_time(ns);
- qtest_send_prefix(chr);
- qtest_sendf(chr, "OK %"PRIi64"\n",
- (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+ new_ns = qemu_clock_advance_virtual_time(ns);
+ qtest_sendf(chr, "%s %"PRIi64"\n",
+ new_ns == ns ? "OK" : "FAIL", new_ns);
} else if (process_command_cb && process_command_cb(chr, words)) {
/* Command got consumed by the callback handler */
} else {
- qtest_send_prefix(chr);
qtest_sendf(chr, "FAIL Unknown command '%s'\n", words[0]);
}
}
+/*
+ * Process as much of @inbuf as we can in newline terminated chunks.
+ * Remove the processed commands from @inbuf as we go.
+ */
static void qtest_process_inbuf(CharBackend *chr, GString *inbuf)
{
char *end;
while ((end = strchr(inbuf->str, '\n')) != NULL) {
- size_t offset;
- GString *cmd;
- gchar **words;
-
- offset = end - inbuf->str;
+ size_t len = end - inbuf->str;
+ g_autofree char *cmd = g_strndup(inbuf->str, len);
+ g_auto(GStrv) words = g_strsplit(cmd, " ", 0);
- cmd = g_string_new_len(inbuf->str, offset);
- g_string_erase(inbuf, 0, offset + 1);
-
- words = g_strsplit(cmd->str, " ", 0);
+ g_string_erase(inbuf, 0, len + 1);
qtest_process_command(chr, words);
- g_strfreev(words);
-
- g_string_free(cmd, TRUE);
}
}
@@ -1004,7 +994,7 @@ static char *qtest_get_chardev(Object *obj, Error **errp)
return g_strdup(q->chr_name);
}
-static void qtest_class_init(ObjectClass *oc, void *data)
+static void qtest_class_init(ObjectClass *oc, const void *data)
{
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
@@ -1022,7 +1012,7 @@ static const TypeInfo qtest_info = {
.parent = TYPE_OBJECT,
.class_init = qtest_class_init,
.instance_size = sizeof(QTest),
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_USER_CREATABLE },
{ }
}
diff --git a/system/ram-block-attributes.c b/system/ram-block-attributes.c
new file mode 100644
index 0000000..68e8a02
--- /dev/null
+++ b/system/ram-block-attributes.c
@@ -0,0 +1,444 @@
+/*
+ * QEMU ram block attributes
+ *
+ * Copyright Intel
+ *
+ * Author:
+ * Chenyi Qiang <chenyi.qiang@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "system/ramblock.h"
+#include "trace.h"
+
+OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RamBlockAttributes,
+ ram_block_attributes,
+ RAM_BLOCK_ATTRIBUTES,
+ OBJECT,
+ { TYPE_RAM_DISCARD_MANAGER },
+ { })
+
+static size_t
+ram_block_attributes_get_block_size(const RamBlockAttributes *attr)
+{
+ /*
+ * Because page conversion could be manipulated in the size of at least 4K
+ * or 4K aligned, Use the host page size as the granularity to track the
+ * memory attribute.
+ */
+ g_assert(attr && attr->ram_block);
+ g_assert(attr->ram_block->page_size == qemu_real_host_page_size());
+ return attr->ram_block->page_size;
+}
+
+
+static bool
+ram_block_attributes_rdm_is_populated(const RamDiscardManager *rdm,
+ const MemoryRegionSection *section)
+{
+ const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ const uint64_t first_bit = section->offset_within_region / block_size;
+ const uint64_t last_bit =
+ first_bit + int128_get64(section->size) / block_size - 1;
+ unsigned long first_discarded_bit;
+
+ first_discarded_bit = find_next_zero_bit(attr->bitmap, last_bit + 1,
+ first_bit);
+ return first_discarded_bit > last_bit;
+}
+
+typedef int (*ram_block_attributes_section_cb)(MemoryRegionSection *s,
+ void *arg);
+
+static int
+ram_block_attributes_notify_populate_cb(MemoryRegionSection *section,
+ void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ return rdl->notify_populate(rdl, section);
+}
+
+static int
+ram_block_attributes_notify_discard_cb(MemoryRegionSection *section,
+ void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ rdl->notify_discard(rdl, section);
+ return 0;
+}
+
+static int
+ram_block_attributes_for_each_populated_section(const RamBlockAttributes *attr,
+ MemoryRegionSection *section,
+ void *arg,
+ ram_block_attributes_section_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ int ret = 0;
+
+ first_bit = section->offset_within_region / block_size;
+ first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
+ first_bit);
+
+ while (first_bit < attr->bitmap_size) {
+ MemoryRegionSection tmp = *section;
+
+ offset = first_bit * block_size;
+ last_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * block_size;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ break;
+ }
+
+ ret = cb(&tmp, arg);
+ if (ret) {
+ error_report("%s: Failed to notify RAM discard listener: %s",
+ __func__, strerror(-ret));
+ break;
+ }
+
+ first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
+ last_bit + 2);
+ }
+
+ return ret;
+}
+
+static int
+ram_block_attributes_for_each_discarded_section(const RamBlockAttributes *attr,
+ MemoryRegionSection *section,
+ void *arg,
+ ram_block_attributes_section_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ int ret = 0;
+
+ first_bit = section->offset_within_region / block_size;
+ first_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
+ first_bit);
+
+ while (first_bit < attr->bitmap_size) {
+ MemoryRegionSection tmp = *section;
+
+ offset = first_bit * block_size;
+ last_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * block_size;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ break;
+ }
+
+ ret = cb(&tmp, arg);
+ if (ret) {
+ error_report("%s: Failed to notify RAM discard listener: %s",
+ __func__, strerror(-ret));
+ break;
+ }
+
+ first_bit = find_next_zero_bit(attr->bitmap,
+ attr->bitmap_size,
+ last_bit + 2);
+ }
+
+ return ret;
+}
+
+static uint64_t
+ram_block_attributes_rdm_get_min_granularity(const RamDiscardManager *rdm,
+ const MemoryRegion *mr)
+{
+ const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+
+ g_assert(mr == attr->ram_block->mr);
+ return ram_block_attributes_get_block_size(attr);
+}
+
+static void
+ram_block_attributes_rdm_register_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ int ret;
+
+ g_assert(section->mr == attr->ram_block->mr);
+ rdl->section = memory_region_section_new_copy(section);
+
+ QLIST_INSERT_HEAD(&attr->rdl_list, rdl, next);
+
+ ret = ram_block_attributes_for_each_populated_section(attr, section, rdl,
+ ram_block_attributes_notify_populate_cb);
+ if (ret) {
+ error_report("%s: Failed to register RAM discard listener: %s",
+ __func__, strerror(-ret));
+ exit(1);
+ }
+}
+
+static void
+ram_block_attributes_rdm_unregister_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ int ret;
+
+ g_assert(rdl->section);
+ g_assert(rdl->section->mr == attr->ram_block->mr);
+
+ if (rdl->double_discard_supported) {
+ rdl->notify_discard(rdl, rdl->section);
+ } else {
+ ret = ram_block_attributes_for_each_populated_section(attr,
+ rdl->section, rdl, ram_block_attributes_notify_discard_cb);
+ if (ret) {
+ error_report("%s: Failed to unregister RAM discard listener: %s",
+ __func__, strerror(-ret));
+ exit(1);
+ }
+ }
+
+ memory_region_section_free_copy(rdl->section);
+ rdl->section = NULL;
+ QLIST_REMOVE(rdl, next);
+}
+
+typedef struct RamBlockAttributesReplayData {
+ ReplayRamDiscardState fn;
+ void *opaque;
+} RamBlockAttributesReplayData;
+
+static int ram_block_attributes_rdm_replay_cb(MemoryRegionSection *section,
+ void *arg)
+{
+ RamBlockAttributesReplayData *data = arg;
+
+ return data->fn(section, data->opaque);
+}
+
+static int
+ram_block_attributes_rdm_replay_populated(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
+
+ g_assert(section->mr == attr->ram_block->mr);
+ return ram_block_attributes_for_each_populated_section(attr, section, &data,
+ ram_block_attributes_rdm_replay_cb);
+}
+
+static int
+ram_block_attributes_rdm_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
+
+ g_assert(section->mr == attr->ram_block->mr);
+ return ram_block_attributes_for_each_discarded_section(attr, section, &data,
+ ram_block_attributes_rdm_replay_cb);
+}
+
+static bool
+ram_block_attributes_is_valid_range(RamBlockAttributes *attr, uint64_t offset,
+ uint64_t size)
+{
+ MemoryRegion *mr = attr->ram_block->mr;
+
+ g_assert(mr);
+
+ uint64_t region_size = memory_region_size(mr);
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+
+ if (!QEMU_IS_ALIGNED(offset, block_size) ||
+ !QEMU_IS_ALIGNED(size, block_size)) {
+ return false;
+ }
+ if (offset + size <= offset) {
+ return false;
+ }
+ if (offset + size > region_size) {
+ return false;
+ }
+ return true;
+}
+
+static void ram_block_attributes_notify_discard(RamBlockAttributes *attr,
+ uint64_t offset,
+ uint64_t size)
+{
+ RamDiscardListener *rdl;
+
+ QLIST_FOREACH(rdl, &attr->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ continue;
+ }
+ rdl->notify_discard(rdl, &tmp);
+ }
+}
+
+static int
+ram_block_attributes_notify_populate(RamBlockAttributes *attr,
+ uint64_t offset, uint64_t size)
+{
+ RamDiscardListener *rdl;
+ int ret = 0;
+
+ QLIST_FOREACH(rdl, &attr->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ continue;
+ }
+ ret = rdl->notify_populate(rdl, &tmp);
+ if (ret) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int ram_block_attributes_state_change(RamBlockAttributes *attr,
+ uint64_t offset, uint64_t size,
+ bool to_discard)
+{
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ const unsigned long first_bit = offset / block_size;
+ const unsigned long nbits = size / block_size;
+ const unsigned long last_bit = first_bit + nbits - 1;
+ const bool is_discarded = find_next_bit(attr->bitmap, attr->bitmap_size,
+ first_bit) > last_bit;
+ const bool is_populated = find_next_zero_bit(attr->bitmap,
+ attr->bitmap_size, first_bit) > last_bit;
+ unsigned long bit;
+ int ret = 0;
+
+ if (!ram_block_attributes_is_valid_range(attr, offset, size)) {
+ error_report("%s, invalid range: offset 0x%" PRIx64 ", size "
+ "0x%" PRIx64, __func__, offset, size);
+ return -EINVAL;
+ }
+
+ trace_ram_block_attributes_state_change(offset, size,
+ is_discarded ? "discarded" :
+ is_populated ? "populated" :
+ "mixture",
+ to_discard ? "discarded" :
+ "populated");
+ if (to_discard) {
+ if (is_discarded) {
+ /* Already private */
+ } else if (is_populated) {
+ /* Completely shared */
+ bitmap_clear(attr->bitmap, first_bit, nbits);
+ ram_block_attributes_notify_discard(attr, offset, size);
+ } else {
+ /* Unexpected mixture: process individual blocks */
+ for (bit = first_bit; bit < first_bit + nbits; bit++) {
+ if (!test_bit(bit, attr->bitmap)) {
+ continue;
+ }
+ clear_bit(bit, attr->bitmap);
+ ram_block_attributes_notify_discard(attr, bit * block_size,
+ block_size);
+ }
+ }
+ } else {
+ if (is_populated) {
+ /* Already shared */
+ } else if (is_discarded) {
+ /* Completely private */
+ bitmap_set(attr->bitmap, first_bit, nbits);
+ ret = ram_block_attributes_notify_populate(attr, offset, size);
+ } else {
+ /* Unexpected mixture: process individual blocks */
+ for (bit = first_bit; bit < first_bit + nbits; bit++) {
+ if (test_bit(bit, attr->bitmap)) {
+ continue;
+ }
+ set_bit(bit, attr->bitmap);
+ ret = ram_block_attributes_notify_populate(attr,
+ bit * block_size,
+ block_size);
+ if (ret) {
+ break;
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block)
+{
+ const int block_size = qemu_real_host_page_size();
+ RamBlockAttributes *attr;
+ MemoryRegion *mr = ram_block->mr;
+
+ attr = RAM_BLOCK_ATTRIBUTES(object_new(TYPE_RAM_BLOCK_ATTRIBUTES));
+
+ attr->ram_block = ram_block;
+ if (memory_region_set_ram_discard_manager(mr, RAM_DISCARD_MANAGER(attr))) {
+ object_unref(OBJECT(attr));
+ return NULL;
+ }
+ attr->bitmap_size =
+ ROUND_UP(int128_get64(mr->size), block_size) / block_size;
+ attr->bitmap = bitmap_new(attr->bitmap_size);
+
+ return attr;
+}
+
+void ram_block_attributes_destroy(RamBlockAttributes *attr)
+{
+ g_assert(attr);
+
+ g_free(attr->bitmap);
+ memory_region_set_ram_discard_manager(attr->ram_block->mr, NULL);
+ object_unref(OBJECT(attr));
+}
+
+static void ram_block_attributes_init(Object *obj)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(obj);
+
+ QLIST_INIT(&attr->rdl_list);
+}
+
+static void ram_block_attributes_finalize(Object *obj)
+{
+}
+
+static void ram_block_attributes_class_init(ObjectClass *klass,
+ const void *data)
+{
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
+
+ rdmc->get_min_granularity = ram_block_attributes_rdm_get_min_granularity;
+ rdmc->register_listener = ram_block_attributes_rdm_register_listener;
+ rdmc->unregister_listener = ram_block_attributes_rdm_unregister_listener;
+ rdmc->is_populated = ram_block_attributes_rdm_is_populated;
+ rdmc->replay_populated = ram_block_attributes_rdm_replay_populated;
+ rdmc->replay_discarded = ram_block_attributes_rdm_replay_discarded;
+}
diff --git a/system/rtc.c b/system/rtc.c
index dc44576..5695128 100644
--- a/system/rtc.c
+++ b/system/rtc.c
@@ -29,9 +29,9 @@
#include "qemu/option.h"
#include "qemu/timer.h"
#include "qom/object.h"
-#include "sysemu/replay.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/rtc.h"
+#include "system/replay.h"
+#include "system/system.h"
+#include "system/rtc.h"
#include "hw/rtc/mc146818rtc.h"
static enum {
@@ -62,7 +62,7 @@ static time_t qemu_ref_timedate(QEMUClockType clock)
}
break;
default:
- assert(0);
+ g_assert_not_reached();
}
return value;
}
diff --git a/system/runstate-action.c b/system/runstate-action.c
index ae0761a..f912bc8 100644
--- a/system/runstate-action.c
+++ b/system/runstate-action.c
@@ -7,8 +7,8 @@
*/
#include "qemu/osdep.h"
-#include "sysemu/runstate-action.h"
-#include "sysemu/watchdog.h"
+#include "system/runstate-action.h"
+#include "system/watchdog.h"
#include "qemu/config-file.h"
#include "qapi/error.h"
#include "qemu/option_int.h"
diff --git a/system/runstate-hmp-cmds.c b/system/runstate-hmp-cmds.c
index 2df670f..be1d676 100644
--- a/system/runstate-hmp-cmds.c
+++ b/system/runstate-hmp-cmds.c
@@ -19,7 +19,7 @@
#include "monitor/monitor.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-run-state.h"
-#include "qapi/qmp/qdict.h"
+#include "qobject/qdict.h"
#include "qemu/accel.h"
void hmp_info_status(Monitor *mon, const QDict *qdict)
diff --git a/system/runstate.c b/system/runstate.c
index ec32e27..38900c9 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -32,6 +32,7 @@
#include "exec/cpu-common.h"
#include "gdbstub/syscalls.h"
#include "hw/boards.h"
+#include "hw/resettable.h"
#include "migration/misc.h"
#include "migration/postcopy-ram.h"
#include "monitor/monitor.h"
@@ -50,14 +51,14 @@
#include "qemu/thread.h"
#include "qom/object.h"
#include "qom/object_interfaces.h"
-#include "sysemu/cpus.h"
-#include "sysemu/qtest.h"
-#include "sysemu/replay.h"
-#include "sysemu/reset.h"
-#include "sysemu/runstate.h"
-#include "sysemu/runstate-action.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/tpm.h"
+#include "system/cpus.h"
+#include "system/qtest.h"
+#include "system/replay.h"
+#include "system/reset.h"
+#include "system/runstate.h"
+#include "system/runstate-action.h"
+#include "system/system.h"
+#include "system/tpm.h"
#include "trace.h"
static NotifierList exit_notifiers =
@@ -181,6 +182,12 @@ static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE__MAX, RUN_STATE__MAX },
};
+static const RunStateTransition replay_play_runstate_transitions_def[] = {
+ { RUN_STATE_SHUTDOWN, RUN_STATE_RUNNING},
+
+ { RUN_STATE__MAX, RUN_STATE__MAX },
+};
+
static bool runstate_valid_transitions[RUN_STATE__MAX][RUN_STATE__MAX];
bool runstate_check(RunState state)
@@ -188,14 +195,33 @@ bool runstate_check(RunState state)
return current_run_state == state;
}
-static void runstate_init(void)
+static void transitions_set_valid(const RunStateTransition *rst)
{
const RunStateTransition *p;
- memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions));
- for (p = &runstate_transitions_def[0]; p->from != RUN_STATE__MAX; p++) {
+ for (p = rst; p->from != RUN_STATE__MAX; p++) {
runstate_valid_transitions[p->from][p->to] = true;
}
+}
+
+void runstate_replay_enable(void)
+{
+ assert(replay_mode != REPLAY_MODE_NONE);
+
+ if (replay_mode == REPLAY_MODE_PLAY) {
+ /*
+ * When reverse-debugging, it is possible to move state from
+ * shutdown to running.
+ */
+ transitions_set_valid(&replay_play_runstate_transitions_def[0]);
+ }
+}
+
+static void runstate_init(void)
+{
+ memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions));
+
+ transitions_set_valid(&runstate_transitions_def[0]);
qemu_mutex_init(&vmstop_lock);
}
@@ -271,6 +297,7 @@ void qemu_system_vmstop_request(RunState state)
struct VMChangeStateEntry {
VMChangeStateHandler *cb;
VMChangeStateHandler *prepare_cb;
+ VMChangeStateHandlerWithRet *cb_ret;
void *opaque;
QTAILQ_ENTRY(VMChangeStateEntry) entries;
int priority;
@@ -294,14 +321,15 @@ static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head =
VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateHandler *cb, void *opaque, int priority)
{
- return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque,
- priority);
+ return qemu_add_vm_change_state_handler_prio_full(cb, NULL, NULL,
+ opaque, priority);
}
/**
* qemu_add_vm_change_state_handler_prio_full:
* @cb: the main callback to invoke
* @prepare_cb: a callback to invoke before the main callback
+ * @cb_ret: the main callback to invoke with return value
* @opaque: user data passed to the callbacks
* @priority: low priorities execute first when the vm runs and the reverse is
* true when the vm stops
@@ -318,6 +346,7 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateEntry *
qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
VMChangeStateHandler *prepare_cb,
+ VMChangeStateHandlerWithRet *cb_ret,
void *opaque, int priority)
{
VMChangeStateEntry *e;
@@ -326,6 +355,7 @@ qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
e = g_malloc0(sizeof(*e));
e->cb = cb;
e->prepare_cb = prepare_cb;
+ e->cb_ret = cb_ret;
e->opaque = opaque;
e->priority = priority;
@@ -353,9 +383,10 @@ void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
g_free(e);
}
-void vm_state_notify(bool running, RunState state)
+int vm_state_notify(bool running, RunState state)
{
VMChangeStateEntry *e, *next;
+ int ret = 0;
trace_vm_state_notify(running, state, RunState_str(state));
@@ -367,7 +398,17 @@ void vm_state_notify(bool running, RunState state)
}
QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
- e->cb(e->opaque, running, state);
+ if (e->cb) {
+ e->cb(e->opaque, running, state);
+ } else if (e->cb_ret) {
+ /*
+ * Here ignore the return value of cb_ret because
+ * we only care about the stopping the device during
+ * the VM live migration to indicate whether the
+ * connection between qemu and backend is normal.
+ */
+ e->cb_ret(e->opaque, running, state);
+ }
}
} else {
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
@@ -377,9 +418,19 @@ void vm_state_notify(bool running, RunState state)
}
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
- e->cb(e->opaque, running, state);
+ if (e->cb) {
+ e->cb(e->opaque, running, state);
+ } else if (e->cb_ret) {
+ /*
+ * We should execute all registered callbacks even if
+ * one of them returns failure, otherwise, some cleanup
+ * work of the device will be skipped.
+ */
+ ret |= e->cb_ret(e->opaque, running, state);
+ }
}
}
+ return ret;
}
static ShutdownCause reset_requested;
@@ -482,15 +533,23 @@ static int qemu_debug_requested(void)
void qemu_system_reset(ShutdownCause reason)
{
MachineClass *mc;
+ ResetType type;
mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL;
cpu_synchronize_all_states();
+ switch (reason) {
+ case SHUTDOWN_CAUSE_SNAPSHOT_LOAD:
+ type = RESET_TYPE_SNAPSHOT_LOAD;
+ break;
+ default:
+ type = RESET_TYPE_COLD;
+ }
if (mc && mc->reset) {
- mc->reset(current_machine, reason);
+ mc->reset(current_machine, type);
} else {
- qemu_devices_reset(reason);
+ qemu_devices_reset(type);
}
switch (reason) {
case SHUTDOWN_CAUSE_NONE:
@@ -531,6 +590,58 @@ static void qemu_system_wakeup(void)
}
}
+static char *tdx_parse_panic_message(char *message)
+{
+ bool printable = false;
+ char *buf = NULL;
+ int len = 0, i;
+
+ /*
+ * Although message is defined as a json string, we shouldn't
+ * unconditionally treat it as is because the guest generated it and
+ * it's not necessarily trustable.
+ */
+ if (message) {
+ /* The caller guarantees the NULL-terminated string. */
+ len = strlen(message);
+
+ printable = len > 0;
+ for (i = 0; i < len; i++) {
+ if (!(0x20 <= message[i] && message[i] <= 0x7e)) {
+ printable = false;
+ break;
+ }
+ }
+ }
+
+ if (len == 0) {
+ buf = g_malloc(1);
+ buf[0] = '\0';
+ } else {
+ if (!printable) {
+ /* 3 = length of "%02x " */
+ buf = g_malloc(len * 3);
+ for (i = 0; i < len; i++) {
+ if (message[i] == '\0') {
+ break;
+ } else {
+ sprintf(buf + 3 * i, "%02x ", message[i]);
+ }
+ }
+ if (i > 0) {
+ /* replace the last ' '(space) to NULL */
+ buf[i * 3 - 1] = '\0';
+ } else {
+ buf[0] = '\0';
+ }
+ } else {
+ buf = g_strdup(message);
+ }
+ }
+
+ return buf;
+}
+
void qemu_system_guest_panicked(GuestPanicInformation *info)
{
qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
@@ -572,7 +683,20 @@ void qemu_system_guest_panicked(GuestPanicInformation *info)
S390CrashReason_str(info->u.s390.reason),
info->u.s390.psw_mask,
info->u.s390.psw_addr);
+ } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) {
+ char *message = tdx_parse_panic_message(info->u.tdx.message);
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "\nTDX guest reports fatal error."
+ " error code: 0x%" PRIx32 " error message:\"%s\"\n",
+ info->u.tdx.error_code, message);
+ g_free(message);
+ if (info->u.tdx.gpa != -1ull) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Additional error information "
+ "can be found at gpa page: 0x%" PRIx64 "\n",
+ info->u.tdx.gpa);
+ }
}
+
qapi_free_GuestPanicInformation(info);
}
}
@@ -584,6 +708,12 @@ void qemu_system_guest_crashloaded(GuestPanicInformation *info)
qapi_free_GuestPanicInformation(info);
}
+void qemu_system_guest_pvshutdown(void)
+{
+ qapi_event_send_guest_pvshutdown();
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+}
+
void qemu_system_reset_request(ShutdownCause reason)
{
if (reboot_action == REBOOT_ACTION_SHUTDOWN &&
@@ -810,6 +940,7 @@ void qemu_remove_exit_notifier(Notifier *notify)
static void qemu_run_exit_notifiers(void)
{
+ BQL_LOCK_GUARD();
notifier_list_notify(&exit_notifiers, NULL);
}
diff --git a/system/tpm.c b/system/tpm.c
index 7164ea7..8df0f6e 100644
--- a/system/tpm.c
+++ b/system/tpm.c
@@ -17,8 +17,8 @@
#include "qapi/error.h"
#include "qapi/qapi-commands-tpm.h"
#include "qapi/qmp/qerror.h"
-#include "sysemu/tpm_backend.h"
-#include "sysemu/tpm.h"
+#include "system/tpm_backend.h"
+#include "system/tpm.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
diff --git a/system/trace-events b/system/trace-events
index 69c9044..82856e4 100644
--- a/system/trace-events
+++ b/system/trace-events
@@ -4,6 +4,13 @@
# Since requests are raised via monitor, not many tracepoints are needed.
balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu"
+# dma-helpers.c
+dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d"
+dma_aio_cancel(void *dbs) "dbs=%p"
+dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
+dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d"
+dma_map_wait(void *dbs) "dbs=%p"
+
# ioport.c
cpu_in(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
cpu_out(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
@@ -21,6 +28,13 @@ flatview_destroy(void *view, void *root) "%p (root %p)"
flatview_destroy_rcu(void *view, void *root) "%p (root %p)"
global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32
+# physmem.c
+address_space_map(void *as, uint64_t addr, uint64_t len, bool is_write, uint32_t attrs) "as:%p addr 0x%"PRIx64":%"PRIx64" write:%d attrs:0x%x"
+find_ram_offset(uint64_t size, uint64_t offset) "size: 0x%" PRIx64 " @ 0x%" PRIx64
+find_ram_offset_loop(uint64_t size, uint64_t candidate, uint64_t offset, uint64_t next, uint64_t mingap) "trying size: 0x%" PRIx64 " @ 0x%" PRIx64 ", offset: 0x%" PRIx64" next: 0x%" PRIx64 " mingap: 0x%" PRIx64
+ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_madvise, bool need_fallocate, int ret) "%s@%p + 0x%zx: madvise: %d fallocate: %d ret: %d"
+qemu_ram_alloc_shared(const char *name, size_t size, size_t max_size, int fd, void *host) "%s size %zu max_size %zu fd %d host %p"
+
# cpus.c
vm_stop_flush_all(int ret) "ret %d"
@@ -38,3 +52,6 @@ dirtylimit_state_finalize(void)
dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"
+
+# ram-block-attributes.c
+ram_block_attributes_state_change(uint64_t offset, uint64_t size, const char *from, const char *to) "offset 0x%"PRIx64" size 0x%"PRIx64" from '%s' to '%s'"
diff --git a/system/vl.c b/system/vl.c
index cfcb674..3b7057e 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -26,25 +26,28 @@
#include "qemu/help-texts.h"
#include "qemu/datadir.h"
#include "qemu/units.h"
+#include "qemu/module.h"
+#include "qemu/target-info.h"
#include "exec/cpu-common.h"
#include "exec/page-vary.h"
#include "hw/qdev-properties.h"
#include "qapi/compat-policy.h"
#include "qapi/error.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"
-#include "qapi/qmp/qjson.h"
+#include "qobject/qdict.h"
+#include "qobject/qstring.h"
+#include "qobject/qjson.h"
#include "qemu-version.h"
#include "qemu/cutils.h"
#include "qemu/help_option.h"
#include "qemu/hw-version.h"
#include "qemu/uuid.h"
-#include "sysemu/reset.h"
-#include "sysemu/runstate.h"
-#include "sysemu/runstate-action.h"
-#include "sysemu/seccomp.h"
-#include "sysemu/tcg.h"
-#include "sysemu/xen.h"
+#include "qemu/target-info.h"
+#include "system/reset.h"
+#include "system/runstate.h"
+#include "system/runstate-action.h"
+#include "system/seccomp.h"
+#include "system/tcg.h"
+#include "system/xen.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
@@ -53,6 +56,7 @@
#include "hw/usb.h"
#include "hw/isa/isa.h"
#include "hw/scsi/scsi.h"
+#include "hw/sd/sd.h"
#include "hw/display/vga.h"
#include "hw/firmware/smbios.h"
#include "hw/acpi/acpi.h"
@@ -64,30 +68,32 @@
#include "monitor/monitor.h"
#include "ui/console.h"
#include "ui/input.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/numa.h"
-#include "sysemu/hostmem.h"
+#include "system/system.h"
+#include "system/numa.h"
+#include "system/hostmem.h"
#include "exec/gdbstub.h"
#include "gdbstub/enums.h"
#include "qemu/timer.h"
#include "chardev/char.h"
#include "qemu/bitmap.h"
#include "qemu/log.h"
-#include "sysemu/blockdev.h"
+#include "system/blockdev.h"
#include "hw/block/block.h"
#include "hw/i386/x86.h"
#include "hw/i386/pc.h"
+#include "migration/cpr.h"
#include "migration/misc.h"
#include "migration/snapshot.h"
-#include "sysemu/tpm.h"
-#include "sysemu/dma.h"
+#include "system/tpm.h"
+#include "system/dma.h"
#include "hw/audio/soundhw.h"
#include "audio/audio.h"
-#include "sysemu/cpus.h"
-#include "sysemu/cpu-timers.h"
+#include "system/cpus.h"
+#include "system/cpu-timers.h"
+#include "exec/icount.h"
#include "migration/colo.h"
#include "migration/postcopy-ram.h"
-#include "sysemu/kvm.h"
+#include "system/kvm.h"
#include "qapi/qobject-input-visitor.h"
#include "qemu/option.h"
#include "qemu/config-file.h"
@@ -95,7 +101,7 @@
#ifdef CONFIG_VIRTFS
#include "fsdev/qemu-fsdev.h"
#endif
-#include "sysemu/qtest.h"
+#include "system/qtest.h"
#ifdef CONFIG_TCG
#include "tcg/perf.h"
#endif
@@ -106,8 +112,8 @@
#include "trace/control.h"
#include "qemu/plugin.h"
#include "qemu/queue.h"
-#include "sysemu/arch_init.h"
-#include "exec/confidential-guest-support.h"
+#include "system/arch_init.h"
+#include "system/confidential-guest-support.h"
#include "ui/qemu-spice.h"
#include "qapi/string-input-visitor.h"
@@ -116,13 +122,14 @@
#include "qom/object_interfaces.h"
#include "semihosting/semihost.h"
#include "crypto/init.h"
-#include "sysemu/replay.h"
+#include "system/replay.h"
#include "qapi/qapi-events-run-state.h"
#include "qapi/qapi-types-audio.h"
#include "qapi/qapi-visit-audio.h"
#include "qapi/qapi-visit-block-core.h"
#include "qapi/qapi-visit-compat.h"
#include "qapi/qapi-visit-machine.h"
+#include "qapi/qapi-visit-migration.h"
#include "qapi/qapi-visit-ui.h"
#include "qapi/qapi-commands-block-core.h"
#include "qapi/qapi-commands-migration.h"
@@ -131,7 +138,7 @@
#include "qapi/qapi-commands-ui.h"
#include "block/qdict.h"
#include "qapi/qmp/qerror.h"
-#include "sysemu/iothread.h"
+#include "system/iothread.h"
#include "qemu/guest-random.h"
#include "qemu/keyval.h"
@@ -159,6 +166,8 @@ typedef struct DeviceOption {
static const char *cpu_option;
static const char *mem_path;
static const char *incoming;
+static const char *incoming_str[MIGRATION_CHANNEL_TYPE__MAX];
+static MigrationChannel *incoming_channels[MIGRATION_CHANNEL_TYPE__MAX];
static const char *loadvm;
static const char *accelerators;
static bool have_custom_ram_size;
@@ -190,7 +199,7 @@ static int default_parallel = 1;
static int default_monitor = 1;
static int default_floppy = 1;
static int default_cdrom = 1;
-static int default_sdcard = 1;
+static bool auto_create_sdcard = true;
static int default_vga = 1;
static int default_net = 1;
@@ -347,7 +356,7 @@ static QemuOptsList qemu_overcommit_opts = {
.desc = {
{
.name = "mem-lock",
- .type = QEMU_OPT_BOOL,
+ .type = QEMU_OPT_STRING,
},
{
.name = "cpu-pm",
@@ -714,7 +723,7 @@ static void configure_blockdev(BlockdevOptionsQueue *bdo_queue,
default_drive(default_cdrom, snapshot, machine_class->block_default_type, 2,
CDROM_OPTS);
default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
- default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
+ default_drive(auto_create_sdcard, snapshot, IF_SD, 0, SD_OPTS);
}
@@ -759,7 +768,7 @@ static QemuOptsList qemu_smp_opts = {
},
};
-#if defined(CONFIG_POSIX)
+#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN)
static QemuOptsList qemu_run_with_opts = {
.name = "run-with",
.head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head),
@@ -792,8 +801,8 @@ static QemuOptsList qemu_run_with_opts = {
static void realtime_init(void)
{
- if (enable_mlock) {
- if (os_mlock() < 0) {
+ if (should_mlock(mlock_state)) {
+ if (os_mlock(is_mlock_on_fault(mlock_state)) < 0) {
error_report("locking memory failed");
exit(1);
}
@@ -811,29 +820,15 @@ static void configure_msg(QemuOpts *opts)
/***********************************************************/
/* USB devices */
-static int usb_device_add(const char *devname)
+static bool usb_parse(const char *cmdline, Error **errp)
{
- USBDevice *dev = NULL;
-
- if (!machine_usb(current_machine)) {
- return -1;
- }
-
- dev = usbdevice_create(devname);
- if (!dev)
- return -1;
-
- return 0;
-}
+ g_assert(machine_usb(current_machine));
-static int usb_parse(const char *cmdline)
-{
- int r;
- r = usb_device_add(cmdline);
- if (r < 0) {
- error_report("could not add USB device '%s'", cmdline);
+ if (!usbdevice_create(cmdline)) {
+ error_setg(errp, "could not add USB device '%s'", cmdline);
+ return false;
}
- return r;
+ return true;
}
/***********************************************************/
@@ -885,11 +880,11 @@ static void help(int exitcode)
g_get_prgname());
#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask) \
- if ((arch_mask) & arch_type) \
+ if (qemu_arch_available(arch_mask)) \
fputs(opt_help, stdout);
#define ARCHHEADING(text, arch_mask) \
- if ((arch_mask) & arch_type) \
+ if (qemu_arch_available(arch_mask)) \
puts(stringify(text));
#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL)
@@ -1000,9 +995,16 @@ static bool vga_interface_available(VGAInterfaceType t)
const VGAInterfaceInfo *ti = &vga_interfaces[t];
assert(t < VGA_TYPE_MAX);
- return !ti->class_names[0] ||
- module_object_class_by_name(ti->class_names[0]) ||
- module_object_class_by_name(ti->class_names[1]);
+
+ if (!ti->class_names[0] || module_object_class_by_name(ti->class_names[0])) {
+ return true;
+ }
+
+ if (ti->class_names[1] && module_object_class_by_name(ti->class_names[1])) {
+ return true;
+ }
+
+ return false;
}
static const char *
@@ -1177,7 +1179,8 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp)
size = strlen(str); /* NUL terminator NOT included in fw_cfg blob */
buf = g_memdup(str, size);
} else if (nonempty_str(gen_id)) {
- if (!fw_cfg_add_from_generator(fw_cfg, name, gen_id, errp)) {
+ if (!fw_cfg_add_file_from_generator(fw_cfg, object_get_objects_root(),
+ gen_id, name, errp)) {
return -1;
}
return 0;
@@ -1189,10 +1192,7 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp)
return -1;
}
}
- /* For legacy, keep user files in a specific global order. */
- fw_cfg_set_order_override(fw_cfg, FW_CFG_ORDER_OVERRIDE_USER);
fw_cfg_add_file(fw_cfg, name, buf, size);
- fw_cfg_reset_order_override(fw_cfg);
return 0;
}
@@ -1299,22 +1299,27 @@ static void add_device_config(int type, const char *cmdline)
QTAILQ_INSERT_TAIL(&device_configs, conf, next);
}
-static int foreach_device_config(int type, int (*func)(const char *cmdline))
+/**
+ * foreach_device_config_or_exit(): process per-device configs
+ * @type: device_config type
+ * @func: device specific config function, returning pass/fail
+ *
+ * @func is called with the &error_fatal handler so device specific
+ * error messages can be reported on failure.
+ */
+static void foreach_device_config_or_exit(int type,
+ bool (*func)(const char *cmdline,
+ Error **errp))
{
struct device_config *conf;
- int rc;
QTAILQ_FOREACH(conf, &device_configs, next) {
if (conf->type != type)
continue;
loc_push_restore(&conf->loc);
- rc = func(conf->cmdline);
+ func(conf->cmdline, &error_fatal);
loc_pop(&conf->loc);
- if (rc) {
- return rc;
- }
}
- return 0;
}
static void qemu_disable_default_devices(void)
@@ -1343,8 +1348,8 @@ static void qemu_disable_default_devices(void)
if (!has_defaults || machine_class->no_cdrom) {
default_cdrom = 0;
}
- if (!has_defaults || machine_class->no_sdcard) {
- default_sdcard = 0;
+ if (!has_defaults || !machine_class->auto_create_sdcard) {
+ auto_create_sdcard = false;
}
if (!has_defaults) {
default_audio = 0;
@@ -1444,7 +1449,7 @@ static void qemu_create_default_devices(void)
}
}
-static int serial_parse(const char *devname)
+static bool serial_parse(const char *devname, Error **errp)
{
int index = num_serial_hds;
@@ -1459,13 +1464,13 @@ static int serial_parse(const char *devname)
serial_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL);
if (!serial_hds[index]) {
- error_report("could not connect serial device"
- " to character backend '%s'", devname);
- return -1;
+ error_setg(errp, "could not connect serial device"
+ " to character backend '%s'", devname);
+ return false;
}
}
num_serial_hds++;
- return 0;
+ return true;
}
Chardev *serial_hd(int i)
@@ -1477,47 +1482,47 @@ Chardev *serial_hd(int i)
return NULL;
}
-static int parallel_parse(const char *devname)
+static bool parallel_parse(const char *devname, Error **errp)
{
static int index = 0;
char label[32];
if (strcmp(devname, "none") == 0)
- return 0;
+ return true;
if (index == MAX_PARALLEL_PORTS) {
- error_report("too many parallel ports");
- exit(1);
+ error_setg(errp, "too many parallel ports");
+ return false;
}
snprintf(label, sizeof(label), "parallel%d", index);
parallel_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL);
if (!parallel_hds[index]) {
- error_report("could not connect parallel device"
- " to character backend '%s'", devname);
- return -1;
+ error_setg(errp, "could not connect parallel device"
+ " to character backend '%s'", devname);
+ return false;
}
index++;
- return 0;
+ return true;
}
-static int debugcon_parse(const char *devname)
+static bool debugcon_parse(const char *devname, Error **errp)
{
QemuOpts *opts;
if (!qemu_chr_new_mux_mon("debugcon", devname, NULL)) {
- error_report("invalid character backend '%s'", devname);
- exit(1);
+ error_setg(errp, "invalid character backend '%s'", devname);
+ return false;
}
opts = qemu_opts_create(qemu_find_opts("device"), "debugcon", 1, NULL);
if (!opts) {
- error_report("already have a debugcon device");
- exit(1);
+ error_setg(errp, "already have a debugcon device");
+ return false;
}
qemu_opt_set(opts, "driver", "isa-debugcon", &error_abort);
qemu_opt_set(opts, "chardev", "debugcon", &error_abort);
- return 0;
+ return true;
}
-static gint machine_class_cmp(gconstpointer a, gconstpointer b)
+static gint machine_class_cmp(gconstpointer a, gconstpointer b, gpointer d)
{
const MachineClass *mc1 = a, *mc2 = b;
int res;
@@ -1557,7 +1562,7 @@ static void machine_help_func(const QDict *qdict)
GSList *el;
const char *type = qdict_get_try_str(qdict, "type");
- machines = object_class_get_list(TYPE_MACHINE, false);
+ machines = object_class_get_list(target_machine_typename(), false);
if (type) {
ObjectClass *machine_class = OBJECT_CLASS(find_machine(type, machines));
if (machine_class) {
@@ -1567,7 +1572,7 @@ static void machine_help_func(const QDict *qdict)
}
printf("Supported machines are:\n");
- machines = g_slist_sort(machines, machine_class_cmp);
+ machines = g_slist_sort_with_data(machines, machine_class_cmp, NULL);
for (el = machines; el; el = el->next) {
MachineClass *mc = el->data;
if (mc->alias) {
@@ -1665,28 +1670,27 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
static MachineClass *select_machine(QDict *qdict, Error **errp)
{
+ ERRP_GUARD();
const char *machine_type = qdict_get_try_str(qdict, "type");
- GSList *machines = object_class_get_list(TYPE_MACHINE, false);
- MachineClass *machine_class;
- Error *local_err = NULL;
+ g_autoptr(GSList) machines = object_class_get_list(TYPE_MACHINE, false);
+ MachineClass *machine_class = NULL;
if (machine_type) {
machine_class = find_machine(machine_type, machines);
- qdict_del(qdict, "type");
if (!machine_class) {
- error_setg(&local_err, "unsupported machine type");
+ error_setg(errp, "unsupported machine type: \"%s\"", machine_type);
}
+ qdict_del(qdict, "type");
} else {
machine_class = find_default_machine(machines);
if (!machine_class) {
- error_setg(&local_err, "No machine specified, and there is no default");
+ error_setg(errp, "No machine specified, and there is no default");
}
}
- g_slist_free(machines);
- if (local_err) {
- error_append_hint(&local_err, "Use -machine help to list supported machines\n");
- error_propagate(errp, local_err);
+ if (!machine_class) {
+ error_append_hint(errp,
+ "Use -machine help to list supported machines\n");
}
return machine_class;
}
@@ -1815,6 +1819,30 @@ static void object_option_add_visitor(Visitor *v)
QTAILQ_INSERT_TAIL(&object_opts, opt, next);
}
+static void incoming_option_parse(const char *str)
+{
+ MigrationChannelType type = MIGRATION_CHANNEL_TYPE_MAIN;
+ MigrationChannel *channel;
+ Visitor *v;
+
+ if (!strcmp(str, "defer")) {
+ channel = NULL;
+ } else if (migrate_is_uri(str)) {
+ migrate_uri_parse(str, &channel, &error_fatal);
+ } else {
+ v = qobject_input_visitor_new_str(str, "channel-type", &error_fatal);
+ visit_type_MigrationChannel(v, NULL, &channel, &error_fatal);
+ visit_free(v);
+ type = channel->channel_type;
+ }
+
+ /* New incoming spec replaces the previous */
+ qapi_free_MigrationChannel(incoming_channels[type]);
+ incoming_channels[type] = channel;
+ incoming_str[type] = str;
+ incoming = incoming_str[MIGRATION_CHANNEL_TYPE_MAIN];
+}
+
static void object_option_parse(const char *str)
{
QemuOpts *opts;
@@ -1835,7 +1863,8 @@ static void object_option_parse(const char *str)
type = qemu_opt_get(opts, "qom-type");
if (!type) {
- error_setg(&error_fatal, QERR_MISSING_PARAMETER, "qom-type");
+ error_report(QERR_MISSING_PARAMETER, "qom-type");
+ exit(1);
}
if (user_creatable_print_help(type, opts)) {
exit(0);
@@ -1848,6 +1877,44 @@ static void object_option_parse(const char *str)
visit_free(v);
}
+static void overcommit_parse(const char *str)
+{
+ QemuOpts *opts;
+ const char *mem_lock_opt;
+
+ opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
+ str, false);
+ if (!opts) {
+ exit(1);
+ }
+
+ enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", enable_cpu_pm);
+
+ mem_lock_opt = qemu_opt_get(opts, "mem-lock");
+ if (!mem_lock_opt) {
+ return;
+ }
+
+ if (strcmp(mem_lock_opt, "on") == 0) {
+ mlock_state = MLOCK_ON;
+ return;
+ }
+
+ if (strcmp(mem_lock_opt, "off") == 0) {
+ mlock_state = MLOCK_OFF;
+ return;
+ }
+
+ if (strcmp(mem_lock_opt, "on-fault") == 0) {
+ mlock_state = MLOCK_ON_FAULT;
+ return;
+ }
+
+ error_report("parameter 'mem-lock' expects one of "
+ "'on', 'off', 'on-fault'");
+ exit(1);
+}
+
/*
* Very early object creation, before the sandbox options have been activated.
*/
@@ -1965,11 +2032,12 @@ static void qemu_create_early_backends(void)
qemu_console_early_init();
- if (dpy.has_gl && dpy.gl != DISPLAYGL_MODE_OFF && display_opengl == 0) {
+ if (dpy.has_gl && dpy.gl != DISPLAY_GL_MODE_OFF && display_opengl == 0) {
#if defined(CONFIG_OPENGL)
- error_report("OpenGL is not supported by the display");
+ error_report("OpenGL is not supported by display backend '%s'",
+ DisplayType_str(dpy.type));
#else
- error_report("OpenGL support is disabled");
+ error_report("OpenGL support was not enabled in this build of QEMU");
#endif
exit(1);
}
@@ -2035,12 +2103,9 @@ static void qemu_create_late_backends(void)
qemu_opts_foreach(qemu_find_opts("mon"),
mon_init_func, NULL, &error_fatal);
- if (foreach_device_config(DEV_SERIAL, serial_parse) < 0)
- exit(1);
- if (foreach_device_config(DEV_PARALLEL, parallel_parse) < 0)
- exit(1);
- if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0)
- exit(1);
+ foreach_device_config_or_exit(DEV_SERIAL, serial_parse);
+ foreach_device_config_or_exit(DEV_PARALLEL, parallel_parse);
+ foreach_device_config_or_exit(DEV_DEBUGCON, debugcon_parse);
/* now chardevs have been created we may have semihosting to connect */
qemu_semihosting_chardev_init();
@@ -2104,6 +2169,19 @@ static void parse_memory_options(void)
loc_pop(&loc);
}
+static void qemu_create_machine_containers(Object *machine)
+{
+ static const char *const containers[] = {
+ "unattached",
+ "peripheral",
+ "peripheral-anon",
+ };
+
+ for (unsigned i = 0; i < ARRAY_SIZE(containers); i++) {
+ object_property_add_new_container(machine, containers[i]);
+ }
+}
+
static void qemu_create_machine(QDict *qdict)
{
MachineClass *machine_class = select_machine(qdict, &error_fatal);
@@ -2112,8 +2190,8 @@ static void qemu_create_machine(QDict *qdict)
current_machine = MACHINE(object_new_with_class(OBJECT_CLASS(machine_class)));
object_property_add_child(object_get_root(), "machine",
OBJECT(current_machine));
- object_property_add_child(container_get(OBJECT(current_machine),
- "/unattached"),
+ qemu_create_machine_containers(OBJECT(current_machine));
+ object_property_add_child(machine_get_container("unattached"),
"sysbus", OBJECT(sysbus_get_default()));
if (machine_class->minimum_page_bits) {
@@ -2354,6 +2432,7 @@ static void configure_accelerators(const char *progname)
/* Select the default accelerator */
bool have_tcg = accel_find("tcg");
bool have_kvm = accel_find("kvm");
+ bool have_hvf = accel_find("hvf");
if (have_tcg && have_kvm) {
if (g_str_has_suffix(progname, "kvm")) {
@@ -2366,6 +2445,8 @@ static void configure_accelerators(const char *progname)
accelerators = "kvm";
} else if (have_tcg) {
accelerators = "tcg";
+ } else if (have_hvf) {
+ accelerators = "hvf";
} else {
error_report("No accelerator selected and"
" no default accelerator available");
@@ -2415,19 +2496,25 @@ static void configure_accelerators(const char *progname)
static void qemu_validate_options(const QDict *machine_opts)
{
const char *kernel_filename = qdict_get_try_str(machine_opts, "kernel");
+ const char *shim_filename = qdict_get_try_str(machine_opts, "shim");
const char *initrd_filename = qdict_get_try_str(machine_opts, "initrd");
const char *kernel_cmdline = qdict_get_try_str(machine_opts, "append");
if (kernel_filename == NULL) {
- if (kernel_cmdline != NULL) {
- error_report("-append only allowed with -kernel option");
- exit(1);
- }
+ if (kernel_cmdline != NULL) {
+ error_report("-append only allowed with -kernel option");
+ exit(1);
+ }
+
+ if (shim_filename != NULL) {
+ error_report("-shim only allowed with -kernel option");
+ exit(1);
+ }
- if (initrd_filename != NULL) {
- error_report("-initrd only allowed with -kernel option");
- exit(1);
- }
+ if (initrd_filename != NULL) {
+ error_report("-initrd only allowed with -kernel option");
+ exit(1);
+ }
}
if (loadvm && incoming) {
@@ -2614,12 +2701,27 @@ static void qemu_init_displays(void)
static void qemu_init_board(void)
{
+ MachineClass *machine_class = MACHINE_GET_CLASS(current_machine);
+
/* process plugin before CPUs are created, but once -smp has been parsed */
qemu_plugin_load_list(&plugin_list, &error_fatal);
/* From here on we enter MACHINE_PHASE_INITIALIZED. */
machine_run_board_init(current_machine, mem_path, &error_fatal);
+ if (machine_class->auto_create_sdcard) {
+ bool ambigous;
+
+ /* Ensure there is a SD bus available to create SD card on */
+ Object *obj = object_resolve_path_type("", TYPE_SD_BUS, &ambigous);
+ if (!obj && !ambigous) {
+ fprintf(stderr, "Can not create sd-card on '%s' machine"
+ " because it lacks a sd-bus\n",
+ machine_class->name);
+ abort();
+ }
+ }
+
drive_check_orphaned();
realtime_init();
@@ -2636,29 +2738,20 @@ static void qemu_create_cli_devices(void)
/* init USB devices */
if (machine_usb(current_machine)) {
- if (foreach_device_config(DEV_USB, usb_parse) < 0)
- exit(1);
+ foreach_device_config_or_exit(DEV_USB, usb_parse);
}
/* init generic devices */
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE);
qemu_opts_foreach(qemu_find_opts("device"),
device_init_func, NULL, &error_fatal);
QTAILQ_FOREACH(opt, &device_opts, next) {
- DeviceState *dev;
+ QObject *ret_data = NULL;
+
loc_push_restore(&opt->loc);
- /*
- * TODO Eventually we should call qmp_device_add() here to make sure it
- * behaves the same, but QMP still has to accept incorrectly typed
- * options until libvirt is fixed and we want to be strict on the CLI
- * from the start, so call qdev_device_add_from_qdict() directly for
- * now.
- */
- dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal);
- object_unref(OBJECT(dev));
+ qmp_device_add(opt->opts, &ret_data, &error_fatal);
+ assert(ret_data == NULL); /* error_fatal aborts */
loc_pop(&opt->loc);
}
- rom_reset_order_override();
}
static bool qemu_machine_creation_done(Error **errp)
@@ -2690,10 +2783,8 @@ static bool qemu_machine_creation_done(Error **errp)
exit(1);
}
- if (foreach_device_config(DEV_GDB, gdbserver_start) < 0) {
- error_setg(errp, "could not start gdbserver");
- return false;
- }
+ foreach_device_config_or_exit(DEV_GDB, gdbserver_start);
+
if (!vga_interface_created && !default_vga &&
vga_interface_type != VGA_NONE) {
warn_report("A -vga option was passed but this machine "
@@ -2728,8 +2819,11 @@ void qmp_x_exit_preconfig(Error **errp)
if (incoming) {
Error *local_err = NULL;
if (strcmp(incoming, "defer") != 0) {
- qmp_migrate_incoming(incoming, false, NULL, true, true,
- &local_err);
+ g_autofree MigrationChannelList *channels =
+ g_new0(MigrationChannelList, 1);
+
+ channels->value = incoming_channels[MIGRATION_CHANNEL_TYPE_MAIN];
+ qmp_migrate_incoming(NULL, true, channels, true, true, &local_err);
if (local_err) {
error_reportf_err(local_err, "-incoming %s: ", incoming);
exit(1);
@@ -2790,7 +2884,10 @@ void qemu_init(int argc, char **argv)
os_setup_limits();
- qemu_init_arch_modules();
+#ifdef CONFIG_MODULES
+ module_init_info(qemu_modinfo);
+ module_allow_arch(target_name());
+#endif
qemu_init_subsystems();
@@ -2829,7 +2926,7 @@ void qemu_init(int argc, char **argv)
const QEMUOption *popt;
popt = lookup_opt(argc, argv, &optarg, &optind);
- if (!(popt->arch_mask & arch_type)) {
+ if (!qemu_arch_available(popt->arch_mask)) {
error_report("Option not supported for this target");
exit(1);
}
@@ -2903,20 +3000,12 @@ void qemu_init(int argc, char **argv)
nographic = true;
dpy.type = DISPLAY_TYPE_NONE;
break;
- case QEMU_OPTION_portrait:
- graphic_rotate = 90;
- break;
- case QEMU_OPTION_rotate:
- graphic_rotate = strtol(optarg, (char **) &optarg, 10);
- if (graphic_rotate != 0 && graphic_rotate != 90 &&
- graphic_rotate != 180 && graphic_rotate != 270) {
- error_report("only 90, 180, 270 deg rotation is available");
- exit(1);
- }
- break;
case QEMU_OPTION_kernel:
qdict_put_str(machine_opts_dict, "kernel", optarg);
break;
+ case QEMU_OPTION_shim:
+ qdict_put_str(machine_opts_dict, "shim", optarg);
+ break;
case QEMU_OPTION_initrd:
qdict_put_str(machine_opts_dict, "initrd", optarg);
break;
@@ -3436,6 +3525,7 @@ void qemu_init(int argc, char **argv)
nb_prom_envs++;
break;
case QEMU_OPTION_old_param:
+ warn_report("-old-param is deprecated");
old_param = 1;
break;
case QEMU_OPTION_rtc:
@@ -3456,7 +3546,7 @@ void qemu_init(int argc, char **argv)
if (!incoming) {
runstate_set(RUN_STATE_INMIGRATE);
}
- incoming = optarg;
+ incoming_option_parse(optarg);
break;
case QEMU_OPTION_only_migratable:
only_migratable = 1;
@@ -3541,13 +3631,7 @@ void qemu_init(int argc, char **argv)
object_option_parse(optarg);
break;
case QEMU_OPTION_overcommit:
- opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
- optarg, false);
- if (!opts) {
- exit(1);
- }
- enable_mlock = qemu_opt_get_bool(opts, "mem-lock", false);
- enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", false);
+ overcommit_parse(optarg);
break;
case QEMU_OPTION_compat:
{
@@ -3590,16 +3674,7 @@ void qemu_init(int argc, char **argv)
case QEMU_OPTION_nouserconfig:
/* Nothing to be parsed here. Especially, do not error out below. */
break;
-#if defined(CONFIG_POSIX)
- case QEMU_OPTION_runas:
- warn_report("-runas is deprecated, use '-run-with user=...' instead");
- if (!os_set_runas(optarg)) {
- error_report("User \"%s\" doesn't exist"
- " (and is not <uid>:<gid>)",
- optarg);
- exit(1);
- }
- break;
+#if defined(CONFIG_POSIX) && !defined(EMSCRIPTEN)
case QEMU_OPTION_daemonize:
os_set_daemonize(true);
break;
@@ -3683,6 +3758,12 @@ void qemu_init(int argc, char **argv)
qemu_create_machine(machine_opts_dict);
+ /*
+ * Load incoming CPR state before any devices are created, because it
+ * contains file descriptors that are needed in device initialization code.
+ */
+ cpr_state_load(incoming_channels[MIGRATION_CHANNEL_TYPE_CPR], &error_fatal);
+
suspend_mux_open();
qemu_disable_default_devices();
diff --git a/system/watchpoint.c b/system/watchpoint.c
index 2aa2a9e..21d0bb3 100644
--- a/system/watchpoint.c
+++ b/system/watchpoint.c
@@ -19,7 +19,9 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
-#include "exec/exec-all.h"
+#include "exec/cputlb.h"
+#include "exec/target_page.h"
+#include "exec/watchpoint.h"
#include "hw/core/cpu.h"
/* Add a watchpoint. */